diff options
Diffstat (limited to 'native/dicttoolkit/src')
23 files changed, 1386 insertions, 0 deletions
diff --git a/native/dicttoolkit/src/command_executors/diff_executor.cpp b/native/dicttoolkit/src/command_executors/diff_executor.cpp new file mode 100644 index 000000000..bf6830686 --- /dev/null +++ b/native/dicttoolkit/src/command_executors/diff_executor.cpp @@ -0,0 +1,49 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "command_executors/diff_executor.h" + +#include <cstdio> + +namespace latinime { +namespace dicttoolkit { + +const char *const DiffExecutor::COMMAND_NAME = "diff"; + +/* static */ int DiffExecutor::run(const int argc, char **argv) { + fprintf(stderr, "Command '%s' has not been implemented yet.\n", COMMAND_NAME); + return 0; +} + +/* static */ void DiffExecutor::printUsage() { + printf("*** %s\n", COMMAND_NAME); + getArgumentsParser().printUsage(COMMAND_NAME, "Shows differences between two dictionaries."); +} + +/* static */ const ArgumentsParser DiffExecutor::getArgumentsParser() { + std::unordered_map<std::string, OptionSpec> optionSpecs; + optionSpecs["p"] = OptionSpec::switchOption("(plumbing) produce output suitable for a script"); + + const std::vector<ArgumentSpec> argumentSpecs = { + ArgumentSpec::singleArgument("dict1", "dictionary file"), + ArgumentSpec::singleArgument("dict2", "dictionary file") + }; + + return ArgumentsParser(std::move(optionSpecs), std::move(argumentSpecs)); +} + +} // namespace dicttoolkit +} // namespace latinime diff --git a/native/dicttoolkit/src/command_executors/diff_executor.h b/native/dicttoolkit/src/command_executors/diff_executor.h new file mode 100644 index 000000000..f92ae49d5 --- /dev/null +++ b/native/dicttoolkit/src/command_executors/diff_executor.h @@ -0,0 +1,40 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DICT_TOOLKIT_DIFF_EXECUTOR_H +#define LATINIME_DICT_TOOLKIT_DIFF_EXECUTOR_H + +#include "dict_toolkit_defines.h" +#include "utils/arguments_parser.h" + +namespace latinime { +namespace dicttoolkit { + +class DiffExecutor final { + public: + static const char *const COMMAND_NAME; + + static int run(const int argc, char **argv); + static void printUsage(); + static const ArgumentsParser getArgumentsParser(); + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(DiffExecutor); +}; + +} // namespace dicttoolkit +} // namepsace latinime +#endif // LATINIME_DICT_TOOLKIT_DIFF_EXECUTOR_H diff --git a/native/dicttoolkit/src/command_executors/header_executor.cpp b/native/dicttoolkit/src/command_executors/header_executor.cpp new file mode 100644 index 000000000..b3d273b4e --- /dev/null +++ b/native/dicttoolkit/src/command_executors/header_executor.cpp @@ -0,0 +1,48 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "command_executors/header_executor.h" + +#include <cstdio> + +namespace latinime { +namespace dicttoolkit { + +const char *const HeaderExecutor::COMMAND_NAME = "header"; + +/* static */ int HeaderExecutor::run(const int argc, char **argv) { + fprintf(stderr, "Command '%s' has not been implemented yet.\n", COMMAND_NAME); + return 0; +} + +/* static */ void HeaderExecutor::printUsage() { + printf("*** %s\n", COMMAND_NAME); + getArgumentsParser().printUsage(COMMAND_NAME, + "Prints the header contents of a dictionary file."); +} + +/* static */ const ArgumentsParser HeaderExecutor::getArgumentsParser() { + std::unordered_map<std::string, OptionSpec> optionSpecs; + optionSpecs["p"] = OptionSpec::switchOption("(plumbing) produce output suitable for a script"); + + const std::vector<ArgumentSpec> argumentSpecs = { + ArgumentSpec::singleArgument("dict", "prints the header contents of a dictionary file") + }; + + return ArgumentsParser(std::move(optionSpecs), std::move(argumentSpecs)); +} +} // namespace dicttoolkit +} // namespace latinime diff --git a/native/dicttoolkit/src/command_executors/header_executor.h b/native/dicttoolkit/src/command_executors/header_executor.h new file mode 100644 index 000000000..44cc9cfc4 --- /dev/null +++ b/native/dicttoolkit/src/command_executors/header_executor.h @@ -0,0 +1,40 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DICT_TOOLKIT_HEADER_EXECUTOR_H +#define LATINIME_DICT_TOOLKIT_HEADER_EXECUTOR_H + +#include "dict_toolkit_defines.h" +#include "utils/arguments_parser.h" + +namespace latinime { +namespace dicttoolkit { + +class HeaderExecutor final { + public: + static const char *const COMMAND_NAME; + + static int run(const int argc, char **argv); + static void printUsage(); + static const ArgumentsParser getArgumentsParser(); + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(HeaderExecutor); +}; + +} // namespace dicttoolkit +} // namepsace latinime +#endif // LATINIME_DICT_TOOLKIT_HEADER_EXECUTOR_H diff --git a/native/dicttoolkit/src/command_executors/help_executor.cpp b/native/dicttoolkit/src/command_executors/help_executor.cpp new file mode 100644 index 000000000..bd29a5b16 --- /dev/null +++ b/native/dicttoolkit/src/command_executors/help_executor.cpp @@ -0,0 +1,52 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "command_executors/help_executor.h" + +#include <cstdio> +#include <functional> +#include <vector> + +#include "command_executors/diff_executor.h" +#include "command_executors/header_executor.h" +#include "command_executors/info_executor.h" +#include "command_executors/makedict_executor.h" +#include "utils/command_utils.h" + +namespace latinime { +namespace dicttoolkit { + +const char *const HelpExecutor::COMMAND_NAME = "help"; + +/* static */ int HelpExecutor::run(const int argc, char **argv) { + printf("Available commands:\n\n"); + const std::vector<std::function<void(void)>> printUsageMethods = {DiffExecutor::printUsage, + HeaderExecutor::printUsage, InfoExecutor::printUsage, MakedictExecutor::printUsage, + printUsage}; + for (const auto &printUsageMethod : printUsageMethods) { + printUsageMethod(); + } + return 0; +} + +/* static */ void HelpExecutor::printUsage() { + printf("*** %s\n", COMMAND_NAME); + printf("Usage: %s\n", COMMAND_NAME); + printf("Show this help list.\n\n"); +} + +} // namespace dicttoolkit +} // namespace latinime diff --git a/native/dicttoolkit/src/command_executors/help_executor.h b/native/dicttoolkit/src/command_executors/help_executor.h new file mode 100644 index 000000000..280610eb9 --- /dev/null +++ b/native/dicttoolkit/src/command_executors/help_executor.h @@ -0,0 +1,38 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DICT_TOOLKIT_HELP_EXECUTOR_H +#define LATINIME_DICT_TOOLKIT_HELP_EXECUTOR_H + +#include "dict_toolkit_defines.h" + +namespace latinime { +namespace dicttoolkit { + +class HelpExecutor final { + public: + static const char *const COMMAND_NAME; + + static int run(const int argc, char **argv); + static void printUsage(); + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(HelpExecutor); +}; + +} // namespace dicttoolkit +} // namepsace latinime +#endif // LATINIME_DICT_TOOLKIT_HELP_EXECUTOR_H diff --git a/native/dicttoolkit/src/command_executors/info_executor.cpp b/native/dicttoolkit/src/command_executors/info_executor.cpp new file mode 100644 index 000000000..351da4aff --- /dev/null +++ b/native/dicttoolkit/src/command_executors/info_executor.cpp @@ -0,0 +1,54 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "command_executors/info_executor.h" + +#include <cstdio> +#include <string> +#include <unordered_map> +#include <vector> + +namespace latinime { +namespace dicttoolkit { + +const char *const InfoExecutor::COMMAND_NAME = "info"; + +/* static */ int InfoExecutor::run(const int argc, char **argv) { + fprintf(stderr, "Command '%s' has not been implemented yet.\n", COMMAND_NAME); + return 0; +} + +/* static */ void InfoExecutor::printUsage() { + printf("*** %s\n", COMMAND_NAME); + getArgumentsParser().printUsage(COMMAND_NAME, + "Prints various information about a dictionary file."); +} + +/* static */const ArgumentsParser InfoExecutor::getArgumentsParser() { + std::unordered_map<std::string, OptionSpec> optionSpecs; + optionSpecs["p"] = OptionSpec::switchOption("(plumbing) produce output suitable for a script"); + + const std::vector<ArgumentSpec> argumentSpecs = { + ArgumentSpec::singleArgument("dict", "dictionary file name"), + ArgumentSpec::variableLengthArguments("word", 0 /* minCount */, + ArgumentSpec::UNLIMITED_COUNT, "word to show information") + }; + + return ArgumentsParser(std::move(optionSpecs), std::move(argumentSpecs)); +} + +} // namespace dicttoolkit +} // namespace latinime diff --git a/native/dicttoolkit/src/command_executors/info_executor.h b/native/dicttoolkit/src/command_executors/info_executor.h new file mode 100644 index 000000000..d4106d59f --- /dev/null +++ b/native/dicttoolkit/src/command_executors/info_executor.h @@ -0,0 +1,40 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DICT_TOOLKIT_INFO_EXECUTOR_H +#define LATINIME_DICT_TOOLKIT_INFO_EXECUTOR_H + +#include "dict_toolkit_defines.h" +#include "utils/arguments_parser.h" + +namespace latinime { +namespace dicttoolkit { + +class InfoExecutor final { + public: + static const char *const COMMAND_NAME; + + static int run(const int argc, char **argv); + static void printUsage(); + static const ArgumentsParser getArgumentsParser(); + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(InfoExecutor); +}; + +} // namepsace dicttoolkit +} // namespace latinime +#endif // LATINIME_DICT_TOOLKIT_INFO_EXECUTOR_H diff --git a/native/dicttoolkit/src/command_executors/makedict_executor.cpp b/native/dicttoolkit/src/command_executors/makedict_executor.cpp new file mode 100644 index 000000000..8a84e8069 --- /dev/null +++ b/native/dicttoolkit/src/command_executors/makedict_executor.cpp @@ -0,0 +1,55 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "command_executors/makedict_executor.h" + +#include <cstdio> + +namespace latinime { +namespace dicttoolkit { + +const char *const MakedictExecutor::COMMAND_NAME = "makedict"; + +/* static */ int MakedictExecutor::run(const int argc, char **argv) { + fprintf(stderr, "Command '%s' has not been implemented yet.\n", COMMAND_NAME); + return 0; +} + +/* static */ void MakedictExecutor::printUsage() { + printf("*** %s\n", COMMAND_NAME); + getArgumentsParser().printUsage(COMMAND_NAME, + "Converts a source dictionary file to one or several outputs.\n" + "Source can be a binary dictionary file or a combined format file.\n" + "Binary version 2 (Jelly Bean), 4, and combined format outputs are supported."); +} + +/* static */const ArgumentsParser MakedictExecutor::getArgumentsParser() { + std::unordered_map<std::string, OptionSpec> optionSpecs; + optionSpecs["o"] = OptionSpec::keyValueOption("format", "2", + "output format version: 2/4/combined"); + optionSpecs["t"] = OptionSpec::keyValueOption("mode", "off", + "code point table switch: on/off/auto"); + + const std::vector<ArgumentSpec> argumentSpecs = { + ArgumentSpec::singleArgument("src_dict", "source dictionary file"), + ArgumentSpec::singleArgument("dest_dict", "output dictionary file") + }; + + return ArgumentsParser(std::move(optionSpecs), std::move(argumentSpecs)); +} + +} // namespace dicttoolkit +} // namespace latinime diff --git a/native/dicttoolkit/src/command_executors/makedict_executor.h b/native/dicttoolkit/src/command_executors/makedict_executor.h new file mode 100644 index 000000000..c3de977a3 --- /dev/null +++ b/native/dicttoolkit/src/command_executors/makedict_executor.h @@ -0,0 +1,40 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DICT_TOOLKIT_MAKEDICT_EXECUTOR_H +#define LATINIME_DICT_TOOLKIT_MAKEDICT_EXECUTOR_H + +#include "dict_toolkit_defines.h" +#include "utils/arguments_parser.h" + +namespace latinime { +namespace dicttoolkit { + +class MakedictExecutor final { + public: + static const char *const COMMAND_NAME; + + static int run(const int argc, char **argv); + static void printUsage(); + static const ArgumentsParser getArgumentsParser(); + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(MakedictExecutor); +}; + +} // namespace dicttoolkit +} // namepsace latinime +#endif // LATINIME_DICT_TOOLKIT_MAKEDICT_EXECUTOR_H diff --git a/native/dicttoolkit/src/dict_toolkit_defines.h b/native/dicttoolkit/src/dict_toolkit_defines.h new file mode 100644 index 000000000..dbaae0ca0 --- /dev/null +++ b/native/dicttoolkit/src/dict_toolkit_defines.h @@ -0,0 +1,24 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DICT_TOOLKIT_DEFINES_H +#define LATINIME_DICT_TOOLKIT_DEFINES_H + +#include "defines.h" + +#define MIN_ARG_COUNT 2 + +#endif // LATINIME_DICT_TOOLKIT_DEFINES_H diff --git a/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict.cpp b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict.cpp new file mode 100644 index 000000000..af28131cf --- /dev/null +++ b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict.cpp @@ -0,0 +1,126 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "offdevice_intermediate_dict/offdevice_intermediate_dict.h" + +#include "offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node.h" + +namespace latinime { +namespace dicttoolkit { + +bool OffdeviceIntermediateDict::addWord(const WordProperty &wordProperty) { + const CodePointArrayView codePoints = wordProperty.getCodePoints(); + if (codePoints.empty() || codePoints.size() > MAX_WORD_LENGTH) { + return false; + } + return addWordInner(codePoints, wordProperty, mRootPtNodeArray); +} + +bool OffdeviceIntermediateDict::addWordInner(const CodePointArrayView codePoints, + const WordProperty &wordProperty, OffdeviceIntermediateDictPtNodeArray &ptNodeArray) { + auto ptNodeList = ptNodeArray.getMutablePtNodeList(); + auto ptNodeIt = ptNodeList->begin(); + for (; ptNodeIt != ptNodeList->end(); ++ptNodeIt) { + const auto &ptNode = *ptNodeIt; + const CodePointArrayView ptNodeCodePoints = ptNode->getPtNodeCodePoints(); + if (codePoints[0] < ptNodeCodePoints[0]) { + continue; + } + if (codePoints[0] > ptNodeCodePoints[0]) { + break; + } + size_t i = 1; + for (; i < codePoints.size(); ++i) { + if (i >= ptNodeCodePoints.size()) { + // Add new child. + return addWordInner(codePoints.skip(i), wordProperty, + ptNode->getChildrenPtNodeArray()); + } + if (codePoints[i] != ptNodeCodePoints[i]) { + break; + } + } + if (codePoints.size() == i && codePoints.size() == ptNodeCodePoints.size()) { + // All code points matched. + if (ptNode->getWordProperty()) { + // Adding the same word multiple times is not supported. + return false; + } + ptNodeList->insert(ptNodeIt, + std::make_shared<OffdeviceIntermediateDictPtNode>(wordProperty, *ptNode)); + ptNodeList->erase(ptNodeIt); + return true; + } + // The (i+1)-th elements are different. + // Create and Add new parent ptNode for the common part. + auto newPtNode = codePoints.size() == i + ? std::make_shared<OffdeviceIntermediateDictPtNode>(codePoints, wordProperty) + : std::make_shared<OffdeviceIntermediateDictPtNode>(codePoints.limit(i)); + ptNodeList->insert(ptNodeIt, newPtNode); + OffdeviceIntermediateDictPtNodeArray &childrenPtNodeArray = + newPtNode->getChildrenPtNodeArray(); + // Add new child for the existing ptNode. + childrenPtNodeArray.getMutablePtNodeList()->push_back( + std::make_shared<OffdeviceIntermediateDictPtNode>( + ptNodeCodePoints.skip(i), *ptNode)); + ptNodeList->erase(ptNodeIt); + if (codePoints.size() != i) { + // Add a child for the new word. + return addWordInner(codePoints.skip(i), wordProperty, childrenPtNodeArray); + } + return true; + } + ptNodeList->insert(ptNodeIt, + std::make_shared<OffdeviceIntermediateDictPtNode>(codePoints, wordProperty)); + return true; +} + +const WordProperty *OffdeviceIntermediateDict::getWordProperty( + const CodePointArrayView codePoints) const { + const OffdeviceIntermediateDictPtNodeArray *ptNodeArray = &mRootPtNodeArray; + for (size_t i = 0; i < codePoints.size();) { + bool foundNext = false; + for (const auto ptNode : ptNodeArray->getPtNodeList()) { + const CodePointArrayView ptNodeCodePoints = ptNode->getPtNodeCodePoints(); + if (codePoints[i] < ptNodeCodePoints[0]) { + continue; + } + if (codePoints[i] > ptNodeCodePoints[0] + || codePoints.size() < ptNodeCodePoints.size()) { + return nullptr; + } + for (size_t j = 1; j < ptNodeCodePoints.size(); ++j) { + if (codePoints[i + j] != ptNodeCodePoints[j]) { + return nullptr; + } + } + i += ptNodeCodePoints.size(); + if (i == codePoints.size()) { + return ptNode->getWordProperty(); + } + ptNodeArray = &ptNode->getChildrenPtNodeArray(); + foundNext = true; + break; + } + if (!foundNext) { + break; + } + } + return nullptr; +} + +} // namespace dicttoolkit +} // namespace latinime diff --git a/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict.h b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict.h new file mode 100644 index 000000000..13d26ba91 --- /dev/null +++ b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict.h @@ -0,0 +1,54 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_H +#define LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_H + +#include "dict_toolkit_defines.h" +#include "offdevice_intermediate_dict/offdevice_intermediate_dict_header.h" +#include "offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node_array.h" +#include "suggest/core/dictionary/property/word_property.h" +#include "utils/int_array_view.h" + +namespace latinime { +namespace dicttoolkit { + +/** + * On memory patricia trie to represent a dictionary. + */ +class OffdeviceIntermediateDict final { + public: + OffdeviceIntermediateDict(const OffdeviceIntermediateDictHeader &header) + : mHeader(header), mRootPtNodeArray() {} + + bool addWord(const WordProperty &wordProperty); + // The returned value will be invalid after modifying the dictionary. e.g. calling addWord(). + const WordProperty *getWordProperty(const CodePointArrayView codePoints) const; + const OffdeviceIntermediateDictHeader &getHeader() const { return mHeader; } + + private: + DISALLOW_ASSIGNMENT_OPERATOR(OffdeviceIntermediateDict); + + const OffdeviceIntermediateDictHeader mHeader; + OffdeviceIntermediateDictPtNodeArray mRootPtNodeArray; + + bool addWordInner(const CodePointArrayView codePoints, const WordProperty &wordProperty, + OffdeviceIntermediateDictPtNodeArray &ptNodeArray); +}; + +} // namespace dicttoolkit +} // namespace latinime +#endif // LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_H diff --git a/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_header.h b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_header.h new file mode 100644 index 000000000..440627a79 --- /dev/null +++ b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_header.h @@ -0,0 +1,44 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_HEADER_H +#define LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_HEADER_H + +#include <map> +#include <vector> + +#include "dict_toolkit_defines.h" + +namespace latinime { +namespace dicttoolkit { + +class OffdeviceIntermediateDictHeader final { + public: + using AttributeMap = std::map<std::vector<int>, std::vector<int>>; + + OffdeviceIntermediateDictHeader(const AttributeMap &attributesMap) + : mAttributeMap(attributesMap) {} + + private: + DISALLOW_DEFAULT_CONSTRUCTOR(OffdeviceIntermediateDictHeader); + DISALLOW_ASSIGNMENT_OPERATOR(OffdeviceIntermediateDictHeader); + + const AttributeMap mAttributeMap; +}; + +} // namespace dicttoolkit +} // namespace latinime +#endif // LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_HEADER_H diff --git a/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node.h b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node.h new file mode 100644 index 000000000..721ccd778 --- /dev/null +++ b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node.h @@ -0,0 +1,79 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_PT_NODE_H +#define LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_PT_NODE_H + +#include <memory> + +#include "dict_toolkit_defines.h" +#include "offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node_array.h" +#include "suggest/core/dictionary/property/word_property.h" +#include "utils/int_array_view.h" + +namespace latinime { +namespace dicttoolkit { + +class OffdeviceIntermediateDictPtNode final { + public: + // Non-terminal + OffdeviceIntermediateDictPtNode(const CodePointArrayView ptNodeCodePoints) + : mPtNodeCodePoints(ptNodeCodePoints.toVector()), mChildrenPtNodeArray(), + mWortProperty(nullptr) {} + + // Terminal + OffdeviceIntermediateDictPtNode(const CodePointArrayView ptNodeCodePoints, + const WordProperty &wordProperty) + : mPtNodeCodePoints(ptNodeCodePoints.toVector()), mChildrenPtNodeArray(), + mWortProperty(new WordProperty(wordProperty)) {} + + // Replacing PtNodeCodePoints. + OffdeviceIntermediateDictPtNode(const CodePointArrayView ptNodeCodePoints, + const OffdeviceIntermediateDictPtNode &ptNode) + : mPtNodeCodePoints(ptNodeCodePoints.toVector()), + mChildrenPtNodeArray(ptNode.mChildrenPtNodeArray), + mWortProperty(new WordProperty(*ptNode.mWortProperty)) {} + + // Replacing WordProperty. + OffdeviceIntermediateDictPtNode(const WordProperty &wordProperty, + const OffdeviceIntermediateDictPtNode &ptNode) + : mPtNodeCodePoints(ptNode.mPtNodeCodePoints), + mChildrenPtNodeArray(ptNode.mChildrenPtNodeArray), + mWortProperty(new WordProperty(wordProperty)) {} + + const WordProperty *getWordProperty() const { + return mWortProperty.get(); + } + + const CodePointArrayView getPtNodeCodePoints() const { + return CodePointArrayView(mPtNodeCodePoints); + } + + OffdeviceIntermediateDictPtNodeArray &getChildrenPtNodeArray() { + return mChildrenPtNodeArray; + } + + private: + DISALLOW_COPY_AND_ASSIGN(OffdeviceIntermediateDictPtNode); + + const std::vector<int> mPtNodeCodePoints; + OffdeviceIntermediateDictPtNodeArray mChildrenPtNodeArray; + const std::unique_ptr<WordProperty> mWortProperty; +}; + +} // namespace dicttoolkit +} // namespace latinime +#endif // LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_PT_NODE_H diff --git a/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node_array.h b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node_array.h new file mode 100644 index 000000000..f87456ce0 --- /dev/null +++ b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node_array.h @@ -0,0 +1,48 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_PT_NODE_ARRAY_H +#define LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_PT_NODE_ARRAY_H + +#include <list> +#include <memory> + +#include "dict_toolkit_defines.h" + +namespace latinime { +namespace dicttoolkit { + +class OffdeviceIntermediateDictPtNode; + +class OffdeviceIntermediateDictPtNodeArray final { + public: + const std::list<std::shared_ptr<OffdeviceIntermediateDictPtNode>> &getPtNodeList() const { + return mPtNodes; + } + + std::list<std::shared_ptr<OffdeviceIntermediateDictPtNode>> *getMutablePtNodeList() { + return &mPtNodes; + } + + private: + DISALLOW_ASSIGNMENT_OPERATOR(OffdeviceIntermediateDictPtNodeArray); + + std::list<std::shared_ptr<OffdeviceIntermediateDictPtNode>> mPtNodes; +}; + +} // namespace dicttoolkit +} // namespace latinime +#endif // LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_PT_NODE_ARRAY_H diff --git a/native/dicttoolkit/src/utils/arguments_and_options.h b/native/dicttoolkit/src/utils/arguments_and_options.h new file mode 100644 index 000000000..d8f5985e5 --- /dev/null +++ b/native/dicttoolkit/src/utils/arguments_and_options.h @@ -0,0 +1,54 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DICT_TOOLKIT_ARGUMENTS_AND_OPTIONS_H +#define LATINIME_DICT_TOOLKIT_ARGUMENTS_AND_OPTIONS_H + +#include <string> +#include <unordered_map> +#include <vector> + +#include "dict_toolkit_defines.h" + +namespace latinime { +namespace dicttoolkit { + +class ArgumentsAndOptions { + public: + ArgumentsAndOptions() : mIsValid(false), mOptions(), mArguments() {} + + ArgumentsAndOptions(std::unordered_map<std::string, std::string> &&options, + std::unordered_map<std::string, std::vector<std::string>> &&arguments) + : mIsValid(true), mOptions(std::move(options)), mArguments(std::move(arguments)) {} + + bool isValid() const { + return mIsValid; + } + + bool hasOption(const std::string &optionName) const { + return mOptions.find(optionName) != mOptions.end(); + } + + private: + DISALLOW_ASSIGNMENT_OPERATOR(ArgumentsAndOptions); + + const bool mIsValid; + const std::unordered_map<std::string, std::string> mOptions; + const std::unordered_map<std::string, std::vector<std::string>> mArguments; +}; +} // namespace dicttoolkit +} // namespace latinime +#endif // LATINIME_DICT_TOOLKIT_ARGUMENTS_AND_OPTIONS_H diff --git a/native/dicttoolkit/src/utils/arguments_parser.cpp b/native/dicttoolkit/src/utils/arguments_parser.cpp new file mode 100644 index 000000000..039dae35b --- /dev/null +++ b/native/dicttoolkit/src/utils/arguments_parser.cpp @@ -0,0 +1,84 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "utils/arguments_parser.h" + +namespace latinime { +namespace dicttoolkit { + +const int ArgumentSpec::UNLIMITED_COUNT = -1; + +bool ArgumentsParser::validateSpecs() const { + for (size_t i = 0; i < mArgumentSpecs.size() ; ++i) { + if (mArgumentSpecs[i].getMinCount() != mArgumentSpecs[i].getMaxCount() + && i != mArgumentSpecs.size() - 1) { + AKLOGE("Variable length argument must be at the end."); + return false; + } + } + return true; +} + +void ArgumentsParser::printUsage(const std::string &commandName, + const std::string &description) const { + printf("Usage: %s", commandName.c_str()); + for (const auto &option : mOptionSpecs) { + const std::string &optionName = option.first; + const OptionSpec &spec = option.second; + printf(" [-%s", optionName.c_str()); + if (spec.takeValue()) { + printf(" <%s>", spec.getValueName().c_str()); + } + printf("]"); + } + for (const auto &argSpec : mArgumentSpecs) { + if (argSpec.getMinCount() == 0 && argSpec.getMaxCount() == 1) { + printf(" [<%s>]", argSpec.getName().c_str()); + } else if (argSpec.getMinCount() == 1 && argSpec.getMaxCount() == 1) { + printf(" <%s>", argSpec.getName().c_str()); + } else if (argSpec.getMinCount() == 0) { + printf(" [<%s>...]", argSpec.getName().c_str()); + } else if (argSpec.getMinCount() == 1) { + printf(" <%s>...", argSpec.getName().c_str()); + } + } + printf("\n%s\n\n", description.c_str()); + for (const auto &option : mOptionSpecs) { + const std::string &optionName = option.first; + const OptionSpec &spec = option.second; + printf(" -%s", optionName.c_str()); + if (spec.takeValue()) { + printf(" <%s>", spec.getValueName().c_str()); + } + printf("\t\t\t%s", spec.getDescription().c_str()); + if (spec.takeValue() && !spec.getDefaultValue().empty()) { + printf("\tdefault: %s", spec.getDefaultValue().c_str()); + } + printf("\n"); + } + for (const auto &argSpec : mArgumentSpecs) { + printf(" <%s>\t\t\t%s\n", argSpec.getName().c_str(), argSpec.getDescription().c_str()); + } + printf("\n\n"); +} + +const ArgumentsAndOptions ArgumentsParser::parseArguments(const int argc, char **argv) const { + // TODO: Implement + return ArgumentsAndOptions(); +} + +} // namespace dicttoolkit +} // namespace latinime diff --git a/native/dicttoolkit/src/utils/arguments_parser.h b/native/dicttoolkit/src/utils/arguments_parser.h new file mode 100644 index 000000000..be2dd8749 --- /dev/null +++ b/native/dicttoolkit/src/utils/arguments_parser.h @@ -0,0 +1,118 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DICT_TOOLKIT_ARGUMENTS_PARSER_H +#define LATINIME_DICT_TOOLKIT_ARGUMENTS_PARSER_H + +#include <string> +#include <unordered_map> +#include <vector> + +#include "dict_toolkit_defines.h" +#include "utils/arguments_and_options.h" + +namespace latinime { +namespace dicttoolkit { + +class OptionSpec { + public: + // Default constructor and assignment operator is enabled to be used with std::unordered_map. + OptionSpec() = default; + OptionSpec &operator=(const OptionSpec &) = default; + + static OptionSpec keyValueOption(const std::string &valueName, const std::string &defaultValue, + const std::string &description) { + return OptionSpec(true /* takeValue */, valueName, defaultValue, description); + } + + static OptionSpec switchOption(const std::string &description) { + return OptionSpec(false /* takeValue */, "" /* valueName */, "" /* defaultValue */, + description); + } + + bool takeValue() const { return mTakeValue; } + const std::string &getValueName() const { return mValueName; } + const std::string &getDefaultValue() const { return mDefaultValue; } + const std::string &getDescription() const { return mDescription; } + + private: + OptionSpec(const bool takeValue, const std::string &valueName, const std::string &defaultValue, + const std::string &description) + : mTakeValue(takeValue), mValueName(valueName), mDefaultValue(defaultValue), + mDescription(description) {} + + // Whether the option have to be used with a value or just a switch. + // e.g. 'f' in "command -f /path/to/file" is mTakeValue == true. + // 'f' in "command -f -t" is mTakeValue == false. + bool mTakeValue; + // Name of the value used to show usage. + std::string mValueName; + std::string mDefaultValue; + std::string mDescription; +}; + +class ArgumentSpec { + public: + static const int UNLIMITED_COUNT; + + static ArgumentSpec singleArgument(const std::string &name, const std::string &description) { + return ArgumentSpec(name, 1 /* minCount */, 1 /* maxCount */, description); + } + + static ArgumentSpec variableLengthArguments(const std::string &name, const int minCount, + const int maxCount, const std::string &description) { + return ArgumentSpec(name, minCount, maxCount, description); + } + + const std::string &getName() const { return mName; } + int getMinCount() const { return mMinCount; } + int getMaxCount() const { return mMaxCount; } + const std::string &getDescription() const { return mDescription; } + + private: + DISALLOW_DEFAULT_CONSTRUCTOR(ArgumentSpec); + + ArgumentSpec(const std::string &name, const int minCount, const int maxCount, + const std::string &description) + : mName(name), mMinCount(minCount), mMaxCount(maxCount), mDescription(description) {} + + const std::string mName; + const int mMinCount; + const int mMaxCount; + const std::string mDescription; +}; + +class ArgumentsParser { + public: + ArgumentsParser(std::unordered_map<std::string, OptionSpec> &&optionSpecs, + std::vector<ArgumentSpec> &&argumentSpecs) + : mOptionSpecs(std::move(optionSpecs)), mArgumentSpecs(std::move(argumentSpecs)) {} + + const ArgumentsAndOptions parseArguments(const int argc, char **argv) const; + bool validateSpecs() const; + void printUsage(const std::string &commandName, const std::string &description) const; + + private: + DISALLOW_DEFAULT_CONSTRUCTOR(ArgumentsParser); + DISALLOW_ASSIGNMENT_OPERATOR(ArgumentsParser); + + const std::unordered_map<std::string, OptionSpec> mOptionSpecs; + const std::vector<ArgumentSpec> mArgumentSpecs; +}; + +} // namespace dicttoolkit +} // namespace latinime +#endif // LATINIME_DICT_TOOLKIT_ARGUMENTS_PARSER_H diff --git a/native/dicttoolkit/src/utils/command_utils.cpp b/native/dicttoolkit/src/utils/command_utils.cpp new file mode 100644 index 000000000..34196425e --- /dev/null +++ b/native/dicttoolkit/src/utils/command_utils.cpp @@ -0,0 +1,74 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "utils/command_utils.h" + +#include <cstdio> + +#include "command_executors/diff_executor.h" +#include "command_executors/header_executor.h" +#include "command_executors/help_executor.h" +#include "command_executors/info_executor.h" +#include "command_executors/makedict_executor.h" + +namespace latinime { +namespace dicttoolkit { + +/* static */ CommandType CommandUtils::getCommandType(const std::string &commandName) { + if (commandName == InfoExecutor::COMMAND_NAME) { + return CommandType::Info; + } else if (commandName == DiffExecutor::COMMAND_NAME) { + return CommandType::Diff; + } else if (commandName == MakedictExecutor::COMMAND_NAME) { + return CommandType::Makedict; + } else if (commandName == HeaderExecutor::COMMAND_NAME) { + return CommandType::Header; + } else if (commandName == HelpExecutor::COMMAND_NAME) { + return CommandType::Help; + } else { + return CommandType::Unknown; + } +} + +/* static */ void CommandUtils::printCommandUnknownMessage(const std::string &programName, + const std::string &commandName) { + fprintf(stderr, "Command '%s' is unknown. Try '%s %s' for more information.\n", + commandName.c_str(), programName.c_str(), HelpExecutor::COMMAND_NAME); +} + +/* static */ std::function<int(int, char **)> CommandUtils::getCommandExecutor( + const CommandType commandType) { + switch (commandType) { + case CommandType::Info: + return InfoExecutor::run; + case CommandType::Diff: + return DiffExecutor::run; + case CommandType::Makedict: + return MakedictExecutor::run; + case CommandType::Header: + return HeaderExecutor::run; + case CommandType::Help: + return HelpExecutor::run; + default: + return [] (int, char **) -> int { + printf("Command executor not found."); + return 1; + }; + } +} + +} // namespace dicttoolkit +} // namespace latinime diff --git a/native/dicttoolkit/src/utils/command_utils.h b/native/dicttoolkit/src/utils/command_utils.h new file mode 100644 index 000000000..4a181f194 --- /dev/null +++ b/native/dicttoolkit/src/utils/command_utils.h @@ -0,0 +1,50 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DICT_TOOLKIT_COMMAND_UTILS_H +#define LATINIME_DICT_TOOLKIT_COMMAND_UTILS_H + +#include <functional> +#include <memory> +#include <string> + +#include "dict_toolkit_defines.h" + +namespace latinime { +namespace dicttoolkit { + +enum class CommandType : int { + Info, + Diff, + Makedict, + Header, + Help, + Unknown +}; + +class CommandUtils { +public: + static CommandType getCommandType(const std::string &commandName); + static void printCommandUnknownMessage(const std::string &programName, + const std::string &commandName); + static std::function<int(int, char **)> getCommandExecutor(const CommandType commandType); + +private: + DISALLOW_IMPLICIT_CONSTRUCTORS(CommandUtils); +}; +} // namespace dicttoolkit +} // namespace latinime +#endif // LATINIME_DICT_TOOLKIT_COMMAND_UTILS_H diff --git a/native/dicttoolkit/src/utils/utf8_utils.cpp b/native/dicttoolkit/src/utils/utf8_utils.cpp new file mode 100644 index 000000000..0f349f512 --- /dev/null +++ b/native/dicttoolkit/src/utils/utf8_utils.cpp @@ -0,0 +1,119 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "utils/utf8_utils.h" + +#include "utils/char_utils.h" + +namespace latinime { +namespace dicttoolkit { + +const size_t Utf8Utils::MAX_SEQUENCE_SIZE_FOR_A_CODE_POINT = 4; +const uint8_t Utf8Utils::FIRST_BYTE_MARKER_MASKS[] = {0, 0x80, 0xE0, 0xF0, 0xF8}; +const uint8_t Utf8Utils::FIRST_BYTE_MARKERS[] = {0, 0x00, 0xC0, 0xE0, 0xF0}; +const uint8_t Utf8Utils::FIRST_BYTE_CODE_POINT_BITS_MASKS[] = {0, 0x7F, 0x1F, 0x0F, 0x03}; +const int Utf8Utils::MAX_ENCODED_CODE_POINT_VALUES[] = {-1, 0x7F, 0x7FF, 0xFFFF, 0x10FFFF}; + +const uint8_t Utf8Utils::TRAILING_BYTE_CODE_POINT_BITS_MASK = 0x3F; +const uint8_t Utf8Utils::TRAILING_BYTE_MARKER = 0x80; +const size_t Utf8Utils::CODE_POINT_BIT_COUNT_IN_TRAILING_BYTE = 6; + +/* static */ std::vector<int> Utf8Utils::getCodePoints(const std::string &utf8Str) { + std::vector<int> codePoints; + int remainingByteCountForCurrentCodePoint = 0; + int currentCodePointSequenceSize = 0; + int codePoint = 0; + for (const char c : utf8Str) { + if (remainingByteCountForCurrentCodePoint == 0) { + currentCodePointSequenceSize = getSequenceSizeByCheckingFirstByte(c); + if (currentCodePointSequenceSize <= 0) { + AKLOGE("%x is an invalid utf8 first byte value.", c); + return std::vector<int>(); + } + remainingByteCountForCurrentCodePoint = currentCodePointSequenceSize; + codePoint = maskFirstByte(c, remainingByteCountForCurrentCodePoint); + } else { + codePoint <<= CODE_POINT_BIT_COUNT_IN_TRAILING_BYTE; + codePoint += maskTrailingByte(c); + } + remainingByteCountForCurrentCodePoint--; + if (remainingByteCountForCurrentCodePoint == 0) { + if (codePoint <= MAX_ENCODED_CODE_POINT_VALUES[currentCodePointSequenceSize - 1]) { + AKLOGE("%d bytes encode for codePoint(%x) is a redundant UTF-8 sequence.", + currentCodePointSequenceSize, codePoint); + return std::vector<int>(); + } + codePoints.push_back(codePoint); + } + } + return codePoints; +} + +/* static */ int Utf8Utils::getSequenceSizeByCheckingFirstByte(const uint8_t firstByte) { + for (size_t i = 1; i <= MAX_SEQUENCE_SIZE_FOR_A_CODE_POINT; ++i) { + if ((firstByte & FIRST_BYTE_MARKER_MASKS[i]) == FIRST_BYTE_MARKERS[i]) { + return i; + } + } + // Not a valid utf8 char first byte. + return -1; +} + +/* static */ AK_FORCE_INLINE int Utf8Utils::maskFirstByte(const uint8_t firstByte, + const int sequenceSize) { + return firstByte & FIRST_BYTE_CODE_POINT_BITS_MASKS[sequenceSize]; +} + +/* static */ AK_FORCE_INLINE int Utf8Utils::maskTrailingByte(const uint8_t secondOrLaterByte) { + return secondOrLaterByte & TRAILING_BYTE_CODE_POINT_BITS_MASK; +} + +/* static */ std::string Utf8Utils::getUtf8String(const CodePointArrayView codePoints) { + std::string utf8String; + for (const int codePoint : codePoints) { + const int sequenceSize = getSequenceSizeToEncodeCodePoint(codePoint); + if (sequenceSize <= 0) { + AKLOGE("Cannot encode code point (%d).", codePoint); + return std::string(); + } + const int trailingByteCount = sequenceSize - 1; + // Output first byte. + const int value = codePoint >> (trailingByteCount * CODE_POINT_BIT_COUNT_IN_TRAILING_BYTE); + utf8String.push_back(static_cast<char>(value | FIRST_BYTE_MARKERS[sequenceSize])); + // Output second and later bytes. + for (int i = 1; i < sequenceSize; ++i) { + const int shiftAmount = (trailingByteCount - i) * CODE_POINT_BIT_COUNT_IN_TRAILING_BYTE; + const int value = (codePoint >> shiftAmount) & TRAILING_BYTE_CODE_POINT_BITS_MASK; + utf8String.push_back(static_cast<char>(value | TRAILING_BYTE_MARKER)); + } + } + return utf8String; +} + +/* static */ int Utf8Utils::getSequenceSizeToEncodeCodePoint(const int codePoint) { + if (codePoint < 0) { + return -1; + } + for (size_t i = 1; i <= MAX_SEQUENCE_SIZE_FOR_A_CODE_POINT; ++i) { + if (codePoint <= MAX_ENCODED_CODE_POINT_VALUES[i]) { + return i; + } + } + return -1; +} + +} // namespace dicttoolkit +} // namespace latinime diff --git a/native/dicttoolkit/src/utils/utf8_utils.h b/native/dicttoolkit/src/utils/utf8_utils.h new file mode 100644 index 000000000..35818e56c --- /dev/null +++ b/native/dicttoolkit/src/utils/utf8_utils.h @@ -0,0 +1,56 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DICT_TOOLKIT_UTF8_UTILS_H +#define LATINIME_DICT_TOOLKIT_UTF8_UTILS_H + +#include <cstdint> +#include <string> +#include <vector> + +#include "dict_toolkit_defines.h" +#include "utils/int_array_view.h" + +namespace latinime { +namespace dicttoolkit { + +class Utf8Utils { +public: + static std::vector<int> getCodePoints(const std::string &utf8Str); + static std::string getUtf8String(const CodePointArrayView codePoints); + +private: + DISALLOW_IMPLICIT_CONSTRUCTORS(Utf8Utils); + + // Values indexed by sequence size. + static const size_t MAX_SEQUENCE_SIZE_FOR_A_CODE_POINT; + static const uint8_t FIRST_BYTE_MARKER_MASKS[]; + static const uint8_t FIRST_BYTE_MARKERS[]; + static const uint8_t FIRST_BYTE_CODE_POINT_BITS_MASKS[]; + static const int MAX_ENCODED_CODE_POINT_VALUES[]; + + static const uint8_t TRAILING_BYTE_CODE_POINT_BITS_MASK; + static const uint8_t TRAILING_BYTE_MARKER; + static const size_t CODE_POINT_BIT_COUNT_IN_TRAILING_BYTE; + + static int getSequenceSizeByCheckingFirstByte(const uint8_t firstByte); + static int maskFirstByte(const uint8_t firstByte, const int encodeSize); + static int maskTrailingByte(const uint8_t secondOrLaterByte); + static int getSequenceSizeToEncodeCodePoint(const int codePoint); +}; +} // namespace dicttoolkit +} // namespace latinime +#endif // LATINIME_DICT_TOOLKIT_UTF8_UTILS_H |