diff options
Diffstat (limited to 'native')
27 files changed, 1231 insertions, 11 deletions
diff --git a/native/dicttoolkit/NativeFileList.mk b/native/dicttoolkit/NativeFileList.mk index 925ec458b..d2c8c3a2c 100644 --- a/native/dicttoolkit/NativeFileList.mk +++ b/native/dicttoolkit/NativeFileList.mk @@ -22,8 +22,22 @@ LATIN_IME_DICT_TOOLKIT_SRC_FILES := \ help_executor.cpp \ info_executor.cpp \ makedict_executor.cpp) \ - utils/command_utils.cpp + $(addprefix offdevice_intermediate_dict/, \ + offdevice_intermediate_dict.cpp) \ + $(addprefix utils/, \ + arguments_parser.cpp \ + command_utils.cpp \ + utf8_utils.cpp) LATIN_IME_DICT_TOOLKIT_TEST_FILES := \ + $(addprefix command_executors/, \ + diff_executor_test.cpp \ + header_executor_test.cpp \ + info_executor_test.cpp \ + makedict_executor_test.cpp) \ dict_toolkit_defines_test.cpp \ - utils/command_utils_test.cpp + $(addprefix offdevice_intermediate_dict/, \ + offdevice_intermediate_dict_test.cpp) \ + $(addprefix utils/, \ + command_utils_test.cpp \ + utf8_utils_test.cpp) diff --git a/native/dicttoolkit/src/command_executors/diff_executor.cpp b/native/dicttoolkit/src/command_executors/diff_executor.cpp index 077a40090..bf6830686 100644 --- a/native/dicttoolkit/src/command_executors/diff_executor.cpp +++ b/native/dicttoolkit/src/command_executors/diff_executor.cpp @@ -30,8 +30,19 @@ const char *const DiffExecutor::COMMAND_NAME = "diff"; /* static */ void DiffExecutor::printUsage() { printf("*** %s\n", COMMAND_NAME); - printf("Usage: %s\n", COMMAND_NAME); - printf("Shows differences between two dictionaries.\n\n"); + getArgumentsParser().printUsage(COMMAND_NAME, "Shows differences between two dictionaries."); +} + +/* static */ const ArgumentsParser DiffExecutor::getArgumentsParser() { + std::unordered_map<std::string, OptionSpec> optionSpecs; + optionSpecs["p"] = OptionSpec::switchOption("(plumbing) produce output suitable for a script"); + + const std::vector<ArgumentSpec> argumentSpecs = { + ArgumentSpec::singleArgument("dict1", "dictionary file"), + ArgumentSpec::singleArgument("dict2", "dictionary file") + }; + + return ArgumentsParser(std::move(optionSpecs), std::move(argumentSpecs)); } } // namespace dicttoolkit diff --git a/native/dicttoolkit/src/command_executors/diff_executor.h b/native/dicttoolkit/src/command_executors/diff_executor.h index fc8dc0d8f..f92ae49d5 100644 --- a/native/dicttoolkit/src/command_executors/diff_executor.h +++ b/native/dicttoolkit/src/command_executors/diff_executor.h @@ -18,6 +18,7 @@ #define LATINIME_DICT_TOOLKIT_DIFF_EXECUTOR_H #include "dict_toolkit_defines.h" +#include "utils/arguments_parser.h" namespace latinime { namespace dicttoolkit { @@ -28,6 +29,7 @@ class DiffExecutor final { static int run(const int argc, char **argv); static void printUsage(); + static const ArgumentsParser getArgumentsParser(); private: DISALLOW_IMPLICIT_CONSTRUCTORS(DiffExecutor); diff --git a/native/dicttoolkit/src/command_executors/header_executor.cpp b/native/dicttoolkit/src/command_executors/header_executor.cpp index 068a62c31..b3d273b4e 100644 --- a/native/dicttoolkit/src/command_executors/header_executor.cpp +++ b/native/dicttoolkit/src/command_executors/header_executor.cpp @@ -30,9 +30,19 @@ const char *const HeaderExecutor::COMMAND_NAME = "header"; /* static */ void HeaderExecutor::printUsage() { printf("*** %s\n", COMMAND_NAME); - printf("Usage: %s\n", COMMAND_NAME); - printf("Prints the header contents of a dictionary file.\n\n"); + getArgumentsParser().printUsage(COMMAND_NAME, + "Prints the header contents of a dictionary file."); } +/* static */ const ArgumentsParser HeaderExecutor::getArgumentsParser() { + std::unordered_map<std::string, OptionSpec> optionSpecs; + optionSpecs["p"] = OptionSpec::switchOption("(plumbing) produce output suitable for a script"); + + const std::vector<ArgumentSpec> argumentSpecs = { + ArgumentSpec::singleArgument("dict", "prints the header contents of a dictionary file") + }; + + return ArgumentsParser(std::move(optionSpecs), std::move(argumentSpecs)); +} } // namespace dicttoolkit } // namespace latinime diff --git a/native/dicttoolkit/src/command_executors/header_executor.h b/native/dicttoolkit/src/command_executors/header_executor.h index 4cdeb1a99..44cc9cfc4 100644 --- a/native/dicttoolkit/src/command_executors/header_executor.h +++ b/native/dicttoolkit/src/command_executors/header_executor.h @@ -18,6 +18,7 @@ #define LATINIME_DICT_TOOLKIT_HEADER_EXECUTOR_H #include "dict_toolkit_defines.h" +#include "utils/arguments_parser.h" namespace latinime { namespace dicttoolkit { @@ -28,6 +29,7 @@ class HeaderExecutor final { static int run(const int argc, char **argv); static void printUsage(); + static const ArgumentsParser getArgumentsParser(); private: DISALLOW_IMPLICIT_CONSTRUCTORS(HeaderExecutor); diff --git a/native/dicttoolkit/src/command_executors/info_executor.cpp b/native/dicttoolkit/src/command_executors/info_executor.cpp index c4d84cab3..351da4aff 100644 --- a/native/dicttoolkit/src/command_executors/info_executor.cpp +++ b/native/dicttoolkit/src/command_executors/info_executor.cpp @@ -17,6 +17,9 @@ #include "command_executors/info_executor.h" #include <cstdio> +#include <string> +#include <unordered_map> +#include <vector> namespace latinime { namespace dicttoolkit { @@ -30,8 +33,21 @@ const char *const InfoExecutor::COMMAND_NAME = "info"; /* static */ void InfoExecutor::printUsage() { printf("*** %s\n", COMMAND_NAME); - printf("Usage: %s\n", COMMAND_NAME); - printf("Prints various information about a dictionary file.\n\n"); + getArgumentsParser().printUsage(COMMAND_NAME, + "Prints various information about a dictionary file."); +} + +/* static */const ArgumentsParser InfoExecutor::getArgumentsParser() { + std::unordered_map<std::string, OptionSpec> optionSpecs; + optionSpecs["p"] = OptionSpec::switchOption("(plumbing) produce output suitable for a script"); + + const std::vector<ArgumentSpec> argumentSpecs = { + ArgumentSpec::singleArgument("dict", "dictionary file name"), + ArgumentSpec::variableLengthArguments("word", 0 /* minCount */, + ArgumentSpec::UNLIMITED_COUNT, "word to show information") + }; + + return ArgumentsParser(std::move(optionSpecs), std::move(argumentSpecs)); } } // namespace dicttoolkit diff --git a/native/dicttoolkit/src/command_executors/info_executor.h b/native/dicttoolkit/src/command_executors/info_executor.h index 4ffa74fb0..d4106d59f 100644 --- a/native/dicttoolkit/src/command_executors/info_executor.h +++ b/native/dicttoolkit/src/command_executors/info_executor.h @@ -18,6 +18,7 @@ #define LATINIME_DICT_TOOLKIT_INFO_EXECUTOR_H #include "dict_toolkit_defines.h" +#include "utils/arguments_parser.h" namespace latinime { namespace dicttoolkit { @@ -28,6 +29,7 @@ class InfoExecutor final { static int run(const int argc, char **argv); static void printUsage(); + static const ArgumentsParser getArgumentsParser(); private: DISALLOW_IMPLICIT_CONSTRUCTORS(InfoExecutor); diff --git a/native/dicttoolkit/src/command_executors/makedict_executor.cpp b/native/dicttoolkit/src/command_executors/makedict_executor.cpp index ea62e3c37..8a84e8069 100644 --- a/native/dicttoolkit/src/command_executors/makedict_executor.cpp +++ b/native/dicttoolkit/src/command_executors/makedict_executor.cpp @@ -30,10 +30,25 @@ const char *const MakedictExecutor::COMMAND_NAME = "makedict"; /* static */ void MakedictExecutor::printUsage() { printf("*** %s\n", COMMAND_NAME); - printf("Usage: %s\n", COMMAND_NAME); - printf("Converts a source dictionary file to one or several outputs.\n" + getArgumentsParser().printUsage(COMMAND_NAME, + "Converts a source dictionary file to one or several outputs.\n" "Source can be a binary dictionary file or a combined format file.\n" - "Binary version 2 (Jelly Bean), 4, and combined format outputs are supported.\n\n"); + "Binary version 2 (Jelly Bean), 4, and combined format outputs are supported."); +} + +/* static */const ArgumentsParser MakedictExecutor::getArgumentsParser() { + std::unordered_map<std::string, OptionSpec> optionSpecs; + optionSpecs["o"] = OptionSpec::keyValueOption("format", "2", + "output format version: 2/4/combined"); + optionSpecs["t"] = OptionSpec::keyValueOption("mode", "off", + "code point table switch: on/off/auto"); + + const std::vector<ArgumentSpec> argumentSpecs = { + ArgumentSpec::singleArgument("src_dict", "source dictionary file"), + ArgumentSpec::singleArgument("dest_dict", "output dictionary file") + }; + + return ArgumentsParser(std::move(optionSpecs), std::move(argumentSpecs)); } } // namespace dicttoolkit diff --git a/native/dicttoolkit/src/command_executors/makedict_executor.h b/native/dicttoolkit/src/command_executors/makedict_executor.h index ae1309f60..c3de977a3 100644 --- a/native/dicttoolkit/src/command_executors/makedict_executor.h +++ b/native/dicttoolkit/src/command_executors/makedict_executor.h @@ -18,6 +18,7 @@ #define LATINIME_DICT_TOOLKIT_MAKEDICT_EXECUTOR_H #include "dict_toolkit_defines.h" +#include "utils/arguments_parser.h" namespace latinime { namespace dicttoolkit { @@ -28,6 +29,7 @@ class MakedictExecutor final { static int run(const int argc, char **argv); static void printUsage(); + static const ArgumentsParser getArgumentsParser(); private: DISALLOW_IMPLICIT_CONSTRUCTORS(MakedictExecutor); diff --git a/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict.cpp b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict.cpp new file mode 100644 index 000000000..af28131cf --- /dev/null +++ b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict.cpp @@ -0,0 +1,126 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "offdevice_intermediate_dict/offdevice_intermediate_dict.h" + +#include "offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node.h" + +namespace latinime { +namespace dicttoolkit { + +bool OffdeviceIntermediateDict::addWord(const WordProperty &wordProperty) { + const CodePointArrayView codePoints = wordProperty.getCodePoints(); + if (codePoints.empty() || codePoints.size() > MAX_WORD_LENGTH) { + return false; + } + return addWordInner(codePoints, wordProperty, mRootPtNodeArray); +} + +bool OffdeviceIntermediateDict::addWordInner(const CodePointArrayView codePoints, + const WordProperty &wordProperty, OffdeviceIntermediateDictPtNodeArray &ptNodeArray) { + auto ptNodeList = ptNodeArray.getMutablePtNodeList(); + auto ptNodeIt = ptNodeList->begin(); + for (; ptNodeIt != ptNodeList->end(); ++ptNodeIt) { + const auto &ptNode = *ptNodeIt; + const CodePointArrayView ptNodeCodePoints = ptNode->getPtNodeCodePoints(); + if (codePoints[0] < ptNodeCodePoints[0]) { + continue; + } + if (codePoints[0] > ptNodeCodePoints[0]) { + break; + } + size_t i = 1; + for (; i < codePoints.size(); ++i) { + if (i >= ptNodeCodePoints.size()) { + // Add new child. + return addWordInner(codePoints.skip(i), wordProperty, + ptNode->getChildrenPtNodeArray()); + } + if (codePoints[i] != ptNodeCodePoints[i]) { + break; + } + } + if (codePoints.size() == i && codePoints.size() == ptNodeCodePoints.size()) { + // All code points matched. + if (ptNode->getWordProperty()) { + // Adding the same word multiple times is not supported. + return false; + } + ptNodeList->insert(ptNodeIt, + std::make_shared<OffdeviceIntermediateDictPtNode>(wordProperty, *ptNode)); + ptNodeList->erase(ptNodeIt); + return true; + } + // The (i+1)-th elements are different. + // Create and Add new parent ptNode for the common part. + auto newPtNode = codePoints.size() == i + ? std::make_shared<OffdeviceIntermediateDictPtNode>(codePoints, wordProperty) + : std::make_shared<OffdeviceIntermediateDictPtNode>(codePoints.limit(i)); + ptNodeList->insert(ptNodeIt, newPtNode); + OffdeviceIntermediateDictPtNodeArray &childrenPtNodeArray = + newPtNode->getChildrenPtNodeArray(); + // Add new child for the existing ptNode. + childrenPtNodeArray.getMutablePtNodeList()->push_back( + std::make_shared<OffdeviceIntermediateDictPtNode>( + ptNodeCodePoints.skip(i), *ptNode)); + ptNodeList->erase(ptNodeIt); + if (codePoints.size() != i) { + // Add a child for the new word. + return addWordInner(codePoints.skip(i), wordProperty, childrenPtNodeArray); + } + return true; + } + ptNodeList->insert(ptNodeIt, + std::make_shared<OffdeviceIntermediateDictPtNode>(codePoints, wordProperty)); + return true; +} + +const WordProperty *OffdeviceIntermediateDict::getWordProperty( + const CodePointArrayView codePoints) const { + const OffdeviceIntermediateDictPtNodeArray *ptNodeArray = &mRootPtNodeArray; + for (size_t i = 0; i < codePoints.size();) { + bool foundNext = false; + for (const auto ptNode : ptNodeArray->getPtNodeList()) { + const CodePointArrayView ptNodeCodePoints = ptNode->getPtNodeCodePoints(); + if (codePoints[i] < ptNodeCodePoints[0]) { + continue; + } + if (codePoints[i] > ptNodeCodePoints[0] + || codePoints.size() < ptNodeCodePoints.size()) { + return nullptr; + } + for (size_t j = 1; j < ptNodeCodePoints.size(); ++j) { + if (codePoints[i + j] != ptNodeCodePoints[j]) { + return nullptr; + } + } + i += ptNodeCodePoints.size(); + if (i == codePoints.size()) { + return ptNode->getWordProperty(); + } + ptNodeArray = &ptNode->getChildrenPtNodeArray(); + foundNext = true; + break; + } + if (!foundNext) { + break; + } + } + return nullptr; +} + +} // namespace dicttoolkit +} // namespace latinime diff --git a/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict.h b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict.h new file mode 100644 index 000000000..13d26ba91 --- /dev/null +++ b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict.h @@ -0,0 +1,54 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_H +#define LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_H + +#include "dict_toolkit_defines.h" +#include "offdevice_intermediate_dict/offdevice_intermediate_dict_header.h" +#include "offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node_array.h" +#include "suggest/core/dictionary/property/word_property.h" +#include "utils/int_array_view.h" + +namespace latinime { +namespace dicttoolkit { + +/** + * On memory patricia trie to represent a dictionary. + */ +class OffdeviceIntermediateDict final { + public: + OffdeviceIntermediateDict(const OffdeviceIntermediateDictHeader &header) + : mHeader(header), mRootPtNodeArray() {} + + bool addWord(const WordProperty &wordProperty); + // The returned value will be invalid after modifying the dictionary. e.g. calling addWord(). + const WordProperty *getWordProperty(const CodePointArrayView codePoints) const; + const OffdeviceIntermediateDictHeader &getHeader() const { return mHeader; } + + private: + DISALLOW_ASSIGNMENT_OPERATOR(OffdeviceIntermediateDict); + + const OffdeviceIntermediateDictHeader mHeader; + OffdeviceIntermediateDictPtNodeArray mRootPtNodeArray; + + bool addWordInner(const CodePointArrayView codePoints, const WordProperty &wordProperty, + OffdeviceIntermediateDictPtNodeArray &ptNodeArray); +}; + +} // namespace dicttoolkit +} // namespace latinime +#endif // LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_H diff --git a/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_header.h b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_header.h new file mode 100644 index 000000000..440627a79 --- /dev/null +++ b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_header.h @@ -0,0 +1,44 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_HEADER_H +#define LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_HEADER_H + +#include <map> +#include <vector> + +#include "dict_toolkit_defines.h" + +namespace latinime { +namespace dicttoolkit { + +class OffdeviceIntermediateDictHeader final { + public: + using AttributeMap = std::map<std::vector<int>, std::vector<int>>; + + OffdeviceIntermediateDictHeader(const AttributeMap &attributesMap) + : mAttributeMap(attributesMap) {} + + private: + DISALLOW_DEFAULT_CONSTRUCTOR(OffdeviceIntermediateDictHeader); + DISALLOW_ASSIGNMENT_OPERATOR(OffdeviceIntermediateDictHeader); + + const AttributeMap mAttributeMap; +}; + +} // namespace dicttoolkit +} // namespace latinime +#endif // LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_HEADER_H diff --git a/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node.h b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node.h new file mode 100644 index 000000000..721ccd778 --- /dev/null +++ b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node.h @@ -0,0 +1,79 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_PT_NODE_H +#define LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_PT_NODE_H + +#include <memory> + +#include "dict_toolkit_defines.h" +#include "offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node_array.h" +#include "suggest/core/dictionary/property/word_property.h" +#include "utils/int_array_view.h" + +namespace latinime { +namespace dicttoolkit { + +class OffdeviceIntermediateDictPtNode final { + public: + // Non-terminal + OffdeviceIntermediateDictPtNode(const CodePointArrayView ptNodeCodePoints) + : mPtNodeCodePoints(ptNodeCodePoints.toVector()), mChildrenPtNodeArray(), + mWortProperty(nullptr) {} + + // Terminal + OffdeviceIntermediateDictPtNode(const CodePointArrayView ptNodeCodePoints, + const WordProperty &wordProperty) + : mPtNodeCodePoints(ptNodeCodePoints.toVector()), mChildrenPtNodeArray(), + mWortProperty(new WordProperty(wordProperty)) {} + + // Replacing PtNodeCodePoints. + OffdeviceIntermediateDictPtNode(const CodePointArrayView ptNodeCodePoints, + const OffdeviceIntermediateDictPtNode &ptNode) + : mPtNodeCodePoints(ptNodeCodePoints.toVector()), + mChildrenPtNodeArray(ptNode.mChildrenPtNodeArray), + mWortProperty(new WordProperty(*ptNode.mWortProperty)) {} + + // Replacing WordProperty. + OffdeviceIntermediateDictPtNode(const WordProperty &wordProperty, + const OffdeviceIntermediateDictPtNode &ptNode) + : mPtNodeCodePoints(ptNode.mPtNodeCodePoints), + mChildrenPtNodeArray(ptNode.mChildrenPtNodeArray), + mWortProperty(new WordProperty(wordProperty)) {} + + const WordProperty *getWordProperty() const { + return mWortProperty.get(); + } + + const CodePointArrayView getPtNodeCodePoints() const { + return CodePointArrayView(mPtNodeCodePoints); + } + + OffdeviceIntermediateDictPtNodeArray &getChildrenPtNodeArray() { + return mChildrenPtNodeArray; + } + + private: + DISALLOW_COPY_AND_ASSIGN(OffdeviceIntermediateDictPtNode); + + const std::vector<int> mPtNodeCodePoints; + OffdeviceIntermediateDictPtNodeArray mChildrenPtNodeArray; + const std::unique_ptr<WordProperty> mWortProperty; +}; + +} // namespace dicttoolkit +} // namespace latinime +#endif // LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_PT_NODE_H diff --git a/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node_array.h b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node_array.h new file mode 100644 index 000000000..f87456ce0 --- /dev/null +++ b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node_array.h @@ -0,0 +1,48 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_PT_NODE_ARRAY_H +#define LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_PT_NODE_ARRAY_H + +#include <list> +#include <memory> + +#include "dict_toolkit_defines.h" + +namespace latinime { +namespace dicttoolkit { + +class OffdeviceIntermediateDictPtNode; + +class OffdeviceIntermediateDictPtNodeArray final { + public: + const std::list<std::shared_ptr<OffdeviceIntermediateDictPtNode>> &getPtNodeList() const { + return mPtNodes; + } + + std::list<std::shared_ptr<OffdeviceIntermediateDictPtNode>> *getMutablePtNodeList() { + return &mPtNodes; + } + + private: + DISALLOW_ASSIGNMENT_OPERATOR(OffdeviceIntermediateDictPtNodeArray); + + std::list<std::shared_ptr<OffdeviceIntermediateDictPtNode>> mPtNodes; +}; + +} // namespace dicttoolkit +} // namespace latinime +#endif // LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_PT_NODE_ARRAY_H diff --git a/native/dicttoolkit/src/utils/arguments_and_options.h b/native/dicttoolkit/src/utils/arguments_and_options.h new file mode 100644 index 000000000..d8f5985e5 --- /dev/null +++ b/native/dicttoolkit/src/utils/arguments_and_options.h @@ -0,0 +1,54 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DICT_TOOLKIT_ARGUMENTS_AND_OPTIONS_H +#define LATINIME_DICT_TOOLKIT_ARGUMENTS_AND_OPTIONS_H + +#include <string> +#include <unordered_map> +#include <vector> + +#include "dict_toolkit_defines.h" + +namespace latinime { +namespace dicttoolkit { + +class ArgumentsAndOptions { + public: + ArgumentsAndOptions() : mIsValid(false), mOptions(), mArguments() {} + + ArgumentsAndOptions(std::unordered_map<std::string, std::string> &&options, + std::unordered_map<std::string, std::vector<std::string>> &&arguments) + : mIsValid(true), mOptions(std::move(options)), mArguments(std::move(arguments)) {} + + bool isValid() const { + return mIsValid; + } + + bool hasOption(const std::string &optionName) const { + return mOptions.find(optionName) != mOptions.end(); + } + + private: + DISALLOW_ASSIGNMENT_OPERATOR(ArgumentsAndOptions); + + const bool mIsValid; + const std::unordered_map<std::string, std::string> mOptions; + const std::unordered_map<std::string, std::vector<std::string>> mArguments; +}; +} // namespace dicttoolkit +} // namespace latinime +#endif // LATINIME_DICT_TOOLKIT_ARGUMENTS_AND_OPTIONS_H diff --git a/native/dicttoolkit/src/utils/arguments_parser.cpp b/native/dicttoolkit/src/utils/arguments_parser.cpp new file mode 100644 index 000000000..039dae35b --- /dev/null +++ b/native/dicttoolkit/src/utils/arguments_parser.cpp @@ -0,0 +1,84 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "utils/arguments_parser.h" + +namespace latinime { +namespace dicttoolkit { + +const int ArgumentSpec::UNLIMITED_COUNT = -1; + +bool ArgumentsParser::validateSpecs() const { + for (size_t i = 0; i < mArgumentSpecs.size() ; ++i) { + if (mArgumentSpecs[i].getMinCount() != mArgumentSpecs[i].getMaxCount() + && i != mArgumentSpecs.size() - 1) { + AKLOGE("Variable length argument must be at the end."); + return false; + } + } + return true; +} + +void ArgumentsParser::printUsage(const std::string &commandName, + const std::string &description) const { + printf("Usage: %s", commandName.c_str()); + for (const auto &option : mOptionSpecs) { + const std::string &optionName = option.first; + const OptionSpec &spec = option.second; + printf(" [-%s", optionName.c_str()); + if (spec.takeValue()) { + printf(" <%s>", spec.getValueName().c_str()); + } + printf("]"); + } + for (const auto &argSpec : mArgumentSpecs) { + if (argSpec.getMinCount() == 0 && argSpec.getMaxCount() == 1) { + printf(" [<%s>]", argSpec.getName().c_str()); + } else if (argSpec.getMinCount() == 1 && argSpec.getMaxCount() == 1) { + printf(" <%s>", argSpec.getName().c_str()); + } else if (argSpec.getMinCount() == 0) { + printf(" [<%s>...]", argSpec.getName().c_str()); + } else if (argSpec.getMinCount() == 1) { + printf(" <%s>...", argSpec.getName().c_str()); + } + } + printf("\n%s\n\n", description.c_str()); + for (const auto &option : mOptionSpecs) { + const std::string &optionName = option.first; + const OptionSpec &spec = option.second; + printf(" -%s", optionName.c_str()); + if (spec.takeValue()) { + printf(" <%s>", spec.getValueName().c_str()); + } + printf("\t\t\t%s", spec.getDescription().c_str()); + if (spec.takeValue() && !spec.getDefaultValue().empty()) { + printf("\tdefault: %s", spec.getDefaultValue().c_str()); + } + printf("\n"); + } + for (const auto &argSpec : mArgumentSpecs) { + printf(" <%s>\t\t\t%s\n", argSpec.getName().c_str(), argSpec.getDescription().c_str()); + } + printf("\n\n"); +} + +const ArgumentsAndOptions ArgumentsParser::parseArguments(const int argc, char **argv) const { + // TODO: Implement + return ArgumentsAndOptions(); +} + +} // namespace dicttoolkit +} // namespace latinime diff --git a/native/dicttoolkit/src/utils/arguments_parser.h b/native/dicttoolkit/src/utils/arguments_parser.h new file mode 100644 index 000000000..be2dd8749 --- /dev/null +++ b/native/dicttoolkit/src/utils/arguments_parser.h @@ -0,0 +1,118 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DICT_TOOLKIT_ARGUMENTS_PARSER_H +#define LATINIME_DICT_TOOLKIT_ARGUMENTS_PARSER_H + +#include <string> +#include <unordered_map> +#include <vector> + +#include "dict_toolkit_defines.h" +#include "utils/arguments_and_options.h" + +namespace latinime { +namespace dicttoolkit { + +class OptionSpec { + public: + // Default constructor and assignment operator is enabled to be used with std::unordered_map. + OptionSpec() = default; + OptionSpec &operator=(const OptionSpec &) = default; + + static OptionSpec keyValueOption(const std::string &valueName, const std::string &defaultValue, + const std::string &description) { + return OptionSpec(true /* takeValue */, valueName, defaultValue, description); + } + + static OptionSpec switchOption(const std::string &description) { + return OptionSpec(false /* takeValue */, "" /* valueName */, "" /* defaultValue */, + description); + } + + bool takeValue() const { return mTakeValue; } + const std::string &getValueName() const { return mValueName; } + const std::string &getDefaultValue() const { return mDefaultValue; } + const std::string &getDescription() const { return mDescription; } + + private: + OptionSpec(const bool takeValue, const std::string &valueName, const std::string &defaultValue, + const std::string &description) + : mTakeValue(takeValue), mValueName(valueName), mDefaultValue(defaultValue), + mDescription(description) {} + + // Whether the option have to be used with a value or just a switch. + // e.g. 'f' in "command -f /path/to/file" is mTakeValue == true. + // 'f' in "command -f -t" is mTakeValue == false. + bool mTakeValue; + // Name of the value used to show usage. + std::string mValueName; + std::string mDefaultValue; + std::string mDescription; +}; + +class ArgumentSpec { + public: + static const int UNLIMITED_COUNT; + + static ArgumentSpec singleArgument(const std::string &name, const std::string &description) { + return ArgumentSpec(name, 1 /* minCount */, 1 /* maxCount */, description); + } + + static ArgumentSpec variableLengthArguments(const std::string &name, const int minCount, + const int maxCount, const std::string &description) { + return ArgumentSpec(name, minCount, maxCount, description); + } + + const std::string &getName() const { return mName; } + int getMinCount() const { return mMinCount; } + int getMaxCount() const { return mMaxCount; } + const std::string &getDescription() const { return mDescription; } + + private: + DISALLOW_DEFAULT_CONSTRUCTOR(ArgumentSpec); + + ArgumentSpec(const std::string &name, const int minCount, const int maxCount, + const std::string &description) + : mName(name), mMinCount(minCount), mMaxCount(maxCount), mDescription(description) {} + + const std::string mName; + const int mMinCount; + const int mMaxCount; + const std::string mDescription; +}; + +class ArgumentsParser { + public: + ArgumentsParser(std::unordered_map<std::string, OptionSpec> &&optionSpecs, + std::vector<ArgumentSpec> &&argumentSpecs) + : mOptionSpecs(std::move(optionSpecs)), mArgumentSpecs(std::move(argumentSpecs)) {} + + const ArgumentsAndOptions parseArguments(const int argc, char **argv) const; + bool validateSpecs() const; + void printUsage(const std::string &commandName, const std::string &description) const; + + private: + DISALLOW_DEFAULT_CONSTRUCTOR(ArgumentsParser); + DISALLOW_ASSIGNMENT_OPERATOR(ArgumentsParser); + + const std::unordered_map<std::string, OptionSpec> mOptionSpecs; + const std::vector<ArgumentSpec> mArgumentSpecs; +}; + +} // namespace dicttoolkit +} // namespace latinime +#endif // LATINIME_DICT_TOOLKIT_ARGUMENTS_PARSER_H diff --git a/native/dicttoolkit/src/utils/utf8_utils.cpp b/native/dicttoolkit/src/utils/utf8_utils.cpp new file mode 100644 index 000000000..0f349f512 --- /dev/null +++ b/native/dicttoolkit/src/utils/utf8_utils.cpp @@ -0,0 +1,119 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "utils/utf8_utils.h" + +#include "utils/char_utils.h" + +namespace latinime { +namespace dicttoolkit { + +const size_t Utf8Utils::MAX_SEQUENCE_SIZE_FOR_A_CODE_POINT = 4; +const uint8_t Utf8Utils::FIRST_BYTE_MARKER_MASKS[] = {0, 0x80, 0xE0, 0xF0, 0xF8}; +const uint8_t Utf8Utils::FIRST_BYTE_MARKERS[] = {0, 0x00, 0xC0, 0xE0, 0xF0}; +const uint8_t Utf8Utils::FIRST_BYTE_CODE_POINT_BITS_MASKS[] = {0, 0x7F, 0x1F, 0x0F, 0x03}; +const int Utf8Utils::MAX_ENCODED_CODE_POINT_VALUES[] = {-1, 0x7F, 0x7FF, 0xFFFF, 0x10FFFF}; + +const uint8_t Utf8Utils::TRAILING_BYTE_CODE_POINT_BITS_MASK = 0x3F; +const uint8_t Utf8Utils::TRAILING_BYTE_MARKER = 0x80; +const size_t Utf8Utils::CODE_POINT_BIT_COUNT_IN_TRAILING_BYTE = 6; + +/* static */ std::vector<int> Utf8Utils::getCodePoints(const std::string &utf8Str) { + std::vector<int> codePoints; + int remainingByteCountForCurrentCodePoint = 0; + int currentCodePointSequenceSize = 0; + int codePoint = 0; + for (const char c : utf8Str) { + if (remainingByteCountForCurrentCodePoint == 0) { + currentCodePointSequenceSize = getSequenceSizeByCheckingFirstByte(c); + if (currentCodePointSequenceSize <= 0) { + AKLOGE("%x is an invalid utf8 first byte value.", c); + return std::vector<int>(); + } + remainingByteCountForCurrentCodePoint = currentCodePointSequenceSize; + codePoint = maskFirstByte(c, remainingByteCountForCurrentCodePoint); + } else { + codePoint <<= CODE_POINT_BIT_COUNT_IN_TRAILING_BYTE; + codePoint += maskTrailingByte(c); + } + remainingByteCountForCurrentCodePoint--; + if (remainingByteCountForCurrentCodePoint == 0) { + if (codePoint <= MAX_ENCODED_CODE_POINT_VALUES[currentCodePointSequenceSize - 1]) { + AKLOGE("%d bytes encode for codePoint(%x) is a redundant UTF-8 sequence.", + currentCodePointSequenceSize, codePoint); + return std::vector<int>(); + } + codePoints.push_back(codePoint); + } + } + return codePoints; +} + +/* static */ int Utf8Utils::getSequenceSizeByCheckingFirstByte(const uint8_t firstByte) { + for (size_t i = 1; i <= MAX_SEQUENCE_SIZE_FOR_A_CODE_POINT; ++i) { + if ((firstByte & FIRST_BYTE_MARKER_MASKS[i]) == FIRST_BYTE_MARKERS[i]) { + return i; + } + } + // Not a valid utf8 char first byte. + return -1; +} + +/* static */ AK_FORCE_INLINE int Utf8Utils::maskFirstByte(const uint8_t firstByte, + const int sequenceSize) { + return firstByte & FIRST_BYTE_CODE_POINT_BITS_MASKS[sequenceSize]; +} + +/* static */ AK_FORCE_INLINE int Utf8Utils::maskTrailingByte(const uint8_t secondOrLaterByte) { + return secondOrLaterByte & TRAILING_BYTE_CODE_POINT_BITS_MASK; +} + +/* static */ std::string Utf8Utils::getUtf8String(const CodePointArrayView codePoints) { + std::string utf8String; + for (const int codePoint : codePoints) { + const int sequenceSize = getSequenceSizeToEncodeCodePoint(codePoint); + if (sequenceSize <= 0) { + AKLOGE("Cannot encode code point (%d).", codePoint); + return std::string(); + } + const int trailingByteCount = sequenceSize - 1; + // Output first byte. + const int value = codePoint >> (trailingByteCount * CODE_POINT_BIT_COUNT_IN_TRAILING_BYTE); + utf8String.push_back(static_cast<char>(value | FIRST_BYTE_MARKERS[sequenceSize])); + // Output second and later bytes. + for (int i = 1; i < sequenceSize; ++i) { + const int shiftAmount = (trailingByteCount - i) * CODE_POINT_BIT_COUNT_IN_TRAILING_BYTE; + const int value = (codePoint >> shiftAmount) & TRAILING_BYTE_CODE_POINT_BITS_MASK; + utf8String.push_back(static_cast<char>(value | TRAILING_BYTE_MARKER)); + } + } + return utf8String; +} + +/* static */ int Utf8Utils::getSequenceSizeToEncodeCodePoint(const int codePoint) { + if (codePoint < 0) { + return -1; + } + for (size_t i = 1; i <= MAX_SEQUENCE_SIZE_FOR_A_CODE_POINT; ++i) { + if (codePoint <= MAX_ENCODED_CODE_POINT_VALUES[i]) { + return i; + } + } + return -1; +} + +} // namespace dicttoolkit +} // namespace latinime diff --git a/native/dicttoolkit/src/utils/utf8_utils.h b/native/dicttoolkit/src/utils/utf8_utils.h new file mode 100644 index 000000000..35818e56c --- /dev/null +++ b/native/dicttoolkit/src/utils/utf8_utils.h @@ -0,0 +1,56 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DICT_TOOLKIT_UTF8_UTILS_H +#define LATINIME_DICT_TOOLKIT_UTF8_UTILS_H + +#include <cstdint> +#include <string> +#include <vector> + +#include "dict_toolkit_defines.h" +#include "utils/int_array_view.h" + +namespace latinime { +namespace dicttoolkit { + +class Utf8Utils { +public: + static std::vector<int> getCodePoints(const std::string &utf8Str); + static std::string getUtf8String(const CodePointArrayView codePoints); + +private: + DISALLOW_IMPLICIT_CONSTRUCTORS(Utf8Utils); + + // Values indexed by sequence size. + static const size_t MAX_SEQUENCE_SIZE_FOR_A_CODE_POINT; + static const uint8_t FIRST_BYTE_MARKER_MASKS[]; + static const uint8_t FIRST_BYTE_MARKERS[]; + static const uint8_t FIRST_BYTE_CODE_POINT_BITS_MASKS[]; + static const int MAX_ENCODED_CODE_POINT_VALUES[]; + + static const uint8_t TRAILING_BYTE_CODE_POINT_BITS_MASK; + static const uint8_t TRAILING_BYTE_MARKER; + static const size_t CODE_POINT_BIT_COUNT_IN_TRAILING_BYTE; + + static int getSequenceSizeByCheckingFirstByte(const uint8_t firstByte); + static int maskFirstByte(const uint8_t firstByte, const int encodeSize); + static int maskTrailingByte(const uint8_t secondOrLaterByte); + static int getSequenceSizeToEncodeCodePoint(const int codePoint); +}; +} // namespace dicttoolkit +} // namespace latinime +#endif // LATINIME_DICT_TOOLKIT_UTF8_UTILS_H diff --git a/native/dicttoolkit/tests/command_executors/diff_executor_test.cpp b/native/dicttoolkit/tests/command_executors/diff_executor_test.cpp new file mode 100644 index 000000000..444141427 --- /dev/null +++ b/native/dicttoolkit/tests/command_executors/diff_executor_test.cpp @@ -0,0 +1,31 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "command_executors/diff_executor.h" + +#include <gtest/gtest.h> + +namespace latinime { +namespace dicttoolkit { +namespace { + +TEST(DiffExecutorTests, TestArguemntSpecs) { + EXPECT_TRUE(DiffExecutor::getArgumentsParser().validateSpecs()); +} + +} // namespace +} // namespace dicttoolkit +} // namespace latinime diff --git a/native/dicttoolkit/tests/command_executors/header_executor_test.cpp b/native/dicttoolkit/tests/command_executors/header_executor_test.cpp new file mode 100644 index 000000000..a94150b01 --- /dev/null +++ b/native/dicttoolkit/tests/command_executors/header_executor_test.cpp @@ -0,0 +1,31 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "command_executors/header_executor.h" + +#include <gtest/gtest.h> + +namespace latinime { +namespace dicttoolkit { +namespace { + +TEST(HeaderExecutorTests, TestArguemntSpecs) { + EXPECT_TRUE(HeaderExecutor::getArgumentsParser().validateSpecs()); +} + +} // namespace +} // namespace dicttoolkit +} // namespace latinime diff --git a/native/dicttoolkit/tests/command_executors/info_executor_test.cpp b/native/dicttoolkit/tests/command_executors/info_executor_test.cpp new file mode 100644 index 000000000..debe8c601 --- /dev/null +++ b/native/dicttoolkit/tests/command_executors/info_executor_test.cpp @@ -0,0 +1,31 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "command_executors/info_executor.h" + +#include <gtest/gtest.h> + +namespace latinime { +namespace dicttoolkit { +namespace { + +TEST(InfoExecutorTests, TestArguemntSpecs) { + EXPECT_TRUE(InfoExecutor::getArgumentsParser().validateSpecs()); +} + +} // namespace +} // namespace dicttoolkit +} // namespace latinime diff --git a/native/dicttoolkit/tests/command_executors/makedict_executor_test.cpp b/native/dicttoolkit/tests/command_executors/makedict_executor_test.cpp new file mode 100644 index 000000000..44eb3dc1b --- /dev/null +++ b/native/dicttoolkit/tests/command_executors/makedict_executor_test.cpp @@ -0,0 +1,31 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "command_executors/makedict_executor.h" + +#include <gtest/gtest.h> + +namespace latinime { +namespace dicttoolkit { +namespace { + +TEST(MakedictExecutorTests, TestArguemntSpecs) { + EXPECT_TRUE(MakedictExecutor::getArgumentsParser().validateSpecs()); +} + +} // namespace +} // namespace dicttoolkit +} // namespace latinime diff --git a/native/dicttoolkit/tests/offdevice_intermediate_dict/offdevice_intermediate_dict_test.cpp b/native/dicttoolkit/tests/offdevice_intermediate_dict/offdevice_intermediate_dict_test.cpp new file mode 100644 index 000000000..f2e24ab5f --- /dev/null +++ b/native/dicttoolkit/tests/offdevice_intermediate_dict/offdevice_intermediate_dict_test.cpp @@ -0,0 +1,85 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "offdevice_intermediate_dict/offdevice_intermediate_dict.h" + +#include <gtest/gtest.h> + +#include <vector> + +#include "suggest/core/dictionary/property/word_property.h" +#include "utils/int_array_view.h" + +namespace latinime { +namespace dicttoolkit { +namespace { + +const std::vector<int> getCodePointVector(const char *str) { + std::vector<int> codePoints; + while (*str) { + codePoints.push_back(*str); + ++str; + } + return codePoints; +} + +const WordProperty getDummpWordProperty(const std::vector<int> &&codePoints) { + return WordProperty(std::move(codePoints), UnigramProperty(), std::vector<NgramProperty>()); +} + +TEST(OffdeviceIntermediateDictTest, TestAddWordProperties) { + OffdeviceIntermediateDict dict = OffdeviceIntermediateDict( + OffdeviceIntermediateDictHeader(OffdeviceIntermediateDictHeader::AttributeMap())); + EXPECT_EQ(nullptr, dict.getWordProperty(CodePointArrayView())); + + const WordProperty wordProperty0 = getDummpWordProperty(getCodePointVector("abcd")); + EXPECT_TRUE(dict.addWord(wordProperty0)); + EXPECT_NE(nullptr, dict.getWordProperty(wordProperty0.getCodePoints())); + + const WordProperty wordProperty1 = getDummpWordProperty(getCodePointVector("efgh")); + EXPECT_TRUE(dict.addWord(wordProperty1)); + EXPECT_NE(nullptr, dict.getWordProperty(wordProperty1.getCodePoints())); + + const WordProperty wordProperty2 = getDummpWordProperty(getCodePointVector("ab")); + EXPECT_TRUE(dict.addWord(wordProperty2)); + EXPECT_NE(nullptr, dict.getWordProperty(wordProperty2.getCodePoints())); + + const WordProperty wordProperty3 = getDummpWordProperty(getCodePointVector("abcdefg")); + EXPECT_TRUE(dict.addWord(wordProperty3)); + EXPECT_NE(nullptr, dict.getWordProperty(wordProperty3.getCodePoints())); + + const WordProperty wordProperty4 = getDummpWordProperty(getCodePointVector("efef")); + EXPECT_TRUE(dict.addWord(wordProperty4)); + EXPECT_NE(nullptr, dict.getWordProperty(wordProperty4.getCodePoints())); + + const WordProperty wordProperty5 = getDummpWordProperty(getCodePointVector("ef")); + EXPECT_TRUE(dict.addWord(wordProperty5)); + EXPECT_NE(nullptr, dict.getWordProperty(wordProperty5.getCodePoints())); + + const WordProperty wordProperty6 = getDummpWordProperty(getCodePointVector("abcd")); + EXPECT_FALSE(dict.addWord(wordProperty6)) << "Adding the same word multiple times should fail."; + + EXPECT_NE(nullptr, dict.getWordProperty(wordProperty0.getCodePoints())); + EXPECT_NE(nullptr, dict.getWordProperty(wordProperty1.getCodePoints())); + EXPECT_NE(nullptr, dict.getWordProperty(wordProperty2.getCodePoints())); + EXPECT_NE(nullptr, dict.getWordProperty(wordProperty3.getCodePoints())); + EXPECT_NE(nullptr, dict.getWordProperty(wordProperty4.getCodePoints())); + EXPECT_NE(nullptr, dict.getWordProperty(wordProperty5.getCodePoints())); +} + +} // namespace +} // namespace dicttoolkit +} // namespace latinime diff --git a/native/dicttoolkit/tests/utils/utf8_utils_test.cpp b/native/dicttoolkit/tests/utils/utf8_utils_test.cpp new file mode 100644 index 000000000..9c59a8b05 --- /dev/null +++ b/native/dicttoolkit/tests/utils/utf8_utils_test.cpp @@ -0,0 +1,85 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "utils/utf8_utils.h" + +#include <gtest/gtest.h> + +#include <vector> + +#include "utils/int_array_view.h" + +namespace latinime { +namespace dicttoolkit { +namespace { + +TEST(Utf8UtilsTests, TestGetCodePoints) { + { + const std::vector<int> codePoints = Utf8Utils::getCodePoints(""); + EXPECT_EQ(0u, codePoints.size()); + } + { + const std::vector<int> codePoints = Utf8Utils::getCodePoints("test"); + EXPECT_EQ(4u, codePoints.size()); + EXPECT_EQ('t', codePoints[0]); + EXPECT_EQ('e', codePoints[1]); + EXPECT_EQ('s', codePoints[2]); + EXPECT_EQ('t', codePoints[3]); + } + { + const std::vector<int> codePoints = Utf8Utils::getCodePoints(u8"\u3042a\u03C2\u0410"); + EXPECT_EQ(4u, codePoints.size()); + EXPECT_EQ(0x3042, codePoints[0]); // HIRAGANA LETTER A + EXPECT_EQ('a', codePoints[1]); + EXPECT_EQ(0x03C2, codePoints[2]); // CYRILLIC CAPITAL LETTER A + EXPECT_EQ(0x0410, codePoints[3]); // GREEK SMALL LETTER FINAL SIGMA + } + { + const std::vector<int> codePoints = Utf8Utils::getCodePoints(u8"\U0001F36A?\U0001F752"); + EXPECT_EQ(3u, codePoints.size()); + EXPECT_EQ(0x1F36A, codePoints[0]); // COOKIE + EXPECT_EQ('?', codePoints[1]); + EXPECT_EQ(0x1F752, codePoints[2]); // ALCHEMICAL SYMBOL FOR STARRED TRIDENT + } + + // Redundant UTF-8 sequences must be rejected. + EXPECT_TRUE(Utf8Utils::getCodePoints("\xC0\xAF").empty()); + EXPECT_TRUE(Utf8Utils::getCodePoints("\xE0\x80\xAF").empty()); + EXPECT_TRUE(Utf8Utils::getCodePoints("\xF0\x80\x80\xAF").empty()); +} + +TEST(Utf8UtilsTests, TestGetUtf8String) { + { + const std::vector<int> codePoints = {'t', 'e', 's', 't'}; + EXPECT_EQ("test", Utf8Utils::getUtf8String(CodePointArrayView(codePoints))); + } + { + const std::vector<int> codePoints = { + 0x00E0 /* LATIN SMALL LETTER A WITH GRAVE */, + 0x03C2 /* GREEK SMALL LETTER FINAL SIGMA */, + 0x0430 /* CYRILLIC SMALL LETTER A */, + 0x3042 /* HIRAGANA LETTER A */, + 0x1F36A /* COOKIE */, + 0x1F752 /* ALCHEMICAL SYMBOL FOR STARRED TRIDENT */ + }; + EXPECT_EQ(u8"\u00E0\u03C2\u0430\u3042\U0001F36A\U0001F752", + Utf8Utils::getUtf8String(CodePointArrayView(codePoints))); + } +} + +} // namespace +} // namespace dicttoolkit +} // namespace latinime diff --git a/native/jni/src/utils/int_array_view.h b/native/jni/src/utils/int_array_view.h index 408373176..e0f671056 100644 --- a/native/jni/src/utils/int_array_view.h +++ b/native/jni/src/utils/int_array_view.h @@ -133,6 +133,29 @@ class IntArrayView { return std::vector<int>(begin(), end()); } + std::vector<IntArrayView> split(const int separator, const int limit = S_INT_MAX) const { + if (limit <= 0) { + return std::vector<IntArrayView>(); + } + std::vector<IntArrayView> result; + if (limit == 1) { + result.emplace_back(mPtr, mSize); + return result; + } + size_t startIndex = 0; + for (size_t i = 0; i < mSize; ++i) { + if (mPtr[i] == separator) { + result.emplace_back(mPtr + startIndex, i - startIndex); + startIndex = i + 1; + if (result.size() >= static_cast<size_t>(limit - 1)) { + break; + } + } + } + result.emplace_back(mPtr + startIndex, mSize - startIndex); + return result; + } + private: DISALLOW_ASSIGNMENT_OPERATOR(IntArrayView); diff --git a/native/jni/tests/utils/int_array_view_test.cpp b/native/jni/tests/utils/int_array_view_test.cpp index 4757a416b..2fce633f5 100644 --- a/native/jni/tests/utils/int_array_view_test.cpp +++ b/native/jni/tests/utils/int_array_view_test.cpp @@ -151,5 +151,52 @@ TEST(IntArrayViewTest, TestToVector) { EXPECT_EQ(std::vector<int>(), CodePointArrayView().toVector()); } +TEST(IntArrayViewTest, TestSplit) { + EXPECT_TRUE(IntArrayView().split(0, 0).empty()); + { + const auto intArrayViews = IntArrayView().split(0, 1); + EXPECT_EQ(1u, intArrayViews.size()); + EXPECT_TRUE(intArrayViews[0].empty()); + } + { + const auto intArrayViews = IntArrayView().split(0, 100); + EXPECT_EQ(1u, intArrayViews.size()); + EXPECT_TRUE(intArrayViews[0].empty()); + } + + const std::vector<int> intVector = {1, 2, 3, 3, 2, 3}; + const IntArrayView intArrayView(intVector); + { + const auto intArrayViews = intArrayView.split(2); + EXPECT_EQ(3u, intArrayViews.size()); + EXPECT_EQ(std::vector<int>({1}), intArrayViews[0].toVector()); + EXPECT_EQ(std::vector<int>({3, 3}), intArrayViews[1].toVector()); + EXPECT_EQ(std::vector<int>({3}), intArrayViews[2].toVector()); + } + { + const auto intArrayViews = intArrayView.split(2, 2); + EXPECT_EQ(2u, intArrayViews.size()); + EXPECT_EQ(std::vector<int>({1}), intArrayViews[0].toVector()); + EXPECT_EQ(std::vector<int>({3, 3, 2, 3}), intArrayViews[1].toVector()); + } + { + const auto intArrayViews = intArrayView.split(2, 1); + EXPECT_EQ(1u, intArrayViews.size()); + EXPECT_EQ(intVector, intArrayViews[0].toVector()); + } + { + const auto intArrayViews = intArrayView.split(2, 0); + EXPECT_EQ(0u, intArrayViews.size()); + } + { + const auto intArrayViews = intArrayView.split(3); + EXPECT_EQ(4u, intArrayViews.size()); + EXPECT_EQ(std::vector<int>({1, 2}), intArrayViews[0].toVector()); + EXPECT_EQ(std::vector<int>(), intArrayViews[1].toVector()); + EXPECT_EQ(std::vector<int>({2}), intArrayViews[2].toVector()); + EXPECT_EQ(std::vector<int>(), intArrayViews[3].toVector()); + } +} + } // namespace } // namespace latinime |