diff options
Diffstat (limited to 'native/dicttoolkit/src/utils')
-rw-r--r-- | native/dicttoolkit/src/utils/arguments_and_options.h | 54 | ||||
-rw-r--r-- | native/dicttoolkit/src/utils/arguments_parser.cpp | 84 | ||||
-rw-r--r-- | native/dicttoolkit/src/utils/arguments_parser.h | 118 | ||||
-rw-r--r-- | native/dicttoolkit/src/utils/command_utils.cpp | 74 | ||||
-rw-r--r-- | native/dicttoolkit/src/utils/command_utils.h | 50 | ||||
-rw-r--r-- | native/dicttoolkit/src/utils/utf8_utils.cpp | 119 | ||||
-rw-r--r-- | native/dicttoolkit/src/utils/utf8_utils.h | 56 |
7 files changed, 555 insertions, 0 deletions
diff --git a/native/dicttoolkit/src/utils/arguments_and_options.h b/native/dicttoolkit/src/utils/arguments_and_options.h new file mode 100644 index 000000000..d8f5985e5 --- /dev/null +++ b/native/dicttoolkit/src/utils/arguments_and_options.h @@ -0,0 +1,54 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DICT_TOOLKIT_ARGUMENTS_AND_OPTIONS_H +#define LATINIME_DICT_TOOLKIT_ARGUMENTS_AND_OPTIONS_H + +#include <string> +#include <unordered_map> +#include <vector> + +#include "dict_toolkit_defines.h" + +namespace latinime { +namespace dicttoolkit { + +class ArgumentsAndOptions { + public: + ArgumentsAndOptions() : mIsValid(false), mOptions(), mArguments() {} + + ArgumentsAndOptions(std::unordered_map<std::string, std::string> &&options, + std::unordered_map<std::string, std::vector<std::string>> &&arguments) + : mIsValid(true), mOptions(std::move(options)), mArguments(std::move(arguments)) {} + + bool isValid() const { + return mIsValid; + } + + bool hasOption(const std::string &optionName) const { + return mOptions.find(optionName) != mOptions.end(); + } + + private: + DISALLOW_ASSIGNMENT_OPERATOR(ArgumentsAndOptions); + + const bool mIsValid; + const std::unordered_map<std::string, std::string> mOptions; + const std::unordered_map<std::string, std::vector<std::string>> mArguments; +}; +} // namespace dicttoolkit +} // namespace latinime +#endif // LATINIME_DICT_TOOLKIT_ARGUMENTS_AND_OPTIONS_H diff --git a/native/dicttoolkit/src/utils/arguments_parser.cpp b/native/dicttoolkit/src/utils/arguments_parser.cpp new file mode 100644 index 000000000..039dae35b --- /dev/null +++ b/native/dicttoolkit/src/utils/arguments_parser.cpp @@ -0,0 +1,84 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "utils/arguments_parser.h" + +namespace latinime { +namespace dicttoolkit { + +const int ArgumentSpec::UNLIMITED_COUNT = -1; + +bool ArgumentsParser::validateSpecs() const { + for (size_t i = 0; i < mArgumentSpecs.size() ; ++i) { + if (mArgumentSpecs[i].getMinCount() != mArgumentSpecs[i].getMaxCount() + && i != mArgumentSpecs.size() - 1) { + AKLOGE("Variable length argument must be at the end."); + return false; + } + } + return true; +} + +void ArgumentsParser::printUsage(const std::string &commandName, + const std::string &description) const { + printf("Usage: %s", commandName.c_str()); + for (const auto &option : mOptionSpecs) { + const std::string &optionName = option.first; + const OptionSpec &spec = option.second; + printf(" [-%s", optionName.c_str()); + if (spec.takeValue()) { + printf(" <%s>", spec.getValueName().c_str()); + } + printf("]"); + } + for (const auto &argSpec : mArgumentSpecs) { + if (argSpec.getMinCount() == 0 && argSpec.getMaxCount() == 1) { + printf(" [<%s>]", argSpec.getName().c_str()); + } else if (argSpec.getMinCount() == 1 && argSpec.getMaxCount() == 1) { + printf(" <%s>", argSpec.getName().c_str()); + } else if (argSpec.getMinCount() == 0) { + printf(" [<%s>...]", argSpec.getName().c_str()); + } else if (argSpec.getMinCount() == 1) { + printf(" <%s>...", argSpec.getName().c_str()); + } + } + printf("\n%s\n\n", description.c_str()); + for (const auto &option : mOptionSpecs) { + const std::string &optionName = option.first; + const OptionSpec &spec = option.second; + printf(" -%s", optionName.c_str()); + if (spec.takeValue()) { + printf(" <%s>", spec.getValueName().c_str()); + } + printf("\t\t\t%s", spec.getDescription().c_str()); + if (spec.takeValue() && !spec.getDefaultValue().empty()) { + printf("\tdefault: %s", spec.getDefaultValue().c_str()); + } + printf("\n"); + } + for (const auto &argSpec : mArgumentSpecs) { + printf(" <%s>\t\t\t%s\n", argSpec.getName().c_str(), argSpec.getDescription().c_str()); + } + printf("\n\n"); +} + +const ArgumentsAndOptions ArgumentsParser::parseArguments(const int argc, char **argv) const { + // TODO: Implement + return ArgumentsAndOptions(); +} + +} // namespace dicttoolkit +} // namespace latinime diff --git a/native/dicttoolkit/src/utils/arguments_parser.h b/native/dicttoolkit/src/utils/arguments_parser.h new file mode 100644 index 000000000..be2dd8749 --- /dev/null +++ b/native/dicttoolkit/src/utils/arguments_parser.h @@ -0,0 +1,118 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DICT_TOOLKIT_ARGUMENTS_PARSER_H +#define LATINIME_DICT_TOOLKIT_ARGUMENTS_PARSER_H + +#include <string> +#include <unordered_map> +#include <vector> + +#include "dict_toolkit_defines.h" +#include "utils/arguments_and_options.h" + +namespace latinime { +namespace dicttoolkit { + +class OptionSpec { + public: + // Default constructor and assignment operator is enabled to be used with std::unordered_map. + OptionSpec() = default; + OptionSpec &operator=(const OptionSpec &) = default; + + static OptionSpec keyValueOption(const std::string &valueName, const std::string &defaultValue, + const std::string &description) { + return OptionSpec(true /* takeValue */, valueName, defaultValue, description); + } + + static OptionSpec switchOption(const std::string &description) { + return OptionSpec(false /* takeValue */, "" /* valueName */, "" /* defaultValue */, + description); + } + + bool takeValue() const { return mTakeValue; } + const std::string &getValueName() const { return mValueName; } + const std::string &getDefaultValue() const { return mDefaultValue; } + const std::string &getDescription() const { return mDescription; } + + private: + OptionSpec(const bool takeValue, const std::string &valueName, const std::string &defaultValue, + const std::string &description) + : mTakeValue(takeValue), mValueName(valueName), mDefaultValue(defaultValue), + mDescription(description) {} + + // Whether the option have to be used with a value or just a switch. + // e.g. 'f' in "command -f /path/to/file" is mTakeValue == true. + // 'f' in "command -f -t" is mTakeValue == false. + bool mTakeValue; + // Name of the value used to show usage. + std::string mValueName; + std::string mDefaultValue; + std::string mDescription; +}; + +class ArgumentSpec { + public: + static const int UNLIMITED_COUNT; + + static ArgumentSpec singleArgument(const std::string &name, const std::string &description) { + return ArgumentSpec(name, 1 /* minCount */, 1 /* maxCount */, description); + } + + static ArgumentSpec variableLengthArguments(const std::string &name, const int minCount, + const int maxCount, const std::string &description) { + return ArgumentSpec(name, minCount, maxCount, description); + } + + const std::string &getName() const { return mName; } + int getMinCount() const { return mMinCount; } + int getMaxCount() const { return mMaxCount; } + const std::string &getDescription() const { return mDescription; } + + private: + DISALLOW_DEFAULT_CONSTRUCTOR(ArgumentSpec); + + ArgumentSpec(const std::string &name, const int minCount, const int maxCount, + const std::string &description) + : mName(name), mMinCount(minCount), mMaxCount(maxCount), mDescription(description) {} + + const std::string mName; + const int mMinCount; + const int mMaxCount; + const std::string mDescription; +}; + +class ArgumentsParser { + public: + ArgumentsParser(std::unordered_map<std::string, OptionSpec> &&optionSpecs, + std::vector<ArgumentSpec> &&argumentSpecs) + : mOptionSpecs(std::move(optionSpecs)), mArgumentSpecs(std::move(argumentSpecs)) {} + + const ArgumentsAndOptions parseArguments(const int argc, char **argv) const; + bool validateSpecs() const; + void printUsage(const std::string &commandName, const std::string &description) const; + + private: + DISALLOW_DEFAULT_CONSTRUCTOR(ArgumentsParser); + DISALLOW_ASSIGNMENT_OPERATOR(ArgumentsParser); + + const std::unordered_map<std::string, OptionSpec> mOptionSpecs; + const std::vector<ArgumentSpec> mArgumentSpecs; +}; + +} // namespace dicttoolkit +} // namespace latinime +#endif // LATINIME_DICT_TOOLKIT_ARGUMENTS_PARSER_H diff --git a/native/dicttoolkit/src/utils/command_utils.cpp b/native/dicttoolkit/src/utils/command_utils.cpp new file mode 100644 index 000000000..34196425e --- /dev/null +++ b/native/dicttoolkit/src/utils/command_utils.cpp @@ -0,0 +1,74 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "utils/command_utils.h" + +#include <cstdio> + +#include "command_executors/diff_executor.h" +#include "command_executors/header_executor.h" +#include "command_executors/help_executor.h" +#include "command_executors/info_executor.h" +#include "command_executors/makedict_executor.h" + +namespace latinime { +namespace dicttoolkit { + +/* static */ CommandType CommandUtils::getCommandType(const std::string &commandName) { + if (commandName == InfoExecutor::COMMAND_NAME) { + return CommandType::Info; + } else if (commandName == DiffExecutor::COMMAND_NAME) { + return CommandType::Diff; + } else if (commandName == MakedictExecutor::COMMAND_NAME) { + return CommandType::Makedict; + } else if (commandName == HeaderExecutor::COMMAND_NAME) { + return CommandType::Header; + } else if (commandName == HelpExecutor::COMMAND_NAME) { + return CommandType::Help; + } else { + return CommandType::Unknown; + } +} + +/* static */ void CommandUtils::printCommandUnknownMessage(const std::string &programName, + const std::string &commandName) { + fprintf(stderr, "Command '%s' is unknown. Try '%s %s' for more information.\n", + commandName.c_str(), programName.c_str(), HelpExecutor::COMMAND_NAME); +} + +/* static */ std::function<int(int, char **)> CommandUtils::getCommandExecutor( + const CommandType commandType) { + switch (commandType) { + case CommandType::Info: + return InfoExecutor::run; + case CommandType::Diff: + return DiffExecutor::run; + case CommandType::Makedict: + return MakedictExecutor::run; + case CommandType::Header: + return HeaderExecutor::run; + case CommandType::Help: + return HelpExecutor::run; + default: + return [] (int, char **) -> int { + printf("Command executor not found."); + return 1; + }; + } +} + +} // namespace dicttoolkit +} // namespace latinime diff --git a/native/dicttoolkit/src/utils/command_utils.h b/native/dicttoolkit/src/utils/command_utils.h new file mode 100644 index 000000000..4a181f194 --- /dev/null +++ b/native/dicttoolkit/src/utils/command_utils.h @@ -0,0 +1,50 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DICT_TOOLKIT_COMMAND_UTILS_H +#define LATINIME_DICT_TOOLKIT_COMMAND_UTILS_H + +#include <functional> +#include <memory> +#include <string> + +#include "dict_toolkit_defines.h" + +namespace latinime { +namespace dicttoolkit { + +enum class CommandType : int { + Info, + Diff, + Makedict, + Header, + Help, + Unknown +}; + +class CommandUtils { +public: + static CommandType getCommandType(const std::string &commandName); + static void printCommandUnknownMessage(const std::string &programName, + const std::string &commandName); + static std::function<int(int, char **)> getCommandExecutor(const CommandType commandType); + +private: + DISALLOW_IMPLICIT_CONSTRUCTORS(CommandUtils); +}; +} // namespace dicttoolkit +} // namespace latinime +#endif // LATINIME_DICT_TOOLKIT_COMMAND_UTILS_H diff --git a/native/dicttoolkit/src/utils/utf8_utils.cpp b/native/dicttoolkit/src/utils/utf8_utils.cpp new file mode 100644 index 000000000..0f349f512 --- /dev/null +++ b/native/dicttoolkit/src/utils/utf8_utils.cpp @@ -0,0 +1,119 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "utils/utf8_utils.h" + +#include "utils/char_utils.h" + +namespace latinime { +namespace dicttoolkit { + +const size_t Utf8Utils::MAX_SEQUENCE_SIZE_FOR_A_CODE_POINT = 4; +const uint8_t Utf8Utils::FIRST_BYTE_MARKER_MASKS[] = {0, 0x80, 0xE0, 0xF0, 0xF8}; +const uint8_t Utf8Utils::FIRST_BYTE_MARKERS[] = {0, 0x00, 0xC0, 0xE0, 0xF0}; +const uint8_t Utf8Utils::FIRST_BYTE_CODE_POINT_BITS_MASKS[] = {0, 0x7F, 0x1F, 0x0F, 0x03}; +const int Utf8Utils::MAX_ENCODED_CODE_POINT_VALUES[] = {-1, 0x7F, 0x7FF, 0xFFFF, 0x10FFFF}; + +const uint8_t Utf8Utils::TRAILING_BYTE_CODE_POINT_BITS_MASK = 0x3F; +const uint8_t Utf8Utils::TRAILING_BYTE_MARKER = 0x80; +const size_t Utf8Utils::CODE_POINT_BIT_COUNT_IN_TRAILING_BYTE = 6; + +/* static */ std::vector<int> Utf8Utils::getCodePoints(const std::string &utf8Str) { + std::vector<int> codePoints; + int remainingByteCountForCurrentCodePoint = 0; + int currentCodePointSequenceSize = 0; + int codePoint = 0; + for (const char c : utf8Str) { + if (remainingByteCountForCurrentCodePoint == 0) { + currentCodePointSequenceSize = getSequenceSizeByCheckingFirstByte(c); + if (currentCodePointSequenceSize <= 0) { + AKLOGE("%x is an invalid utf8 first byte value.", c); + return std::vector<int>(); + } + remainingByteCountForCurrentCodePoint = currentCodePointSequenceSize; + codePoint = maskFirstByte(c, remainingByteCountForCurrentCodePoint); + } else { + codePoint <<= CODE_POINT_BIT_COUNT_IN_TRAILING_BYTE; + codePoint += maskTrailingByte(c); + } + remainingByteCountForCurrentCodePoint--; + if (remainingByteCountForCurrentCodePoint == 0) { + if (codePoint <= MAX_ENCODED_CODE_POINT_VALUES[currentCodePointSequenceSize - 1]) { + AKLOGE("%d bytes encode for codePoint(%x) is a redundant UTF-8 sequence.", + currentCodePointSequenceSize, codePoint); + return std::vector<int>(); + } + codePoints.push_back(codePoint); + } + } + return codePoints; +} + +/* static */ int Utf8Utils::getSequenceSizeByCheckingFirstByte(const uint8_t firstByte) { + for (size_t i = 1; i <= MAX_SEQUENCE_SIZE_FOR_A_CODE_POINT; ++i) { + if ((firstByte & FIRST_BYTE_MARKER_MASKS[i]) == FIRST_BYTE_MARKERS[i]) { + return i; + } + } + // Not a valid utf8 char first byte. + return -1; +} + +/* static */ AK_FORCE_INLINE int Utf8Utils::maskFirstByte(const uint8_t firstByte, + const int sequenceSize) { + return firstByte & FIRST_BYTE_CODE_POINT_BITS_MASKS[sequenceSize]; +} + +/* static */ AK_FORCE_INLINE int Utf8Utils::maskTrailingByte(const uint8_t secondOrLaterByte) { + return secondOrLaterByte & TRAILING_BYTE_CODE_POINT_BITS_MASK; +} + +/* static */ std::string Utf8Utils::getUtf8String(const CodePointArrayView codePoints) { + std::string utf8String; + for (const int codePoint : codePoints) { + const int sequenceSize = getSequenceSizeToEncodeCodePoint(codePoint); + if (sequenceSize <= 0) { + AKLOGE("Cannot encode code point (%d).", codePoint); + return std::string(); + } + const int trailingByteCount = sequenceSize - 1; + // Output first byte. + const int value = codePoint >> (trailingByteCount * CODE_POINT_BIT_COUNT_IN_TRAILING_BYTE); + utf8String.push_back(static_cast<char>(value | FIRST_BYTE_MARKERS[sequenceSize])); + // Output second and later bytes. + for (int i = 1; i < sequenceSize; ++i) { + const int shiftAmount = (trailingByteCount - i) * CODE_POINT_BIT_COUNT_IN_TRAILING_BYTE; + const int value = (codePoint >> shiftAmount) & TRAILING_BYTE_CODE_POINT_BITS_MASK; + utf8String.push_back(static_cast<char>(value | TRAILING_BYTE_MARKER)); + } + } + return utf8String; +} + +/* static */ int Utf8Utils::getSequenceSizeToEncodeCodePoint(const int codePoint) { + if (codePoint < 0) { + return -1; + } + for (size_t i = 1; i <= MAX_SEQUENCE_SIZE_FOR_A_CODE_POINT; ++i) { + if (codePoint <= MAX_ENCODED_CODE_POINT_VALUES[i]) { + return i; + } + } + return -1; +} + +} // namespace dicttoolkit +} // namespace latinime diff --git a/native/dicttoolkit/src/utils/utf8_utils.h b/native/dicttoolkit/src/utils/utf8_utils.h new file mode 100644 index 000000000..35818e56c --- /dev/null +++ b/native/dicttoolkit/src/utils/utf8_utils.h @@ -0,0 +1,56 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DICT_TOOLKIT_UTF8_UTILS_H +#define LATINIME_DICT_TOOLKIT_UTF8_UTILS_H + +#include <cstdint> +#include <string> +#include <vector> + +#include "dict_toolkit_defines.h" +#include "utils/int_array_view.h" + +namespace latinime { +namespace dicttoolkit { + +class Utf8Utils { +public: + static std::vector<int> getCodePoints(const std::string &utf8Str); + static std::string getUtf8String(const CodePointArrayView codePoints); + +private: + DISALLOW_IMPLICIT_CONSTRUCTORS(Utf8Utils); + + // Values indexed by sequence size. + static const size_t MAX_SEQUENCE_SIZE_FOR_A_CODE_POINT; + static const uint8_t FIRST_BYTE_MARKER_MASKS[]; + static const uint8_t FIRST_BYTE_MARKERS[]; + static const uint8_t FIRST_BYTE_CODE_POINT_BITS_MASKS[]; + static const int MAX_ENCODED_CODE_POINT_VALUES[]; + + static const uint8_t TRAILING_BYTE_CODE_POINT_BITS_MASK; + static const uint8_t TRAILING_BYTE_MARKER; + static const size_t CODE_POINT_BIT_COUNT_IN_TRAILING_BYTE; + + static int getSequenceSizeByCheckingFirstByte(const uint8_t firstByte); + static int maskFirstByte(const uint8_t firstByte, const int encodeSize); + static int maskTrailingByte(const uint8_t secondOrLaterByte); + static int getSequenceSizeToEncodeCodePoint(const int codePoint); +}; +} // namespace dicttoolkit +} // namespace latinime +#endif // LATINIME_DICT_TOOLKIT_UTF8_UTILS_H |