aboutsummaryrefslogtreecommitdiffstats
path: root/native/dicttoolkit/src
diff options
context:
space:
mode:
Diffstat (limited to 'native/dicttoolkit/src')
-rw-r--r--native/dicttoolkit/src/command_executors/diff_executor.cpp49
-rw-r--r--native/dicttoolkit/src/command_executors/diff_executor.h40
-rw-r--r--native/dicttoolkit/src/command_executors/header_executor.cpp48
-rw-r--r--native/dicttoolkit/src/command_executors/header_executor.h40
-rw-r--r--native/dicttoolkit/src/command_executors/help_executor.cpp52
-rw-r--r--native/dicttoolkit/src/command_executors/help_executor.h38
-rw-r--r--native/dicttoolkit/src/command_executors/info_executor.cpp54
-rw-r--r--native/dicttoolkit/src/command_executors/info_executor.h40
-rw-r--r--native/dicttoolkit/src/command_executors/makedict_executor.cpp61
-rw-r--r--native/dicttoolkit/src/command_executors/makedict_executor.h40
-rw-r--r--native/dicttoolkit/src/dict_toolkit_defines.h24
-rw-r--r--native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict.cpp126
-rw-r--r--native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict.h54
-rw-r--r--native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_header.h44
-rw-r--r--native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node.h79
-rw-r--r--native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node_array.h48
-rw-r--r--native/dicttoolkit/src/utils/arguments_and_options.h77
-rw-r--r--native/dicttoolkit/src/utils/arguments_parser.cpp165
-rw-r--r--native/dicttoolkit/src/utils/arguments_parser.h119
-rw-r--r--native/dicttoolkit/src/utils/command_utils.cpp74
-rw-r--r--native/dicttoolkit/src/utils/command_utils.h50
-rw-r--r--native/dicttoolkit/src/utils/utf8_utils.cpp119
-rw-r--r--native/dicttoolkit/src/utils/utf8_utils.h56
23 files changed, 1497 insertions, 0 deletions
diff --git a/native/dicttoolkit/src/command_executors/diff_executor.cpp b/native/dicttoolkit/src/command_executors/diff_executor.cpp
new file mode 100644
index 000000000..bf6830686
--- /dev/null
+++ b/native/dicttoolkit/src/command_executors/diff_executor.cpp
@@ -0,0 +1,49 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "command_executors/diff_executor.h"
+
+#include <cstdio>
+
+namespace latinime {
+namespace dicttoolkit {
+
+const char *const DiffExecutor::COMMAND_NAME = "diff";
+
+/* static */ int DiffExecutor::run(const int argc, char **argv) {
+ fprintf(stderr, "Command '%s' has not been implemented yet.\n", COMMAND_NAME);
+ return 0;
+}
+
+/* static */ void DiffExecutor::printUsage() {
+ printf("*** %s\n", COMMAND_NAME);
+ getArgumentsParser().printUsage(COMMAND_NAME, "Shows differences between two dictionaries.");
+}
+
+/* static */ const ArgumentsParser DiffExecutor::getArgumentsParser() {
+ std::unordered_map<std::string, OptionSpec> optionSpecs;
+ optionSpecs["p"] = OptionSpec::switchOption("(plumbing) produce output suitable for a script");
+
+ const std::vector<ArgumentSpec> argumentSpecs = {
+ ArgumentSpec::singleArgument("dict1", "dictionary file"),
+ ArgumentSpec::singleArgument("dict2", "dictionary file")
+ };
+
+ return ArgumentsParser(std::move(optionSpecs), std::move(argumentSpecs));
+}
+
+} // namespace dicttoolkit
+} // namespace latinime
diff --git a/native/dicttoolkit/src/command_executors/diff_executor.h b/native/dicttoolkit/src/command_executors/diff_executor.h
new file mode 100644
index 000000000..f92ae49d5
--- /dev/null
+++ b/native/dicttoolkit/src/command_executors/diff_executor.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DICT_TOOLKIT_DIFF_EXECUTOR_H
+#define LATINIME_DICT_TOOLKIT_DIFF_EXECUTOR_H
+
+#include "dict_toolkit_defines.h"
+#include "utils/arguments_parser.h"
+
+namespace latinime {
+namespace dicttoolkit {
+
+class DiffExecutor final {
+ public:
+ static const char *const COMMAND_NAME;
+
+ static int run(const int argc, char **argv);
+ static void printUsage();
+ static const ArgumentsParser getArgumentsParser();
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(DiffExecutor);
+};
+
+} // namespace dicttoolkit
+} // namepsace latinime
+#endif // LATINIME_DICT_TOOLKIT_DIFF_EXECUTOR_H
diff --git a/native/dicttoolkit/src/command_executors/header_executor.cpp b/native/dicttoolkit/src/command_executors/header_executor.cpp
new file mode 100644
index 000000000..b3d273b4e
--- /dev/null
+++ b/native/dicttoolkit/src/command_executors/header_executor.cpp
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "command_executors/header_executor.h"
+
+#include <cstdio>
+
+namespace latinime {
+namespace dicttoolkit {
+
+const char *const HeaderExecutor::COMMAND_NAME = "header";
+
+/* static */ int HeaderExecutor::run(const int argc, char **argv) {
+ fprintf(stderr, "Command '%s' has not been implemented yet.\n", COMMAND_NAME);
+ return 0;
+}
+
+/* static */ void HeaderExecutor::printUsage() {
+ printf("*** %s\n", COMMAND_NAME);
+ getArgumentsParser().printUsage(COMMAND_NAME,
+ "Prints the header contents of a dictionary file.");
+}
+
+/* static */ const ArgumentsParser HeaderExecutor::getArgumentsParser() {
+ std::unordered_map<std::string, OptionSpec> optionSpecs;
+ optionSpecs["p"] = OptionSpec::switchOption("(plumbing) produce output suitable for a script");
+
+ const std::vector<ArgumentSpec> argumentSpecs = {
+ ArgumentSpec::singleArgument("dict", "prints the header contents of a dictionary file")
+ };
+
+ return ArgumentsParser(std::move(optionSpecs), std::move(argumentSpecs));
+}
+} // namespace dicttoolkit
+} // namespace latinime
diff --git a/native/dicttoolkit/src/command_executors/header_executor.h b/native/dicttoolkit/src/command_executors/header_executor.h
new file mode 100644
index 000000000..44cc9cfc4
--- /dev/null
+++ b/native/dicttoolkit/src/command_executors/header_executor.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DICT_TOOLKIT_HEADER_EXECUTOR_H
+#define LATINIME_DICT_TOOLKIT_HEADER_EXECUTOR_H
+
+#include "dict_toolkit_defines.h"
+#include "utils/arguments_parser.h"
+
+namespace latinime {
+namespace dicttoolkit {
+
+class HeaderExecutor final {
+ public:
+ static const char *const COMMAND_NAME;
+
+ static int run(const int argc, char **argv);
+ static void printUsage();
+ static const ArgumentsParser getArgumentsParser();
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(HeaderExecutor);
+};
+
+} // namespace dicttoolkit
+} // namepsace latinime
+#endif // LATINIME_DICT_TOOLKIT_HEADER_EXECUTOR_H
diff --git a/native/dicttoolkit/src/command_executors/help_executor.cpp b/native/dicttoolkit/src/command_executors/help_executor.cpp
new file mode 100644
index 000000000..bd29a5b16
--- /dev/null
+++ b/native/dicttoolkit/src/command_executors/help_executor.cpp
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "command_executors/help_executor.h"
+
+#include <cstdio>
+#include <functional>
+#include <vector>
+
+#include "command_executors/diff_executor.h"
+#include "command_executors/header_executor.h"
+#include "command_executors/info_executor.h"
+#include "command_executors/makedict_executor.h"
+#include "utils/command_utils.h"
+
+namespace latinime {
+namespace dicttoolkit {
+
+const char *const HelpExecutor::COMMAND_NAME = "help";
+
+/* static */ int HelpExecutor::run(const int argc, char **argv) {
+ printf("Available commands:\n\n");
+ const std::vector<std::function<void(void)>> printUsageMethods = {DiffExecutor::printUsage,
+ HeaderExecutor::printUsage, InfoExecutor::printUsage, MakedictExecutor::printUsage,
+ printUsage};
+ for (const auto &printUsageMethod : printUsageMethods) {
+ printUsageMethod();
+ }
+ return 0;
+}
+
+/* static */ void HelpExecutor::printUsage() {
+ printf("*** %s\n", COMMAND_NAME);
+ printf("Usage: %s\n", COMMAND_NAME);
+ printf("Show this help list.\n\n");
+}
+
+} // namespace dicttoolkit
+} // namespace latinime
diff --git a/native/dicttoolkit/src/command_executors/help_executor.h b/native/dicttoolkit/src/command_executors/help_executor.h
new file mode 100644
index 000000000..280610eb9
--- /dev/null
+++ b/native/dicttoolkit/src/command_executors/help_executor.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DICT_TOOLKIT_HELP_EXECUTOR_H
+#define LATINIME_DICT_TOOLKIT_HELP_EXECUTOR_H
+
+#include "dict_toolkit_defines.h"
+
+namespace latinime {
+namespace dicttoolkit {
+
+class HelpExecutor final {
+ public:
+ static const char *const COMMAND_NAME;
+
+ static int run(const int argc, char **argv);
+ static void printUsage();
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(HelpExecutor);
+};
+
+} // namespace dicttoolkit
+} // namepsace latinime
+#endif // LATINIME_DICT_TOOLKIT_HELP_EXECUTOR_H
diff --git a/native/dicttoolkit/src/command_executors/info_executor.cpp b/native/dicttoolkit/src/command_executors/info_executor.cpp
new file mode 100644
index 000000000..351da4aff
--- /dev/null
+++ b/native/dicttoolkit/src/command_executors/info_executor.cpp
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "command_executors/info_executor.h"
+
+#include <cstdio>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+namespace latinime {
+namespace dicttoolkit {
+
+const char *const InfoExecutor::COMMAND_NAME = "info";
+
+/* static */ int InfoExecutor::run(const int argc, char **argv) {
+ fprintf(stderr, "Command '%s' has not been implemented yet.\n", COMMAND_NAME);
+ return 0;
+}
+
+/* static */ void InfoExecutor::printUsage() {
+ printf("*** %s\n", COMMAND_NAME);
+ getArgumentsParser().printUsage(COMMAND_NAME,
+ "Prints various information about a dictionary file.");
+}
+
+/* static */const ArgumentsParser InfoExecutor::getArgumentsParser() {
+ std::unordered_map<std::string, OptionSpec> optionSpecs;
+ optionSpecs["p"] = OptionSpec::switchOption("(plumbing) produce output suitable for a script");
+
+ const std::vector<ArgumentSpec> argumentSpecs = {
+ ArgumentSpec::singleArgument("dict", "dictionary file name"),
+ ArgumentSpec::variableLengthArguments("word", 0 /* minCount */,
+ ArgumentSpec::UNLIMITED_COUNT, "word to show information")
+ };
+
+ return ArgumentsParser(std::move(optionSpecs), std::move(argumentSpecs));
+}
+
+} // namespace dicttoolkit
+} // namespace latinime
diff --git a/native/dicttoolkit/src/command_executors/info_executor.h b/native/dicttoolkit/src/command_executors/info_executor.h
new file mode 100644
index 000000000..d4106d59f
--- /dev/null
+++ b/native/dicttoolkit/src/command_executors/info_executor.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DICT_TOOLKIT_INFO_EXECUTOR_H
+#define LATINIME_DICT_TOOLKIT_INFO_EXECUTOR_H
+
+#include "dict_toolkit_defines.h"
+#include "utils/arguments_parser.h"
+
+namespace latinime {
+namespace dicttoolkit {
+
+class InfoExecutor final {
+ public:
+ static const char *const COMMAND_NAME;
+
+ static int run(const int argc, char **argv);
+ static void printUsage();
+ static const ArgumentsParser getArgumentsParser();
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(InfoExecutor);
+};
+
+} // namepsace dicttoolkit
+} // namespace latinime
+#endif // LATINIME_DICT_TOOLKIT_INFO_EXECUTOR_H
diff --git a/native/dicttoolkit/src/command_executors/makedict_executor.cpp b/native/dicttoolkit/src/command_executors/makedict_executor.cpp
new file mode 100644
index 000000000..4b0a5aeea
--- /dev/null
+++ b/native/dicttoolkit/src/command_executors/makedict_executor.cpp
@@ -0,0 +1,61 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "command_executors/makedict_executor.h"
+
+#include <cstdio>
+
+namespace latinime {
+namespace dicttoolkit {
+
+const char *const MakedictExecutor::COMMAND_NAME = "makedict";
+
+/* static */ int MakedictExecutor::run(const int argc, char **argv) {
+ const ArgumentsAndOptions argumentsAndOptions =
+ getArgumentsParser().parseArguments(argc, argv, true /* printErrorMessages */);
+ if (!argumentsAndOptions.isValid()) {
+ printUsage();
+ return 1;
+ }
+ fprintf(stderr, "Command '%s' has not been implemented yet.\n", COMMAND_NAME);
+ return 0;
+}
+
+/* static */ void MakedictExecutor::printUsage() {
+ printf("*** %s\n", COMMAND_NAME);
+ getArgumentsParser().printUsage(COMMAND_NAME,
+ "Converts a source dictionary file to one or several outputs.\n"
+ "Source can be a binary dictionary file or a combined format file.\n"
+ "Binary version 2 (Jelly Bean), 4, and combined format outputs are supported.");
+}
+
+/* static */const ArgumentsParser MakedictExecutor::getArgumentsParser() {
+ std::unordered_map<std::string, OptionSpec> optionSpecs;
+ optionSpecs["o"] = OptionSpec::keyValueOption("format", "2",
+ "output format version: 2/4/combined");
+ optionSpecs["t"] = OptionSpec::keyValueOption("mode", "off",
+ "code point table switch: on/off/auto");
+
+ const std::vector<ArgumentSpec> argumentSpecs = {
+ ArgumentSpec::singleArgument("src_dict", "source dictionary file"),
+ ArgumentSpec::singleArgument("dest_dict", "output dictionary file")
+ };
+
+ return ArgumentsParser(std::move(optionSpecs), std::move(argumentSpecs));
+}
+
+} // namespace dicttoolkit
+} // namespace latinime
diff --git a/native/dicttoolkit/src/command_executors/makedict_executor.h b/native/dicttoolkit/src/command_executors/makedict_executor.h
new file mode 100644
index 000000000..c3de977a3
--- /dev/null
+++ b/native/dicttoolkit/src/command_executors/makedict_executor.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DICT_TOOLKIT_MAKEDICT_EXECUTOR_H
+#define LATINIME_DICT_TOOLKIT_MAKEDICT_EXECUTOR_H
+
+#include "dict_toolkit_defines.h"
+#include "utils/arguments_parser.h"
+
+namespace latinime {
+namespace dicttoolkit {
+
+class MakedictExecutor final {
+ public:
+ static const char *const COMMAND_NAME;
+
+ static int run(const int argc, char **argv);
+ static void printUsage();
+ static const ArgumentsParser getArgumentsParser();
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(MakedictExecutor);
+};
+
+} // namespace dicttoolkit
+} // namepsace latinime
+#endif // LATINIME_DICT_TOOLKIT_MAKEDICT_EXECUTOR_H
diff --git a/native/dicttoolkit/src/dict_toolkit_defines.h b/native/dicttoolkit/src/dict_toolkit_defines.h
new file mode 100644
index 000000000..dbaae0ca0
--- /dev/null
+++ b/native/dicttoolkit/src/dict_toolkit_defines.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DICT_TOOLKIT_DEFINES_H
+#define LATINIME_DICT_TOOLKIT_DEFINES_H
+
+#include "defines.h"
+
+#define MIN_ARG_COUNT 2
+
+#endif // LATINIME_DICT_TOOLKIT_DEFINES_H
diff --git a/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict.cpp b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict.cpp
new file mode 100644
index 000000000..af28131cf
--- /dev/null
+++ b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict.cpp
@@ -0,0 +1,126 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "offdevice_intermediate_dict/offdevice_intermediate_dict.h"
+
+#include "offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node.h"
+
+namespace latinime {
+namespace dicttoolkit {
+
+bool OffdeviceIntermediateDict::addWord(const WordProperty &wordProperty) {
+ const CodePointArrayView codePoints = wordProperty.getCodePoints();
+ if (codePoints.empty() || codePoints.size() > MAX_WORD_LENGTH) {
+ return false;
+ }
+ return addWordInner(codePoints, wordProperty, mRootPtNodeArray);
+}
+
+bool OffdeviceIntermediateDict::addWordInner(const CodePointArrayView codePoints,
+ const WordProperty &wordProperty, OffdeviceIntermediateDictPtNodeArray &ptNodeArray) {
+ auto ptNodeList = ptNodeArray.getMutablePtNodeList();
+ auto ptNodeIt = ptNodeList->begin();
+ for (; ptNodeIt != ptNodeList->end(); ++ptNodeIt) {
+ const auto &ptNode = *ptNodeIt;
+ const CodePointArrayView ptNodeCodePoints = ptNode->getPtNodeCodePoints();
+ if (codePoints[0] < ptNodeCodePoints[0]) {
+ continue;
+ }
+ if (codePoints[0] > ptNodeCodePoints[0]) {
+ break;
+ }
+ size_t i = 1;
+ for (; i < codePoints.size(); ++i) {
+ if (i >= ptNodeCodePoints.size()) {
+ // Add new child.
+ return addWordInner(codePoints.skip(i), wordProperty,
+ ptNode->getChildrenPtNodeArray());
+ }
+ if (codePoints[i] != ptNodeCodePoints[i]) {
+ break;
+ }
+ }
+ if (codePoints.size() == i && codePoints.size() == ptNodeCodePoints.size()) {
+ // All code points matched.
+ if (ptNode->getWordProperty()) {
+ // Adding the same word multiple times is not supported.
+ return false;
+ }
+ ptNodeList->insert(ptNodeIt,
+ std::make_shared<OffdeviceIntermediateDictPtNode>(wordProperty, *ptNode));
+ ptNodeList->erase(ptNodeIt);
+ return true;
+ }
+ // The (i+1)-th elements are different.
+ // Create and Add new parent ptNode for the common part.
+ auto newPtNode = codePoints.size() == i
+ ? std::make_shared<OffdeviceIntermediateDictPtNode>(codePoints, wordProperty)
+ : std::make_shared<OffdeviceIntermediateDictPtNode>(codePoints.limit(i));
+ ptNodeList->insert(ptNodeIt, newPtNode);
+ OffdeviceIntermediateDictPtNodeArray &childrenPtNodeArray =
+ newPtNode->getChildrenPtNodeArray();
+ // Add new child for the existing ptNode.
+ childrenPtNodeArray.getMutablePtNodeList()->push_back(
+ std::make_shared<OffdeviceIntermediateDictPtNode>(
+ ptNodeCodePoints.skip(i), *ptNode));
+ ptNodeList->erase(ptNodeIt);
+ if (codePoints.size() != i) {
+ // Add a child for the new word.
+ return addWordInner(codePoints.skip(i), wordProperty, childrenPtNodeArray);
+ }
+ return true;
+ }
+ ptNodeList->insert(ptNodeIt,
+ std::make_shared<OffdeviceIntermediateDictPtNode>(codePoints, wordProperty));
+ return true;
+}
+
+const WordProperty *OffdeviceIntermediateDict::getWordProperty(
+ const CodePointArrayView codePoints) const {
+ const OffdeviceIntermediateDictPtNodeArray *ptNodeArray = &mRootPtNodeArray;
+ for (size_t i = 0; i < codePoints.size();) {
+ bool foundNext = false;
+ for (const auto ptNode : ptNodeArray->getPtNodeList()) {
+ const CodePointArrayView ptNodeCodePoints = ptNode->getPtNodeCodePoints();
+ if (codePoints[i] < ptNodeCodePoints[0]) {
+ continue;
+ }
+ if (codePoints[i] > ptNodeCodePoints[0]
+ || codePoints.size() < ptNodeCodePoints.size()) {
+ return nullptr;
+ }
+ for (size_t j = 1; j < ptNodeCodePoints.size(); ++j) {
+ if (codePoints[i + j] != ptNodeCodePoints[j]) {
+ return nullptr;
+ }
+ }
+ i += ptNodeCodePoints.size();
+ if (i == codePoints.size()) {
+ return ptNode->getWordProperty();
+ }
+ ptNodeArray = &ptNode->getChildrenPtNodeArray();
+ foundNext = true;
+ break;
+ }
+ if (!foundNext) {
+ break;
+ }
+ }
+ return nullptr;
+}
+
+} // namespace dicttoolkit
+} // namespace latinime
diff --git a/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict.h b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict.h
new file mode 100644
index 000000000..13d26ba91
--- /dev/null
+++ b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_H
+#define LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_H
+
+#include "dict_toolkit_defines.h"
+#include "offdevice_intermediate_dict/offdevice_intermediate_dict_header.h"
+#include "offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node_array.h"
+#include "suggest/core/dictionary/property/word_property.h"
+#include "utils/int_array_view.h"
+
+namespace latinime {
+namespace dicttoolkit {
+
+/**
+ * On memory patricia trie to represent a dictionary.
+ */
+class OffdeviceIntermediateDict final {
+ public:
+ OffdeviceIntermediateDict(const OffdeviceIntermediateDictHeader &header)
+ : mHeader(header), mRootPtNodeArray() {}
+
+ bool addWord(const WordProperty &wordProperty);
+ // The returned value will be invalid after modifying the dictionary. e.g. calling addWord().
+ const WordProperty *getWordProperty(const CodePointArrayView codePoints) const;
+ const OffdeviceIntermediateDictHeader &getHeader() const { return mHeader; }
+
+ private:
+ DISALLOW_ASSIGNMENT_OPERATOR(OffdeviceIntermediateDict);
+
+ const OffdeviceIntermediateDictHeader mHeader;
+ OffdeviceIntermediateDictPtNodeArray mRootPtNodeArray;
+
+ bool addWordInner(const CodePointArrayView codePoints, const WordProperty &wordProperty,
+ OffdeviceIntermediateDictPtNodeArray &ptNodeArray);
+};
+
+} // namespace dicttoolkit
+} // namespace latinime
+#endif // LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_H
diff --git a/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_header.h b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_header.h
new file mode 100644
index 000000000..440627a79
--- /dev/null
+++ b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_header.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_HEADER_H
+#define LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_HEADER_H
+
+#include <map>
+#include <vector>
+
+#include "dict_toolkit_defines.h"
+
+namespace latinime {
+namespace dicttoolkit {
+
+class OffdeviceIntermediateDictHeader final {
+ public:
+ using AttributeMap = std::map<std::vector<int>, std::vector<int>>;
+
+ OffdeviceIntermediateDictHeader(const AttributeMap &attributesMap)
+ : mAttributeMap(attributesMap) {}
+
+ private:
+ DISALLOW_DEFAULT_CONSTRUCTOR(OffdeviceIntermediateDictHeader);
+ DISALLOW_ASSIGNMENT_OPERATOR(OffdeviceIntermediateDictHeader);
+
+ const AttributeMap mAttributeMap;
+};
+
+} // namespace dicttoolkit
+} // namespace latinime
+#endif // LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_HEADER_H
diff --git a/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node.h b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node.h
new file mode 100644
index 000000000..721ccd778
--- /dev/null
+++ b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_PT_NODE_H
+#define LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_PT_NODE_H
+
+#include <memory>
+
+#include "dict_toolkit_defines.h"
+#include "offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node_array.h"
+#include "suggest/core/dictionary/property/word_property.h"
+#include "utils/int_array_view.h"
+
+namespace latinime {
+namespace dicttoolkit {
+
+class OffdeviceIntermediateDictPtNode final {
+ public:
+ // Non-terminal
+ OffdeviceIntermediateDictPtNode(const CodePointArrayView ptNodeCodePoints)
+ : mPtNodeCodePoints(ptNodeCodePoints.toVector()), mChildrenPtNodeArray(),
+ mWortProperty(nullptr) {}
+
+ // Terminal
+ OffdeviceIntermediateDictPtNode(const CodePointArrayView ptNodeCodePoints,
+ const WordProperty &wordProperty)
+ : mPtNodeCodePoints(ptNodeCodePoints.toVector()), mChildrenPtNodeArray(),
+ mWortProperty(new WordProperty(wordProperty)) {}
+
+ // Replacing PtNodeCodePoints.
+ OffdeviceIntermediateDictPtNode(const CodePointArrayView ptNodeCodePoints,
+ const OffdeviceIntermediateDictPtNode &ptNode)
+ : mPtNodeCodePoints(ptNodeCodePoints.toVector()),
+ mChildrenPtNodeArray(ptNode.mChildrenPtNodeArray),
+ mWortProperty(new WordProperty(*ptNode.mWortProperty)) {}
+
+ // Replacing WordProperty.
+ OffdeviceIntermediateDictPtNode(const WordProperty &wordProperty,
+ const OffdeviceIntermediateDictPtNode &ptNode)
+ : mPtNodeCodePoints(ptNode.mPtNodeCodePoints),
+ mChildrenPtNodeArray(ptNode.mChildrenPtNodeArray),
+ mWortProperty(new WordProperty(wordProperty)) {}
+
+ const WordProperty *getWordProperty() const {
+ return mWortProperty.get();
+ }
+
+ const CodePointArrayView getPtNodeCodePoints() const {
+ return CodePointArrayView(mPtNodeCodePoints);
+ }
+
+ OffdeviceIntermediateDictPtNodeArray &getChildrenPtNodeArray() {
+ return mChildrenPtNodeArray;
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(OffdeviceIntermediateDictPtNode);
+
+ const std::vector<int> mPtNodeCodePoints;
+ OffdeviceIntermediateDictPtNodeArray mChildrenPtNodeArray;
+ const std::unique_ptr<WordProperty> mWortProperty;
+};
+
+} // namespace dicttoolkit
+} // namespace latinime
+#endif // LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_PT_NODE_H
diff --git a/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node_array.h b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node_array.h
new file mode 100644
index 000000000..f87456ce0
--- /dev/null
+++ b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node_array.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_PT_NODE_ARRAY_H
+#define LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_PT_NODE_ARRAY_H
+
+#include <list>
+#include <memory>
+
+#include "dict_toolkit_defines.h"
+
+namespace latinime {
+namespace dicttoolkit {
+
+class OffdeviceIntermediateDictPtNode;
+
+class OffdeviceIntermediateDictPtNodeArray final {
+ public:
+ const std::list<std::shared_ptr<OffdeviceIntermediateDictPtNode>> &getPtNodeList() const {
+ return mPtNodes;
+ }
+
+ std::list<std::shared_ptr<OffdeviceIntermediateDictPtNode>> *getMutablePtNodeList() {
+ return &mPtNodes;
+ }
+
+ private:
+ DISALLOW_ASSIGNMENT_OPERATOR(OffdeviceIntermediateDictPtNodeArray);
+
+ std::list<std::shared_ptr<OffdeviceIntermediateDictPtNode>> mPtNodes;
+};
+
+} // namespace dicttoolkit
+} // namespace latinime
+#endif // LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_PT_NODE_ARRAY_H
diff --git a/native/dicttoolkit/src/utils/arguments_and_options.h b/native/dicttoolkit/src/utils/arguments_and_options.h
new file mode 100644
index 000000000..2d81b1ecb
--- /dev/null
+++ b/native/dicttoolkit/src/utils/arguments_and_options.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DICT_TOOLKIT_ARGUMENTS_AND_OPTIONS_H
+#define LATINIME_DICT_TOOLKIT_ARGUMENTS_AND_OPTIONS_H
+
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "dict_toolkit_defines.h"
+
+namespace latinime {
+namespace dicttoolkit {
+
+class ArgumentsAndOptions {
+ public:
+ ArgumentsAndOptions() : mIsValid(false), mOptions(), mArguments() {}
+
+ ArgumentsAndOptions(std::unordered_map<std::string, std::string> &&options,
+ std::unordered_map<std::string, std::vector<std::string>> &&arguments)
+ : mIsValid(true), mOptions(std::move(options)), mArguments(std::move(arguments)) {}
+
+ bool isValid() const {
+ return mIsValid;
+ }
+
+ bool hasOption(const std::string &optionName) const {
+ return mOptions.find(optionName) != mOptions.end();
+ }
+
+ const std::string &getOptionValue(const std::string &optionName) const {
+ const auto &it = mOptions.find(optionName);
+ ASSERT(it != mOptions.end());
+ return it->second;
+ }
+
+ bool hasArgument(const std::string &name) const {
+ const auto &it = mArguments.find(name);
+ return it != mArguments.end() && !it->second.empty();
+ }
+
+ const std::string &getSingleArgument(const std::string &name) const {
+ const auto &it = mArguments.find(name);
+ ASSERT(it != mArguments.end() && !it->second.empty());
+ return it->second.front();
+ }
+
+ const std::vector<std::string> &getVariableLengthArguments(const std::string &name) const {
+ const auto &it = mArguments.find(name);
+ ASSERT(it != mArguments.end());
+ return it->second;
+ }
+
+ private:
+ DISALLOW_ASSIGNMENT_OPERATOR(ArgumentsAndOptions);
+
+ const bool mIsValid;
+ const std::unordered_map<std::string, std::string> mOptions;
+ const std::unordered_map<std::string, std::vector<std::string>> mArguments;
+};
+} // namespace dicttoolkit
+} // namespace latinime
+#endif // LATINIME_DICT_TOOLKIT_ARGUMENTS_AND_OPTIONS_H
diff --git a/native/dicttoolkit/src/utils/arguments_parser.cpp b/native/dicttoolkit/src/utils/arguments_parser.cpp
new file mode 100644
index 000000000..1451284f1
--- /dev/null
+++ b/native/dicttoolkit/src/utils/arguments_parser.cpp
@@ -0,0 +1,165 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "utils/arguments_parser.h"
+
+#include <unordered_set>
+
+namespace latinime {
+namespace dicttoolkit {
+
+const size_t ArgumentSpec::UNLIMITED_COUNT = S_INT_MAX;
+
+bool ArgumentsParser::validateSpecs() const {
+ std::unordered_set<std::string> argumentNameSet;
+ for (size_t i = 0; i < mArgumentSpecs.size() ; ++i) {
+ if (mArgumentSpecs[i].getMinCount() == 0 && mArgumentSpecs[i].getMaxCount() == 0) {
+ AKLOGE("minCount = maxCount = 0 for %s.", mArgumentSpecs[i].getName().c_str());
+ return false;
+ }
+ if (mArgumentSpecs[i].getMinCount() != mArgumentSpecs[i].getMaxCount()
+ && i != mArgumentSpecs.size() - 1) {
+ AKLOGE("Variable length argument must be at the end.",
+ mArgumentSpecs[i].getName().c_str()v );
+ return false;
+ }
+ if (argumentNameSet.count(mArgumentSpecs[i].getName()) > 0) {
+ AKLOGE("Multiple arguments have the same name \"%s\".",
+ mArgumentSpecs[i].getName().c_str());
+ return false;
+ }
+ argumentNameSet.insert(mArgumentSpecs[i].getName());
+ }
+ return true;
+}
+
+void ArgumentsParser::printUsage(const std::string &commandName,
+ const std::string &description) const {
+ printf("Usage: %s", commandName.c_str());
+ for (const auto &option : mOptionSpecs) {
+ const std::string &optionName = option.first;
+ const OptionSpec &spec = option.second;
+ printf(" [-%s", optionName.c_str());
+ if (spec.needsValue()) {
+ printf(" <%s>", spec.getValueName().c_str());
+ }
+ printf("]");
+ }
+ for (const auto &argSpec : mArgumentSpecs) {
+ if (argSpec.getMinCount() == 0 && argSpec.getMaxCount() == 1) {
+ printf(" [<%s>]", argSpec.getName().c_str());
+ } else if (argSpec.getMinCount() == 1 && argSpec.getMaxCount() == 1) {
+ printf(" <%s>", argSpec.getName().c_str());
+ } else if (argSpec.getMinCount() == 0) {
+ printf(" [<%s>...]", argSpec.getName().c_str());
+ } else if (argSpec.getMinCount() == 1) {
+ printf(" <%s>...", argSpec.getName().c_str());
+ }
+ }
+ printf("\n%s\n\n", description.c_str());
+ for (const auto &option : mOptionSpecs) {
+ const std::string &optionName = option.first;
+ const OptionSpec &spec = option.second;
+ printf(" -%s", optionName.c_str());
+ if (spec.needsValue()) {
+ printf(" <%s>", spec.getValueName().c_str());
+ }
+ printf("\t\t\t%s", spec.getDescription().c_str());
+ if (spec.needsValue() && !spec.getDefaultValue().empty()) {
+ printf("\tdefault: %s", spec.getDefaultValue().c_str());
+ }
+ printf("\n");
+ }
+ for (const auto &argSpec : mArgumentSpecs) {
+ printf(" <%s>\t\t\t%s\n", argSpec.getName().c_str(), argSpec.getDescription().c_str());
+ }
+ printf("\n\n");
+}
+
+const ArgumentsAndOptions ArgumentsParser::parseArguments(const int argc, char **argv,
+ const bool printErrorMessage) const {
+ if (argc <= 0) {
+ AKLOGE("Invalid argc (%d).", argc);
+ ASSERT(false);
+ return ArgumentsAndOptions();
+ }
+ std::unordered_map<std::string, std::string> options;
+ for (const auto &entry : mOptionSpecs) {
+ const std::string &optionName = entry.first;
+ const OptionSpec &optionSpec = entry.second;
+ if (optionSpec.needsValue() && !optionSpec.getDefaultValue().empty()) {
+ // Set default value.
+ options[optionName] = optionSpec.getDefaultValue();
+ }
+ }
+ std::unordered_map<std::string, std::vector<std::string>> arguments;
+ auto argumentSpecIt = mArgumentSpecs.cbegin();
+ for (int i = 1; i < argc; ++i) {
+ const std::string arg = argv[i];
+ if (arg.length() > 1 && arg[0] == '-') {
+ // option
+ const std::string optionName = arg.substr(1);
+ const auto it = mOptionSpecs.find(optionName);
+ if (it == mOptionSpecs.end()) {
+ if (printErrorMessage) {
+ fprintf(stderr, "Unknown option: '%s'\n", optionName.c_str());
+ }
+ return ArgumentsAndOptions();
+ }
+ std::string optionValue;
+ if (it->second.needsValue()) {
+ ++i;
+ if (i >= argc) {
+ if (printErrorMessage) {
+ fprintf(stderr, "Missing argument for option '%s'\n", optionName.c_str());
+ }
+ return ArgumentsAndOptions();
+ }
+ optionValue = argv[i];
+ }
+ options[optionName] = optionValue;
+ } else {
+ // argument
+ if (argumentSpecIt == mArgumentSpecs.end()) {
+ if (printErrorMessage) {
+ fprintf(stderr, "Too many arguments.\n");
+ }
+ return ArgumentsAndOptions();
+ }
+ arguments[argumentSpecIt->getName()].push_back(arg);
+ if (arguments[argumentSpecIt->getName()].size() >= argumentSpecIt->getMaxCount()) {
+ ++argumentSpecIt;
+ }
+ }
+ }
+
+ if (argumentSpecIt != mArgumentSpecs.end()) {
+ const auto &it = arguments.find(argumentSpecIt->getName());
+ const size_t minCount = argumentSpecIt->getMinCount();
+ const size_t actualcount = it == arguments.end() ? 0 : it->second.size();
+ if (minCount > actualcount) {
+ if (printErrorMessage) {
+ fprintf(stderr, "Not enough arguments. %zd argumant(s) required for <%s>\n",
+ minCount, argumentSpecIt->getName().c_str());
+ }
+ return ArgumentsAndOptions();
+ }
+ }
+ return ArgumentsAndOptions(std::move(options), std::move(arguments));
+}
+
+} // namespace dicttoolkit
+} // namespace latinime
diff --git a/native/dicttoolkit/src/utils/arguments_parser.h b/native/dicttoolkit/src/utils/arguments_parser.h
new file mode 100644
index 000000000..32bd328d4
--- /dev/null
+++ b/native/dicttoolkit/src/utils/arguments_parser.h
@@ -0,0 +1,119 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DICT_TOOLKIT_ARGUMENTS_PARSER_H
+#define LATINIME_DICT_TOOLKIT_ARGUMENTS_PARSER_H
+
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "dict_toolkit_defines.h"
+#include "utils/arguments_and_options.h"
+
+namespace latinime {
+namespace dicttoolkit {
+
+class OptionSpec {
+ public:
+ // Default constructor and assignment operator is enabled to be used with std::unordered_map.
+ OptionSpec() = default;
+ OptionSpec &operator=(const OptionSpec &) = default;
+
+ static OptionSpec keyValueOption(const std::string &valueName, const std::string &defaultValue,
+ const std::string &description) {
+ return OptionSpec(true /* needsValue */, valueName, defaultValue, description);
+ }
+
+ static OptionSpec switchOption(const std::string &description) {
+ return OptionSpec(false /* needsValue */, "" /* valueName */, "" /* defaultValue */,
+ description);
+ }
+
+ bool needsValue() const { return mNeedsValue; }
+ const std::string &getValueName() const { return mValueName; }
+ const std::string &getDefaultValue() const { return mDefaultValue; }
+ const std::string &getDescription() const { return mDescription; }
+
+ private:
+ OptionSpec(const bool needsValue, const std::string &valueName, const std::string &defaultValue,
+ const std::string &description)
+ : mNeedsValue(needsValue), mValueName(valueName), mDefaultValue(defaultValue),
+ mDescription(description) {}
+
+ // Whether the option have to be used with a value or just a switch.
+ // e.g. 'f' in "command -f /path/to/file" is mNeedsValue == true.
+ // 'f' in "command -f -t" is mNeedsValue == false.
+ bool mNeedsValue;
+ // Name of the value used to show usage.
+ std::string mValueName;
+ std::string mDefaultValue;
+ std::string mDescription;
+};
+
+class ArgumentSpec {
+ public:
+ static const size_t UNLIMITED_COUNT;
+
+ static ArgumentSpec singleArgument(const std::string &name, const std::string &description) {
+ return ArgumentSpec(name, 1 /* minCount */, 1 /* maxCount */, description);
+ }
+
+ static ArgumentSpec variableLengthArguments(const std::string &name, const size_t minCount,
+ const size_t maxCount, const std::string &description) {
+ return ArgumentSpec(name, minCount, maxCount, description);
+ }
+
+ const std::string &getName() const { return mName; }
+ size_t getMinCount() const { return mMinCount; }
+ size_t getMaxCount() const { return mMaxCount; }
+ const std::string &getDescription() const { return mDescription; }
+
+ private:
+ DISALLOW_DEFAULT_CONSTRUCTOR(ArgumentSpec);
+
+ ArgumentSpec(const std::string &name, const size_t minCount, const size_t maxCount,
+ const std::string &description)
+ : mName(name), mMinCount(minCount), mMaxCount(maxCount), mDescription(description) {}
+
+ const std::string mName;
+ const size_t mMinCount;
+ const size_t mMaxCount;
+ const std::string mDescription;
+};
+
+class ArgumentsParser {
+ public:
+ ArgumentsParser(const std::unordered_map<std::string, OptionSpec> &&optionSpecs,
+ const std::vector<ArgumentSpec> &&argumentSpecs)
+ : mOptionSpecs(std::move(optionSpecs)), mArgumentSpecs(std::move(argumentSpecs)) {}
+
+ const ArgumentsAndOptions parseArguments(const int argc, char **argv,
+ const bool printErrorMessage) const;
+ bool validateSpecs() const;
+ void printUsage(const std::string &commandName, const std::string &description) const;
+
+ private:
+ DISALLOW_DEFAULT_CONSTRUCTOR(ArgumentsParser);
+ DISALLOW_ASSIGNMENT_OPERATOR(ArgumentsParser);
+
+ const std::unordered_map<std::string, OptionSpec> mOptionSpecs;
+ const std::vector<ArgumentSpec> mArgumentSpecs;
+};
+
+} // namespace dicttoolkit
+} // namespace latinime
+#endif // LATINIME_DICT_TOOLKIT_ARGUMENTS_PARSER_H
diff --git a/native/dicttoolkit/src/utils/command_utils.cpp b/native/dicttoolkit/src/utils/command_utils.cpp
new file mode 100644
index 000000000..34196425e
--- /dev/null
+++ b/native/dicttoolkit/src/utils/command_utils.cpp
@@ -0,0 +1,74 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "utils/command_utils.h"
+
+#include <cstdio>
+
+#include "command_executors/diff_executor.h"
+#include "command_executors/header_executor.h"
+#include "command_executors/help_executor.h"
+#include "command_executors/info_executor.h"
+#include "command_executors/makedict_executor.h"
+
+namespace latinime {
+namespace dicttoolkit {
+
+/* static */ CommandType CommandUtils::getCommandType(const std::string &commandName) {
+ if (commandName == InfoExecutor::COMMAND_NAME) {
+ return CommandType::Info;
+ } else if (commandName == DiffExecutor::COMMAND_NAME) {
+ return CommandType::Diff;
+ } else if (commandName == MakedictExecutor::COMMAND_NAME) {
+ return CommandType::Makedict;
+ } else if (commandName == HeaderExecutor::COMMAND_NAME) {
+ return CommandType::Header;
+ } else if (commandName == HelpExecutor::COMMAND_NAME) {
+ return CommandType::Help;
+ } else {
+ return CommandType::Unknown;
+ }
+}
+
+/* static */ void CommandUtils::printCommandUnknownMessage(const std::string &programName,
+ const std::string &commandName) {
+ fprintf(stderr, "Command '%s' is unknown. Try '%s %s' for more information.\n",
+ commandName.c_str(), programName.c_str(), HelpExecutor::COMMAND_NAME);
+}
+
+/* static */ std::function<int(int, char **)> CommandUtils::getCommandExecutor(
+ const CommandType commandType) {
+ switch (commandType) {
+ case CommandType::Info:
+ return InfoExecutor::run;
+ case CommandType::Diff:
+ return DiffExecutor::run;
+ case CommandType::Makedict:
+ return MakedictExecutor::run;
+ case CommandType::Header:
+ return HeaderExecutor::run;
+ case CommandType::Help:
+ return HelpExecutor::run;
+ default:
+ return [] (int, char **) -> int {
+ printf("Command executor not found.");
+ return 1;
+ };
+ }
+}
+
+} // namespace dicttoolkit
+} // namespace latinime
diff --git a/native/dicttoolkit/src/utils/command_utils.h b/native/dicttoolkit/src/utils/command_utils.h
new file mode 100644
index 000000000..4a181f194
--- /dev/null
+++ b/native/dicttoolkit/src/utils/command_utils.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DICT_TOOLKIT_COMMAND_UTILS_H
+#define LATINIME_DICT_TOOLKIT_COMMAND_UTILS_H
+
+#include <functional>
+#include <memory>
+#include <string>
+
+#include "dict_toolkit_defines.h"
+
+namespace latinime {
+namespace dicttoolkit {
+
+enum class CommandType : int {
+ Info,
+ Diff,
+ Makedict,
+ Header,
+ Help,
+ Unknown
+};
+
+class CommandUtils {
+public:
+ static CommandType getCommandType(const std::string &commandName);
+ static void printCommandUnknownMessage(const std::string &programName,
+ const std::string &commandName);
+ static std::function<int(int, char **)> getCommandExecutor(const CommandType commandType);
+
+private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(CommandUtils);
+};
+} // namespace dicttoolkit
+} // namespace latinime
+#endif // LATINIME_DICT_TOOLKIT_COMMAND_UTILS_H
diff --git a/native/dicttoolkit/src/utils/utf8_utils.cpp b/native/dicttoolkit/src/utils/utf8_utils.cpp
new file mode 100644
index 000000000..0f349f512
--- /dev/null
+++ b/native/dicttoolkit/src/utils/utf8_utils.cpp
@@ -0,0 +1,119 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "utils/utf8_utils.h"
+
+#include "utils/char_utils.h"
+
+namespace latinime {
+namespace dicttoolkit {
+
+const size_t Utf8Utils::MAX_SEQUENCE_SIZE_FOR_A_CODE_POINT = 4;
+const uint8_t Utf8Utils::FIRST_BYTE_MARKER_MASKS[] = {0, 0x80, 0xE0, 0xF0, 0xF8};
+const uint8_t Utf8Utils::FIRST_BYTE_MARKERS[] = {0, 0x00, 0xC0, 0xE0, 0xF0};
+const uint8_t Utf8Utils::FIRST_BYTE_CODE_POINT_BITS_MASKS[] = {0, 0x7F, 0x1F, 0x0F, 0x03};
+const int Utf8Utils::MAX_ENCODED_CODE_POINT_VALUES[] = {-1, 0x7F, 0x7FF, 0xFFFF, 0x10FFFF};
+
+const uint8_t Utf8Utils::TRAILING_BYTE_CODE_POINT_BITS_MASK = 0x3F;
+const uint8_t Utf8Utils::TRAILING_BYTE_MARKER = 0x80;
+const size_t Utf8Utils::CODE_POINT_BIT_COUNT_IN_TRAILING_BYTE = 6;
+
+/* static */ std::vector<int> Utf8Utils::getCodePoints(const std::string &utf8Str) {
+ std::vector<int> codePoints;
+ int remainingByteCountForCurrentCodePoint = 0;
+ int currentCodePointSequenceSize = 0;
+ int codePoint = 0;
+ for (const char c : utf8Str) {
+ if (remainingByteCountForCurrentCodePoint == 0) {
+ currentCodePointSequenceSize = getSequenceSizeByCheckingFirstByte(c);
+ if (currentCodePointSequenceSize <= 0) {
+ AKLOGE("%x is an invalid utf8 first byte value.", c);
+ return std::vector<int>();
+ }
+ remainingByteCountForCurrentCodePoint = currentCodePointSequenceSize;
+ codePoint = maskFirstByte(c, remainingByteCountForCurrentCodePoint);
+ } else {
+ codePoint <<= CODE_POINT_BIT_COUNT_IN_TRAILING_BYTE;
+ codePoint += maskTrailingByte(c);
+ }
+ remainingByteCountForCurrentCodePoint--;
+ if (remainingByteCountForCurrentCodePoint == 0) {
+ if (codePoint <= MAX_ENCODED_CODE_POINT_VALUES[currentCodePointSequenceSize - 1]) {
+ AKLOGE("%d bytes encode for codePoint(%x) is a redundant UTF-8 sequence.",
+ currentCodePointSequenceSize, codePoint);
+ return std::vector<int>();
+ }
+ codePoints.push_back(codePoint);
+ }
+ }
+ return codePoints;
+}
+
+/* static */ int Utf8Utils::getSequenceSizeByCheckingFirstByte(const uint8_t firstByte) {
+ for (size_t i = 1; i <= MAX_SEQUENCE_SIZE_FOR_A_CODE_POINT; ++i) {
+ if ((firstByte & FIRST_BYTE_MARKER_MASKS[i]) == FIRST_BYTE_MARKERS[i]) {
+ return i;
+ }
+ }
+ // Not a valid utf8 char first byte.
+ return -1;
+}
+
+/* static */ AK_FORCE_INLINE int Utf8Utils::maskFirstByte(const uint8_t firstByte,
+ const int sequenceSize) {
+ return firstByte & FIRST_BYTE_CODE_POINT_BITS_MASKS[sequenceSize];
+}
+
+/* static */ AK_FORCE_INLINE int Utf8Utils::maskTrailingByte(const uint8_t secondOrLaterByte) {
+ return secondOrLaterByte & TRAILING_BYTE_CODE_POINT_BITS_MASK;
+}
+
+/* static */ std::string Utf8Utils::getUtf8String(const CodePointArrayView codePoints) {
+ std::string utf8String;
+ for (const int codePoint : codePoints) {
+ const int sequenceSize = getSequenceSizeToEncodeCodePoint(codePoint);
+ if (sequenceSize <= 0) {
+ AKLOGE("Cannot encode code point (%d).", codePoint);
+ return std::string();
+ }
+ const int trailingByteCount = sequenceSize - 1;
+ // Output first byte.
+ const int value = codePoint >> (trailingByteCount * CODE_POINT_BIT_COUNT_IN_TRAILING_BYTE);
+ utf8String.push_back(static_cast<char>(value | FIRST_BYTE_MARKERS[sequenceSize]));
+ // Output second and later bytes.
+ for (int i = 1; i < sequenceSize; ++i) {
+ const int shiftAmount = (trailingByteCount - i) * CODE_POINT_BIT_COUNT_IN_TRAILING_BYTE;
+ const int value = (codePoint >> shiftAmount) & TRAILING_BYTE_CODE_POINT_BITS_MASK;
+ utf8String.push_back(static_cast<char>(value | TRAILING_BYTE_MARKER));
+ }
+ }
+ return utf8String;
+}
+
+/* static */ int Utf8Utils::getSequenceSizeToEncodeCodePoint(const int codePoint) {
+ if (codePoint < 0) {
+ return -1;
+ }
+ for (size_t i = 1; i <= MAX_SEQUENCE_SIZE_FOR_A_CODE_POINT; ++i) {
+ if (codePoint <= MAX_ENCODED_CODE_POINT_VALUES[i]) {
+ return i;
+ }
+ }
+ return -1;
+}
+
+} // namespace dicttoolkit
+} // namespace latinime
diff --git a/native/dicttoolkit/src/utils/utf8_utils.h b/native/dicttoolkit/src/utils/utf8_utils.h
new file mode 100644
index 000000000..35818e56c
--- /dev/null
+++ b/native/dicttoolkit/src/utils/utf8_utils.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DICT_TOOLKIT_UTF8_UTILS_H
+#define LATINIME_DICT_TOOLKIT_UTF8_UTILS_H
+
+#include <cstdint>
+#include <string>
+#include <vector>
+
+#include "dict_toolkit_defines.h"
+#include "utils/int_array_view.h"
+
+namespace latinime {
+namespace dicttoolkit {
+
+class Utf8Utils {
+public:
+ static std::vector<int> getCodePoints(const std::string &utf8Str);
+ static std::string getUtf8String(const CodePointArrayView codePoints);
+
+private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(Utf8Utils);
+
+ // Values indexed by sequence size.
+ static const size_t MAX_SEQUENCE_SIZE_FOR_A_CODE_POINT;
+ static const uint8_t FIRST_BYTE_MARKER_MASKS[];
+ static const uint8_t FIRST_BYTE_MARKERS[];
+ static const uint8_t FIRST_BYTE_CODE_POINT_BITS_MASKS[];
+ static const int MAX_ENCODED_CODE_POINT_VALUES[];
+
+ static const uint8_t TRAILING_BYTE_CODE_POINT_BITS_MASK;
+ static const uint8_t TRAILING_BYTE_MARKER;
+ static const size_t CODE_POINT_BIT_COUNT_IN_TRAILING_BYTE;
+
+ static int getSequenceSizeByCheckingFirstByte(const uint8_t firstByte);
+ static int maskFirstByte(const uint8_t firstByte, const int encodeSize);
+ static int maskTrailingByte(const uint8_t secondOrLaterByte);
+ static int getSequenceSizeToEncodeCodePoint(const int codePoint);
+};
+} // namespace dicttoolkit
+} // namespace latinime
+#endif // LATINIME_DICT_TOOLKIT_UTF8_UTILS_H