aboutsummaryrefslogtreecommitdiffstats
path: root/native/dicttoolkit/src
diff options
context:
space:
mode:
Diffstat (limited to 'native/dicttoolkit/src')
-rw-r--r--native/dicttoolkit/src/command_executors/diff_executor.cpp38
-rw-r--r--native/dicttoolkit/src/command_executors/diff_executor.h38
-rw-r--r--native/dicttoolkit/src/command_executors/header_executor.cpp38
-rw-r--r--native/dicttoolkit/src/command_executors/header_executor.h38
-rw-r--r--native/dicttoolkit/src/command_executors/help_executor.cpp52
-rw-r--r--native/dicttoolkit/src/command_executors/help_executor.h38
-rw-r--r--native/dicttoolkit/src/command_executors/info_executor.cpp38
-rw-r--r--native/dicttoolkit/src/command_executors/info_executor.h38
-rw-r--r--native/dicttoolkit/src/command_executors/makedict_executor.cpp40
-rw-r--r--native/dicttoolkit/src/command_executors/makedict_executor.h38
-rw-r--r--native/dicttoolkit/src/dict_toolkit_defines.h2
-rw-r--r--native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict.cpp126
-rw-r--r--native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict.h54
-rw-r--r--native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_header.h44
-rw-r--r--native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node.h79
-rw-r--r--native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node_array.h48
-rw-r--r--native/dicttoolkit/src/utils/command_utils.cpp74
-rw-r--r--native/dicttoolkit/src/utils/command_utils.h50
-rw-r--r--native/dicttoolkit/src/utils/utf8_utils.cpp119
-rw-r--r--native/dicttoolkit/src/utils/utf8_utils.h56
20 files changed, 1048 insertions, 0 deletions
diff --git a/native/dicttoolkit/src/command_executors/diff_executor.cpp b/native/dicttoolkit/src/command_executors/diff_executor.cpp
new file mode 100644
index 000000000..077a40090
--- /dev/null
+++ b/native/dicttoolkit/src/command_executors/diff_executor.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "command_executors/diff_executor.h"
+
+#include <cstdio>
+
+namespace latinime {
+namespace dicttoolkit {
+
+const char *const DiffExecutor::COMMAND_NAME = "diff";
+
+/* static */ int DiffExecutor::run(const int argc, char **argv) {
+ fprintf(stderr, "Command '%s' has not been implemented yet.\n", COMMAND_NAME);
+ return 0;
+}
+
+/* static */ void DiffExecutor::printUsage() {
+ printf("*** %s\n", COMMAND_NAME);
+ printf("Usage: %s\n", COMMAND_NAME);
+ printf("Shows differences between two dictionaries.\n\n");
+}
+
+} // namespace dicttoolkit
+} // namespace latinime
diff --git a/native/dicttoolkit/src/command_executors/diff_executor.h b/native/dicttoolkit/src/command_executors/diff_executor.h
new file mode 100644
index 000000000..fc8dc0d8f
--- /dev/null
+++ b/native/dicttoolkit/src/command_executors/diff_executor.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DICT_TOOLKIT_DIFF_EXECUTOR_H
+#define LATINIME_DICT_TOOLKIT_DIFF_EXECUTOR_H
+
+#include "dict_toolkit_defines.h"
+
+namespace latinime {
+namespace dicttoolkit {
+
+class DiffExecutor final {
+ public:
+ static const char *const COMMAND_NAME;
+
+ static int run(const int argc, char **argv);
+ static void printUsage();
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(DiffExecutor);
+};
+
+} // namespace dicttoolkit
+} // namepsace latinime
+#endif // LATINIME_DICT_TOOLKIT_DIFF_EXECUTOR_H
diff --git a/native/dicttoolkit/src/command_executors/header_executor.cpp b/native/dicttoolkit/src/command_executors/header_executor.cpp
new file mode 100644
index 000000000..068a62c31
--- /dev/null
+++ b/native/dicttoolkit/src/command_executors/header_executor.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "command_executors/header_executor.h"
+
+#include <cstdio>
+
+namespace latinime {
+namespace dicttoolkit {
+
+const char *const HeaderExecutor::COMMAND_NAME = "header";
+
+/* static */ int HeaderExecutor::run(const int argc, char **argv) {
+ fprintf(stderr, "Command '%s' has not been implemented yet.\n", COMMAND_NAME);
+ return 0;
+}
+
+/* static */ void HeaderExecutor::printUsage() {
+ printf("*** %s\n", COMMAND_NAME);
+ printf("Usage: %s\n", COMMAND_NAME);
+ printf("Prints the header contents of a dictionary file.\n\n");
+}
+
+} // namespace dicttoolkit
+} // namespace latinime
diff --git a/native/dicttoolkit/src/command_executors/header_executor.h b/native/dicttoolkit/src/command_executors/header_executor.h
new file mode 100644
index 000000000..4cdeb1a99
--- /dev/null
+++ b/native/dicttoolkit/src/command_executors/header_executor.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DICT_TOOLKIT_HEADER_EXECUTOR_H
+#define LATINIME_DICT_TOOLKIT_HEADER_EXECUTOR_H
+
+#include "dict_toolkit_defines.h"
+
+namespace latinime {
+namespace dicttoolkit {
+
+class HeaderExecutor final {
+ public:
+ static const char *const COMMAND_NAME;
+
+ static int run(const int argc, char **argv);
+ static void printUsage();
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(HeaderExecutor);
+};
+
+} // namespace dicttoolkit
+} // namepsace latinime
+#endif // LATINIME_DICT_TOOLKIT_HEADER_EXECUTOR_H
diff --git a/native/dicttoolkit/src/command_executors/help_executor.cpp b/native/dicttoolkit/src/command_executors/help_executor.cpp
new file mode 100644
index 000000000..bd29a5b16
--- /dev/null
+++ b/native/dicttoolkit/src/command_executors/help_executor.cpp
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "command_executors/help_executor.h"
+
+#include <cstdio>
+#include <functional>
+#include <vector>
+
+#include "command_executors/diff_executor.h"
+#include "command_executors/header_executor.h"
+#include "command_executors/info_executor.h"
+#include "command_executors/makedict_executor.h"
+#include "utils/command_utils.h"
+
+namespace latinime {
+namespace dicttoolkit {
+
+const char *const HelpExecutor::COMMAND_NAME = "help";
+
+/* static */ int HelpExecutor::run(const int argc, char **argv) {
+ printf("Available commands:\n\n");
+ const std::vector<std::function<void(void)>> printUsageMethods = {DiffExecutor::printUsage,
+ HeaderExecutor::printUsage, InfoExecutor::printUsage, MakedictExecutor::printUsage,
+ printUsage};
+ for (const auto &printUsageMethod : printUsageMethods) {
+ printUsageMethod();
+ }
+ return 0;
+}
+
+/* static */ void HelpExecutor::printUsage() {
+ printf("*** %s\n", COMMAND_NAME);
+ printf("Usage: %s\n", COMMAND_NAME);
+ printf("Show this help list.\n\n");
+}
+
+} // namespace dicttoolkit
+} // namespace latinime
diff --git a/native/dicttoolkit/src/command_executors/help_executor.h b/native/dicttoolkit/src/command_executors/help_executor.h
new file mode 100644
index 000000000..280610eb9
--- /dev/null
+++ b/native/dicttoolkit/src/command_executors/help_executor.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DICT_TOOLKIT_HELP_EXECUTOR_H
+#define LATINIME_DICT_TOOLKIT_HELP_EXECUTOR_H
+
+#include "dict_toolkit_defines.h"
+
+namespace latinime {
+namespace dicttoolkit {
+
+class HelpExecutor final {
+ public:
+ static const char *const COMMAND_NAME;
+
+ static int run(const int argc, char **argv);
+ static void printUsage();
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(HelpExecutor);
+};
+
+} // namespace dicttoolkit
+} // namepsace latinime
+#endif // LATINIME_DICT_TOOLKIT_HELP_EXECUTOR_H
diff --git a/native/dicttoolkit/src/command_executors/info_executor.cpp b/native/dicttoolkit/src/command_executors/info_executor.cpp
new file mode 100644
index 000000000..c4d84cab3
--- /dev/null
+++ b/native/dicttoolkit/src/command_executors/info_executor.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "command_executors/info_executor.h"
+
+#include <cstdio>
+
+namespace latinime {
+namespace dicttoolkit {
+
+const char *const InfoExecutor::COMMAND_NAME = "info";
+
+/* static */ int InfoExecutor::run(const int argc, char **argv) {
+ fprintf(stderr, "Command '%s' has not been implemented yet.\n", COMMAND_NAME);
+ return 0;
+}
+
+/* static */ void InfoExecutor::printUsage() {
+ printf("*** %s\n", COMMAND_NAME);
+ printf("Usage: %s\n", COMMAND_NAME);
+ printf("Prints various information about a dictionary file.\n\n");
+}
+
+} // namespace dicttoolkit
+} // namespace latinime
diff --git a/native/dicttoolkit/src/command_executors/info_executor.h b/native/dicttoolkit/src/command_executors/info_executor.h
new file mode 100644
index 000000000..4ffa74fb0
--- /dev/null
+++ b/native/dicttoolkit/src/command_executors/info_executor.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DICT_TOOLKIT_INFO_EXECUTOR_H
+#define LATINIME_DICT_TOOLKIT_INFO_EXECUTOR_H
+
+#include "dict_toolkit_defines.h"
+
+namespace latinime {
+namespace dicttoolkit {
+
+class InfoExecutor final {
+ public:
+ static const char *const COMMAND_NAME;
+
+ static int run(const int argc, char **argv);
+ static void printUsage();
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(InfoExecutor);
+};
+
+} // namepsace dicttoolkit
+} // namespace latinime
+#endif // LATINIME_DICT_TOOLKIT_INFO_EXECUTOR_H
diff --git a/native/dicttoolkit/src/command_executors/makedict_executor.cpp b/native/dicttoolkit/src/command_executors/makedict_executor.cpp
new file mode 100644
index 000000000..ea62e3c37
--- /dev/null
+++ b/native/dicttoolkit/src/command_executors/makedict_executor.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "command_executors/makedict_executor.h"
+
+#include <cstdio>
+
+namespace latinime {
+namespace dicttoolkit {
+
+const char *const MakedictExecutor::COMMAND_NAME = "makedict";
+
+/* static */ int MakedictExecutor::run(const int argc, char **argv) {
+ fprintf(stderr, "Command '%s' has not been implemented yet.\n", COMMAND_NAME);
+ return 0;
+}
+
+/* static */ void MakedictExecutor::printUsage() {
+ printf("*** %s\n", COMMAND_NAME);
+ printf("Usage: %s\n", COMMAND_NAME);
+ printf("Converts a source dictionary file to one or several outputs.\n"
+ "Source can be a binary dictionary file or a combined format file.\n"
+ "Binary version 2 (Jelly Bean), 4, and combined format outputs are supported.\n\n");
+}
+
+} // namespace dicttoolkit
+} // namespace latinime
diff --git a/native/dicttoolkit/src/command_executors/makedict_executor.h b/native/dicttoolkit/src/command_executors/makedict_executor.h
new file mode 100644
index 000000000..ae1309f60
--- /dev/null
+++ b/native/dicttoolkit/src/command_executors/makedict_executor.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DICT_TOOLKIT_MAKEDICT_EXECUTOR_H
+#define LATINIME_DICT_TOOLKIT_MAKEDICT_EXECUTOR_H
+
+#include "dict_toolkit_defines.h"
+
+namespace latinime {
+namespace dicttoolkit {
+
+class MakedictExecutor final {
+ public:
+ static const char *const COMMAND_NAME;
+
+ static int run(const int argc, char **argv);
+ static void printUsage();
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(MakedictExecutor);
+};
+
+} // namespace dicttoolkit
+} // namepsace latinime
+#endif // LATINIME_DICT_TOOLKIT_MAKEDICT_EXECUTOR_H
diff --git a/native/dicttoolkit/src/dict_toolkit_defines.h b/native/dicttoolkit/src/dict_toolkit_defines.h
index 2a2104e26..dbaae0ca0 100644
--- a/native/dicttoolkit/src/dict_toolkit_defines.h
+++ b/native/dicttoolkit/src/dict_toolkit_defines.h
@@ -19,4 +19,6 @@
#include "defines.h"
+#define MIN_ARG_COUNT 2
+
#endif // LATINIME_DICT_TOOLKIT_DEFINES_H
diff --git a/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict.cpp b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict.cpp
new file mode 100644
index 000000000..af28131cf
--- /dev/null
+++ b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict.cpp
@@ -0,0 +1,126 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "offdevice_intermediate_dict/offdevice_intermediate_dict.h"
+
+#include "offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node.h"
+
+namespace latinime {
+namespace dicttoolkit {
+
+bool OffdeviceIntermediateDict::addWord(const WordProperty &wordProperty) {
+ const CodePointArrayView codePoints = wordProperty.getCodePoints();
+ if (codePoints.empty() || codePoints.size() > MAX_WORD_LENGTH) {
+ return false;
+ }
+ return addWordInner(codePoints, wordProperty, mRootPtNodeArray);
+}
+
+bool OffdeviceIntermediateDict::addWordInner(const CodePointArrayView codePoints,
+ const WordProperty &wordProperty, OffdeviceIntermediateDictPtNodeArray &ptNodeArray) {
+ auto ptNodeList = ptNodeArray.getMutablePtNodeList();
+ auto ptNodeIt = ptNodeList->begin();
+ for (; ptNodeIt != ptNodeList->end(); ++ptNodeIt) {
+ const auto &ptNode = *ptNodeIt;
+ const CodePointArrayView ptNodeCodePoints = ptNode->getPtNodeCodePoints();
+ if (codePoints[0] < ptNodeCodePoints[0]) {
+ continue;
+ }
+ if (codePoints[0] > ptNodeCodePoints[0]) {
+ break;
+ }
+ size_t i = 1;
+ for (; i < codePoints.size(); ++i) {
+ if (i >= ptNodeCodePoints.size()) {
+ // Add new child.
+ return addWordInner(codePoints.skip(i), wordProperty,
+ ptNode->getChildrenPtNodeArray());
+ }
+ if (codePoints[i] != ptNodeCodePoints[i]) {
+ break;
+ }
+ }
+ if (codePoints.size() == i && codePoints.size() == ptNodeCodePoints.size()) {
+ // All code points matched.
+ if (ptNode->getWordProperty()) {
+ // Adding the same word multiple times is not supported.
+ return false;
+ }
+ ptNodeList->insert(ptNodeIt,
+ std::make_shared<OffdeviceIntermediateDictPtNode>(wordProperty, *ptNode));
+ ptNodeList->erase(ptNodeIt);
+ return true;
+ }
+ // The (i+1)-th elements are different.
+ // Create and Add new parent ptNode for the common part.
+ auto newPtNode = codePoints.size() == i
+ ? std::make_shared<OffdeviceIntermediateDictPtNode>(codePoints, wordProperty)
+ : std::make_shared<OffdeviceIntermediateDictPtNode>(codePoints.limit(i));
+ ptNodeList->insert(ptNodeIt, newPtNode);
+ OffdeviceIntermediateDictPtNodeArray &childrenPtNodeArray =
+ newPtNode->getChildrenPtNodeArray();
+ // Add new child for the existing ptNode.
+ childrenPtNodeArray.getMutablePtNodeList()->push_back(
+ std::make_shared<OffdeviceIntermediateDictPtNode>(
+ ptNodeCodePoints.skip(i), *ptNode));
+ ptNodeList->erase(ptNodeIt);
+ if (codePoints.size() != i) {
+ // Add a child for the new word.
+ return addWordInner(codePoints.skip(i), wordProperty, childrenPtNodeArray);
+ }
+ return true;
+ }
+ ptNodeList->insert(ptNodeIt,
+ std::make_shared<OffdeviceIntermediateDictPtNode>(codePoints, wordProperty));
+ return true;
+}
+
+const WordProperty *OffdeviceIntermediateDict::getWordProperty(
+ const CodePointArrayView codePoints) const {
+ const OffdeviceIntermediateDictPtNodeArray *ptNodeArray = &mRootPtNodeArray;
+ for (size_t i = 0; i < codePoints.size();) {
+ bool foundNext = false;
+ for (const auto ptNode : ptNodeArray->getPtNodeList()) {
+ const CodePointArrayView ptNodeCodePoints = ptNode->getPtNodeCodePoints();
+ if (codePoints[i] < ptNodeCodePoints[0]) {
+ continue;
+ }
+ if (codePoints[i] > ptNodeCodePoints[0]
+ || codePoints.size() < ptNodeCodePoints.size()) {
+ return nullptr;
+ }
+ for (size_t j = 1; j < ptNodeCodePoints.size(); ++j) {
+ if (codePoints[i + j] != ptNodeCodePoints[j]) {
+ return nullptr;
+ }
+ }
+ i += ptNodeCodePoints.size();
+ if (i == codePoints.size()) {
+ return ptNode->getWordProperty();
+ }
+ ptNodeArray = &ptNode->getChildrenPtNodeArray();
+ foundNext = true;
+ break;
+ }
+ if (!foundNext) {
+ break;
+ }
+ }
+ return nullptr;
+}
+
+} // namespace dicttoolkit
+} // namespace latinime
diff --git a/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict.h b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict.h
new file mode 100644
index 000000000..13d26ba91
--- /dev/null
+++ b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_H
+#define LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_H
+
+#include "dict_toolkit_defines.h"
+#include "offdevice_intermediate_dict/offdevice_intermediate_dict_header.h"
+#include "offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node_array.h"
+#include "suggest/core/dictionary/property/word_property.h"
+#include "utils/int_array_view.h"
+
+namespace latinime {
+namespace dicttoolkit {
+
+/**
+ * On memory patricia trie to represent a dictionary.
+ */
+class OffdeviceIntermediateDict final {
+ public:
+ OffdeviceIntermediateDict(const OffdeviceIntermediateDictHeader &header)
+ : mHeader(header), mRootPtNodeArray() {}
+
+ bool addWord(const WordProperty &wordProperty);
+ // The returned value will be invalid after modifying the dictionary. e.g. calling addWord().
+ const WordProperty *getWordProperty(const CodePointArrayView codePoints) const;
+ const OffdeviceIntermediateDictHeader &getHeader() const { return mHeader; }
+
+ private:
+ DISALLOW_ASSIGNMENT_OPERATOR(OffdeviceIntermediateDict);
+
+ const OffdeviceIntermediateDictHeader mHeader;
+ OffdeviceIntermediateDictPtNodeArray mRootPtNodeArray;
+
+ bool addWordInner(const CodePointArrayView codePoints, const WordProperty &wordProperty,
+ OffdeviceIntermediateDictPtNodeArray &ptNodeArray);
+};
+
+} // namespace dicttoolkit
+} // namespace latinime
+#endif // LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_H
diff --git a/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_header.h b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_header.h
new file mode 100644
index 000000000..440627a79
--- /dev/null
+++ b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_header.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_HEADER_H
+#define LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_HEADER_H
+
+#include <map>
+#include <vector>
+
+#include "dict_toolkit_defines.h"
+
+namespace latinime {
+namespace dicttoolkit {
+
+class OffdeviceIntermediateDictHeader final {
+ public:
+ using AttributeMap = std::map<std::vector<int>, std::vector<int>>;
+
+ OffdeviceIntermediateDictHeader(const AttributeMap &attributesMap)
+ : mAttributeMap(attributesMap) {}
+
+ private:
+ DISALLOW_DEFAULT_CONSTRUCTOR(OffdeviceIntermediateDictHeader);
+ DISALLOW_ASSIGNMENT_OPERATOR(OffdeviceIntermediateDictHeader);
+
+ const AttributeMap mAttributeMap;
+};
+
+} // namespace dicttoolkit
+} // namespace latinime
+#endif // LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_HEADER_H
diff --git a/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node.h b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node.h
new file mode 100644
index 000000000..721ccd778
--- /dev/null
+++ b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_PT_NODE_H
+#define LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_PT_NODE_H
+
+#include <memory>
+
+#include "dict_toolkit_defines.h"
+#include "offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node_array.h"
+#include "suggest/core/dictionary/property/word_property.h"
+#include "utils/int_array_view.h"
+
+namespace latinime {
+namespace dicttoolkit {
+
+class OffdeviceIntermediateDictPtNode final {
+ public:
+ // Non-terminal
+ OffdeviceIntermediateDictPtNode(const CodePointArrayView ptNodeCodePoints)
+ : mPtNodeCodePoints(ptNodeCodePoints.toVector()), mChildrenPtNodeArray(),
+ mWortProperty(nullptr) {}
+
+ // Terminal
+ OffdeviceIntermediateDictPtNode(const CodePointArrayView ptNodeCodePoints,
+ const WordProperty &wordProperty)
+ : mPtNodeCodePoints(ptNodeCodePoints.toVector()), mChildrenPtNodeArray(),
+ mWortProperty(new WordProperty(wordProperty)) {}
+
+ // Replacing PtNodeCodePoints.
+ OffdeviceIntermediateDictPtNode(const CodePointArrayView ptNodeCodePoints,
+ const OffdeviceIntermediateDictPtNode &ptNode)
+ : mPtNodeCodePoints(ptNodeCodePoints.toVector()),
+ mChildrenPtNodeArray(ptNode.mChildrenPtNodeArray),
+ mWortProperty(new WordProperty(*ptNode.mWortProperty)) {}
+
+ // Replacing WordProperty.
+ OffdeviceIntermediateDictPtNode(const WordProperty &wordProperty,
+ const OffdeviceIntermediateDictPtNode &ptNode)
+ : mPtNodeCodePoints(ptNode.mPtNodeCodePoints),
+ mChildrenPtNodeArray(ptNode.mChildrenPtNodeArray),
+ mWortProperty(new WordProperty(wordProperty)) {}
+
+ const WordProperty *getWordProperty() const {
+ return mWortProperty.get();
+ }
+
+ const CodePointArrayView getPtNodeCodePoints() const {
+ return CodePointArrayView(mPtNodeCodePoints);
+ }
+
+ OffdeviceIntermediateDictPtNodeArray &getChildrenPtNodeArray() {
+ return mChildrenPtNodeArray;
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(OffdeviceIntermediateDictPtNode);
+
+ const std::vector<int> mPtNodeCodePoints;
+ OffdeviceIntermediateDictPtNodeArray mChildrenPtNodeArray;
+ const std::unique_ptr<WordProperty> mWortProperty;
+};
+
+} // namespace dicttoolkit
+} // namespace latinime
+#endif // LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_PT_NODE_H
diff --git a/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node_array.h b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node_array.h
new file mode 100644
index 000000000..f87456ce0
--- /dev/null
+++ b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node_array.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_PT_NODE_ARRAY_H
+#define LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_PT_NODE_ARRAY_H
+
+#include <list>
+#include <memory>
+
+#include "dict_toolkit_defines.h"
+
+namespace latinime {
+namespace dicttoolkit {
+
+class OffdeviceIntermediateDictPtNode;
+
+class OffdeviceIntermediateDictPtNodeArray final {
+ public:
+ const std::list<std::shared_ptr<OffdeviceIntermediateDictPtNode>> &getPtNodeList() const {
+ return mPtNodes;
+ }
+
+ std::list<std::shared_ptr<OffdeviceIntermediateDictPtNode>> *getMutablePtNodeList() {
+ return &mPtNodes;
+ }
+
+ private:
+ DISALLOW_ASSIGNMENT_OPERATOR(OffdeviceIntermediateDictPtNodeArray);
+
+ std::list<std::shared_ptr<OffdeviceIntermediateDictPtNode>> mPtNodes;
+};
+
+} // namespace dicttoolkit
+} // namespace latinime
+#endif // LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_PT_NODE_ARRAY_H
diff --git a/native/dicttoolkit/src/utils/command_utils.cpp b/native/dicttoolkit/src/utils/command_utils.cpp
new file mode 100644
index 000000000..34196425e
--- /dev/null
+++ b/native/dicttoolkit/src/utils/command_utils.cpp
@@ -0,0 +1,74 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "utils/command_utils.h"
+
+#include <cstdio>
+
+#include "command_executors/diff_executor.h"
+#include "command_executors/header_executor.h"
+#include "command_executors/help_executor.h"
+#include "command_executors/info_executor.h"
+#include "command_executors/makedict_executor.h"
+
+namespace latinime {
+namespace dicttoolkit {
+
+/* static */ CommandType CommandUtils::getCommandType(const std::string &commandName) {
+ if (commandName == InfoExecutor::COMMAND_NAME) {
+ return CommandType::Info;
+ } else if (commandName == DiffExecutor::COMMAND_NAME) {
+ return CommandType::Diff;
+ } else if (commandName == MakedictExecutor::COMMAND_NAME) {
+ return CommandType::Makedict;
+ } else if (commandName == HeaderExecutor::COMMAND_NAME) {
+ return CommandType::Header;
+ } else if (commandName == HelpExecutor::COMMAND_NAME) {
+ return CommandType::Help;
+ } else {
+ return CommandType::Unknown;
+ }
+}
+
+/* static */ void CommandUtils::printCommandUnknownMessage(const std::string &programName,
+ const std::string &commandName) {
+ fprintf(stderr, "Command '%s' is unknown. Try '%s %s' for more information.\n",
+ commandName.c_str(), programName.c_str(), HelpExecutor::COMMAND_NAME);
+}
+
+/* static */ std::function<int(int, char **)> CommandUtils::getCommandExecutor(
+ const CommandType commandType) {
+ switch (commandType) {
+ case CommandType::Info:
+ return InfoExecutor::run;
+ case CommandType::Diff:
+ return DiffExecutor::run;
+ case CommandType::Makedict:
+ return MakedictExecutor::run;
+ case CommandType::Header:
+ return HeaderExecutor::run;
+ case CommandType::Help:
+ return HelpExecutor::run;
+ default:
+ return [] (int, char **) -> int {
+ printf("Command executor not found.");
+ return 1;
+ };
+ }
+}
+
+} // namespace dicttoolkit
+} // namespace latinime
diff --git a/native/dicttoolkit/src/utils/command_utils.h b/native/dicttoolkit/src/utils/command_utils.h
new file mode 100644
index 000000000..4a181f194
--- /dev/null
+++ b/native/dicttoolkit/src/utils/command_utils.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DICT_TOOLKIT_COMMAND_UTILS_H
+#define LATINIME_DICT_TOOLKIT_COMMAND_UTILS_H
+
+#include <functional>
+#include <memory>
+#include <string>
+
+#include "dict_toolkit_defines.h"
+
+namespace latinime {
+namespace dicttoolkit {
+
+enum class CommandType : int {
+ Info,
+ Diff,
+ Makedict,
+ Header,
+ Help,
+ Unknown
+};
+
+class CommandUtils {
+public:
+ static CommandType getCommandType(const std::string &commandName);
+ static void printCommandUnknownMessage(const std::string &programName,
+ const std::string &commandName);
+ static std::function<int(int, char **)> getCommandExecutor(const CommandType commandType);
+
+private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(CommandUtils);
+};
+} // namespace dicttoolkit
+} // namespace latinime
+#endif // LATINIME_DICT_TOOLKIT_COMMAND_UTILS_H
diff --git a/native/dicttoolkit/src/utils/utf8_utils.cpp b/native/dicttoolkit/src/utils/utf8_utils.cpp
new file mode 100644
index 000000000..0f349f512
--- /dev/null
+++ b/native/dicttoolkit/src/utils/utf8_utils.cpp
@@ -0,0 +1,119 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "utils/utf8_utils.h"
+
+#include "utils/char_utils.h"
+
+namespace latinime {
+namespace dicttoolkit {
+
+const size_t Utf8Utils::MAX_SEQUENCE_SIZE_FOR_A_CODE_POINT = 4;
+const uint8_t Utf8Utils::FIRST_BYTE_MARKER_MASKS[] = {0, 0x80, 0xE0, 0xF0, 0xF8};
+const uint8_t Utf8Utils::FIRST_BYTE_MARKERS[] = {0, 0x00, 0xC0, 0xE0, 0xF0};
+const uint8_t Utf8Utils::FIRST_BYTE_CODE_POINT_BITS_MASKS[] = {0, 0x7F, 0x1F, 0x0F, 0x03};
+const int Utf8Utils::MAX_ENCODED_CODE_POINT_VALUES[] = {-1, 0x7F, 0x7FF, 0xFFFF, 0x10FFFF};
+
+const uint8_t Utf8Utils::TRAILING_BYTE_CODE_POINT_BITS_MASK = 0x3F;
+const uint8_t Utf8Utils::TRAILING_BYTE_MARKER = 0x80;
+const size_t Utf8Utils::CODE_POINT_BIT_COUNT_IN_TRAILING_BYTE = 6;
+
+/* static */ std::vector<int> Utf8Utils::getCodePoints(const std::string &utf8Str) {
+ std::vector<int> codePoints;
+ int remainingByteCountForCurrentCodePoint = 0;
+ int currentCodePointSequenceSize = 0;
+ int codePoint = 0;
+ for (const char c : utf8Str) {
+ if (remainingByteCountForCurrentCodePoint == 0) {
+ currentCodePointSequenceSize = getSequenceSizeByCheckingFirstByte(c);
+ if (currentCodePointSequenceSize <= 0) {
+ AKLOGE("%x is an invalid utf8 first byte value.", c);
+ return std::vector<int>();
+ }
+ remainingByteCountForCurrentCodePoint = currentCodePointSequenceSize;
+ codePoint = maskFirstByte(c, remainingByteCountForCurrentCodePoint);
+ } else {
+ codePoint <<= CODE_POINT_BIT_COUNT_IN_TRAILING_BYTE;
+ codePoint += maskTrailingByte(c);
+ }
+ remainingByteCountForCurrentCodePoint--;
+ if (remainingByteCountForCurrentCodePoint == 0) {
+ if (codePoint <= MAX_ENCODED_CODE_POINT_VALUES[currentCodePointSequenceSize - 1]) {
+ AKLOGE("%d bytes encode for codePoint(%x) is a redundant UTF-8 sequence.",
+ currentCodePointSequenceSize, codePoint);
+ return std::vector<int>();
+ }
+ codePoints.push_back(codePoint);
+ }
+ }
+ return codePoints;
+}
+
+/* static */ int Utf8Utils::getSequenceSizeByCheckingFirstByte(const uint8_t firstByte) {
+ for (size_t i = 1; i <= MAX_SEQUENCE_SIZE_FOR_A_CODE_POINT; ++i) {
+ if ((firstByte & FIRST_BYTE_MARKER_MASKS[i]) == FIRST_BYTE_MARKERS[i]) {
+ return i;
+ }
+ }
+ // Not a valid utf8 char first byte.
+ return -1;
+}
+
+/* static */ AK_FORCE_INLINE int Utf8Utils::maskFirstByte(const uint8_t firstByte,
+ const int sequenceSize) {
+ return firstByte & FIRST_BYTE_CODE_POINT_BITS_MASKS[sequenceSize];
+}
+
+/* static */ AK_FORCE_INLINE int Utf8Utils::maskTrailingByte(const uint8_t secondOrLaterByte) {
+ return secondOrLaterByte & TRAILING_BYTE_CODE_POINT_BITS_MASK;
+}
+
+/* static */ std::string Utf8Utils::getUtf8String(const CodePointArrayView codePoints) {
+ std::string utf8String;
+ for (const int codePoint : codePoints) {
+ const int sequenceSize = getSequenceSizeToEncodeCodePoint(codePoint);
+ if (sequenceSize <= 0) {
+ AKLOGE("Cannot encode code point (%d).", codePoint);
+ return std::string();
+ }
+ const int trailingByteCount = sequenceSize - 1;
+ // Output first byte.
+ const int value = codePoint >> (trailingByteCount * CODE_POINT_BIT_COUNT_IN_TRAILING_BYTE);
+ utf8String.push_back(static_cast<char>(value | FIRST_BYTE_MARKERS[sequenceSize]));
+ // Output second and later bytes.
+ for (int i = 1; i < sequenceSize; ++i) {
+ const int shiftAmount = (trailingByteCount - i) * CODE_POINT_BIT_COUNT_IN_TRAILING_BYTE;
+ const int value = (codePoint >> shiftAmount) & TRAILING_BYTE_CODE_POINT_BITS_MASK;
+ utf8String.push_back(static_cast<char>(value | TRAILING_BYTE_MARKER));
+ }
+ }
+ return utf8String;
+}
+
+/* static */ int Utf8Utils::getSequenceSizeToEncodeCodePoint(const int codePoint) {
+ if (codePoint < 0) {
+ return -1;
+ }
+ for (size_t i = 1; i <= MAX_SEQUENCE_SIZE_FOR_A_CODE_POINT; ++i) {
+ if (codePoint <= MAX_ENCODED_CODE_POINT_VALUES[i]) {
+ return i;
+ }
+ }
+ return -1;
+}
+
+} // namespace dicttoolkit
+} // namespace latinime
diff --git a/native/dicttoolkit/src/utils/utf8_utils.h b/native/dicttoolkit/src/utils/utf8_utils.h
new file mode 100644
index 000000000..35818e56c
--- /dev/null
+++ b/native/dicttoolkit/src/utils/utf8_utils.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DICT_TOOLKIT_UTF8_UTILS_H
+#define LATINIME_DICT_TOOLKIT_UTF8_UTILS_H
+
+#include <cstdint>
+#include <string>
+#include <vector>
+
+#include "dict_toolkit_defines.h"
+#include "utils/int_array_view.h"
+
+namespace latinime {
+namespace dicttoolkit {
+
+class Utf8Utils {
+public:
+ static std::vector<int> getCodePoints(const std::string &utf8Str);
+ static std::string getUtf8String(const CodePointArrayView codePoints);
+
+private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(Utf8Utils);
+
+ // Values indexed by sequence size.
+ static const size_t MAX_SEQUENCE_SIZE_FOR_A_CODE_POINT;
+ static const uint8_t FIRST_BYTE_MARKER_MASKS[];
+ static const uint8_t FIRST_BYTE_MARKERS[];
+ static const uint8_t FIRST_BYTE_CODE_POINT_BITS_MASKS[];
+ static const int MAX_ENCODED_CODE_POINT_VALUES[];
+
+ static const uint8_t TRAILING_BYTE_CODE_POINT_BITS_MASK;
+ static const uint8_t TRAILING_BYTE_MARKER;
+ static const size_t CODE_POINT_BIT_COUNT_IN_TRAILING_BYTE;
+
+ static int getSequenceSizeByCheckingFirstByte(const uint8_t firstByte);
+ static int maskFirstByte(const uint8_t firstByte, const int encodeSize);
+ static int maskTrailingByte(const uint8_t secondOrLaterByte);
+ static int getSequenceSizeToEncodeCodePoint(const int codePoint);
+};
+} // namespace dicttoolkit
+} // namespace latinime
+#endif // LATINIME_DICT_TOOLKIT_UTF8_UTILS_H