aboutsummaryrefslogtreecommitdiffstats
path: root/native
diff options
context:
space:
mode:
Diffstat (limited to 'native')
-rw-r--r--native/dicttoolkit/Android.mk67
-rw-r--r--native/dicttoolkit/CleanupNativeFileList.mk17
-rw-r--r--native/dicttoolkit/NativeFileList.mk43
-rw-r--r--native/dicttoolkit/UnitTests.mk69
-rw-r--r--native/dicttoolkit/dict_toolkit_main.cpp39
-rwxr-xr-xnative/dicttoolkit/run_tests.sh34
-rw-r--r--native/dicttoolkit/src/command_executors/diff_executor.cpp49
-rw-r--r--native/dicttoolkit/src/command_executors/diff_executor.h40
-rw-r--r--native/dicttoolkit/src/command_executors/header_executor.cpp48
-rw-r--r--native/dicttoolkit/src/command_executors/header_executor.h40
-rw-r--r--native/dicttoolkit/src/command_executors/help_executor.cpp52
-rw-r--r--native/dicttoolkit/src/command_executors/help_executor.h38
-rw-r--r--native/dicttoolkit/src/command_executors/info_executor.cpp54
-rw-r--r--native/dicttoolkit/src/command_executors/info_executor.h40
-rw-r--r--native/dicttoolkit/src/command_executors/makedict_executor.cpp55
-rw-r--r--native/dicttoolkit/src/command_executors/makedict_executor.h40
-rw-r--r--native/dicttoolkit/src/dict_toolkit_defines.h24
-rw-r--r--native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict.cpp126
-rw-r--r--native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict.h54
-rw-r--r--native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_header.h44
-rw-r--r--native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node.h79
-rw-r--r--native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node_array.h48
-rw-r--r--native/dicttoolkit/src/utils/arguments_and_options.h54
-rw-r--r--native/dicttoolkit/src/utils/arguments_parser.cpp84
-rw-r--r--native/dicttoolkit/src/utils/arguments_parser.h118
-rw-r--r--native/dicttoolkit/src/utils/command_utils.cpp74
-rw-r--r--native/dicttoolkit/src/utils/command_utils.h50
-rw-r--r--native/dicttoolkit/src/utils/utf8_utils.cpp119
-rw-r--r--native/dicttoolkit/src/utils/utf8_utils.h56
-rw-r--r--native/dicttoolkit/tests/command_executors/diff_executor_test.cpp31
-rw-r--r--native/dicttoolkit/tests/command_executors/header_executor_test.cpp31
-rw-r--r--native/dicttoolkit/tests/command_executors/info_executor_test.cpp31
-rw-r--r--native/dicttoolkit/tests/command_executors/makedict_executor_test.cpp31
-rw-r--r--native/dicttoolkit/tests/dict_toolkit_defines_test.cpp32
-rw-r--r--native/dicttoolkit/tests/offdevice_intermediate_dict/offdevice_intermediate_dict_test.cpp85
-rw-r--r--native/dicttoolkit/tests/utils/command_utils_test.cpp37
-rw-r--r--native/dicttoolkit/tests/utils/utf8_utils_test.cpp85
-rw-r--r--native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp12
-rw-r--r--native/jni/src/defines.h67
-rw-r--r--native/jni/src/suggest/core/dictionary/error_type_utils.cpp1
-rw-r--r--native/jni/src/suggest/core/dictionary/error_type_utils.h5
-rw-r--r--native/jni/src/suggest/core/dictionary/property/historical_info.h1
-rw-r--r--native/jni/src/suggest/core/dictionary/property/word_property.h21
-rw-r--r--native/jni/src/suggest/core/policy/scoring.h2
-rw-r--r--native/jni/src/suggest/core/result/suggestions_output_utils.cpp66
-rw-r--r--native/jni/src/suggest/core/result/suggestions_output_utils.h9
-rw-r--r--native/jni/src/suggest/core/suggest.cpp21
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h16
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp7
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_dict_content.cpp2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/probability_dict_content.cpp3
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp6
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp18
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h11
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp14
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp4
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h1
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp14
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp4
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp14
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h8
-rw-r--r--native/jni/src/suggest/policyimpl/typing/scoring_params.cpp1
-rw-r--r--native/jni/src/suggest/policyimpl/typing/scoring_params.h1
-rw-r--r--native/jni/src/suggest/policyimpl/typing/typing_scoring.h47
-rw-r--r--native/jni/src/utils/int_array_view.h23
-rw-r--r--native/jni/src/utils/profiler.h86
-rw-r--r--native/jni/tests/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_test.cpp4
-rw-r--r--native/jni/tests/suggest/policyimpl/dictionary/structure/v4/content/probability_entry_test.cpp8
-rw-r--r--native/jni/tests/suggest/policyimpl/dictionary/utils/format_utils_test.cpp8
-rw-r--r--native/jni/tests/utils/int_array_view_test.cpp47
71 files changed, 2392 insertions, 180 deletions
diff --git a/native/dicttoolkit/Android.mk b/native/dicttoolkit/Android.mk
new file mode 100644
index 000000000..118682dfc
--- /dev/null
+++ b/native/dicttoolkit/Android.mk
@@ -0,0 +1,67 @@
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+ifeq (,$(TARGET_BUILD_APPS))
+
+# Only build if it's explicitly requested, or running mm/mmm.
+ifneq ($(ONE_SHOT_MAKEFILE)$(filter $(MAKECMDGOALS),dicttoolkit),)
+
+# HACK: Temporarily disable host tool build on Mac until the build system is ready for C++11.
+LATINIME_HOST_OSNAME := $(shell uname -s)
+ifneq ($(LATINIME_HOST_OSNAME), Darwin) # TODO: Remove this
+
+LOCAL_PATH := $(call my-dir)
+
+include $(CLEAR_VARS)
+
+LATIN_IME_CORE_PATH := $(LOCAL_PATH)/../jni
+
+LATIN_IME_DICT_TOOLKIT_SRC_DIR := src
+LATIN_IME_CORE_SRC_DIR := ../jni/src
+
+LOCAL_CFLAGS += -Werror -Wall -Wextra -Weffc++ -Wformat=2 -Wcast-qual -Wcast-align \
+ -Wwrite-strings -Wfloat-equal -Wpointer-arith -Winit-self -Wredundant-decls \
+ -Woverloaded-virtual -Wsign-promo -Wno-system-headers
+
+# To suppress compiler warnings for unused variables/functions used for debug features etc.
+LOCAL_CFLAGS += -Wno-unused-parameter -Wno-unused-function
+LOCAL_CFLAGS += -std=c++11 -Wno-unused-parameter -Wno-unused-function
+
+include $(LOCAL_PATH)/NativeFileList.mk
+include $(LATIN_IME_CORE_PATH)/NativeFileList.mk
+
+LOCAL_C_INCLUDES += $(LOCAL_PATH)/$(LATIN_IME_DICT_TOOLKIT_SRC_DIR) \
+ $(LATIN_IME_CORE_PATH)/$(LATIN_IME_CORE_SRC_DIR)
+
+LOCAL_SRC_FILES := $(LATIN_IME_DICT_TOOLKIT_MAIN_SRC_FILES) \
+ $(addprefix $(LATIN_IME_DICT_TOOLKIT_SRC_DIR)/, $(LATIN_IME_DICT_TOOLKIT_SRC_FILES)) \
+ $(addprefix $(LATIN_IME_CORE_SRC_DIR)/, $(LATIN_IME_CORE_SRC_FILES))
+
+LOCAL_MODULE := dicttoolkit
+LOCAL_MODULE_TAGS := optional
+
+LOCAL_CLANG := true
+LOCAL_CXX_STL := libc++
+
+include $(BUILD_HOST_EXECUTABLE)
+#################### Clean up the tmp vars
+include $(LOCAL_PATH)/CleanupNativeFileList.mk
+#################### Unit test
+include $(LOCAL_PATH)/UnitTests.mk
+
+endif # Darwin - TODO: Remove this
+
+endif
+
+endif # TARGET_BUILD_APPS
diff --git a/native/dicttoolkit/CleanupNativeFileList.mk b/native/dicttoolkit/CleanupNativeFileList.mk
new file mode 100644
index 000000000..b804b41ed
--- /dev/null
+++ b/native/dicttoolkit/CleanupNativeFileList.mk
@@ -0,0 +1,17 @@
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+LATIN_IME_DICT_TOOLKIT_MAIN_SRC_FILES :=
+LATIN_IME_DICT_TOOLKIT_SRC_FILES :=
+LATIN_IME_DICT_TOOLKIT_TEST_FILES :=
diff --git a/native/dicttoolkit/NativeFileList.mk b/native/dicttoolkit/NativeFileList.mk
new file mode 100644
index 000000000..d2c8c3a2c
--- /dev/null
+++ b/native/dicttoolkit/NativeFileList.mk
@@ -0,0 +1,43 @@
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+LATIN_IME_DICT_TOOLKIT_MAIN_SRC_FILES := \
+ dict_toolkit_main.cpp
+
+LATIN_IME_DICT_TOOLKIT_SRC_FILES := \
+ $(addprefix command_executors/, \
+ diff_executor.cpp \
+ header_executor.cpp \
+ help_executor.cpp \
+ info_executor.cpp \
+ makedict_executor.cpp) \
+ $(addprefix offdevice_intermediate_dict/, \
+ offdevice_intermediate_dict.cpp) \
+ $(addprefix utils/, \
+ arguments_parser.cpp \
+ command_utils.cpp \
+ utf8_utils.cpp)
+
+LATIN_IME_DICT_TOOLKIT_TEST_FILES := \
+ $(addprefix command_executors/, \
+ diff_executor_test.cpp \
+ header_executor_test.cpp \
+ info_executor_test.cpp \
+ makedict_executor_test.cpp) \
+ dict_toolkit_defines_test.cpp \
+ $(addprefix offdevice_intermediate_dict/, \
+ offdevice_intermediate_dict_test.cpp) \
+ $(addprefix utils/, \
+ command_utils_test.cpp \
+ utf8_utils_test.cpp)
diff --git a/native/dicttoolkit/UnitTests.mk b/native/dicttoolkit/UnitTests.mk
new file mode 100644
index 000000000..96e28730e
--- /dev/null
+++ b/native/dicttoolkit/UnitTests.mk
@@ -0,0 +1,69 @@
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+ifeq (,$(TARGET_BUILD_APPS))
+
+LOCAL_PATH := $(call my-dir)
+
+######################################
+include $(CLEAR_VARS)
+
+LATIN_IME_CORE_PATH := $(LOCAL_PATH)/../jni
+
+LATIN_IME_DICT_TOOLKIT_SRC_DIR := src
+LATIN_IME_CORE_SRC_DIR := ../jni/src
+LATIN_DICT_TOOLKIT_TEST_SRC_DIR := tests
+
+include $(LOCAL_PATH)/NativeFileList.mk
+include $(LATIN_IME_CORE_PATH)/NativeFileList.mk
+
+# TODO: Remove -std=c++11 once it is set by default on host build.
+LATIN_IME_SRC_DIR := src
+LOCAL_ADDRESS_SANITIZER := true
+LOCAL_CFLAGS += -std=c++11 -Wno-unused-parameter -Wno-unused-function
+LOCAL_CLANG := true
+LOCAL_CXX_STL := libc++
+LOCAL_C_INCLUDES += $(LOCAL_PATH)/$(LATIN_IME_DICT_TOOLKIT_SRC_DIR) \
+ $(LATIN_IME_CORE_PATH)/$(LATIN_IME_CORE_SRC_DIR)
+LOCAL_MODULE := liblatinime_dicttoolkit_host_static_for_unittests
+LOCAL_MODULE_TAGS := optional
+LOCAL_SRC_FILES := \
+ $(addprefix $(LATIN_IME_DICT_TOOLKIT_SRC_DIR)/, $(LATIN_IME_DICT_TOOLKIT_SRC_FILES)) \
+ $(addprefix $(LATIN_IME_CORE_SRC_DIR)/, $(LATIN_IME_CORE_SRC_FILES))
+include $(BUILD_HOST_STATIC_LIBRARY)
+
+include $(CLEAR_VARS)
+
+# TODO: Remove -std=c++11 once it is set by default on host build.
+LOCAL_ADDRESS_SANITIZER := true
+LOCAL_CFLAGS += -std=c++11 -Wno-unused-parameter -Wno-unused-function
+LOCAL_CLANG := true
+LOCAL_CXX_STL := libc++
+LOCAL_C_INCLUDES += $(LOCAL_PATH)/$(LATIN_IME_DICT_TOOLKIT_SRC_DIR) \
+ $(LATIN_IME_CORE_PATH)/$(LATIN_IME_CORE_SRC_DIR)
+LOCAL_MODULE := dicttoolkit_unittests
+LOCAL_MODULE_TAGS := tests
+LOCAL_SRC_FILES := \
+ $(addprefix $(LATIN_DICT_TOOLKIT_TEST_SRC_DIR)/, $(LATIN_IME_DICT_TOOLKIT_TEST_FILES))
+LOCAL_STATIC_LIBRARIES += liblatinime_dicttoolkit_host_static_for_unittests
+include $(BUILD_HOST_NATIVE_TEST)
+
+include $(LOCAL_PATH)/CleanupNativeFileList.mk
+
+#################### Clean up the tmp vars
+LATINIME_HOST_OSNAME :=
+LATIN_IME_SRC_DIR :=
+LATIN_IME_TEST_SRC_DIR :=
+
+endif # TARGET_BUILD_APPS
diff --git a/native/dicttoolkit/dict_toolkit_main.cpp b/native/dicttoolkit/dict_toolkit_main.cpp
new file mode 100644
index 000000000..53cc5e915
--- /dev/null
+++ b/native/dicttoolkit/dict_toolkit_main.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cstdio>
+
+#include "dict_toolkit_defines.h"
+#include "utils/command_utils.h"
+
+void usage(int argc, char **argv) {
+ fprintf(stderr, "Usage: %s <command> [arguments]\n", argc > 0 ? argv[0] : "dicttoolkit");
+}
+
+int main(int argc, char **argv) {
+ if (argc < MIN_ARG_COUNT) {
+ usage(argc, argv);
+ return 1;
+ }
+ using namespace latinime::dicttoolkit;
+ const CommandType commandType = CommandUtils::getCommandType(argv[1]);
+ if (commandType == CommandType::Unknown) {
+ CommandUtils::printCommandUnknownMessage(argv[0], argv[1]);
+ return 1;
+ }
+ const auto executor = CommandUtils::getCommandExecutor(commandType);
+ return executor(argc - 1, argv + 1);
+}
diff --git a/native/dicttoolkit/run_tests.sh b/native/dicttoolkit/run_tests.sh
new file mode 100755
index 000000000..44c99c144
--- /dev/null
+++ b/native/dicttoolkit/run_tests.sh
@@ -0,0 +1,34 @@
+#!/bin/bash
+# Copyright 2014, The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# check script arguments
+if [[ $(type -t mmm) != function ]]; then
+if [[ ${BASH_SOURCE[0]} != $0 ]]; then return; else exit 1; fi
+fi
+
+# Host build is never supported in unbundled (NDK/tapas) build
+if [[ -n $TARGET_BUILD_APPS ]]; then
+ echo "Host build is never supported in tapas build." 1>&2
+ echo "Use lunch command instead." 1>&2
+ if [[ ${BASH_SOURCE[0]} != $0 ]]; then return; else exit 1; fi
+fi
+
+test_name=dicttoolkit_unittests
+
+pushd $PWD > /dev/null
+cd $(gettop)
+(mmm -j16 packages/inputmethods/LatinIME/native/dicttoolkit) || (make -j16 $test_name)
+$ANDROID_HOST_OUT/bin/$test_name
+popd > /dev/null
diff --git a/native/dicttoolkit/src/command_executors/diff_executor.cpp b/native/dicttoolkit/src/command_executors/diff_executor.cpp
new file mode 100644
index 000000000..bf6830686
--- /dev/null
+++ b/native/dicttoolkit/src/command_executors/diff_executor.cpp
@@ -0,0 +1,49 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "command_executors/diff_executor.h"
+
+#include <cstdio>
+
+namespace latinime {
+namespace dicttoolkit {
+
+const char *const DiffExecutor::COMMAND_NAME = "diff";
+
+/* static */ int DiffExecutor::run(const int argc, char **argv) {
+ fprintf(stderr, "Command '%s' has not been implemented yet.\n", COMMAND_NAME);
+ return 0;
+}
+
+/* static */ void DiffExecutor::printUsage() {
+ printf("*** %s\n", COMMAND_NAME);
+ getArgumentsParser().printUsage(COMMAND_NAME, "Shows differences between two dictionaries.");
+}
+
+/* static */ const ArgumentsParser DiffExecutor::getArgumentsParser() {
+ std::unordered_map<std::string, OptionSpec> optionSpecs;
+ optionSpecs["p"] = OptionSpec::switchOption("(plumbing) produce output suitable for a script");
+
+ const std::vector<ArgumentSpec> argumentSpecs = {
+ ArgumentSpec::singleArgument("dict1", "dictionary file"),
+ ArgumentSpec::singleArgument("dict2", "dictionary file")
+ };
+
+ return ArgumentsParser(std::move(optionSpecs), std::move(argumentSpecs));
+}
+
+} // namespace dicttoolkit
+} // namespace latinime
diff --git a/native/dicttoolkit/src/command_executors/diff_executor.h b/native/dicttoolkit/src/command_executors/diff_executor.h
new file mode 100644
index 000000000..f92ae49d5
--- /dev/null
+++ b/native/dicttoolkit/src/command_executors/diff_executor.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DICT_TOOLKIT_DIFF_EXECUTOR_H
+#define LATINIME_DICT_TOOLKIT_DIFF_EXECUTOR_H
+
+#include "dict_toolkit_defines.h"
+#include "utils/arguments_parser.h"
+
+namespace latinime {
+namespace dicttoolkit {
+
+class DiffExecutor final {
+ public:
+ static const char *const COMMAND_NAME;
+
+ static int run(const int argc, char **argv);
+ static void printUsage();
+ static const ArgumentsParser getArgumentsParser();
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(DiffExecutor);
+};
+
+} // namespace dicttoolkit
+} // namepsace latinime
+#endif // LATINIME_DICT_TOOLKIT_DIFF_EXECUTOR_H
diff --git a/native/dicttoolkit/src/command_executors/header_executor.cpp b/native/dicttoolkit/src/command_executors/header_executor.cpp
new file mode 100644
index 000000000..b3d273b4e
--- /dev/null
+++ b/native/dicttoolkit/src/command_executors/header_executor.cpp
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "command_executors/header_executor.h"
+
+#include <cstdio>
+
+namespace latinime {
+namespace dicttoolkit {
+
+const char *const HeaderExecutor::COMMAND_NAME = "header";
+
+/* static */ int HeaderExecutor::run(const int argc, char **argv) {
+ fprintf(stderr, "Command '%s' has not been implemented yet.\n", COMMAND_NAME);
+ return 0;
+}
+
+/* static */ void HeaderExecutor::printUsage() {
+ printf("*** %s\n", COMMAND_NAME);
+ getArgumentsParser().printUsage(COMMAND_NAME,
+ "Prints the header contents of a dictionary file.");
+}
+
+/* static */ const ArgumentsParser HeaderExecutor::getArgumentsParser() {
+ std::unordered_map<std::string, OptionSpec> optionSpecs;
+ optionSpecs["p"] = OptionSpec::switchOption("(plumbing) produce output suitable for a script");
+
+ const std::vector<ArgumentSpec> argumentSpecs = {
+ ArgumentSpec::singleArgument("dict", "prints the header contents of a dictionary file")
+ };
+
+ return ArgumentsParser(std::move(optionSpecs), std::move(argumentSpecs));
+}
+} // namespace dicttoolkit
+} // namespace latinime
diff --git a/native/dicttoolkit/src/command_executors/header_executor.h b/native/dicttoolkit/src/command_executors/header_executor.h
new file mode 100644
index 000000000..44cc9cfc4
--- /dev/null
+++ b/native/dicttoolkit/src/command_executors/header_executor.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DICT_TOOLKIT_HEADER_EXECUTOR_H
+#define LATINIME_DICT_TOOLKIT_HEADER_EXECUTOR_H
+
+#include "dict_toolkit_defines.h"
+#include "utils/arguments_parser.h"
+
+namespace latinime {
+namespace dicttoolkit {
+
+class HeaderExecutor final {
+ public:
+ static const char *const COMMAND_NAME;
+
+ static int run(const int argc, char **argv);
+ static void printUsage();
+ static const ArgumentsParser getArgumentsParser();
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(HeaderExecutor);
+};
+
+} // namespace dicttoolkit
+} // namepsace latinime
+#endif // LATINIME_DICT_TOOLKIT_HEADER_EXECUTOR_H
diff --git a/native/dicttoolkit/src/command_executors/help_executor.cpp b/native/dicttoolkit/src/command_executors/help_executor.cpp
new file mode 100644
index 000000000..bd29a5b16
--- /dev/null
+++ b/native/dicttoolkit/src/command_executors/help_executor.cpp
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "command_executors/help_executor.h"
+
+#include <cstdio>
+#include <functional>
+#include <vector>
+
+#include "command_executors/diff_executor.h"
+#include "command_executors/header_executor.h"
+#include "command_executors/info_executor.h"
+#include "command_executors/makedict_executor.h"
+#include "utils/command_utils.h"
+
+namespace latinime {
+namespace dicttoolkit {
+
+const char *const HelpExecutor::COMMAND_NAME = "help";
+
+/* static */ int HelpExecutor::run(const int argc, char **argv) {
+ printf("Available commands:\n\n");
+ const std::vector<std::function<void(void)>> printUsageMethods = {DiffExecutor::printUsage,
+ HeaderExecutor::printUsage, InfoExecutor::printUsage, MakedictExecutor::printUsage,
+ printUsage};
+ for (const auto &printUsageMethod : printUsageMethods) {
+ printUsageMethod();
+ }
+ return 0;
+}
+
+/* static */ void HelpExecutor::printUsage() {
+ printf("*** %s\n", COMMAND_NAME);
+ printf("Usage: %s\n", COMMAND_NAME);
+ printf("Show this help list.\n\n");
+}
+
+} // namespace dicttoolkit
+} // namespace latinime
diff --git a/native/dicttoolkit/src/command_executors/help_executor.h b/native/dicttoolkit/src/command_executors/help_executor.h
new file mode 100644
index 000000000..280610eb9
--- /dev/null
+++ b/native/dicttoolkit/src/command_executors/help_executor.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DICT_TOOLKIT_HELP_EXECUTOR_H
+#define LATINIME_DICT_TOOLKIT_HELP_EXECUTOR_H
+
+#include "dict_toolkit_defines.h"
+
+namespace latinime {
+namespace dicttoolkit {
+
+class HelpExecutor final {
+ public:
+ static const char *const COMMAND_NAME;
+
+ static int run(const int argc, char **argv);
+ static void printUsage();
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(HelpExecutor);
+};
+
+} // namespace dicttoolkit
+} // namepsace latinime
+#endif // LATINIME_DICT_TOOLKIT_HELP_EXECUTOR_H
diff --git a/native/dicttoolkit/src/command_executors/info_executor.cpp b/native/dicttoolkit/src/command_executors/info_executor.cpp
new file mode 100644
index 000000000..351da4aff
--- /dev/null
+++ b/native/dicttoolkit/src/command_executors/info_executor.cpp
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "command_executors/info_executor.h"
+
+#include <cstdio>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+namespace latinime {
+namespace dicttoolkit {
+
+const char *const InfoExecutor::COMMAND_NAME = "info";
+
+/* static */ int InfoExecutor::run(const int argc, char **argv) {
+ fprintf(stderr, "Command '%s' has not been implemented yet.\n", COMMAND_NAME);
+ return 0;
+}
+
+/* static */ void InfoExecutor::printUsage() {
+ printf("*** %s\n", COMMAND_NAME);
+ getArgumentsParser().printUsage(COMMAND_NAME,
+ "Prints various information about a dictionary file.");
+}
+
+/* static */const ArgumentsParser InfoExecutor::getArgumentsParser() {
+ std::unordered_map<std::string, OptionSpec> optionSpecs;
+ optionSpecs["p"] = OptionSpec::switchOption("(plumbing) produce output suitable for a script");
+
+ const std::vector<ArgumentSpec> argumentSpecs = {
+ ArgumentSpec::singleArgument("dict", "dictionary file name"),
+ ArgumentSpec::variableLengthArguments("word", 0 /* minCount */,
+ ArgumentSpec::UNLIMITED_COUNT, "word to show information")
+ };
+
+ return ArgumentsParser(std::move(optionSpecs), std::move(argumentSpecs));
+}
+
+} // namespace dicttoolkit
+} // namespace latinime
diff --git a/native/dicttoolkit/src/command_executors/info_executor.h b/native/dicttoolkit/src/command_executors/info_executor.h
new file mode 100644
index 000000000..d4106d59f
--- /dev/null
+++ b/native/dicttoolkit/src/command_executors/info_executor.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DICT_TOOLKIT_INFO_EXECUTOR_H
+#define LATINIME_DICT_TOOLKIT_INFO_EXECUTOR_H
+
+#include "dict_toolkit_defines.h"
+#include "utils/arguments_parser.h"
+
+namespace latinime {
+namespace dicttoolkit {
+
+class InfoExecutor final {
+ public:
+ static const char *const COMMAND_NAME;
+
+ static int run(const int argc, char **argv);
+ static void printUsage();
+ static const ArgumentsParser getArgumentsParser();
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(InfoExecutor);
+};
+
+} // namepsace dicttoolkit
+} // namespace latinime
+#endif // LATINIME_DICT_TOOLKIT_INFO_EXECUTOR_H
diff --git a/native/dicttoolkit/src/command_executors/makedict_executor.cpp b/native/dicttoolkit/src/command_executors/makedict_executor.cpp
new file mode 100644
index 000000000..8a84e8069
--- /dev/null
+++ b/native/dicttoolkit/src/command_executors/makedict_executor.cpp
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "command_executors/makedict_executor.h"
+
+#include <cstdio>
+
+namespace latinime {
+namespace dicttoolkit {
+
+const char *const MakedictExecutor::COMMAND_NAME = "makedict";
+
+/* static */ int MakedictExecutor::run(const int argc, char **argv) {
+ fprintf(stderr, "Command '%s' has not been implemented yet.\n", COMMAND_NAME);
+ return 0;
+}
+
+/* static */ void MakedictExecutor::printUsage() {
+ printf("*** %s\n", COMMAND_NAME);
+ getArgumentsParser().printUsage(COMMAND_NAME,
+ "Converts a source dictionary file to one or several outputs.\n"
+ "Source can be a binary dictionary file or a combined format file.\n"
+ "Binary version 2 (Jelly Bean), 4, and combined format outputs are supported.");
+}
+
+/* static */const ArgumentsParser MakedictExecutor::getArgumentsParser() {
+ std::unordered_map<std::string, OptionSpec> optionSpecs;
+ optionSpecs["o"] = OptionSpec::keyValueOption("format", "2",
+ "output format version: 2/4/combined");
+ optionSpecs["t"] = OptionSpec::keyValueOption("mode", "off",
+ "code point table switch: on/off/auto");
+
+ const std::vector<ArgumentSpec> argumentSpecs = {
+ ArgumentSpec::singleArgument("src_dict", "source dictionary file"),
+ ArgumentSpec::singleArgument("dest_dict", "output dictionary file")
+ };
+
+ return ArgumentsParser(std::move(optionSpecs), std::move(argumentSpecs));
+}
+
+} // namespace dicttoolkit
+} // namespace latinime
diff --git a/native/dicttoolkit/src/command_executors/makedict_executor.h b/native/dicttoolkit/src/command_executors/makedict_executor.h
new file mode 100644
index 000000000..c3de977a3
--- /dev/null
+++ b/native/dicttoolkit/src/command_executors/makedict_executor.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DICT_TOOLKIT_MAKEDICT_EXECUTOR_H
+#define LATINIME_DICT_TOOLKIT_MAKEDICT_EXECUTOR_H
+
+#include "dict_toolkit_defines.h"
+#include "utils/arguments_parser.h"
+
+namespace latinime {
+namespace dicttoolkit {
+
+class MakedictExecutor final {
+ public:
+ static const char *const COMMAND_NAME;
+
+ static int run(const int argc, char **argv);
+ static void printUsage();
+ static const ArgumentsParser getArgumentsParser();
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(MakedictExecutor);
+};
+
+} // namespace dicttoolkit
+} // namepsace latinime
+#endif // LATINIME_DICT_TOOLKIT_MAKEDICT_EXECUTOR_H
diff --git a/native/dicttoolkit/src/dict_toolkit_defines.h b/native/dicttoolkit/src/dict_toolkit_defines.h
new file mode 100644
index 000000000..dbaae0ca0
--- /dev/null
+++ b/native/dicttoolkit/src/dict_toolkit_defines.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DICT_TOOLKIT_DEFINES_H
+#define LATINIME_DICT_TOOLKIT_DEFINES_H
+
+#include "defines.h"
+
+#define MIN_ARG_COUNT 2
+
+#endif // LATINIME_DICT_TOOLKIT_DEFINES_H
diff --git a/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict.cpp b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict.cpp
new file mode 100644
index 000000000..af28131cf
--- /dev/null
+++ b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict.cpp
@@ -0,0 +1,126 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "offdevice_intermediate_dict/offdevice_intermediate_dict.h"
+
+#include "offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node.h"
+
+namespace latinime {
+namespace dicttoolkit {
+
+bool OffdeviceIntermediateDict::addWord(const WordProperty &wordProperty) {
+ const CodePointArrayView codePoints = wordProperty.getCodePoints();
+ if (codePoints.empty() || codePoints.size() > MAX_WORD_LENGTH) {
+ return false;
+ }
+ return addWordInner(codePoints, wordProperty, mRootPtNodeArray);
+}
+
+bool OffdeviceIntermediateDict::addWordInner(const CodePointArrayView codePoints,
+ const WordProperty &wordProperty, OffdeviceIntermediateDictPtNodeArray &ptNodeArray) {
+ auto ptNodeList = ptNodeArray.getMutablePtNodeList();
+ auto ptNodeIt = ptNodeList->begin();
+ for (; ptNodeIt != ptNodeList->end(); ++ptNodeIt) {
+ const auto &ptNode = *ptNodeIt;
+ const CodePointArrayView ptNodeCodePoints = ptNode->getPtNodeCodePoints();
+ if (codePoints[0] < ptNodeCodePoints[0]) {
+ continue;
+ }
+ if (codePoints[0] > ptNodeCodePoints[0]) {
+ break;
+ }
+ size_t i = 1;
+ for (; i < codePoints.size(); ++i) {
+ if (i >= ptNodeCodePoints.size()) {
+ // Add new child.
+ return addWordInner(codePoints.skip(i), wordProperty,
+ ptNode->getChildrenPtNodeArray());
+ }
+ if (codePoints[i] != ptNodeCodePoints[i]) {
+ break;
+ }
+ }
+ if (codePoints.size() == i && codePoints.size() == ptNodeCodePoints.size()) {
+ // All code points matched.
+ if (ptNode->getWordProperty()) {
+ // Adding the same word multiple times is not supported.
+ return false;
+ }
+ ptNodeList->insert(ptNodeIt,
+ std::make_shared<OffdeviceIntermediateDictPtNode>(wordProperty, *ptNode));
+ ptNodeList->erase(ptNodeIt);
+ return true;
+ }
+ // The (i+1)-th elements are different.
+ // Create and Add new parent ptNode for the common part.
+ auto newPtNode = codePoints.size() == i
+ ? std::make_shared<OffdeviceIntermediateDictPtNode>(codePoints, wordProperty)
+ : std::make_shared<OffdeviceIntermediateDictPtNode>(codePoints.limit(i));
+ ptNodeList->insert(ptNodeIt, newPtNode);
+ OffdeviceIntermediateDictPtNodeArray &childrenPtNodeArray =
+ newPtNode->getChildrenPtNodeArray();
+ // Add new child for the existing ptNode.
+ childrenPtNodeArray.getMutablePtNodeList()->push_back(
+ std::make_shared<OffdeviceIntermediateDictPtNode>(
+ ptNodeCodePoints.skip(i), *ptNode));
+ ptNodeList->erase(ptNodeIt);
+ if (codePoints.size() != i) {
+ // Add a child for the new word.
+ return addWordInner(codePoints.skip(i), wordProperty, childrenPtNodeArray);
+ }
+ return true;
+ }
+ ptNodeList->insert(ptNodeIt,
+ std::make_shared<OffdeviceIntermediateDictPtNode>(codePoints, wordProperty));
+ return true;
+}
+
+const WordProperty *OffdeviceIntermediateDict::getWordProperty(
+ const CodePointArrayView codePoints) const {
+ const OffdeviceIntermediateDictPtNodeArray *ptNodeArray = &mRootPtNodeArray;
+ for (size_t i = 0; i < codePoints.size();) {
+ bool foundNext = false;
+ for (const auto ptNode : ptNodeArray->getPtNodeList()) {
+ const CodePointArrayView ptNodeCodePoints = ptNode->getPtNodeCodePoints();
+ if (codePoints[i] < ptNodeCodePoints[0]) {
+ continue;
+ }
+ if (codePoints[i] > ptNodeCodePoints[0]
+ || codePoints.size() < ptNodeCodePoints.size()) {
+ return nullptr;
+ }
+ for (size_t j = 1; j < ptNodeCodePoints.size(); ++j) {
+ if (codePoints[i + j] != ptNodeCodePoints[j]) {
+ return nullptr;
+ }
+ }
+ i += ptNodeCodePoints.size();
+ if (i == codePoints.size()) {
+ return ptNode->getWordProperty();
+ }
+ ptNodeArray = &ptNode->getChildrenPtNodeArray();
+ foundNext = true;
+ break;
+ }
+ if (!foundNext) {
+ break;
+ }
+ }
+ return nullptr;
+}
+
+} // namespace dicttoolkit
+} // namespace latinime
diff --git a/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict.h b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict.h
new file mode 100644
index 000000000..13d26ba91
--- /dev/null
+++ b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_H
+#define LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_H
+
+#include "dict_toolkit_defines.h"
+#include "offdevice_intermediate_dict/offdevice_intermediate_dict_header.h"
+#include "offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node_array.h"
+#include "suggest/core/dictionary/property/word_property.h"
+#include "utils/int_array_view.h"
+
+namespace latinime {
+namespace dicttoolkit {
+
+/**
+ * On memory patricia trie to represent a dictionary.
+ */
+class OffdeviceIntermediateDict final {
+ public:
+ OffdeviceIntermediateDict(const OffdeviceIntermediateDictHeader &header)
+ : mHeader(header), mRootPtNodeArray() {}
+
+ bool addWord(const WordProperty &wordProperty);
+ // The returned value will be invalid after modifying the dictionary. e.g. calling addWord().
+ const WordProperty *getWordProperty(const CodePointArrayView codePoints) const;
+ const OffdeviceIntermediateDictHeader &getHeader() const { return mHeader; }
+
+ private:
+ DISALLOW_ASSIGNMENT_OPERATOR(OffdeviceIntermediateDict);
+
+ const OffdeviceIntermediateDictHeader mHeader;
+ OffdeviceIntermediateDictPtNodeArray mRootPtNodeArray;
+
+ bool addWordInner(const CodePointArrayView codePoints, const WordProperty &wordProperty,
+ OffdeviceIntermediateDictPtNodeArray &ptNodeArray);
+};
+
+} // namespace dicttoolkit
+} // namespace latinime
+#endif // LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_H
diff --git a/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_header.h b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_header.h
new file mode 100644
index 000000000..440627a79
--- /dev/null
+++ b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_header.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_HEADER_H
+#define LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_HEADER_H
+
+#include <map>
+#include <vector>
+
+#include "dict_toolkit_defines.h"
+
+namespace latinime {
+namespace dicttoolkit {
+
+class OffdeviceIntermediateDictHeader final {
+ public:
+ using AttributeMap = std::map<std::vector<int>, std::vector<int>>;
+
+ OffdeviceIntermediateDictHeader(const AttributeMap &attributesMap)
+ : mAttributeMap(attributesMap) {}
+
+ private:
+ DISALLOW_DEFAULT_CONSTRUCTOR(OffdeviceIntermediateDictHeader);
+ DISALLOW_ASSIGNMENT_OPERATOR(OffdeviceIntermediateDictHeader);
+
+ const AttributeMap mAttributeMap;
+};
+
+} // namespace dicttoolkit
+} // namespace latinime
+#endif // LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_HEADER_H
diff --git a/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node.h b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node.h
new file mode 100644
index 000000000..721ccd778
--- /dev/null
+++ b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_PT_NODE_H
+#define LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_PT_NODE_H
+
+#include <memory>
+
+#include "dict_toolkit_defines.h"
+#include "offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node_array.h"
+#include "suggest/core/dictionary/property/word_property.h"
+#include "utils/int_array_view.h"
+
+namespace latinime {
+namespace dicttoolkit {
+
+class OffdeviceIntermediateDictPtNode final {
+ public:
+ // Non-terminal
+ OffdeviceIntermediateDictPtNode(const CodePointArrayView ptNodeCodePoints)
+ : mPtNodeCodePoints(ptNodeCodePoints.toVector()), mChildrenPtNodeArray(),
+ mWortProperty(nullptr) {}
+
+ // Terminal
+ OffdeviceIntermediateDictPtNode(const CodePointArrayView ptNodeCodePoints,
+ const WordProperty &wordProperty)
+ : mPtNodeCodePoints(ptNodeCodePoints.toVector()), mChildrenPtNodeArray(),
+ mWortProperty(new WordProperty(wordProperty)) {}
+
+ // Replacing PtNodeCodePoints.
+ OffdeviceIntermediateDictPtNode(const CodePointArrayView ptNodeCodePoints,
+ const OffdeviceIntermediateDictPtNode &ptNode)
+ : mPtNodeCodePoints(ptNodeCodePoints.toVector()),
+ mChildrenPtNodeArray(ptNode.mChildrenPtNodeArray),
+ mWortProperty(new WordProperty(*ptNode.mWortProperty)) {}
+
+ // Replacing WordProperty.
+ OffdeviceIntermediateDictPtNode(const WordProperty &wordProperty,
+ const OffdeviceIntermediateDictPtNode &ptNode)
+ : mPtNodeCodePoints(ptNode.mPtNodeCodePoints),
+ mChildrenPtNodeArray(ptNode.mChildrenPtNodeArray),
+ mWortProperty(new WordProperty(wordProperty)) {}
+
+ const WordProperty *getWordProperty() const {
+ return mWortProperty.get();
+ }
+
+ const CodePointArrayView getPtNodeCodePoints() const {
+ return CodePointArrayView(mPtNodeCodePoints);
+ }
+
+ OffdeviceIntermediateDictPtNodeArray &getChildrenPtNodeArray() {
+ return mChildrenPtNodeArray;
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(OffdeviceIntermediateDictPtNode);
+
+ const std::vector<int> mPtNodeCodePoints;
+ OffdeviceIntermediateDictPtNodeArray mChildrenPtNodeArray;
+ const std::unique_ptr<WordProperty> mWortProperty;
+};
+
+} // namespace dicttoolkit
+} // namespace latinime
+#endif // LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_PT_NODE_H
diff --git a/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node_array.h b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node_array.h
new file mode 100644
index 000000000..f87456ce0
--- /dev/null
+++ b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node_array.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_PT_NODE_ARRAY_H
+#define LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_PT_NODE_ARRAY_H
+
+#include <list>
+#include <memory>
+
+#include "dict_toolkit_defines.h"
+
+namespace latinime {
+namespace dicttoolkit {
+
+class OffdeviceIntermediateDictPtNode;
+
+class OffdeviceIntermediateDictPtNodeArray final {
+ public:
+ const std::list<std::shared_ptr<OffdeviceIntermediateDictPtNode>> &getPtNodeList() const {
+ return mPtNodes;
+ }
+
+ std::list<std::shared_ptr<OffdeviceIntermediateDictPtNode>> *getMutablePtNodeList() {
+ return &mPtNodes;
+ }
+
+ private:
+ DISALLOW_ASSIGNMENT_OPERATOR(OffdeviceIntermediateDictPtNodeArray);
+
+ std::list<std::shared_ptr<OffdeviceIntermediateDictPtNode>> mPtNodes;
+};
+
+} // namespace dicttoolkit
+} // namespace latinime
+#endif // LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_PT_NODE_ARRAY_H
diff --git a/native/dicttoolkit/src/utils/arguments_and_options.h b/native/dicttoolkit/src/utils/arguments_and_options.h
new file mode 100644
index 000000000..d8f5985e5
--- /dev/null
+++ b/native/dicttoolkit/src/utils/arguments_and_options.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DICT_TOOLKIT_ARGUMENTS_AND_OPTIONS_H
+#define LATINIME_DICT_TOOLKIT_ARGUMENTS_AND_OPTIONS_H
+
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "dict_toolkit_defines.h"
+
+namespace latinime {
+namespace dicttoolkit {
+
+class ArgumentsAndOptions {
+ public:
+ ArgumentsAndOptions() : mIsValid(false), mOptions(), mArguments() {}
+
+ ArgumentsAndOptions(std::unordered_map<std::string, std::string> &&options,
+ std::unordered_map<std::string, std::vector<std::string>> &&arguments)
+ : mIsValid(true), mOptions(std::move(options)), mArguments(std::move(arguments)) {}
+
+ bool isValid() const {
+ return mIsValid;
+ }
+
+ bool hasOption(const std::string &optionName) const {
+ return mOptions.find(optionName) != mOptions.end();
+ }
+
+ private:
+ DISALLOW_ASSIGNMENT_OPERATOR(ArgumentsAndOptions);
+
+ const bool mIsValid;
+ const std::unordered_map<std::string, std::string> mOptions;
+ const std::unordered_map<std::string, std::vector<std::string>> mArguments;
+};
+} // namespace dicttoolkit
+} // namespace latinime
+#endif // LATINIME_DICT_TOOLKIT_ARGUMENTS_AND_OPTIONS_H
diff --git a/native/dicttoolkit/src/utils/arguments_parser.cpp b/native/dicttoolkit/src/utils/arguments_parser.cpp
new file mode 100644
index 000000000..039dae35b
--- /dev/null
+++ b/native/dicttoolkit/src/utils/arguments_parser.cpp
@@ -0,0 +1,84 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "utils/arguments_parser.h"
+
+namespace latinime {
+namespace dicttoolkit {
+
+const int ArgumentSpec::UNLIMITED_COUNT = -1;
+
+bool ArgumentsParser::validateSpecs() const {
+ for (size_t i = 0; i < mArgumentSpecs.size() ; ++i) {
+ if (mArgumentSpecs[i].getMinCount() != mArgumentSpecs[i].getMaxCount()
+ && i != mArgumentSpecs.size() - 1) {
+ AKLOGE("Variable length argument must be at the end.");
+ return false;
+ }
+ }
+ return true;
+}
+
+void ArgumentsParser::printUsage(const std::string &commandName,
+ const std::string &description) const {
+ printf("Usage: %s", commandName.c_str());
+ for (const auto &option : mOptionSpecs) {
+ const std::string &optionName = option.first;
+ const OptionSpec &spec = option.second;
+ printf(" [-%s", optionName.c_str());
+ if (spec.takeValue()) {
+ printf(" <%s>", spec.getValueName().c_str());
+ }
+ printf("]");
+ }
+ for (const auto &argSpec : mArgumentSpecs) {
+ if (argSpec.getMinCount() == 0 && argSpec.getMaxCount() == 1) {
+ printf(" [<%s>]", argSpec.getName().c_str());
+ } else if (argSpec.getMinCount() == 1 && argSpec.getMaxCount() == 1) {
+ printf(" <%s>", argSpec.getName().c_str());
+ } else if (argSpec.getMinCount() == 0) {
+ printf(" [<%s>...]", argSpec.getName().c_str());
+ } else if (argSpec.getMinCount() == 1) {
+ printf(" <%s>...", argSpec.getName().c_str());
+ }
+ }
+ printf("\n%s\n\n", description.c_str());
+ for (const auto &option : mOptionSpecs) {
+ const std::string &optionName = option.first;
+ const OptionSpec &spec = option.second;
+ printf(" -%s", optionName.c_str());
+ if (spec.takeValue()) {
+ printf(" <%s>", spec.getValueName().c_str());
+ }
+ printf("\t\t\t%s", spec.getDescription().c_str());
+ if (spec.takeValue() && !spec.getDefaultValue().empty()) {
+ printf("\tdefault: %s", spec.getDefaultValue().c_str());
+ }
+ printf("\n");
+ }
+ for (const auto &argSpec : mArgumentSpecs) {
+ printf(" <%s>\t\t\t%s\n", argSpec.getName().c_str(), argSpec.getDescription().c_str());
+ }
+ printf("\n\n");
+}
+
+const ArgumentsAndOptions ArgumentsParser::parseArguments(const int argc, char **argv) const {
+ // TODO: Implement
+ return ArgumentsAndOptions();
+}
+
+} // namespace dicttoolkit
+} // namespace latinime
diff --git a/native/dicttoolkit/src/utils/arguments_parser.h b/native/dicttoolkit/src/utils/arguments_parser.h
new file mode 100644
index 000000000..be2dd8749
--- /dev/null
+++ b/native/dicttoolkit/src/utils/arguments_parser.h
@@ -0,0 +1,118 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DICT_TOOLKIT_ARGUMENTS_PARSER_H
+#define LATINIME_DICT_TOOLKIT_ARGUMENTS_PARSER_H
+
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "dict_toolkit_defines.h"
+#include "utils/arguments_and_options.h"
+
+namespace latinime {
+namespace dicttoolkit {
+
+class OptionSpec {
+ public:
+ // Default constructor and assignment operator is enabled to be used with std::unordered_map.
+ OptionSpec() = default;
+ OptionSpec &operator=(const OptionSpec &) = default;
+
+ static OptionSpec keyValueOption(const std::string &valueName, const std::string &defaultValue,
+ const std::string &description) {
+ return OptionSpec(true /* takeValue */, valueName, defaultValue, description);
+ }
+
+ static OptionSpec switchOption(const std::string &description) {
+ return OptionSpec(false /* takeValue */, "" /* valueName */, "" /* defaultValue */,
+ description);
+ }
+
+ bool takeValue() const { return mTakeValue; }
+ const std::string &getValueName() const { return mValueName; }
+ const std::string &getDefaultValue() const { return mDefaultValue; }
+ const std::string &getDescription() const { return mDescription; }
+
+ private:
+ OptionSpec(const bool takeValue, const std::string &valueName, const std::string &defaultValue,
+ const std::string &description)
+ : mTakeValue(takeValue), mValueName(valueName), mDefaultValue(defaultValue),
+ mDescription(description) {}
+
+ // Whether the option have to be used with a value or just a switch.
+ // e.g. 'f' in "command -f /path/to/file" is mTakeValue == true.
+ // 'f' in "command -f -t" is mTakeValue == false.
+ bool mTakeValue;
+ // Name of the value used to show usage.
+ std::string mValueName;
+ std::string mDefaultValue;
+ std::string mDescription;
+};
+
+class ArgumentSpec {
+ public:
+ static const int UNLIMITED_COUNT;
+
+ static ArgumentSpec singleArgument(const std::string &name, const std::string &description) {
+ return ArgumentSpec(name, 1 /* minCount */, 1 /* maxCount */, description);
+ }
+
+ static ArgumentSpec variableLengthArguments(const std::string &name, const int minCount,
+ const int maxCount, const std::string &description) {
+ return ArgumentSpec(name, minCount, maxCount, description);
+ }
+
+ const std::string &getName() const { return mName; }
+ int getMinCount() const { return mMinCount; }
+ int getMaxCount() const { return mMaxCount; }
+ const std::string &getDescription() const { return mDescription; }
+
+ private:
+ DISALLOW_DEFAULT_CONSTRUCTOR(ArgumentSpec);
+
+ ArgumentSpec(const std::string &name, const int minCount, const int maxCount,
+ const std::string &description)
+ : mName(name), mMinCount(minCount), mMaxCount(maxCount), mDescription(description) {}
+
+ const std::string mName;
+ const int mMinCount;
+ const int mMaxCount;
+ const std::string mDescription;
+};
+
+class ArgumentsParser {
+ public:
+ ArgumentsParser(std::unordered_map<std::string, OptionSpec> &&optionSpecs,
+ std::vector<ArgumentSpec> &&argumentSpecs)
+ : mOptionSpecs(std::move(optionSpecs)), mArgumentSpecs(std::move(argumentSpecs)) {}
+
+ const ArgumentsAndOptions parseArguments(const int argc, char **argv) const;
+ bool validateSpecs() const;
+ void printUsage(const std::string &commandName, const std::string &description) const;
+
+ private:
+ DISALLOW_DEFAULT_CONSTRUCTOR(ArgumentsParser);
+ DISALLOW_ASSIGNMENT_OPERATOR(ArgumentsParser);
+
+ const std::unordered_map<std::string, OptionSpec> mOptionSpecs;
+ const std::vector<ArgumentSpec> mArgumentSpecs;
+};
+
+} // namespace dicttoolkit
+} // namespace latinime
+#endif // LATINIME_DICT_TOOLKIT_ARGUMENTS_PARSER_H
diff --git a/native/dicttoolkit/src/utils/command_utils.cpp b/native/dicttoolkit/src/utils/command_utils.cpp
new file mode 100644
index 000000000..34196425e
--- /dev/null
+++ b/native/dicttoolkit/src/utils/command_utils.cpp
@@ -0,0 +1,74 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "utils/command_utils.h"
+
+#include <cstdio>
+
+#include "command_executors/diff_executor.h"
+#include "command_executors/header_executor.h"
+#include "command_executors/help_executor.h"
+#include "command_executors/info_executor.h"
+#include "command_executors/makedict_executor.h"
+
+namespace latinime {
+namespace dicttoolkit {
+
+/* static */ CommandType CommandUtils::getCommandType(const std::string &commandName) {
+ if (commandName == InfoExecutor::COMMAND_NAME) {
+ return CommandType::Info;
+ } else if (commandName == DiffExecutor::COMMAND_NAME) {
+ return CommandType::Diff;
+ } else if (commandName == MakedictExecutor::COMMAND_NAME) {
+ return CommandType::Makedict;
+ } else if (commandName == HeaderExecutor::COMMAND_NAME) {
+ return CommandType::Header;
+ } else if (commandName == HelpExecutor::COMMAND_NAME) {
+ return CommandType::Help;
+ } else {
+ return CommandType::Unknown;
+ }
+}
+
+/* static */ void CommandUtils::printCommandUnknownMessage(const std::string &programName,
+ const std::string &commandName) {
+ fprintf(stderr, "Command '%s' is unknown. Try '%s %s' for more information.\n",
+ commandName.c_str(), programName.c_str(), HelpExecutor::COMMAND_NAME);
+}
+
+/* static */ std::function<int(int, char **)> CommandUtils::getCommandExecutor(
+ const CommandType commandType) {
+ switch (commandType) {
+ case CommandType::Info:
+ return InfoExecutor::run;
+ case CommandType::Diff:
+ return DiffExecutor::run;
+ case CommandType::Makedict:
+ return MakedictExecutor::run;
+ case CommandType::Header:
+ return HeaderExecutor::run;
+ case CommandType::Help:
+ return HelpExecutor::run;
+ default:
+ return [] (int, char **) -> int {
+ printf("Command executor not found.");
+ return 1;
+ };
+ }
+}
+
+} // namespace dicttoolkit
+} // namespace latinime
diff --git a/native/dicttoolkit/src/utils/command_utils.h b/native/dicttoolkit/src/utils/command_utils.h
new file mode 100644
index 000000000..4a181f194
--- /dev/null
+++ b/native/dicttoolkit/src/utils/command_utils.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DICT_TOOLKIT_COMMAND_UTILS_H
+#define LATINIME_DICT_TOOLKIT_COMMAND_UTILS_H
+
+#include <functional>
+#include <memory>
+#include <string>
+
+#include "dict_toolkit_defines.h"
+
+namespace latinime {
+namespace dicttoolkit {
+
+enum class CommandType : int {
+ Info,
+ Diff,
+ Makedict,
+ Header,
+ Help,
+ Unknown
+};
+
+class CommandUtils {
+public:
+ static CommandType getCommandType(const std::string &commandName);
+ static void printCommandUnknownMessage(const std::string &programName,
+ const std::string &commandName);
+ static std::function<int(int, char **)> getCommandExecutor(const CommandType commandType);
+
+private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(CommandUtils);
+};
+} // namespace dicttoolkit
+} // namespace latinime
+#endif // LATINIME_DICT_TOOLKIT_COMMAND_UTILS_H
diff --git a/native/dicttoolkit/src/utils/utf8_utils.cpp b/native/dicttoolkit/src/utils/utf8_utils.cpp
new file mode 100644
index 000000000..0f349f512
--- /dev/null
+++ b/native/dicttoolkit/src/utils/utf8_utils.cpp
@@ -0,0 +1,119 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "utils/utf8_utils.h"
+
+#include "utils/char_utils.h"
+
+namespace latinime {
+namespace dicttoolkit {
+
+const size_t Utf8Utils::MAX_SEQUENCE_SIZE_FOR_A_CODE_POINT = 4;
+const uint8_t Utf8Utils::FIRST_BYTE_MARKER_MASKS[] = {0, 0x80, 0xE0, 0xF0, 0xF8};
+const uint8_t Utf8Utils::FIRST_BYTE_MARKERS[] = {0, 0x00, 0xC0, 0xE0, 0xF0};
+const uint8_t Utf8Utils::FIRST_BYTE_CODE_POINT_BITS_MASKS[] = {0, 0x7F, 0x1F, 0x0F, 0x03};
+const int Utf8Utils::MAX_ENCODED_CODE_POINT_VALUES[] = {-1, 0x7F, 0x7FF, 0xFFFF, 0x10FFFF};
+
+const uint8_t Utf8Utils::TRAILING_BYTE_CODE_POINT_BITS_MASK = 0x3F;
+const uint8_t Utf8Utils::TRAILING_BYTE_MARKER = 0x80;
+const size_t Utf8Utils::CODE_POINT_BIT_COUNT_IN_TRAILING_BYTE = 6;
+
+/* static */ std::vector<int> Utf8Utils::getCodePoints(const std::string &utf8Str) {
+ std::vector<int> codePoints;
+ int remainingByteCountForCurrentCodePoint = 0;
+ int currentCodePointSequenceSize = 0;
+ int codePoint = 0;
+ for (const char c : utf8Str) {
+ if (remainingByteCountForCurrentCodePoint == 0) {
+ currentCodePointSequenceSize = getSequenceSizeByCheckingFirstByte(c);
+ if (currentCodePointSequenceSize <= 0) {
+ AKLOGE("%x is an invalid utf8 first byte value.", c);
+ return std::vector<int>();
+ }
+ remainingByteCountForCurrentCodePoint = currentCodePointSequenceSize;
+ codePoint = maskFirstByte(c, remainingByteCountForCurrentCodePoint);
+ } else {
+ codePoint <<= CODE_POINT_BIT_COUNT_IN_TRAILING_BYTE;
+ codePoint += maskTrailingByte(c);
+ }
+ remainingByteCountForCurrentCodePoint--;
+ if (remainingByteCountForCurrentCodePoint == 0) {
+ if (codePoint <= MAX_ENCODED_CODE_POINT_VALUES[currentCodePointSequenceSize - 1]) {
+ AKLOGE("%d bytes encode for codePoint(%x) is a redundant UTF-8 sequence.",
+ currentCodePointSequenceSize, codePoint);
+ return std::vector<int>();
+ }
+ codePoints.push_back(codePoint);
+ }
+ }
+ return codePoints;
+}
+
+/* static */ int Utf8Utils::getSequenceSizeByCheckingFirstByte(const uint8_t firstByte) {
+ for (size_t i = 1; i <= MAX_SEQUENCE_SIZE_FOR_A_CODE_POINT; ++i) {
+ if ((firstByte & FIRST_BYTE_MARKER_MASKS[i]) == FIRST_BYTE_MARKERS[i]) {
+ return i;
+ }
+ }
+ // Not a valid utf8 char first byte.
+ return -1;
+}
+
+/* static */ AK_FORCE_INLINE int Utf8Utils::maskFirstByte(const uint8_t firstByte,
+ const int sequenceSize) {
+ return firstByte & FIRST_BYTE_CODE_POINT_BITS_MASKS[sequenceSize];
+}
+
+/* static */ AK_FORCE_INLINE int Utf8Utils::maskTrailingByte(const uint8_t secondOrLaterByte) {
+ return secondOrLaterByte & TRAILING_BYTE_CODE_POINT_BITS_MASK;
+}
+
+/* static */ std::string Utf8Utils::getUtf8String(const CodePointArrayView codePoints) {
+ std::string utf8String;
+ for (const int codePoint : codePoints) {
+ const int sequenceSize = getSequenceSizeToEncodeCodePoint(codePoint);
+ if (sequenceSize <= 0) {
+ AKLOGE("Cannot encode code point (%d).", codePoint);
+ return std::string();
+ }
+ const int trailingByteCount = sequenceSize - 1;
+ // Output first byte.
+ const int value = codePoint >> (trailingByteCount * CODE_POINT_BIT_COUNT_IN_TRAILING_BYTE);
+ utf8String.push_back(static_cast<char>(value | FIRST_BYTE_MARKERS[sequenceSize]));
+ // Output second and later bytes.
+ for (int i = 1; i < sequenceSize; ++i) {
+ const int shiftAmount = (trailingByteCount - i) * CODE_POINT_BIT_COUNT_IN_TRAILING_BYTE;
+ const int value = (codePoint >> shiftAmount) & TRAILING_BYTE_CODE_POINT_BITS_MASK;
+ utf8String.push_back(static_cast<char>(value | TRAILING_BYTE_MARKER));
+ }
+ }
+ return utf8String;
+}
+
+/* static */ int Utf8Utils::getSequenceSizeToEncodeCodePoint(const int codePoint) {
+ if (codePoint < 0) {
+ return -1;
+ }
+ for (size_t i = 1; i <= MAX_SEQUENCE_SIZE_FOR_A_CODE_POINT; ++i) {
+ if (codePoint <= MAX_ENCODED_CODE_POINT_VALUES[i]) {
+ return i;
+ }
+ }
+ return -1;
+}
+
+} // namespace dicttoolkit
+} // namespace latinime
diff --git a/native/dicttoolkit/src/utils/utf8_utils.h b/native/dicttoolkit/src/utils/utf8_utils.h
new file mode 100644
index 000000000..35818e56c
--- /dev/null
+++ b/native/dicttoolkit/src/utils/utf8_utils.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DICT_TOOLKIT_UTF8_UTILS_H
+#define LATINIME_DICT_TOOLKIT_UTF8_UTILS_H
+
+#include <cstdint>
+#include <string>
+#include <vector>
+
+#include "dict_toolkit_defines.h"
+#include "utils/int_array_view.h"
+
+namespace latinime {
+namespace dicttoolkit {
+
+class Utf8Utils {
+public:
+ static std::vector<int> getCodePoints(const std::string &utf8Str);
+ static std::string getUtf8String(const CodePointArrayView codePoints);
+
+private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(Utf8Utils);
+
+ // Values indexed by sequence size.
+ static const size_t MAX_SEQUENCE_SIZE_FOR_A_CODE_POINT;
+ static const uint8_t FIRST_BYTE_MARKER_MASKS[];
+ static const uint8_t FIRST_BYTE_MARKERS[];
+ static const uint8_t FIRST_BYTE_CODE_POINT_BITS_MASKS[];
+ static const int MAX_ENCODED_CODE_POINT_VALUES[];
+
+ static const uint8_t TRAILING_BYTE_CODE_POINT_BITS_MASK;
+ static const uint8_t TRAILING_BYTE_MARKER;
+ static const size_t CODE_POINT_BIT_COUNT_IN_TRAILING_BYTE;
+
+ static int getSequenceSizeByCheckingFirstByte(const uint8_t firstByte);
+ static int maskFirstByte(const uint8_t firstByte, const int encodeSize);
+ static int maskTrailingByte(const uint8_t secondOrLaterByte);
+ static int getSequenceSizeToEncodeCodePoint(const int codePoint);
+};
+} // namespace dicttoolkit
+} // namespace latinime
+#endif // LATINIME_DICT_TOOLKIT_UTF8_UTILS_H
diff --git a/native/dicttoolkit/tests/command_executors/diff_executor_test.cpp b/native/dicttoolkit/tests/command_executors/diff_executor_test.cpp
new file mode 100644
index 000000000..444141427
--- /dev/null
+++ b/native/dicttoolkit/tests/command_executors/diff_executor_test.cpp
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "command_executors/diff_executor.h"
+
+#include <gtest/gtest.h>
+
+namespace latinime {
+namespace dicttoolkit {
+namespace {
+
+TEST(DiffExecutorTests, TestArguemntSpecs) {
+ EXPECT_TRUE(DiffExecutor::getArgumentsParser().validateSpecs());
+}
+
+} // namespace
+} // namespace dicttoolkit
+} // namespace latinime
diff --git a/native/dicttoolkit/tests/command_executors/header_executor_test.cpp b/native/dicttoolkit/tests/command_executors/header_executor_test.cpp
new file mode 100644
index 000000000..a94150b01
--- /dev/null
+++ b/native/dicttoolkit/tests/command_executors/header_executor_test.cpp
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "command_executors/header_executor.h"
+
+#include <gtest/gtest.h>
+
+namespace latinime {
+namespace dicttoolkit {
+namespace {
+
+TEST(HeaderExecutorTests, TestArguemntSpecs) {
+ EXPECT_TRUE(HeaderExecutor::getArgumentsParser().validateSpecs());
+}
+
+} // namespace
+} // namespace dicttoolkit
+} // namespace latinime
diff --git a/native/dicttoolkit/tests/command_executors/info_executor_test.cpp b/native/dicttoolkit/tests/command_executors/info_executor_test.cpp
new file mode 100644
index 000000000..debe8c601
--- /dev/null
+++ b/native/dicttoolkit/tests/command_executors/info_executor_test.cpp
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "command_executors/info_executor.h"
+
+#include <gtest/gtest.h>
+
+namespace latinime {
+namespace dicttoolkit {
+namespace {
+
+TEST(InfoExecutorTests, TestArguemntSpecs) {
+ EXPECT_TRUE(InfoExecutor::getArgumentsParser().validateSpecs());
+}
+
+} // namespace
+} // namespace dicttoolkit
+} // namespace latinime
diff --git a/native/dicttoolkit/tests/command_executors/makedict_executor_test.cpp b/native/dicttoolkit/tests/command_executors/makedict_executor_test.cpp
new file mode 100644
index 000000000..44eb3dc1b
--- /dev/null
+++ b/native/dicttoolkit/tests/command_executors/makedict_executor_test.cpp
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "command_executors/makedict_executor.h"
+
+#include <gtest/gtest.h>
+
+namespace latinime {
+namespace dicttoolkit {
+namespace {
+
+TEST(MakedictExecutorTests, TestArguemntSpecs) {
+ EXPECT_TRUE(MakedictExecutor::getArgumentsParser().validateSpecs());
+}
+
+} // namespace
+} // namespace dicttoolkit
+} // namespace latinime
diff --git a/native/dicttoolkit/tests/dict_toolkit_defines_test.cpp b/native/dicttoolkit/tests/dict_toolkit_defines_test.cpp
new file mode 100644
index 000000000..3445bd0c5
--- /dev/null
+++ b/native/dicttoolkit/tests/dict_toolkit_defines_test.cpp
@@ -0,0 +1,32 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dict_toolkit_defines.h"
+
+#include <gtest/gtest.h>
+
+namespace latinime {
+namespace dicttoolkit {
+namespace {
+
+// Initial trivial test case.
+TEST(DictToolkitDefinesTest, TestKeycodeSpace) {
+ EXPECT_EQ(' ', KEYCODE_SPACE);
+}
+
+} // namespace
+} // namespace dicttoolkit
+} // namespace latinime
diff --git a/native/dicttoolkit/tests/offdevice_intermediate_dict/offdevice_intermediate_dict_test.cpp b/native/dicttoolkit/tests/offdevice_intermediate_dict/offdevice_intermediate_dict_test.cpp
new file mode 100644
index 000000000..f2e24ab5f
--- /dev/null
+++ b/native/dicttoolkit/tests/offdevice_intermediate_dict/offdevice_intermediate_dict_test.cpp
@@ -0,0 +1,85 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "offdevice_intermediate_dict/offdevice_intermediate_dict.h"
+
+#include <gtest/gtest.h>
+
+#include <vector>
+
+#include "suggest/core/dictionary/property/word_property.h"
+#include "utils/int_array_view.h"
+
+namespace latinime {
+namespace dicttoolkit {
+namespace {
+
+const std::vector<int> getCodePointVector(const char *str) {
+ std::vector<int> codePoints;
+ while (*str) {
+ codePoints.push_back(*str);
+ ++str;
+ }
+ return codePoints;
+}
+
+const WordProperty getDummpWordProperty(const std::vector<int> &&codePoints) {
+ return WordProperty(std::move(codePoints), UnigramProperty(), std::vector<NgramProperty>());
+}
+
+TEST(OffdeviceIntermediateDictTest, TestAddWordProperties) {
+ OffdeviceIntermediateDict dict = OffdeviceIntermediateDict(
+ OffdeviceIntermediateDictHeader(OffdeviceIntermediateDictHeader::AttributeMap()));
+ EXPECT_EQ(nullptr, dict.getWordProperty(CodePointArrayView()));
+
+ const WordProperty wordProperty0 = getDummpWordProperty(getCodePointVector("abcd"));
+ EXPECT_TRUE(dict.addWord(wordProperty0));
+ EXPECT_NE(nullptr, dict.getWordProperty(wordProperty0.getCodePoints()));
+
+ const WordProperty wordProperty1 = getDummpWordProperty(getCodePointVector("efgh"));
+ EXPECT_TRUE(dict.addWord(wordProperty1));
+ EXPECT_NE(nullptr, dict.getWordProperty(wordProperty1.getCodePoints()));
+
+ const WordProperty wordProperty2 = getDummpWordProperty(getCodePointVector("ab"));
+ EXPECT_TRUE(dict.addWord(wordProperty2));
+ EXPECT_NE(nullptr, dict.getWordProperty(wordProperty2.getCodePoints()));
+
+ const WordProperty wordProperty3 = getDummpWordProperty(getCodePointVector("abcdefg"));
+ EXPECT_TRUE(dict.addWord(wordProperty3));
+ EXPECT_NE(nullptr, dict.getWordProperty(wordProperty3.getCodePoints()));
+
+ const WordProperty wordProperty4 = getDummpWordProperty(getCodePointVector("efef"));
+ EXPECT_TRUE(dict.addWord(wordProperty4));
+ EXPECT_NE(nullptr, dict.getWordProperty(wordProperty4.getCodePoints()));
+
+ const WordProperty wordProperty5 = getDummpWordProperty(getCodePointVector("ef"));
+ EXPECT_TRUE(dict.addWord(wordProperty5));
+ EXPECT_NE(nullptr, dict.getWordProperty(wordProperty5.getCodePoints()));
+
+ const WordProperty wordProperty6 = getDummpWordProperty(getCodePointVector("abcd"));
+ EXPECT_FALSE(dict.addWord(wordProperty6)) << "Adding the same word multiple times should fail.";
+
+ EXPECT_NE(nullptr, dict.getWordProperty(wordProperty0.getCodePoints()));
+ EXPECT_NE(nullptr, dict.getWordProperty(wordProperty1.getCodePoints()));
+ EXPECT_NE(nullptr, dict.getWordProperty(wordProperty2.getCodePoints()));
+ EXPECT_NE(nullptr, dict.getWordProperty(wordProperty3.getCodePoints()));
+ EXPECT_NE(nullptr, dict.getWordProperty(wordProperty4.getCodePoints()));
+ EXPECT_NE(nullptr, dict.getWordProperty(wordProperty5.getCodePoints()));
+}
+
+} // namespace
+} // namespace dicttoolkit
+} // namespace latinime
diff --git a/native/dicttoolkit/tests/utils/command_utils_test.cpp b/native/dicttoolkit/tests/utils/command_utils_test.cpp
new file mode 100644
index 000000000..9d79c9dd9
--- /dev/null
+++ b/native/dicttoolkit/tests/utils/command_utils_test.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "utils/command_utils.h"
+
+#include <gtest/gtest.h>
+
+namespace latinime {
+namespace dicttoolkit {
+namespace {
+
+TEST(CommandUtilsTests, TestGetCommandType) {
+ EXPECT_EQ(CommandUtils::getCommandType(""), CommandType::Unknown);
+ EXPECT_EQ(CommandUtils::getCommandType("abc"), CommandType::Unknown);
+ EXPECT_EQ(CommandUtils::getCommandType("info"), CommandType::Info);
+ EXPECT_EQ(CommandUtils::getCommandType("diff"), CommandType::Diff);
+ EXPECT_EQ(CommandUtils::getCommandType("makedict"), CommandType::Makedict);
+ EXPECT_EQ(CommandUtils::getCommandType("header"), CommandType::Header);
+ EXPECT_EQ(CommandUtils::getCommandType("help"), CommandType::Help);
+}
+
+} // namespace
+} // namespace dicttoolkit
+} // namespace latinime
diff --git a/native/dicttoolkit/tests/utils/utf8_utils_test.cpp b/native/dicttoolkit/tests/utils/utf8_utils_test.cpp
new file mode 100644
index 000000000..9c59a8b05
--- /dev/null
+++ b/native/dicttoolkit/tests/utils/utf8_utils_test.cpp
@@ -0,0 +1,85 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "utils/utf8_utils.h"
+
+#include <gtest/gtest.h>
+
+#include <vector>
+
+#include "utils/int_array_view.h"
+
+namespace latinime {
+namespace dicttoolkit {
+namespace {
+
+TEST(Utf8UtilsTests, TestGetCodePoints) {
+ {
+ const std::vector<int> codePoints = Utf8Utils::getCodePoints("");
+ EXPECT_EQ(0u, codePoints.size());
+ }
+ {
+ const std::vector<int> codePoints = Utf8Utils::getCodePoints("test");
+ EXPECT_EQ(4u, codePoints.size());
+ EXPECT_EQ('t', codePoints[0]);
+ EXPECT_EQ('e', codePoints[1]);
+ EXPECT_EQ('s', codePoints[2]);
+ EXPECT_EQ('t', codePoints[3]);
+ }
+ {
+ const std::vector<int> codePoints = Utf8Utils::getCodePoints(u8"\u3042a\u03C2\u0410");
+ EXPECT_EQ(4u, codePoints.size());
+ EXPECT_EQ(0x3042, codePoints[0]); // HIRAGANA LETTER A
+ EXPECT_EQ('a', codePoints[1]);
+ EXPECT_EQ(0x03C2, codePoints[2]); // CYRILLIC CAPITAL LETTER A
+ EXPECT_EQ(0x0410, codePoints[3]); // GREEK SMALL LETTER FINAL SIGMA
+ }
+ {
+ const std::vector<int> codePoints = Utf8Utils::getCodePoints(u8"\U0001F36A?\U0001F752");
+ EXPECT_EQ(3u, codePoints.size());
+ EXPECT_EQ(0x1F36A, codePoints[0]); // COOKIE
+ EXPECT_EQ('?', codePoints[1]);
+ EXPECT_EQ(0x1F752, codePoints[2]); // ALCHEMICAL SYMBOL FOR STARRED TRIDENT
+ }
+
+ // Redundant UTF-8 sequences must be rejected.
+ EXPECT_TRUE(Utf8Utils::getCodePoints("\xC0\xAF").empty());
+ EXPECT_TRUE(Utf8Utils::getCodePoints("\xE0\x80\xAF").empty());
+ EXPECT_TRUE(Utf8Utils::getCodePoints("\xF0\x80\x80\xAF").empty());
+}
+
+TEST(Utf8UtilsTests, TestGetUtf8String) {
+ {
+ const std::vector<int> codePoints = {'t', 'e', 's', 't'};
+ EXPECT_EQ("test", Utf8Utils::getUtf8String(CodePointArrayView(codePoints)));
+ }
+ {
+ const std::vector<int> codePoints = {
+ 0x00E0 /* LATIN SMALL LETTER A WITH GRAVE */,
+ 0x03C2 /* GREEK SMALL LETTER FINAL SIGMA */,
+ 0x0430 /* CYRILLIC SMALL LETTER A */,
+ 0x3042 /* HIRAGANA LETTER A */,
+ 0x1F36A /* COOKIE */,
+ 0x1F752 /* ALCHEMICAL SYMBOL FOR STARRED TRIDENT */
+ };
+ EXPECT_EQ(u8"\u00E0\u03C2\u0430\u3042\U0001F36A\U0001F752",
+ Utf8Utils::getUtf8String(CodePointArrayView(codePoints)));
+ }
+}
+
+} // namespace
+} // namespace dicttoolkit
+} // namespace latinime
diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
index 118f600bb..9c065e0d1 100644
--- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
+++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
@@ -35,6 +35,7 @@
#include "utils/int_array_view.h"
#include "utils/jni_data_utils.h"
#include "utils/log_utils.h"
+#include "utils/profiler.h"
#include "utils/time_keeper.h"
namespace latinime {
@@ -43,8 +44,8 @@ class ProximityInfo;
static jlong latinime_BinaryDictionary_open(JNIEnv *env, jclass clazz, jstring sourceDir,
jlong dictOffset, jlong dictSize, jboolean isUpdatable) {
- PROF_OPEN;
- PROF_START(66);
+ PROF_INIT;
+ PROF_TIMER_START(66);
const jsize sourceDirUtf8Length = env->GetStringUTFLength(sourceDir);
if (sourceDirUtf8Length <= 0) {
AKLOGE("DICT: Can't get sourceDir string");
@@ -63,8 +64,7 @@ static jlong latinime_BinaryDictionary_open(JNIEnv *env, jclass clazz, jstring s
Dictionary *const dictionary =
new Dictionary(env, std::move(dictionaryStructureWithBufferPolicy));
- PROF_END(66);
- PROF_CLOSE;
+ PROF_TIMER_END(66);
return reinterpret_cast<jlong>(dictionary);
}
@@ -586,7 +586,7 @@ static bool latinime_BinaryDictionary_migrateNative(JNIEnv *env, jclass clazz, j
}
if (!dictionaryStructureWithBufferPolicy->addUnigramEntry(
CodePointArrayView(wordCodePoints, wordCodePointCount),
- wordProperty.getUnigramProperty())) {
+ &wordProperty.getUnigramProperty())) {
LogUtils::logToJava(env, "Cannot add unigram to the new dict.");
return false;
}
@@ -605,7 +605,7 @@ static bool latinime_BinaryDictionary_migrateNative(JNIEnv *env, jclass clazz, j
return false;
}
}
- for (const NgramProperty &ngramProperty : *wordProperty.getNgramProperties()) {
+ for (const NgramProperty &ngramProperty : wordProperty.getNgramProperties()) {
if (!dictionaryStructureWithBufferPolicy->addNgramEntry(&ngramProperty)) {
LogUtils::logToJava(env, "Cannot add ngram to the new dict.");
return false;
diff --git a/native/jni/src/defines.h b/native/jni/src/defines.h
index 885118524..0e67b4d5a 100644
--- a/native/jni/src/defines.h
+++ b/native/jni/src/defines.h
@@ -23,10 +23,10 @@
#define AK_FORCE_INLINE inline
#endif // __GNUC__
-#if defined(FLAG_DO_PROFILE) || defined(FLAG_DBG)
+#if defined(FLAG_DBG)
#undef AK_FORCE_INLINE
#define AK_FORCE_INLINE inline
-#endif // defined(FLAG_DO_PROFILE) || defined(FLAG_DBG)
+#endif // defined(FLAG_DBG)
// Must be equal to Constants.Dictionary.MAX_WORD_LENGTH in Java
#define MAX_WORD_LENGTH 48
@@ -172,69 +172,6 @@ static inline void showStackTrace() {
#define INTS_TO_CHARS(input, length, output)
#endif // defined(FLAG_DO_PROFILE) || defined(FLAG_DBG)
-#ifdef FLAG_DO_PROFILE
-// Profiler
-#include <time.h>
-
-#define PROF_BUF_SIZE 100
-static float profile_buf[PROF_BUF_SIZE];
-static float profile_old[PROF_BUF_SIZE];
-static unsigned int profile_counter[PROF_BUF_SIZE];
-
-#define PROF_RESET prof_reset()
-#define PROF_COUNT(prof_buf_id) ++profile_counter[prof_buf_id]
-#define PROF_OPEN do { PROF_RESET; PROF_START(PROF_BUF_SIZE - 1); } while (0)
-#define PROF_START(prof_buf_id) do { \
- PROF_COUNT(prof_buf_id); profile_old[prof_buf_id] = (clock()); } while (0)
-#define PROF_CLOSE do { PROF_END(PROF_BUF_SIZE - 1); PROF_OUTALL; } while (0)
-#define PROF_END(prof_buf_id) profile_buf[prof_buf_id] += ((clock()) - profile_old[prof_buf_id])
-#define PROF_CLOCKOUT(prof_buf_id) \
- AKLOGI("%s : clock is %f", __FUNCTION__, (clock() - profile_old[prof_buf_id]))
-#define PROF_OUTALL do { AKLOGI("--- %s ---", __FUNCTION__); prof_out(); } while (0)
-
-static inline void prof_reset(void) {
- for (int i = 0; i < PROF_BUF_SIZE; ++i) {
- profile_buf[i] = 0;
- profile_old[i] = 0;
- profile_counter[i] = 0;
- }
-}
-
-static inline void prof_out(void) {
- if (profile_counter[PROF_BUF_SIZE - 1] != 1) {
- AKLOGI("Error: You must call PROF_OPEN before PROF_CLOSE.");
- }
- AKLOGI("Total time is %6.3f ms.",
- profile_buf[PROF_BUF_SIZE - 1] * 1000.0f / static_cast<float>(CLOCKS_PER_SEC));
- float all = 0.0f;
- for (int i = 0; i < PROF_BUF_SIZE - 1; ++i) {
- all += profile_buf[i];
- }
- if (all < 1.0f) all = 1.0f;
- for (int i = 0; i < PROF_BUF_SIZE - 1; ++i) {
- if (profile_buf[i] > 0.0f) {
- AKLOGI("(%d): Used %4.2f%%, %8.4f ms. Called %d times.",
- i, (profile_buf[i] * 100.0f / all),
- profile_buf[i] * 1000.0f / static_cast<float>(CLOCKS_PER_SEC),
- profile_counter[i]);
- }
- }
-}
-
-#else // FLAG_DO_PROFILE
-#define PROF_BUF_SIZE 0
-#define PROF_RESET
-#define PROF_COUNT(prof_buf_id)
-#define PROF_OPEN
-#define PROF_START(prof_buf_id)
-#define PROF_CLOSE
-#define PROF_END(prof_buf_id)
-#define PROF_CLOCK_OUT(prof_buf_id)
-#define PROF_CLOCKOUT(prof_buf_id)
-#define PROF_OUTALL
-
-#endif // FLAG_DO_PROFILE
-
#ifdef FLAG_DBG
#define DEBUG_DICT true
#define DEBUG_DICT_FULL false
diff --git a/native/jni/src/suggest/core/dictionary/error_type_utils.cpp b/native/jni/src/suggest/core/dictionary/error_type_utils.cpp
index 1e2494e92..8f07ce275 100644
--- a/native/jni/src/suggest/core/dictionary/error_type_utils.cpp
+++ b/native/jni/src/suggest/core/dictionary/error_type_utils.cpp
@@ -31,6 +31,7 @@ const ErrorTypeUtils::ErrorType ErrorTypeUtils::NEW_WORD = 0x100;
const ErrorTypeUtils::ErrorType ErrorTypeUtils::ERRORS_TREATED_AS_AN_EXACT_MATCH =
NOT_AN_ERROR | MATCH_WITH_WRONG_CASE | MATCH_WITH_MISSING_ACCENT | MATCH_WITH_DIGRAPH;
+const ErrorTypeUtils::ErrorType ErrorTypeUtils::ERRORS_TREATED_AS_A_PERFECT_MATCH = NOT_AN_ERROR;
const ErrorTypeUtils::ErrorType
ErrorTypeUtils::ERRORS_TREATED_AS_AN_EXACT_MATCH_WITH_INTENTIONAL_OMISSION =
diff --git a/native/jni/src/suggest/core/dictionary/error_type_utils.h b/native/jni/src/suggest/core/dictionary/error_type_utils.h
index fd1d5fcff..e92c509fa 100644
--- a/native/jni/src/suggest/core/dictionary/error_type_utils.h
+++ b/native/jni/src/suggest/core/dictionary/error_type_utils.h
@@ -52,6 +52,10 @@ class ErrorTypeUtils {
return (containedErrorTypes & ~ERRORS_TREATED_AS_AN_EXACT_MATCH) == 0;
}
+ static bool isPerfectMatch(const ErrorType containedErrorTypes) {
+ return (containedErrorTypes & ~ERRORS_TREATED_AS_A_PERFECT_MATCH) == 0;
+ }
+
static bool isExactMatchWithIntentionalOmission(const ErrorType containedErrorTypes) {
return (containedErrorTypes
& ~ERRORS_TREATED_AS_AN_EXACT_MATCH_WITH_INTENTIONAL_OMISSION) == 0;
@@ -73,6 +77,7 @@ class ErrorTypeUtils {
DISALLOW_IMPLICIT_CONSTRUCTORS(ErrorTypeUtils);
static const ErrorType ERRORS_TREATED_AS_AN_EXACT_MATCH;
+ static const ErrorType ERRORS_TREATED_AS_A_PERFECT_MATCH;
static const ErrorType ERRORS_TREATED_AS_AN_EXACT_MATCH_WITH_INTENTIONAL_OMISSION;
};
} // namespace latinime
diff --git a/native/jni/src/suggest/core/dictionary/property/historical_info.h b/native/jni/src/suggest/core/dictionary/property/historical_info.h
index f9bd6fd8c..e5ce1ea25 100644
--- a/native/jni/src/suggest/core/dictionary/property/historical_info.h
+++ b/native/jni/src/suggest/core/dictionary/property/historical_info.h
@@ -38,6 +38,7 @@ class HistoricalInfo {
return mTimestamp;
}
+ // TODO: Remove
int getLevel() const {
return mLevel;
}
diff --git a/native/jni/src/suggest/core/dictionary/property/word_property.h b/native/jni/src/suggest/core/dictionary/property/word_property.h
index b5314faaa..d4db3f09f 100644
--- a/native/jni/src/suggest/core/dictionary/property/word_property.h
+++ b/native/jni/src/suggest/core/dictionary/property/word_property.h
@@ -23,6 +23,7 @@
#include "jni.h"
#include "suggest/core/dictionary/property/ngram_property.h"
#include "suggest/core/dictionary/property/unigram_property.h"
+#include "utils/int_array_view.h"
namespace latinime {
@@ -33,10 +34,10 @@ class WordProperty {
WordProperty()
: mCodePoints(), mUnigramProperty(), mNgrams() {}
- WordProperty(const std::vector<int> &&codePoints, const UnigramProperty *const unigramProperty,
- const std::vector<NgramProperty> *const ngrams)
- : mCodePoints(std::move(codePoints)), mUnigramProperty(*unigramProperty),
- mNgrams(*ngrams) {}
+ WordProperty(const std::vector<int> &&codePoints, const UnigramProperty &unigramProperty,
+ const std::vector<NgramProperty> &ngrams)
+ : mCodePoints(std::move(codePoints)), mUnigramProperty(unigramProperty),
+ mNgrams(ngrams) {}
void outputProperties(JNIEnv *const env, jintArray outCodePoints, jbooleanArray outFlags,
jintArray outProbabilityInfo, jobject outNgramPrevWordsArray,
@@ -44,12 +45,16 @@ class WordProperty {
jobject outNgramProbabilities, jobject outShortcutTargets,
jobject outShortcutProbabilities) const;
- const UnigramProperty *getUnigramProperty() const {
- return &mUnigramProperty;
+ const CodePointArrayView getCodePoints() const {
+ return CodePointArrayView(mCodePoints);
}
- const std::vector<NgramProperty> *getNgramProperties() const {
- return &mNgrams;
+ const UnigramProperty &getUnigramProperty() const {
+ return mUnigramProperty;
+ }
+
+ const std::vector<NgramProperty> &getNgramProperties() const {
+ return mNgrams;
}
private:
diff --git a/native/jni/src/suggest/core/policy/scoring.h b/native/jni/src/suggest/core/policy/scoring.h
index ce3684a1c..b9dda83ad 100644
--- a/native/jni/src/suggest/core/policy/scoring.h
+++ b/native/jni/src/suggest/core/policy/scoring.h
@@ -30,7 +30,7 @@ class Scoring {
public:
virtual int calculateFinalScore(const float compoundDistance, const int inputSize,
const ErrorTypeUtils::ErrorType containedErrorTypes, const bool forceCommit,
- const bool boostExactMatches) const = 0;
+ const bool boostExactMatches, const bool hasProbabilityZero) const = 0;
virtual void getMostProbableString(const DicTraverseSession *const traverseSession,
const float weightOfLangModelVsSpatialModel,
SuggestionResults *const outSuggestionResults) const = 0;
diff --git a/native/jni/src/suggest/core/result/suggestions_output_utils.cpp b/native/jni/src/suggest/core/result/suggestions_output_utils.cpp
index 3283f6deb..74db95953 100644
--- a/native/jni/src/suggest/core/result/suggestions_output_utils.cpp
+++ b/native/jni/src/suggest/core/result/suggestions_output_utils.cpp
@@ -76,6 +76,52 @@ const int SuggestionsOutputUtils::MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT = 16;
weightOfLangModelVsSpatialModelToOutputSuggestions, outSuggestionResults);
}
+/* static */ bool SuggestionsOutputUtils::shouldBlockWord(
+ const SuggestOptions *const suggestOptions, const DicNode *const terminalDicNode,
+ const WordAttributes wordAttributes, const bool isLastWord) {
+ const bool currentWordExactMatch =
+ ErrorTypeUtils::isExactMatch(terminalDicNode->getContainedErrorTypes());
+ // When we have to block offensive words, non-exact matched offensive words should not be
+ // output.
+ const bool shouldBlockOffensiveWords = suggestOptions->blockOffensiveWords();
+
+ const bool isBlockedOffensiveWord = shouldBlockOffensiveWords &&
+ wordAttributes.isPossiblyOffensive();
+
+ // This function is called in two situations:
+ //
+ // 1) At the end of a search, in which case terminalDicNode will point to the last DicNode
+ // of the search, and isLastWord will be true.
+ // "fuck"
+ // |
+ // \ terminalDicNode (isLastWord=true, currentWordExactMatch=true)
+ // In this case, if the current word is an exact match, we will always let the word
+ // through, even if the user is blocking offensive words (it's exactly what they typed!)
+ //
+ // 2) In the middle of the search, when we hit a terminal node, to decide whether or not
+ // to start a new search at root, to try to match the rest of the input. In this case,
+ // terminalDicNode will point to the terminal node we just hit, and isLastWord will be
+ // false.
+ // "fuckvthis"
+ // |
+ // \ terminalDicNode (isLastWord=false, currentWordExactMatch=true)
+ //
+ // In this case, we should NOT allow the match through (correcting "fuckthis" to "fuck this"
+ // when offensive words are blocked would be a bad idea).
+ //
+ // In the case of a multi-word correction where the offensive word is typed last (eg.
+ // for the input "allfuck"), this function will be called with isLastWord==true, but
+ // currentWordExactMatch==false. So we are OK in this case as well.
+ // "allfuck"
+ // |
+ // \ terminalDicNode (isLastWord=true, currentWordExactMatch=false)
+ if (isLastWord && currentWordExactMatch) {
+ return false;
+ } else {
+ return isBlockedOffensiveWord;
+ }
+}
+
/* static */ void SuggestionsOutputUtils::outputSuggestionsOfDicNode(
const Scoring *const scoringPolicy, DicTraverseSession *traverseSession,
const DicNode *const terminalDicNode, const float weightOfLangModelVsSpatialModel,
@@ -98,24 +144,16 @@ const int SuggestionsOutputUtils::MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT = 16;
const bool isExactMatchWithIntentionalOmission =
ErrorTypeUtils::isExactMatchWithIntentionalOmission(
terminalDicNode->getContainedErrorTypes());
- const bool isFirstCharUppercase = terminalDicNode->isFirstCharUppercase();
- // Heuristic: We exclude probability=0 first-char-uppercase words from exact match.
- // (e.g. "AMD" and "and")
- const bool isSafeExactMatch = isExactMatch
- && !(wordAttributes.isPossiblyOffensive() && isFirstCharUppercase);
const int outputTypeFlags =
(wordAttributes.isPossiblyOffensive() ? Dictionary::KIND_FLAG_POSSIBLY_OFFENSIVE : 0)
- | ((isSafeExactMatch && boostExactMatches) ? Dictionary::KIND_FLAG_EXACT_MATCH : 0)
+ | ((isExactMatch && boostExactMatches) ? Dictionary::KIND_FLAG_EXACT_MATCH : 0)
| (isExactMatchWithIntentionalOmission ?
Dictionary::KIND_FLAG_EXACT_MATCH_WITH_INTENTIONAL_OMISSION : 0);
-
// Entries that are blacklisted or do not represent a word should not be output.
const bool isValidWord = !(wordAttributes.isBlacklisted() || wordAttributes.isNotAWord());
- // When we have to block offensive words, non-exact matched offensive words should not be
- // output.
- const bool blockOffensiveWords = traverseSession->getSuggestOptions()->blockOffensiveWords();
- const bool isBlockedOffensiveWord = blockOffensiveWords && wordAttributes.isPossiblyOffensive()
- && !isSafeExactMatch;
+
+ const bool shouldBlockThisWord = shouldBlockWord(traverseSession->getSuggestOptions(),
+ terminalDicNode, wordAttributes, true /* isLastWord */);
// Increase output score of top typing suggestion to ensure autocorrection.
// TODO: Better integration with java side autocorrection logic.
@@ -123,11 +161,11 @@ const int SuggestionsOutputUtils::MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT = 16;
compoundDistance, traverseSession->getInputSize(),
terminalDicNode->getContainedErrorTypes(),
(forceCommitMultiWords && terminalDicNode->hasMultipleWords()),
- boostExactMatches);
+ boostExactMatches, wordAttributes.getProbability() == 0);
// Don't output invalid or blocked offensive words. However, we still need to submit their
// shortcuts if any.
- if (isValidWord && !isBlockedOffensiveWord) {
+ if (isValidWord && !shouldBlockThisWord) {
int codePoints[MAX_WORD_LENGTH];
terminalDicNode->outputResult(codePoints);
const int indexToPartialCommit = outputSecondWordFirstLetterInputIndex ?
diff --git a/native/jni/src/suggest/core/result/suggestions_output_utils.h b/native/jni/src/suggest/core/result/suggestions_output_utils.h
index bf8497828..eca1f78b2 100644
--- a/native/jni/src/suggest/core/result/suggestions_output_utils.h
+++ b/native/jni/src/suggest/core/result/suggestions_output_utils.h
@@ -18,6 +18,7 @@
#define LATINIME_SUGGESTIONS_OUTPUT_UTILS
#include "defines.h"
+#include "suggest/core/dictionary/word_attributes.h"
namespace latinime {
@@ -25,11 +26,19 @@ class BinaryDictionaryShortcutIterator;
class DicNode;
class DicTraverseSession;
class Scoring;
+class SuggestOptions;
class SuggestionResults;
class SuggestionsOutputUtils {
public:
/**
+ * Returns true if we should block the incoming word, in the context of the user's
+ * preferences to include or not include possibly offensive words
+ */
+ static bool shouldBlockWord(const SuggestOptions *const suggestOptions,
+ const DicNode *const terminalDicNode, const WordAttributes wordAttributes,
+ const bool isLastWord);
+ /**
* Outputs the final list of suggestions (i.e., terminal nodes).
*/
static void outputSuggestions(const Scoring *const scoringPolicy,
diff --git a/native/jni/src/suggest/core/suggest.cpp b/native/jni/src/suggest/core/suggest.cpp
index 68a36454e..e5e9b46bf 100644
--- a/native/jni/src/suggest/core/suggest.cpp
+++ b/native/jni/src/suggest/core/suggest.cpp
@@ -29,6 +29,7 @@
#include "suggest/core/result/suggestions_output_utils.h"
#include "suggest/core/session/dic_traverse_session.h"
#include "suggest/core/suggest_options.h"
+#include "utils/profiler.h"
namespace latinime {
@@ -48,8 +49,8 @@ void Suggest::getSuggestions(ProximityInfo *pInfo, void *traverseSession,
int *inputXs, int *inputYs, int *times, int *pointerIds, int *inputCodePoints,
int inputSize, const float weightOfLangModelVsSpatialModel,
SuggestionResults *const outSuggestionResults) const {
- PROF_OPEN;
- PROF_START(0);
+ PROF_INIT;
+ PROF_TIMER_START(0);
const float maxSpatialDistance = TRAVERSAL->getMaxSpatialDistance();
DicTraverseSession *tSession = static_cast<DicTraverseSession *>(traverseSession);
tSession->setupForGetSuggestions(pInfo, inputCodePoints, inputSize, inputXs, inputYs, times,
@@ -57,8 +58,8 @@ void Suggest::getSuggestions(ProximityInfo *pInfo, void *traverseSession,
// TODO: Add the way to evaluate cache
initializeSearch(tSession);
- PROF_END(0);
- PROF_START(1);
+ PROF_TIMER_END(0);
+ PROF_TIMER_START(1);
// keep expanding search dicNodes until all have terminated.
while (tSession->getDicTraverseCache()->activeSize() > 0) {
@@ -66,12 +67,11 @@ void Suggest::getSuggestions(ProximityInfo *pInfo, void *traverseSession,
tSession->getDicTraverseCache()->advanceActiveDicNodes();
tSession->getDicTraverseCache()->advanceInputIndex(inputSize);
}
- PROF_END(1);
- PROF_START(2);
+ PROF_TIMER_END(1);
+ PROF_TIMER_START(2);
SuggestionsOutputUtils::outputSuggestions(
SCORING, tSession, weightOfLangModelVsSpatialModel, outSuggestionResults);
- PROF_END(2);
- PROF_CLOSE;
+ PROF_TIMER_END(2);
}
/**
@@ -416,6 +416,11 @@ void Suggest::createNextWordDicNode(DicTraverseSession *traverseSession, DicNode
traverseSession->getDictionaryStructurePolicy()->getWordAttributesInContext(
dicNode->getPrevWordIds(), dicNode->getWordId(),
traverseSession->getMultiBigramMap());
+ if (SuggestionsOutputUtils::shouldBlockWord(traverseSession->getSuggestOptions(),
+ dicNode, wordAttributes, false /* isLastWord */)) {
+ return;
+ }
+
if (!TRAVERSAL->isGoodToTraverseNextWord(dicNode, wordAttributes.getProbability())) {
return;
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
index 44c2f443f..7a5acd7d5 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
@@ -134,15 +134,17 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
// same so we use them for both here.
switch (mDictFormatVersion) {
case FormatUtils::VERSION_2:
- return FormatUtils::VERSION_2;
case FormatUtils::VERSION_201:
- return FormatUtils::VERSION_201;
+ AKLOGE("Dictionary versions 2 and 201 are incompatible with this version");
+ return FormatUtils::UNKNOWN_VERSION;
+ case FormatUtils::VERSION_202:
+ return FormatUtils::VERSION_202;
case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
return FormatUtils::VERSION_4_ONLY_FOR_TESTING;
- case FormatUtils::VERSION_4:
- return FormatUtils::VERSION_4;
- case FormatUtils::VERSION_4_DEV:
- return FormatUtils::VERSION_4_DEV;
+ case FormatUtils::VERSION_402:
+ return FormatUtils::VERSION_402;
+ case FormatUtils::VERSION_403:
+ return FormatUtils::VERSION_403;
default:
return FormatUtils::UNKNOWN_VERSION;
}
@@ -245,7 +247,7 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
}
bool supportsBeginningOfSentence() const {
- return mDictFormatVersion >= FormatUtils::VERSION_4;
+ return mDictFormatVersion >= FormatUtils::VERSION_402;
}
const int *getCodePointTable() const {
diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp
index 41a8b13b8..19ed0d468 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp
@@ -111,11 +111,12 @@ typedef DictionaryHeaderStructurePolicy::AttributeMap AttributeMap;
switch (version) {
case FormatUtils::VERSION_2:
case FormatUtils::VERSION_201:
- // Version 2 or 201 dictionary writing is not supported.
+ case FormatUtils::VERSION_202:
+ // None of the static dictionaries (v2x) support writing
return false;
case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
- case FormatUtils::VERSION_4:
- case FormatUtils::VERSION_4_DEV:
+ case FormatUtils::VERSION_402:
+ case FormatUtils::VERSION_403:
return buffer->writeUintAndAdvancePosition(version /* data */,
HEADER_DICTIONARY_VERSION_SIZE, writingPos);
default:
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_dict_content.cpp
index 9e1adff70..15ac88319 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_dict_content.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_dict_content.cpp
@@ -65,6 +65,8 @@ const BigramEntry BigramDictContent::getBigramEntryAndAdvancePosition(
(encodedTargetTerminalId == Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID) ?
Ver4DictConstants::NOT_A_TERMINAL_ID : encodedTargetTerminalId;
if (mHasHistoricalInfo) {
+ // Hack for better migration.
+ count += level;
const HistoricalInfo historicalInfo(timestamp, level, count);
return BigramEntry(hasNext, probability, &historicalInfo, targetTerminalId);
} else {
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/probability_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/probability_dict_content.cpp
index ef6166ffd..61ef4aa42 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/probability_dict_content.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/probability_dict_content.cpp
@@ -50,7 +50,8 @@ const ProbabilityEntry ProbabilityDictContent::getProbabilityEntry(const int ter
Ver4DictConstants::WORD_LEVEL_FIELD_SIZE, &entryPos);
const int count = buffer->readUintAndAdvancePosition(
Ver4DictConstants::WORD_COUNT_FIELD_SIZE, &entryPos);
- const HistoricalInfo historicalInfo(timestamp, level, count);
+ // Hack for better migration.
+ const HistoricalInfo historicalInfo(timestamp, level, count + level);
return ProbabilityEntry(flags, probability, &historicalInfo);
} else {
return ProbabilityEntry(flags, probability);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp
index 08e39ce43..ca7d93b0e 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp
@@ -140,7 +140,7 @@ const WordAttributes Ver4PatriciaTriePolicy::getWordAttributesInContext(
const WordAttributes Ver4PatriciaTriePolicy::getWordAttributes(const int probability,
const PtNodeParams &ptNodeParams) const {
- return WordAttributes(probability, ptNodeParams.isBlacklisted(), ptNodeParams.isNotAWord(),
+ return WordAttributes(probability, false /* isBlacklisted */, ptNodeParams.isNotAWord(),
ptNodeParams.getProbability() == 0);
}
@@ -164,7 +164,7 @@ int Ver4PatriciaTriePolicy::getProbabilityOfWord(const WordIdArrayView prevWordI
}
const int ptNodePos = getTerminalPtNodePosFromWordId(wordId);
const PtNodeParams ptNodeParams(mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos));
- if (ptNodeParams.isDeleted() || ptNodeParams.isBlacklisted() || ptNodeParams.isNotAWord()) {
+ if (ptNodeParams.isDeleted() || ptNodeParams.isNotAWord()) {
return NOT_A_PROBABILITY;
}
if (prevWordIds.empty()) {
@@ -614,7 +614,7 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(
const UnigramProperty unigramProperty(ptNodeParams.representsBeginningOfSentence(),
ptNodeParams.isNotAWord(), ptNodeParams.isPossiblyOffensive(),
ptNodeParams.getProbability(), *historicalInfo, std::move(shortcuts));
- return WordProperty(wordCodePoints.toVector(), &unigramProperty, &ngrams);
+ return WordProperty(wordCodePoints.toVector(), unigramProperty, ngrams);
}
int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints,
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp
index 372c9e36f..9a9a21b6b 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp
@@ -58,7 +58,7 @@ namespace latinime {
const DictionaryHeaderStructurePolicy::AttributeMap *const attributeMap) {
FormatUtils::FORMAT_VERSION dictFormatVersion = FormatUtils::getFormatVersion(formatVersion);
switch (dictFormatVersion) {
- case FormatUtils::VERSION_4: {
+ case FormatUtils::VERSION_402: {
return newPolicyForOnMemoryV4Dict<backward::v402::Ver4DictConstants,
backward::v402::Ver4DictBuffers,
backward::v402::Ver4DictBuffers::Ver4DictBuffersPtr,
@@ -66,7 +66,7 @@ namespace latinime {
dictFormatVersion, locale, attributeMap);
}
case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
- case FormatUtils::VERSION_4_DEV: {
+ case FormatUtils::VERSION_403: {
return newPolicyForOnMemoryV4Dict<Ver4DictConstants, Ver4DictBuffers,
Ver4DictBuffers::Ver4DictBuffersPtr, Ver4PatriciaTriePolicy>(
dictFormatVersion, locale, attributeMap);
@@ -115,9 +115,10 @@ template<class DictConstants, class DictBuffers, class DictBuffersPtr, class Str
switch (formatVersion) {
case FormatUtils::VERSION_2:
case FormatUtils::VERSION_201:
- AKLOGE("Given path is a directory but the format is version 2 or 201. path: %s", path);
+ case FormatUtils::VERSION_202:
+ AKLOGE("Given path is a directory but the format is version 2xx. path: %s", path);
break;
- case FormatUtils::VERSION_4: {
+ case FormatUtils::VERSION_402: {
return newPolicyForV4Dict<backward::v402::Ver4DictConstants,
backward::v402::Ver4DictBuffers,
backward::v402::Ver4DictBuffers::Ver4DictBuffersPtr,
@@ -125,7 +126,7 @@ template<class DictConstants, class DictBuffers, class DictBuffersPtr, class Str
headerFilePath, formatVersion, std::move(mmappedBuffer));
}
case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
- case FormatUtils::VERSION_4_DEV: {
+ case FormatUtils::VERSION_403: {
return newPolicyForV4Dict<Ver4DictConstants, Ver4DictBuffers,
Ver4DictBuffers::Ver4DictBuffersPtr, Ver4PatriciaTriePolicy>(
headerFilePath, formatVersion, std::move(mmappedBuffer));
@@ -177,11 +178,14 @@ template<class DictConstants, class DictBuffers, class DictBuffersPtr, class Str
switch (FormatUtils::detectFormatVersion(mmappedBuffer->getReadOnlyByteArrayView())) {
case FormatUtils::VERSION_2:
case FormatUtils::VERSION_201:
+ AKLOGE("Dictionary versions 2 and 201 are incompatible with this version");
+ break;
+ case FormatUtils::VERSION_202:
return DictionaryStructureWithBufferPolicy::StructurePolicyPtr(
new PatriciaTriePolicy(std::move(mmappedBuffer)));
case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
- case FormatUtils::VERSION_4:
- case FormatUtils::VERSION_4_DEV:
+ case FormatUtils::VERSION_402:
+ case FormatUtils::VERSION_403:
AKLOGE("Given path is a file but the format is version 4. path: %s", path);
break;
default:
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h
index 585e87a24..e52706e07 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h
@@ -144,17 +144,6 @@ class PtNodeParams {
return PatriciaTrieReadingUtils::isTerminal(mFlags);
}
- AK_FORCE_INLINE bool isBlacklisted() const {
- // Note: this method will be removed in the next change.
- // It is used in getProbabilityOfWord and getWordAttributes for both v402 and v403.
- // * getProbabilityOfWord will be changed to no longer return NOT_A_PROBABILITY
- // when isBlacklisted (i.e. to only check if isNotAWord or isDeleted)
- // * getWordAttributes will be changed to always return blacklisted=false and
- // isPossiblyOffensive according to the function below (instead of the current
- // behaviour of checking if the probability is zero)
- return PatriciaTrieReadingUtils::isPossiblyOffensive(mFlags);
- }
-
AK_FORCE_INLINE bool isPossiblyOffensive() const {
return PatriciaTrieReadingUtils::isPossiblyOffensive(mFlags);
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
index 66fd18a52..1a51acad5 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
@@ -14,7 +14,6 @@
* limitations under the License.
*/
-
#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h"
#include "defines.h"
@@ -317,8 +316,8 @@ const WordAttributes PatriciaTriePolicy::getWordAttributesInContext(
const WordAttributes PatriciaTriePolicy::getWordAttributes(const int probability,
const PtNodeParams &ptNodeParams) const {
- return WordAttributes(probability, ptNodeParams.isBlacklisted(), ptNodeParams.isNotAWord(),
- ptNodeParams.getProbability() == 0);
+ return WordAttributes(probability, false /* isBlacklisted */, ptNodeParams.isNotAWord(),
+ ptNodeParams.isPossiblyOffensive());
}
int PatriciaTriePolicy::getProbability(const int unigramProbability,
@@ -345,10 +344,9 @@ int PatriciaTriePolicy::getProbabilityOfWord(const WordIdArrayView prevWordIds,
const int ptNodePos = getTerminalPtNodePosFromWordId(wordId);
const PtNodeParams ptNodeParams =
mPtNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos);
- if (ptNodeParams.isNotAWord() || ptNodeParams.isBlacklisted()) {
- // If this is not a word, or if it's a blacklisted entry, it should behave as
- // having no probability outside of the suggestion process (where it should be used
- // for shortcuts).
+ if (ptNodeParams.isNotAWord()) {
+ // If this is not a word, it should behave as having no probability outside of the
+ // suggestion process (where it should be used for shortcuts).
return NOT_A_PROBABILITY;
}
if (!prevWordIds.empty()) {
@@ -480,7 +478,7 @@ const WordProperty PatriciaTriePolicy::getWordProperty(
const UnigramProperty unigramProperty(ptNodeParams.representsBeginningOfSentence(),
ptNodeParams.isNotAWord(), ptNodeParams.isPossiblyOffensive(),
ptNodeParams.getProbability(), HistoricalInfo(), std::move(shortcuts));
- return WordProperty(wordCodePoints.toVector(), &unigramProperty, &ngrams);
+ return WordProperty(wordCodePoints.toVector(), unigramProperty, ngrams);
}
int PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints,
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h
index f4d340f86..9c4ab18e4 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h
@@ -105,7 +105,7 @@ class ProbabilityEntry {
encodedEntry = (encodedEntry << (Ver4DictConstants::WORD_LEVEL_FIELD_SIZE * CHAR_BIT))
| static_cast<uint8_t>(mHistoricalInfo.getLevel());
encodedEntry = (encodedEntry << (Ver4DictConstants::WORD_COUNT_FIELD_SIZE * CHAR_BIT))
- | static_cast<uint8_t>(mHistoricalInfo.getCount());
+ | static_cast<uint16_t>(mHistoricalInfo.getCount());
} else {
encodedEntry = (encodedEntry << (Ver4DictConstants::PROBABILITY_SIZE * CHAR_BIT))
| static_cast<uint8_t>(mProbability);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp
index eb6080a24..bd89b8da7 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp
@@ -49,8 +49,8 @@ const int Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE = 3;
const int Ver4DictConstants::NOT_A_TERMINAL_ADDRESS = 0;
const int Ver4DictConstants::TERMINAL_ID_FIELD_SIZE = 4;
const int Ver4DictConstants::TIME_STAMP_FIELD_SIZE = 4;
-const int Ver4DictConstants::WORD_LEVEL_FIELD_SIZE = 1;
-const int Ver4DictConstants::WORD_COUNT_FIELD_SIZE = 1;
+const int Ver4DictConstants::WORD_LEVEL_FIELD_SIZE = 0;
+const int Ver4DictConstants::WORD_COUNT_FIELD_SIZE = 2;
const uint8_t Ver4DictConstants::FLAG_REPRESENTS_BEGINNING_OF_SENTENCE = 0x1;
const uint8_t Ver4DictConstants::FLAG_NOT_A_VALID_ENTRY = 0x2;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h
index 600b5ffe4..13d7a5714 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h
@@ -47,6 +47,7 @@ class Ver4DictConstants {
static const int NOT_A_TERMINAL_ADDRESS;
static const int TERMINAL_ID_FIELD_SIZE;
static const int TIME_STAMP_FIELD_SIZE;
+ // TODO: Remove
static const int WORD_LEVEL_FIELD_SIZE;
static const int WORD_COUNT_FIELD_SIZE;
// Flags in probability entry.
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
index 96d789f58..7449cd02b 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
@@ -146,8 +146,16 @@ void Ver4PatriciaTriePolicy::iterateNgramEntries(const WordIdArrayView prevWordI
if (!probabilityEntry.isValid()) {
continue;
}
- const int probability = probabilityEntry.hasHistoricalInfo() ?
- 0 : probabilityEntry.getProbability();
+ int probability = NOT_A_PROBABILITY;
+ if (probabilityEntry.hasHistoricalInfo()) {
+ // TODO: Quit checking count here.
+ // If count <= 1, the word can be an invaild word. The actual probability should
+ // be checked using getWordAttributesInContext() in onVisitEntry().
+ probability = probabilityEntry.getHistoricalInfo()->getCount() <= 1 ?
+ NOT_A_PROBABILITY : 0;
+ } else {
+ probability = probabilityEntry.getProbability();
+ }
listener->onVisitEntry(probability, entry.getWordId());
}
}
@@ -552,7 +560,7 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(
wordAttributes.isNotAWord(), wordAttributes.isBlacklisted(),
wordAttributes.isPossiblyOffensive(), wordAttributes.getProbability(),
*historicalInfo, std::move(shortcuts));
- return WordProperty(wordCodePoints.toVector(), &unigramProperty, &ngrams);
+ return WordProperty(wordCodePoints.toVector(), unigramProperty, ngrams);
}
int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints,
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp
index 9d8e86675..edcb43678 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp
@@ -44,13 +44,13 @@ const int DictFileWritingUtils::SIZE_OF_BUFFER_SIZE_FIELD = 4;
TimeKeeper::setCurrentTime();
const FormatUtils::FORMAT_VERSION formatVersion = FormatUtils::getFormatVersion(dictVersion);
switch (formatVersion) {
- case FormatUtils::VERSION_4:
+ case FormatUtils::VERSION_402:
return createEmptyV4DictFile<backward::v402::Ver4DictConstants,
backward::v402::Ver4DictBuffers,
backward::v402::Ver4DictBuffers::Ver4DictBuffersPtr>(
filePath, localeAsCodePointVector, attributeMap, formatVersion);
case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
- case FormatUtils::VERSION_4_DEV:
+ case FormatUtils::VERSION_403:
return createEmptyV4DictFile<Ver4DictConstants, Ver4DictBuffers,
Ver4DictBuffers::Ver4DictBuffersPtr>(
filePath, localeAsCodePointVector, attributeMap, formatVersion);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp
index 0cffe569d..e225c235e 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp
@@ -28,15 +28,17 @@ const size_t FormatUtils::DICTIONARY_MINIMUM_SIZE = 12;
/* static */ FormatUtils::FORMAT_VERSION FormatUtils::getFormatVersion(const int formatVersion) {
switch (formatVersion) {
case VERSION_2:
- return VERSION_2;
case VERSION_201:
- return VERSION_201;
+ AKLOGE("Dictionary versions 2 and 201 are incompatible with this version");
+ return UNKNOWN_VERSION;
+ case VERSION_202:
+ return VERSION_202;
case VERSION_4_ONLY_FOR_TESTING:
return VERSION_4_ONLY_FOR_TESTING;
- case VERSION_4:
- return VERSION_4;
- case VERSION_4_DEV:
- return VERSION_4_DEV;
+ case VERSION_402:
+ return VERSION_402;
+ case VERSION_403:
+ return VERSION_403;
default:
return UNKNOWN_VERSION;
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h
index 96310086b..1616efcce 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h
@@ -31,11 +31,15 @@ class FormatUtils {
public:
enum FORMAT_VERSION {
// These MUST have the same values as the relevant constants in FormatSpec.java.
+ // TODO: Remove VERSION_2 and VERSION_201 when we:
+ // * Confirm that old versions of LatinIME download old-format dictionaries
+ // * We no longer need the corresponding constants on the Java side for dicttool
VERSION_2 = 2,
VERSION_201 = 201,
+ VERSION_202 = 202,
VERSION_4_ONLY_FOR_TESTING = 399,
- VERSION_4 = 402,
- VERSION_4_DEV = 403,
+ VERSION_402 = 402,
+ VERSION_403 = 403,
UNKNOWN_VERSION = -1
};
diff --git a/native/jni/src/suggest/policyimpl/typing/scoring_params.cpp b/native/jni/src/suggest/policyimpl/typing/scoring_params.cpp
index a6f9a8b23..856808a74 100644
--- a/native/jni/src/suggest/policyimpl/typing/scoring_params.cpp
+++ b/native/jni/src/suggest/policyimpl/typing/scoring_params.cpp
@@ -24,6 +24,7 @@ const int ScoringParams::THRESHOLD_NEXT_WORD_PROBABILITY_FOR_CAPPED = 120;
const float ScoringParams::AUTOCORRECT_OUTPUT_THRESHOLD = 1.0f;
const float ScoringParams::EXACT_MATCH_PROMOTION = 1.1f;
+const float ScoringParams::PERFECT_MATCH_PROMOTION = 1.1f;
const float ScoringParams::CASE_ERROR_PENALTY_FOR_EXACT_MATCH = 0.01f;
const float ScoringParams::ACCENT_ERROR_PENALTY_FOR_EXACT_MATCH = 0.02f;
const float ScoringParams::DIGRAPH_PENALTY_FOR_EXACT_MATCH = 0.03f;
diff --git a/native/jni/src/suggest/policyimpl/typing/scoring_params.h b/native/jni/src/suggest/policyimpl/typing/scoring_params.h
index b8f889559..6f327a370 100644
--- a/native/jni/src/suggest/policyimpl/typing/scoring_params.h
+++ b/native/jni/src/suggest/policyimpl/typing/scoring_params.h
@@ -34,6 +34,7 @@ class ScoringParams {
static const int THRESHOLD_SHORT_WORD_LENGTH;
static const float EXACT_MATCH_PROMOTION;
+ static const float PERFECT_MATCH_PROMOTION;
static const float CASE_ERROR_PENALTY_FOR_EXACT_MATCH;
static const float ACCENT_ERROR_PENALTY_FOR_EXACT_MATCH;
static const float DIGRAPH_PENALTY_FOR_EXACT_MATCH;
diff --git a/native/jni/src/suggest/policyimpl/typing/typing_scoring.h b/native/jni/src/suggest/policyimpl/typing/typing_scoring.h
index 0240bcf54..6acd767ea 100644
--- a/native/jni/src/suggest/policyimpl/typing/typing_scoring.h
+++ b/native/jni/src/suggest/policyimpl/typing/typing_scoring.h
@@ -44,23 +44,50 @@ class TypingScoring : public Scoring {
AK_FORCE_INLINE int calculateFinalScore(const float compoundDistance, const int inputSize,
const ErrorTypeUtils::ErrorType containedErrorTypes, const bool forceCommit,
- const bool boostExactMatches) const {
+ const bool boostExactMatches, const bool hasProbabilityZero) const {
const float maxDistance = ScoringParams::DISTANCE_WEIGHT_LANGUAGE
+ static_cast<float>(inputSize) * ScoringParams::TYPING_MAX_OUTPUT_SCORE_PER_INPUT;
float score = ScoringParams::TYPING_BASE_OUTPUT_SCORE - compoundDistance / maxDistance;
if (forceCommit) {
score += ScoringParams::AUTOCORRECT_OUTPUT_THRESHOLD;
}
- if (boostExactMatches && ErrorTypeUtils::isExactMatch(containedErrorTypes)) {
- score += ScoringParams::EXACT_MATCH_PROMOTION;
- if ((ErrorTypeUtils::MATCH_WITH_WRONG_CASE & containedErrorTypes) != 0) {
- score -= ScoringParams::CASE_ERROR_PENALTY_FOR_EXACT_MATCH;
+ if (hasProbabilityZero) {
+ // Previously, when both legitimate 0-frequency words (such as distracters) and
+ // offensive words were encoded in the same way, distracters would never show up
+ // when the user blocked offensive words (the default setting, as well as the
+ // setting for regression tests).
+ //
+ // When b/11031090 was fixed and a separate encoding was used for offensive words,
+ // 0-frequency words would no longer be blocked when they were an "exact match"
+ // (where case mismatches and accent mismatches would be considered an "exact
+ // match"). The exact match boosting functionality meant that, for example, when
+ // the user typed "mt" they would be suggested the word "Mt", although they most
+ // probably meant to type "my".
+ //
+ // For this reason, we introduced this change, which does the following:
+ // * Defines the "perfect match" as a really exact match, with no room for case or
+ // accent mismatches
+ // * When the target word has probability zero (as "Mt" does, because it is a
+ // distracter), ONLY boost its score if it is a perfect match.
+ //
+ // By doing this, when the user types "mt", the word "Mt" will NOT be boosted, and
+ // they will get "my". However, if the user makes an explicit effort to type "Mt",
+ // we do boost the word "Mt" so that the user's input is not autocorrected to "My".
+ if (boostExactMatches && ErrorTypeUtils::isPerfectMatch(containedErrorTypes)) {
+ score += ScoringParams::PERFECT_MATCH_PROMOTION;
}
- if ((ErrorTypeUtils::MATCH_WITH_MISSING_ACCENT & containedErrorTypes) != 0) {
- score -= ScoringParams::ACCENT_ERROR_PENALTY_FOR_EXACT_MATCH;
- }
- if ((ErrorTypeUtils::MATCH_WITH_DIGRAPH & containedErrorTypes) != 0) {
- score -= ScoringParams::DIGRAPH_PENALTY_FOR_EXACT_MATCH;
+ } else {
+ if (boostExactMatches && ErrorTypeUtils::isExactMatch(containedErrorTypes)) {
+ score += ScoringParams::EXACT_MATCH_PROMOTION;
+ if ((ErrorTypeUtils::MATCH_WITH_WRONG_CASE & containedErrorTypes) != 0) {
+ score -= ScoringParams::CASE_ERROR_PENALTY_FOR_EXACT_MATCH;
+ }
+ if ((ErrorTypeUtils::MATCH_WITH_MISSING_ACCENT & containedErrorTypes) != 0) {
+ score -= ScoringParams::ACCENT_ERROR_PENALTY_FOR_EXACT_MATCH;
+ }
+ if ((ErrorTypeUtils::MATCH_WITH_DIGRAPH & containedErrorTypes) != 0) {
+ score -= ScoringParams::DIGRAPH_PENALTY_FOR_EXACT_MATCH;
+ }
}
}
return static_cast<int>(score * SUGGEST_INTERFACE_OUTPUT_SCALE);
diff --git a/native/jni/src/utils/int_array_view.h b/native/jni/src/utils/int_array_view.h
index 408373176..e0f671056 100644
--- a/native/jni/src/utils/int_array_view.h
+++ b/native/jni/src/utils/int_array_view.h
@@ -133,6 +133,29 @@ class IntArrayView {
return std::vector<int>(begin(), end());
}
+ std::vector<IntArrayView> split(const int separator, const int limit = S_INT_MAX) const {
+ if (limit <= 0) {
+ return std::vector<IntArrayView>();
+ }
+ std::vector<IntArrayView> result;
+ if (limit == 1) {
+ result.emplace_back(mPtr, mSize);
+ return result;
+ }
+ size_t startIndex = 0;
+ for (size_t i = 0; i < mSize; ++i) {
+ if (mPtr[i] == separator) {
+ result.emplace_back(mPtr + startIndex, i - startIndex);
+ startIndex = i + 1;
+ if (result.size() >= static_cast<size_t>(limit - 1)) {
+ break;
+ }
+ }
+ }
+ result.emplace_back(mPtr + startIndex, mSize - startIndex);
+ return result;
+ }
+
private:
DISALLOW_ASSIGNMENT_OPERATOR(IntArrayView);
diff --git a/native/jni/src/utils/profiler.h b/native/jni/src/utils/profiler.h
new file mode 100644
index 000000000..5f107fed3
--- /dev/null
+++ b/native/jni/src/utils/profiler.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright (C) 2014, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_PROFILER_H
+#define LATINIME_PROFILER_H
+
+#ifdef FLAG_DO_PROFILE
+
+#include "defines.h"
+
+#include <ctime>
+#include <unordered_map>
+
+namespace latinime {
+
+class Profiler final {
+ public:
+ Profiler(const clockid_t clockId)
+ : mClockId(clockId), mStartTime(getTimeInMicroSec()), mStartTimes(), mTimes(),
+ mCounters() {}
+
+ ~Profiler() {
+ const float totalTime =
+ static_cast<float>(getTimeInMicroSec() - mStartTime) / 1000.f;
+ AKLOGI("Total time is %6.3f ms.", totalTime);
+ for (const auto &time : mTimes) {
+ AKLOGI("(%d): Used %4.2f%%, %8.4f ms. Called %d times.", time.first,
+ time.second / totalTime * 100.0f, time.second, mCounters[time.first]);
+ }
+ }
+
+ void startTimer(const int id) {
+ mStartTimes[id] = getTimeInMicroSec();
+ }
+
+ void endTimer(const int id) {
+ mTimes[id] += static_cast<float>(getTimeInMicroSec() - mStartTimes[id]) / 1000.0f;
+ mCounters[id]++;
+ }
+
+ operator bool() const { return false; }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(Profiler);
+
+ const clockid_t mClockId;
+ int64_t mStartTime;
+ std::unordered_map<int, int64_t> mStartTimes;
+ std::unordered_map<int, float> mTimes;
+ std::unordered_map<int, int> mCounters;
+
+ int64_t getTimeInMicroSec() {
+ timespec time;
+ clock_gettime(mClockId, &time);
+ return static_cast<int64_t>(time.tv_sec) * 1000000
+ + static_cast<int64_t>(time.tv_nsec) / 1000;
+ }
+};
+} // namespace latinime
+
+#define PROF_INIT Profiler __LATINIME__PROFILER__(CLOCK_THREAD_CPUTIME_ID)
+#define PROF_TIMER_START(timer_id) __LATINIME__PROFILER__.startTimer(timer_id)
+#define PROF_TIMER_END(timer_id) __LATINIME__PROFILER__.endTimer(timer_id)
+
+#else // FLAG_DO_PROFILE
+
+#define PROF_INIT
+#define PROF_TIMER_START(timer_id)
+#define PROF_TIMER_END(timer_id)
+
+#endif // FLAG_DO_PROFILE
+
+#endif /* LATINIME_PROFILER_H */
diff --git a/native/jni/tests/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_test.cpp b/native/jni/tests/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_test.cpp
index 86040f12c..313a9af10 100644
--- a/native/jni/tests/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_test.cpp
+++ b/native/jni/tests/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_test.cpp
@@ -52,16 +52,14 @@ TEST(LanguageModelDictContentTest, TestUnigramProbabilityWithHistoricalInfo) {
const int flag = 0xF0;
const int timestamp = 0x3FFFFFFF;
- const int level = 3;
const int count = 10;
const int wordId = 100;
- const HistoricalInfo historicalInfo(timestamp, level, count);
+ const HistoricalInfo historicalInfo(timestamp, 0 /* level */, count);
const ProbabilityEntry probabilityEntry(flag, &historicalInfo);
languageModelDictContent.setProbabilityEntry(wordId, &probabilityEntry);
const ProbabilityEntry entry = languageModelDictContent.getProbabilityEntry(wordId);
EXPECT_EQ(flag, entry.getFlags());
EXPECT_EQ(timestamp, entry.getHistoricalInfo()->getTimestamp());
- EXPECT_EQ(level, entry.getHistoricalInfo()->getLevel());
EXPECT_EQ(count, entry.getHistoricalInfo()->getCount());
// Remove
diff --git a/native/jni/tests/suggest/policyimpl/dictionary/structure/v4/content/probability_entry_test.cpp b/native/jni/tests/suggest/policyimpl/dictionary/structure/v4/content/probability_entry_test.cpp
index 260b347ce..eb78034ba 100644
--- a/native/jni/tests/suggest/policyimpl/dictionary/structure/v4/content/probability_entry_test.cpp
+++ b/native/jni/tests/suggest/policyimpl/dictionary/structure/v4/content/probability_entry_test.cpp
@@ -39,20 +39,18 @@ TEST(ProbabilityEntryTest, TestEncodeDecode) {
TEST(ProbabilityEntryTest, TestEncodeDecodeWithHistoricalInfo) {
const int flag = 0xF0;
const int timestamp = 0x3FFFFFFF;
- const int level = 3;
- const int count = 10;
+ const int count = 0xABCD;
- const HistoricalInfo historicalInfo(timestamp, level, count);
+ const HistoricalInfo historicalInfo(timestamp, 0 /* level */, count);
const ProbabilityEntry entry(flag, &historicalInfo);
const uint64_t encodedEntry = entry.encode(true /* hasHistoricalInfo */);
- EXPECT_EQ(0xF03FFFFFFF030Aull, encodedEntry);
+ EXPECT_EQ(0xF03FFFFFFFABCDull, encodedEntry);
const ProbabilityEntry decodedEntry =
ProbabilityEntry::decode(encodedEntry, true /* hasHistoricalInfo */);
EXPECT_EQ(flag, decodedEntry.getFlags());
EXPECT_EQ(timestamp, decodedEntry.getHistoricalInfo()->getTimestamp());
- EXPECT_EQ(level, decodedEntry.getHistoricalInfo()->getLevel());
EXPECT_EQ(count, decodedEntry.getHistoricalInfo()->getCount());
}
diff --git a/native/jni/tests/suggest/policyimpl/dictionary/utils/format_utils_test.cpp b/native/jni/tests/suggest/policyimpl/dictionary/utils/format_utils_test.cpp
index 15f560cd1..494200568 100644
--- a/native/jni/tests/suggest/policyimpl/dictionary/utils/format_utils_test.cpp
+++ b/native/jni/tests/suggest/policyimpl/dictionary/utils/format_utils_test.cpp
@@ -62,14 +62,14 @@ TEST(FormatUtilsTest, TestDetectFormatVersion) {
}
{
const std::vector<uint8_t> buffer =
- getBuffer(FormatUtils::MAGIC_NUMBER, FormatUtils::VERSION_4, 0, 0);
- EXPECT_EQ(FormatUtils::VERSION_4, FormatUtils::detectFormatVersion(
+ getBuffer(FormatUtils::MAGIC_NUMBER, FormatUtils::VERSION_402, 0, 0);
+ EXPECT_EQ(FormatUtils::VERSION_402, FormatUtils::detectFormatVersion(
ReadOnlyByteArrayView(buffer.data(), buffer.size())));
}
{
const std::vector<uint8_t> buffer =
- getBuffer(FormatUtils::MAGIC_NUMBER, FormatUtils::VERSION_4_DEV, 0, 0);
- EXPECT_EQ(FormatUtils::VERSION_4_DEV, FormatUtils::detectFormatVersion(
+ getBuffer(FormatUtils::MAGIC_NUMBER, FormatUtils::VERSION_403, 0, 0);
+ EXPECT_EQ(FormatUtils::VERSION_403, FormatUtils::detectFormatVersion(
ReadOnlyByteArrayView(buffer.data(), buffer.size())));
}
diff --git a/native/jni/tests/utils/int_array_view_test.cpp b/native/jni/tests/utils/int_array_view_test.cpp
index 4757a416b..2fce633f5 100644
--- a/native/jni/tests/utils/int_array_view_test.cpp
+++ b/native/jni/tests/utils/int_array_view_test.cpp
@@ -151,5 +151,52 @@ TEST(IntArrayViewTest, TestToVector) {
EXPECT_EQ(std::vector<int>(), CodePointArrayView().toVector());
}
+TEST(IntArrayViewTest, TestSplit) {
+ EXPECT_TRUE(IntArrayView().split(0, 0).empty());
+ {
+ const auto intArrayViews = IntArrayView().split(0, 1);
+ EXPECT_EQ(1u, intArrayViews.size());
+ EXPECT_TRUE(intArrayViews[0].empty());
+ }
+ {
+ const auto intArrayViews = IntArrayView().split(0, 100);
+ EXPECT_EQ(1u, intArrayViews.size());
+ EXPECT_TRUE(intArrayViews[0].empty());
+ }
+
+ const std::vector<int> intVector = {1, 2, 3, 3, 2, 3};
+ const IntArrayView intArrayView(intVector);
+ {
+ const auto intArrayViews = intArrayView.split(2);
+ EXPECT_EQ(3u, intArrayViews.size());
+ EXPECT_EQ(std::vector<int>({1}), intArrayViews[0].toVector());
+ EXPECT_EQ(std::vector<int>({3, 3}), intArrayViews[1].toVector());
+ EXPECT_EQ(std::vector<int>({3}), intArrayViews[2].toVector());
+ }
+ {
+ const auto intArrayViews = intArrayView.split(2, 2);
+ EXPECT_EQ(2u, intArrayViews.size());
+ EXPECT_EQ(std::vector<int>({1}), intArrayViews[0].toVector());
+ EXPECT_EQ(std::vector<int>({3, 3, 2, 3}), intArrayViews[1].toVector());
+ }
+ {
+ const auto intArrayViews = intArrayView.split(2, 1);
+ EXPECT_EQ(1u, intArrayViews.size());
+ EXPECT_EQ(intVector, intArrayViews[0].toVector());
+ }
+ {
+ const auto intArrayViews = intArrayView.split(2, 0);
+ EXPECT_EQ(0u, intArrayViews.size());
+ }
+ {
+ const auto intArrayViews = intArrayView.split(3);
+ EXPECT_EQ(4u, intArrayViews.size());
+ EXPECT_EQ(std::vector<int>({1, 2}), intArrayViews[0].toVector());
+ EXPECT_EQ(std::vector<int>(), intArrayViews[1].toVector());
+ EXPECT_EQ(std::vector<int>({2}), intArrayViews[2].toVector());
+ EXPECT_EQ(std::vector<int>(), intArrayViews[3].toVector());
+ }
+}
+
} // namespace
} // namespace latinime