diff options
Diffstat (limited to 'native')
71 files changed, 2392 insertions, 180 deletions
diff --git a/native/dicttoolkit/Android.mk b/native/dicttoolkit/Android.mk new file mode 100644 index 000000000..118682dfc --- /dev/null +++ b/native/dicttoolkit/Android.mk @@ -0,0 +1,67 @@ +# Copyright (C) 2014 The Android Open Source Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +ifeq (,$(TARGET_BUILD_APPS)) + +# Only build if it's explicitly requested, or running mm/mmm. +ifneq ($(ONE_SHOT_MAKEFILE)$(filter $(MAKECMDGOALS),dicttoolkit),) + +# HACK: Temporarily disable host tool build on Mac until the build system is ready for C++11. +LATINIME_HOST_OSNAME := $(shell uname -s) +ifneq ($(LATINIME_HOST_OSNAME), Darwin) # TODO: Remove this + +LOCAL_PATH := $(call my-dir) + +include $(CLEAR_VARS) + +LATIN_IME_CORE_PATH := $(LOCAL_PATH)/../jni + +LATIN_IME_DICT_TOOLKIT_SRC_DIR := src +LATIN_IME_CORE_SRC_DIR := ../jni/src + +LOCAL_CFLAGS += -Werror -Wall -Wextra -Weffc++ -Wformat=2 -Wcast-qual -Wcast-align \ + -Wwrite-strings -Wfloat-equal -Wpointer-arith -Winit-self -Wredundant-decls \ + -Woverloaded-virtual -Wsign-promo -Wno-system-headers + +# To suppress compiler warnings for unused variables/functions used for debug features etc. +LOCAL_CFLAGS += -Wno-unused-parameter -Wno-unused-function +LOCAL_CFLAGS += -std=c++11 -Wno-unused-parameter -Wno-unused-function + +include $(LOCAL_PATH)/NativeFileList.mk +include $(LATIN_IME_CORE_PATH)/NativeFileList.mk + +LOCAL_C_INCLUDES += $(LOCAL_PATH)/$(LATIN_IME_DICT_TOOLKIT_SRC_DIR) \ + $(LATIN_IME_CORE_PATH)/$(LATIN_IME_CORE_SRC_DIR) + +LOCAL_SRC_FILES := $(LATIN_IME_DICT_TOOLKIT_MAIN_SRC_FILES) \ + $(addprefix $(LATIN_IME_DICT_TOOLKIT_SRC_DIR)/, $(LATIN_IME_DICT_TOOLKIT_SRC_FILES)) \ + $(addprefix $(LATIN_IME_CORE_SRC_DIR)/, $(LATIN_IME_CORE_SRC_FILES)) + +LOCAL_MODULE := dicttoolkit +LOCAL_MODULE_TAGS := optional + +LOCAL_CLANG := true +LOCAL_CXX_STL := libc++ + +include $(BUILD_HOST_EXECUTABLE) +#################### Clean up the tmp vars +include $(LOCAL_PATH)/CleanupNativeFileList.mk +#################### Unit test +include $(LOCAL_PATH)/UnitTests.mk + +endif # Darwin - TODO: Remove this + +endif + +endif # TARGET_BUILD_APPS diff --git a/native/dicttoolkit/CleanupNativeFileList.mk b/native/dicttoolkit/CleanupNativeFileList.mk new file mode 100644 index 000000000..b804b41ed --- /dev/null +++ b/native/dicttoolkit/CleanupNativeFileList.mk @@ -0,0 +1,17 @@ +# Copyright (C) 2014 The Android Open Source Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +LATIN_IME_DICT_TOOLKIT_MAIN_SRC_FILES := +LATIN_IME_DICT_TOOLKIT_SRC_FILES := +LATIN_IME_DICT_TOOLKIT_TEST_FILES := diff --git a/native/dicttoolkit/NativeFileList.mk b/native/dicttoolkit/NativeFileList.mk new file mode 100644 index 000000000..d2c8c3a2c --- /dev/null +++ b/native/dicttoolkit/NativeFileList.mk @@ -0,0 +1,43 @@ +# Copyright (C) 2014 The Android Open Source Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +LATIN_IME_DICT_TOOLKIT_MAIN_SRC_FILES := \ + dict_toolkit_main.cpp + +LATIN_IME_DICT_TOOLKIT_SRC_FILES := \ + $(addprefix command_executors/, \ + diff_executor.cpp \ + header_executor.cpp \ + help_executor.cpp \ + info_executor.cpp \ + makedict_executor.cpp) \ + $(addprefix offdevice_intermediate_dict/, \ + offdevice_intermediate_dict.cpp) \ + $(addprefix utils/, \ + arguments_parser.cpp \ + command_utils.cpp \ + utf8_utils.cpp) + +LATIN_IME_DICT_TOOLKIT_TEST_FILES := \ + $(addprefix command_executors/, \ + diff_executor_test.cpp \ + header_executor_test.cpp \ + info_executor_test.cpp \ + makedict_executor_test.cpp) \ + dict_toolkit_defines_test.cpp \ + $(addprefix offdevice_intermediate_dict/, \ + offdevice_intermediate_dict_test.cpp) \ + $(addprefix utils/, \ + command_utils_test.cpp \ + utf8_utils_test.cpp) diff --git a/native/dicttoolkit/UnitTests.mk b/native/dicttoolkit/UnitTests.mk new file mode 100644 index 000000000..96e28730e --- /dev/null +++ b/native/dicttoolkit/UnitTests.mk @@ -0,0 +1,69 @@ +# Copyright (C) 2014 The Android Open Source Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +ifeq (,$(TARGET_BUILD_APPS)) + +LOCAL_PATH := $(call my-dir) + +###################################### +include $(CLEAR_VARS) + +LATIN_IME_CORE_PATH := $(LOCAL_PATH)/../jni + +LATIN_IME_DICT_TOOLKIT_SRC_DIR := src +LATIN_IME_CORE_SRC_DIR := ../jni/src +LATIN_DICT_TOOLKIT_TEST_SRC_DIR := tests + +include $(LOCAL_PATH)/NativeFileList.mk +include $(LATIN_IME_CORE_PATH)/NativeFileList.mk + +# TODO: Remove -std=c++11 once it is set by default on host build. +LATIN_IME_SRC_DIR := src +LOCAL_ADDRESS_SANITIZER := true +LOCAL_CFLAGS += -std=c++11 -Wno-unused-parameter -Wno-unused-function +LOCAL_CLANG := true +LOCAL_CXX_STL := libc++ +LOCAL_C_INCLUDES += $(LOCAL_PATH)/$(LATIN_IME_DICT_TOOLKIT_SRC_DIR) \ + $(LATIN_IME_CORE_PATH)/$(LATIN_IME_CORE_SRC_DIR) +LOCAL_MODULE := liblatinime_dicttoolkit_host_static_for_unittests +LOCAL_MODULE_TAGS := optional +LOCAL_SRC_FILES := \ + $(addprefix $(LATIN_IME_DICT_TOOLKIT_SRC_DIR)/, $(LATIN_IME_DICT_TOOLKIT_SRC_FILES)) \ + $(addprefix $(LATIN_IME_CORE_SRC_DIR)/, $(LATIN_IME_CORE_SRC_FILES)) +include $(BUILD_HOST_STATIC_LIBRARY) + +include $(CLEAR_VARS) + +# TODO: Remove -std=c++11 once it is set by default on host build. +LOCAL_ADDRESS_SANITIZER := true +LOCAL_CFLAGS += -std=c++11 -Wno-unused-parameter -Wno-unused-function +LOCAL_CLANG := true +LOCAL_CXX_STL := libc++ +LOCAL_C_INCLUDES += $(LOCAL_PATH)/$(LATIN_IME_DICT_TOOLKIT_SRC_DIR) \ + $(LATIN_IME_CORE_PATH)/$(LATIN_IME_CORE_SRC_DIR) +LOCAL_MODULE := dicttoolkit_unittests +LOCAL_MODULE_TAGS := tests +LOCAL_SRC_FILES := \ + $(addprefix $(LATIN_DICT_TOOLKIT_TEST_SRC_DIR)/, $(LATIN_IME_DICT_TOOLKIT_TEST_FILES)) +LOCAL_STATIC_LIBRARIES += liblatinime_dicttoolkit_host_static_for_unittests +include $(BUILD_HOST_NATIVE_TEST) + +include $(LOCAL_PATH)/CleanupNativeFileList.mk + +#################### Clean up the tmp vars +LATINIME_HOST_OSNAME := +LATIN_IME_SRC_DIR := +LATIN_IME_TEST_SRC_DIR := + +endif # TARGET_BUILD_APPS diff --git a/native/dicttoolkit/dict_toolkit_main.cpp b/native/dicttoolkit/dict_toolkit_main.cpp new file mode 100644 index 000000000..53cc5e915 --- /dev/null +++ b/native/dicttoolkit/dict_toolkit_main.cpp @@ -0,0 +1,39 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <cstdio> + +#include "dict_toolkit_defines.h" +#include "utils/command_utils.h" + +void usage(int argc, char **argv) { + fprintf(stderr, "Usage: %s <command> [arguments]\n", argc > 0 ? argv[0] : "dicttoolkit"); +} + +int main(int argc, char **argv) { + if (argc < MIN_ARG_COUNT) { + usage(argc, argv); + return 1; + } + using namespace latinime::dicttoolkit; + const CommandType commandType = CommandUtils::getCommandType(argv[1]); + if (commandType == CommandType::Unknown) { + CommandUtils::printCommandUnknownMessage(argv[0], argv[1]); + return 1; + } + const auto executor = CommandUtils::getCommandExecutor(commandType); + return executor(argc - 1, argv + 1); +} diff --git a/native/dicttoolkit/run_tests.sh b/native/dicttoolkit/run_tests.sh new file mode 100755 index 000000000..44c99c144 --- /dev/null +++ b/native/dicttoolkit/run_tests.sh @@ -0,0 +1,34 @@ +#!/bin/bash +# Copyright 2014, The Android Open Source Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# check script arguments +if [[ $(type -t mmm) != function ]]; then +if [[ ${BASH_SOURCE[0]} != $0 ]]; then return; else exit 1; fi +fi + +# Host build is never supported in unbundled (NDK/tapas) build +if [[ -n $TARGET_BUILD_APPS ]]; then + echo "Host build is never supported in tapas build." 1>&2 + echo "Use lunch command instead." 1>&2 + if [[ ${BASH_SOURCE[0]} != $0 ]]; then return; else exit 1; fi +fi + +test_name=dicttoolkit_unittests + +pushd $PWD > /dev/null +cd $(gettop) +(mmm -j16 packages/inputmethods/LatinIME/native/dicttoolkit) || (make -j16 $test_name) +$ANDROID_HOST_OUT/bin/$test_name +popd > /dev/null diff --git a/native/dicttoolkit/src/command_executors/diff_executor.cpp b/native/dicttoolkit/src/command_executors/diff_executor.cpp new file mode 100644 index 000000000..bf6830686 --- /dev/null +++ b/native/dicttoolkit/src/command_executors/diff_executor.cpp @@ -0,0 +1,49 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "command_executors/diff_executor.h" + +#include <cstdio> + +namespace latinime { +namespace dicttoolkit { + +const char *const DiffExecutor::COMMAND_NAME = "diff"; + +/* static */ int DiffExecutor::run(const int argc, char **argv) { + fprintf(stderr, "Command '%s' has not been implemented yet.\n", COMMAND_NAME); + return 0; +} + +/* static */ void DiffExecutor::printUsage() { + printf("*** %s\n", COMMAND_NAME); + getArgumentsParser().printUsage(COMMAND_NAME, "Shows differences between two dictionaries."); +} + +/* static */ const ArgumentsParser DiffExecutor::getArgumentsParser() { + std::unordered_map<std::string, OptionSpec> optionSpecs; + optionSpecs["p"] = OptionSpec::switchOption("(plumbing) produce output suitable for a script"); + + const std::vector<ArgumentSpec> argumentSpecs = { + ArgumentSpec::singleArgument("dict1", "dictionary file"), + ArgumentSpec::singleArgument("dict2", "dictionary file") + }; + + return ArgumentsParser(std::move(optionSpecs), std::move(argumentSpecs)); +} + +} // namespace dicttoolkit +} // namespace latinime diff --git a/native/dicttoolkit/src/command_executors/diff_executor.h b/native/dicttoolkit/src/command_executors/diff_executor.h new file mode 100644 index 000000000..f92ae49d5 --- /dev/null +++ b/native/dicttoolkit/src/command_executors/diff_executor.h @@ -0,0 +1,40 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DICT_TOOLKIT_DIFF_EXECUTOR_H +#define LATINIME_DICT_TOOLKIT_DIFF_EXECUTOR_H + +#include "dict_toolkit_defines.h" +#include "utils/arguments_parser.h" + +namespace latinime { +namespace dicttoolkit { + +class DiffExecutor final { + public: + static const char *const COMMAND_NAME; + + static int run(const int argc, char **argv); + static void printUsage(); + static const ArgumentsParser getArgumentsParser(); + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(DiffExecutor); +}; + +} // namespace dicttoolkit +} // namepsace latinime +#endif // LATINIME_DICT_TOOLKIT_DIFF_EXECUTOR_H diff --git a/native/dicttoolkit/src/command_executors/header_executor.cpp b/native/dicttoolkit/src/command_executors/header_executor.cpp new file mode 100644 index 000000000..b3d273b4e --- /dev/null +++ b/native/dicttoolkit/src/command_executors/header_executor.cpp @@ -0,0 +1,48 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "command_executors/header_executor.h" + +#include <cstdio> + +namespace latinime { +namespace dicttoolkit { + +const char *const HeaderExecutor::COMMAND_NAME = "header"; + +/* static */ int HeaderExecutor::run(const int argc, char **argv) { + fprintf(stderr, "Command '%s' has not been implemented yet.\n", COMMAND_NAME); + return 0; +} + +/* static */ void HeaderExecutor::printUsage() { + printf("*** %s\n", COMMAND_NAME); + getArgumentsParser().printUsage(COMMAND_NAME, + "Prints the header contents of a dictionary file."); +} + +/* static */ const ArgumentsParser HeaderExecutor::getArgumentsParser() { + std::unordered_map<std::string, OptionSpec> optionSpecs; + optionSpecs["p"] = OptionSpec::switchOption("(plumbing) produce output suitable for a script"); + + const std::vector<ArgumentSpec> argumentSpecs = { + ArgumentSpec::singleArgument("dict", "prints the header contents of a dictionary file") + }; + + return ArgumentsParser(std::move(optionSpecs), std::move(argumentSpecs)); +} +} // namespace dicttoolkit +} // namespace latinime diff --git a/native/dicttoolkit/src/command_executors/header_executor.h b/native/dicttoolkit/src/command_executors/header_executor.h new file mode 100644 index 000000000..44cc9cfc4 --- /dev/null +++ b/native/dicttoolkit/src/command_executors/header_executor.h @@ -0,0 +1,40 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DICT_TOOLKIT_HEADER_EXECUTOR_H +#define LATINIME_DICT_TOOLKIT_HEADER_EXECUTOR_H + +#include "dict_toolkit_defines.h" +#include "utils/arguments_parser.h" + +namespace latinime { +namespace dicttoolkit { + +class HeaderExecutor final { + public: + static const char *const COMMAND_NAME; + + static int run(const int argc, char **argv); + static void printUsage(); + static const ArgumentsParser getArgumentsParser(); + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(HeaderExecutor); +}; + +} // namespace dicttoolkit +} // namepsace latinime +#endif // LATINIME_DICT_TOOLKIT_HEADER_EXECUTOR_H diff --git a/native/dicttoolkit/src/command_executors/help_executor.cpp b/native/dicttoolkit/src/command_executors/help_executor.cpp new file mode 100644 index 000000000..bd29a5b16 --- /dev/null +++ b/native/dicttoolkit/src/command_executors/help_executor.cpp @@ -0,0 +1,52 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "command_executors/help_executor.h" + +#include <cstdio> +#include <functional> +#include <vector> + +#include "command_executors/diff_executor.h" +#include "command_executors/header_executor.h" +#include "command_executors/info_executor.h" +#include "command_executors/makedict_executor.h" +#include "utils/command_utils.h" + +namespace latinime { +namespace dicttoolkit { + +const char *const HelpExecutor::COMMAND_NAME = "help"; + +/* static */ int HelpExecutor::run(const int argc, char **argv) { + printf("Available commands:\n\n"); + const std::vector<std::function<void(void)>> printUsageMethods = {DiffExecutor::printUsage, + HeaderExecutor::printUsage, InfoExecutor::printUsage, MakedictExecutor::printUsage, + printUsage}; + for (const auto &printUsageMethod : printUsageMethods) { + printUsageMethod(); + } + return 0; +} + +/* static */ void HelpExecutor::printUsage() { + printf("*** %s\n", COMMAND_NAME); + printf("Usage: %s\n", COMMAND_NAME); + printf("Show this help list.\n\n"); +} + +} // namespace dicttoolkit +} // namespace latinime diff --git a/native/dicttoolkit/src/command_executors/help_executor.h b/native/dicttoolkit/src/command_executors/help_executor.h new file mode 100644 index 000000000..280610eb9 --- /dev/null +++ b/native/dicttoolkit/src/command_executors/help_executor.h @@ -0,0 +1,38 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DICT_TOOLKIT_HELP_EXECUTOR_H +#define LATINIME_DICT_TOOLKIT_HELP_EXECUTOR_H + +#include "dict_toolkit_defines.h" + +namespace latinime { +namespace dicttoolkit { + +class HelpExecutor final { + public: + static const char *const COMMAND_NAME; + + static int run(const int argc, char **argv); + static void printUsage(); + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(HelpExecutor); +}; + +} // namespace dicttoolkit +} // namepsace latinime +#endif // LATINIME_DICT_TOOLKIT_HELP_EXECUTOR_H diff --git a/native/dicttoolkit/src/command_executors/info_executor.cpp b/native/dicttoolkit/src/command_executors/info_executor.cpp new file mode 100644 index 000000000..351da4aff --- /dev/null +++ b/native/dicttoolkit/src/command_executors/info_executor.cpp @@ -0,0 +1,54 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "command_executors/info_executor.h" + +#include <cstdio> +#include <string> +#include <unordered_map> +#include <vector> + +namespace latinime { +namespace dicttoolkit { + +const char *const InfoExecutor::COMMAND_NAME = "info"; + +/* static */ int InfoExecutor::run(const int argc, char **argv) { + fprintf(stderr, "Command '%s' has not been implemented yet.\n", COMMAND_NAME); + return 0; +} + +/* static */ void InfoExecutor::printUsage() { + printf("*** %s\n", COMMAND_NAME); + getArgumentsParser().printUsage(COMMAND_NAME, + "Prints various information about a dictionary file."); +} + +/* static */const ArgumentsParser InfoExecutor::getArgumentsParser() { + std::unordered_map<std::string, OptionSpec> optionSpecs; + optionSpecs["p"] = OptionSpec::switchOption("(plumbing) produce output suitable for a script"); + + const std::vector<ArgumentSpec> argumentSpecs = { + ArgumentSpec::singleArgument("dict", "dictionary file name"), + ArgumentSpec::variableLengthArguments("word", 0 /* minCount */, + ArgumentSpec::UNLIMITED_COUNT, "word to show information") + }; + + return ArgumentsParser(std::move(optionSpecs), std::move(argumentSpecs)); +} + +} // namespace dicttoolkit +} // namespace latinime diff --git a/native/dicttoolkit/src/command_executors/info_executor.h b/native/dicttoolkit/src/command_executors/info_executor.h new file mode 100644 index 000000000..d4106d59f --- /dev/null +++ b/native/dicttoolkit/src/command_executors/info_executor.h @@ -0,0 +1,40 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DICT_TOOLKIT_INFO_EXECUTOR_H +#define LATINIME_DICT_TOOLKIT_INFO_EXECUTOR_H + +#include "dict_toolkit_defines.h" +#include "utils/arguments_parser.h" + +namespace latinime { +namespace dicttoolkit { + +class InfoExecutor final { + public: + static const char *const COMMAND_NAME; + + static int run(const int argc, char **argv); + static void printUsage(); + static const ArgumentsParser getArgumentsParser(); + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(InfoExecutor); +}; + +} // namepsace dicttoolkit +} // namespace latinime +#endif // LATINIME_DICT_TOOLKIT_INFO_EXECUTOR_H diff --git a/native/dicttoolkit/src/command_executors/makedict_executor.cpp b/native/dicttoolkit/src/command_executors/makedict_executor.cpp new file mode 100644 index 000000000..8a84e8069 --- /dev/null +++ b/native/dicttoolkit/src/command_executors/makedict_executor.cpp @@ -0,0 +1,55 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "command_executors/makedict_executor.h" + +#include <cstdio> + +namespace latinime { +namespace dicttoolkit { + +const char *const MakedictExecutor::COMMAND_NAME = "makedict"; + +/* static */ int MakedictExecutor::run(const int argc, char **argv) { + fprintf(stderr, "Command '%s' has not been implemented yet.\n", COMMAND_NAME); + return 0; +} + +/* static */ void MakedictExecutor::printUsage() { + printf("*** %s\n", COMMAND_NAME); + getArgumentsParser().printUsage(COMMAND_NAME, + "Converts a source dictionary file to one or several outputs.\n" + "Source can be a binary dictionary file or a combined format file.\n" + "Binary version 2 (Jelly Bean), 4, and combined format outputs are supported."); +} + +/* static */const ArgumentsParser MakedictExecutor::getArgumentsParser() { + std::unordered_map<std::string, OptionSpec> optionSpecs; + optionSpecs["o"] = OptionSpec::keyValueOption("format", "2", + "output format version: 2/4/combined"); + optionSpecs["t"] = OptionSpec::keyValueOption("mode", "off", + "code point table switch: on/off/auto"); + + const std::vector<ArgumentSpec> argumentSpecs = { + ArgumentSpec::singleArgument("src_dict", "source dictionary file"), + ArgumentSpec::singleArgument("dest_dict", "output dictionary file") + }; + + return ArgumentsParser(std::move(optionSpecs), std::move(argumentSpecs)); +} + +} // namespace dicttoolkit +} // namespace latinime diff --git a/native/dicttoolkit/src/command_executors/makedict_executor.h b/native/dicttoolkit/src/command_executors/makedict_executor.h new file mode 100644 index 000000000..c3de977a3 --- /dev/null +++ b/native/dicttoolkit/src/command_executors/makedict_executor.h @@ -0,0 +1,40 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DICT_TOOLKIT_MAKEDICT_EXECUTOR_H +#define LATINIME_DICT_TOOLKIT_MAKEDICT_EXECUTOR_H + +#include "dict_toolkit_defines.h" +#include "utils/arguments_parser.h" + +namespace latinime { +namespace dicttoolkit { + +class MakedictExecutor final { + public: + static const char *const COMMAND_NAME; + + static int run(const int argc, char **argv); + static void printUsage(); + static const ArgumentsParser getArgumentsParser(); + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(MakedictExecutor); +}; + +} // namespace dicttoolkit +} // namepsace latinime +#endif // LATINIME_DICT_TOOLKIT_MAKEDICT_EXECUTOR_H diff --git a/native/dicttoolkit/src/dict_toolkit_defines.h b/native/dicttoolkit/src/dict_toolkit_defines.h new file mode 100644 index 000000000..dbaae0ca0 --- /dev/null +++ b/native/dicttoolkit/src/dict_toolkit_defines.h @@ -0,0 +1,24 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DICT_TOOLKIT_DEFINES_H +#define LATINIME_DICT_TOOLKIT_DEFINES_H + +#include "defines.h" + +#define MIN_ARG_COUNT 2 + +#endif // LATINIME_DICT_TOOLKIT_DEFINES_H diff --git a/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict.cpp b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict.cpp new file mode 100644 index 000000000..af28131cf --- /dev/null +++ b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict.cpp @@ -0,0 +1,126 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "offdevice_intermediate_dict/offdevice_intermediate_dict.h" + +#include "offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node.h" + +namespace latinime { +namespace dicttoolkit { + +bool OffdeviceIntermediateDict::addWord(const WordProperty &wordProperty) { + const CodePointArrayView codePoints = wordProperty.getCodePoints(); + if (codePoints.empty() || codePoints.size() > MAX_WORD_LENGTH) { + return false; + } + return addWordInner(codePoints, wordProperty, mRootPtNodeArray); +} + +bool OffdeviceIntermediateDict::addWordInner(const CodePointArrayView codePoints, + const WordProperty &wordProperty, OffdeviceIntermediateDictPtNodeArray &ptNodeArray) { + auto ptNodeList = ptNodeArray.getMutablePtNodeList(); + auto ptNodeIt = ptNodeList->begin(); + for (; ptNodeIt != ptNodeList->end(); ++ptNodeIt) { + const auto &ptNode = *ptNodeIt; + const CodePointArrayView ptNodeCodePoints = ptNode->getPtNodeCodePoints(); + if (codePoints[0] < ptNodeCodePoints[0]) { + continue; + } + if (codePoints[0] > ptNodeCodePoints[0]) { + break; + } + size_t i = 1; + for (; i < codePoints.size(); ++i) { + if (i >= ptNodeCodePoints.size()) { + // Add new child. + return addWordInner(codePoints.skip(i), wordProperty, + ptNode->getChildrenPtNodeArray()); + } + if (codePoints[i] != ptNodeCodePoints[i]) { + break; + } + } + if (codePoints.size() == i && codePoints.size() == ptNodeCodePoints.size()) { + // All code points matched. + if (ptNode->getWordProperty()) { + // Adding the same word multiple times is not supported. + return false; + } + ptNodeList->insert(ptNodeIt, + std::make_shared<OffdeviceIntermediateDictPtNode>(wordProperty, *ptNode)); + ptNodeList->erase(ptNodeIt); + return true; + } + // The (i+1)-th elements are different. + // Create and Add new parent ptNode for the common part. + auto newPtNode = codePoints.size() == i + ? std::make_shared<OffdeviceIntermediateDictPtNode>(codePoints, wordProperty) + : std::make_shared<OffdeviceIntermediateDictPtNode>(codePoints.limit(i)); + ptNodeList->insert(ptNodeIt, newPtNode); + OffdeviceIntermediateDictPtNodeArray &childrenPtNodeArray = + newPtNode->getChildrenPtNodeArray(); + // Add new child for the existing ptNode. + childrenPtNodeArray.getMutablePtNodeList()->push_back( + std::make_shared<OffdeviceIntermediateDictPtNode>( + ptNodeCodePoints.skip(i), *ptNode)); + ptNodeList->erase(ptNodeIt); + if (codePoints.size() != i) { + // Add a child for the new word. + return addWordInner(codePoints.skip(i), wordProperty, childrenPtNodeArray); + } + return true; + } + ptNodeList->insert(ptNodeIt, + std::make_shared<OffdeviceIntermediateDictPtNode>(codePoints, wordProperty)); + return true; +} + +const WordProperty *OffdeviceIntermediateDict::getWordProperty( + const CodePointArrayView codePoints) const { + const OffdeviceIntermediateDictPtNodeArray *ptNodeArray = &mRootPtNodeArray; + for (size_t i = 0; i < codePoints.size();) { + bool foundNext = false; + for (const auto ptNode : ptNodeArray->getPtNodeList()) { + const CodePointArrayView ptNodeCodePoints = ptNode->getPtNodeCodePoints(); + if (codePoints[i] < ptNodeCodePoints[0]) { + continue; + } + if (codePoints[i] > ptNodeCodePoints[0] + || codePoints.size() < ptNodeCodePoints.size()) { + return nullptr; + } + for (size_t j = 1; j < ptNodeCodePoints.size(); ++j) { + if (codePoints[i + j] != ptNodeCodePoints[j]) { + return nullptr; + } + } + i += ptNodeCodePoints.size(); + if (i == codePoints.size()) { + return ptNode->getWordProperty(); + } + ptNodeArray = &ptNode->getChildrenPtNodeArray(); + foundNext = true; + break; + } + if (!foundNext) { + break; + } + } + return nullptr; +} + +} // namespace dicttoolkit +} // namespace latinime diff --git a/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict.h b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict.h new file mode 100644 index 000000000..13d26ba91 --- /dev/null +++ b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict.h @@ -0,0 +1,54 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_H +#define LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_H + +#include "dict_toolkit_defines.h" +#include "offdevice_intermediate_dict/offdevice_intermediate_dict_header.h" +#include "offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node_array.h" +#include "suggest/core/dictionary/property/word_property.h" +#include "utils/int_array_view.h" + +namespace latinime { +namespace dicttoolkit { + +/** + * On memory patricia trie to represent a dictionary. + */ +class OffdeviceIntermediateDict final { + public: + OffdeviceIntermediateDict(const OffdeviceIntermediateDictHeader &header) + : mHeader(header), mRootPtNodeArray() {} + + bool addWord(const WordProperty &wordProperty); + // The returned value will be invalid after modifying the dictionary. e.g. calling addWord(). + const WordProperty *getWordProperty(const CodePointArrayView codePoints) const; + const OffdeviceIntermediateDictHeader &getHeader() const { return mHeader; } + + private: + DISALLOW_ASSIGNMENT_OPERATOR(OffdeviceIntermediateDict); + + const OffdeviceIntermediateDictHeader mHeader; + OffdeviceIntermediateDictPtNodeArray mRootPtNodeArray; + + bool addWordInner(const CodePointArrayView codePoints, const WordProperty &wordProperty, + OffdeviceIntermediateDictPtNodeArray &ptNodeArray); +}; + +} // namespace dicttoolkit +} // namespace latinime +#endif // LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_H diff --git a/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_header.h b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_header.h new file mode 100644 index 000000000..440627a79 --- /dev/null +++ b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_header.h @@ -0,0 +1,44 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_HEADER_H +#define LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_HEADER_H + +#include <map> +#include <vector> + +#include "dict_toolkit_defines.h" + +namespace latinime { +namespace dicttoolkit { + +class OffdeviceIntermediateDictHeader final { + public: + using AttributeMap = std::map<std::vector<int>, std::vector<int>>; + + OffdeviceIntermediateDictHeader(const AttributeMap &attributesMap) + : mAttributeMap(attributesMap) {} + + private: + DISALLOW_DEFAULT_CONSTRUCTOR(OffdeviceIntermediateDictHeader); + DISALLOW_ASSIGNMENT_OPERATOR(OffdeviceIntermediateDictHeader); + + const AttributeMap mAttributeMap; +}; + +} // namespace dicttoolkit +} // namespace latinime +#endif // LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_HEADER_H diff --git a/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node.h b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node.h new file mode 100644 index 000000000..721ccd778 --- /dev/null +++ b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node.h @@ -0,0 +1,79 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_PT_NODE_H +#define LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_PT_NODE_H + +#include <memory> + +#include "dict_toolkit_defines.h" +#include "offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node_array.h" +#include "suggest/core/dictionary/property/word_property.h" +#include "utils/int_array_view.h" + +namespace latinime { +namespace dicttoolkit { + +class OffdeviceIntermediateDictPtNode final { + public: + // Non-terminal + OffdeviceIntermediateDictPtNode(const CodePointArrayView ptNodeCodePoints) + : mPtNodeCodePoints(ptNodeCodePoints.toVector()), mChildrenPtNodeArray(), + mWortProperty(nullptr) {} + + // Terminal + OffdeviceIntermediateDictPtNode(const CodePointArrayView ptNodeCodePoints, + const WordProperty &wordProperty) + : mPtNodeCodePoints(ptNodeCodePoints.toVector()), mChildrenPtNodeArray(), + mWortProperty(new WordProperty(wordProperty)) {} + + // Replacing PtNodeCodePoints. + OffdeviceIntermediateDictPtNode(const CodePointArrayView ptNodeCodePoints, + const OffdeviceIntermediateDictPtNode &ptNode) + : mPtNodeCodePoints(ptNodeCodePoints.toVector()), + mChildrenPtNodeArray(ptNode.mChildrenPtNodeArray), + mWortProperty(new WordProperty(*ptNode.mWortProperty)) {} + + // Replacing WordProperty. + OffdeviceIntermediateDictPtNode(const WordProperty &wordProperty, + const OffdeviceIntermediateDictPtNode &ptNode) + : mPtNodeCodePoints(ptNode.mPtNodeCodePoints), + mChildrenPtNodeArray(ptNode.mChildrenPtNodeArray), + mWortProperty(new WordProperty(wordProperty)) {} + + const WordProperty *getWordProperty() const { + return mWortProperty.get(); + } + + const CodePointArrayView getPtNodeCodePoints() const { + return CodePointArrayView(mPtNodeCodePoints); + } + + OffdeviceIntermediateDictPtNodeArray &getChildrenPtNodeArray() { + return mChildrenPtNodeArray; + } + + private: + DISALLOW_COPY_AND_ASSIGN(OffdeviceIntermediateDictPtNode); + + const std::vector<int> mPtNodeCodePoints; + OffdeviceIntermediateDictPtNodeArray mChildrenPtNodeArray; + const std::unique_ptr<WordProperty> mWortProperty; +}; + +} // namespace dicttoolkit +} // namespace latinime +#endif // LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_PT_NODE_H diff --git a/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node_array.h b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node_array.h new file mode 100644 index 000000000..f87456ce0 --- /dev/null +++ b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node_array.h @@ -0,0 +1,48 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_PT_NODE_ARRAY_H +#define LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_PT_NODE_ARRAY_H + +#include <list> +#include <memory> + +#include "dict_toolkit_defines.h" + +namespace latinime { +namespace dicttoolkit { + +class OffdeviceIntermediateDictPtNode; + +class OffdeviceIntermediateDictPtNodeArray final { + public: + const std::list<std::shared_ptr<OffdeviceIntermediateDictPtNode>> &getPtNodeList() const { + return mPtNodes; + } + + std::list<std::shared_ptr<OffdeviceIntermediateDictPtNode>> *getMutablePtNodeList() { + return &mPtNodes; + } + + private: + DISALLOW_ASSIGNMENT_OPERATOR(OffdeviceIntermediateDictPtNodeArray); + + std::list<std::shared_ptr<OffdeviceIntermediateDictPtNode>> mPtNodes; +}; + +} // namespace dicttoolkit +} // namespace latinime +#endif // LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_PT_NODE_ARRAY_H diff --git a/native/dicttoolkit/src/utils/arguments_and_options.h b/native/dicttoolkit/src/utils/arguments_and_options.h new file mode 100644 index 000000000..d8f5985e5 --- /dev/null +++ b/native/dicttoolkit/src/utils/arguments_and_options.h @@ -0,0 +1,54 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DICT_TOOLKIT_ARGUMENTS_AND_OPTIONS_H +#define LATINIME_DICT_TOOLKIT_ARGUMENTS_AND_OPTIONS_H + +#include <string> +#include <unordered_map> +#include <vector> + +#include "dict_toolkit_defines.h" + +namespace latinime { +namespace dicttoolkit { + +class ArgumentsAndOptions { + public: + ArgumentsAndOptions() : mIsValid(false), mOptions(), mArguments() {} + + ArgumentsAndOptions(std::unordered_map<std::string, std::string> &&options, + std::unordered_map<std::string, std::vector<std::string>> &&arguments) + : mIsValid(true), mOptions(std::move(options)), mArguments(std::move(arguments)) {} + + bool isValid() const { + return mIsValid; + } + + bool hasOption(const std::string &optionName) const { + return mOptions.find(optionName) != mOptions.end(); + } + + private: + DISALLOW_ASSIGNMENT_OPERATOR(ArgumentsAndOptions); + + const bool mIsValid; + const std::unordered_map<std::string, std::string> mOptions; + const std::unordered_map<std::string, std::vector<std::string>> mArguments; +}; +} // namespace dicttoolkit +} // namespace latinime +#endif // LATINIME_DICT_TOOLKIT_ARGUMENTS_AND_OPTIONS_H diff --git a/native/dicttoolkit/src/utils/arguments_parser.cpp b/native/dicttoolkit/src/utils/arguments_parser.cpp new file mode 100644 index 000000000..039dae35b --- /dev/null +++ b/native/dicttoolkit/src/utils/arguments_parser.cpp @@ -0,0 +1,84 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "utils/arguments_parser.h" + +namespace latinime { +namespace dicttoolkit { + +const int ArgumentSpec::UNLIMITED_COUNT = -1; + +bool ArgumentsParser::validateSpecs() const { + for (size_t i = 0; i < mArgumentSpecs.size() ; ++i) { + if (mArgumentSpecs[i].getMinCount() != mArgumentSpecs[i].getMaxCount() + && i != mArgumentSpecs.size() - 1) { + AKLOGE("Variable length argument must be at the end."); + return false; + } + } + return true; +} + +void ArgumentsParser::printUsage(const std::string &commandName, + const std::string &description) const { + printf("Usage: %s", commandName.c_str()); + for (const auto &option : mOptionSpecs) { + const std::string &optionName = option.first; + const OptionSpec &spec = option.second; + printf(" [-%s", optionName.c_str()); + if (spec.takeValue()) { + printf(" <%s>", spec.getValueName().c_str()); + } + printf("]"); + } + for (const auto &argSpec : mArgumentSpecs) { + if (argSpec.getMinCount() == 0 && argSpec.getMaxCount() == 1) { + printf(" [<%s>]", argSpec.getName().c_str()); + } else if (argSpec.getMinCount() == 1 && argSpec.getMaxCount() == 1) { + printf(" <%s>", argSpec.getName().c_str()); + } else if (argSpec.getMinCount() == 0) { + printf(" [<%s>...]", argSpec.getName().c_str()); + } else if (argSpec.getMinCount() == 1) { + printf(" <%s>...", argSpec.getName().c_str()); + } + } + printf("\n%s\n\n", description.c_str()); + for (const auto &option : mOptionSpecs) { + const std::string &optionName = option.first; + const OptionSpec &spec = option.second; + printf(" -%s", optionName.c_str()); + if (spec.takeValue()) { + printf(" <%s>", spec.getValueName().c_str()); + } + printf("\t\t\t%s", spec.getDescription().c_str()); + if (spec.takeValue() && !spec.getDefaultValue().empty()) { + printf("\tdefault: %s", spec.getDefaultValue().c_str()); + } + printf("\n"); + } + for (const auto &argSpec : mArgumentSpecs) { + printf(" <%s>\t\t\t%s\n", argSpec.getName().c_str(), argSpec.getDescription().c_str()); + } + printf("\n\n"); +} + +const ArgumentsAndOptions ArgumentsParser::parseArguments(const int argc, char **argv) const { + // TODO: Implement + return ArgumentsAndOptions(); +} + +} // namespace dicttoolkit +} // namespace latinime diff --git a/native/dicttoolkit/src/utils/arguments_parser.h b/native/dicttoolkit/src/utils/arguments_parser.h new file mode 100644 index 000000000..be2dd8749 --- /dev/null +++ b/native/dicttoolkit/src/utils/arguments_parser.h @@ -0,0 +1,118 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DICT_TOOLKIT_ARGUMENTS_PARSER_H +#define LATINIME_DICT_TOOLKIT_ARGUMENTS_PARSER_H + +#include <string> +#include <unordered_map> +#include <vector> + +#include "dict_toolkit_defines.h" +#include "utils/arguments_and_options.h" + +namespace latinime { +namespace dicttoolkit { + +class OptionSpec { + public: + // Default constructor and assignment operator is enabled to be used with std::unordered_map. + OptionSpec() = default; + OptionSpec &operator=(const OptionSpec &) = default; + + static OptionSpec keyValueOption(const std::string &valueName, const std::string &defaultValue, + const std::string &description) { + return OptionSpec(true /* takeValue */, valueName, defaultValue, description); + } + + static OptionSpec switchOption(const std::string &description) { + return OptionSpec(false /* takeValue */, "" /* valueName */, "" /* defaultValue */, + description); + } + + bool takeValue() const { return mTakeValue; } + const std::string &getValueName() const { return mValueName; } + const std::string &getDefaultValue() const { return mDefaultValue; } + const std::string &getDescription() const { return mDescription; } + + private: + OptionSpec(const bool takeValue, const std::string &valueName, const std::string &defaultValue, + const std::string &description) + : mTakeValue(takeValue), mValueName(valueName), mDefaultValue(defaultValue), + mDescription(description) {} + + // Whether the option have to be used with a value or just a switch. + // e.g. 'f' in "command -f /path/to/file" is mTakeValue == true. + // 'f' in "command -f -t" is mTakeValue == false. + bool mTakeValue; + // Name of the value used to show usage. + std::string mValueName; + std::string mDefaultValue; + std::string mDescription; +}; + +class ArgumentSpec { + public: + static const int UNLIMITED_COUNT; + + static ArgumentSpec singleArgument(const std::string &name, const std::string &description) { + return ArgumentSpec(name, 1 /* minCount */, 1 /* maxCount */, description); + } + + static ArgumentSpec variableLengthArguments(const std::string &name, const int minCount, + const int maxCount, const std::string &description) { + return ArgumentSpec(name, minCount, maxCount, description); + } + + const std::string &getName() const { return mName; } + int getMinCount() const { return mMinCount; } + int getMaxCount() const { return mMaxCount; } + const std::string &getDescription() const { return mDescription; } + + private: + DISALLOW_DEFAULT_CONSTRUCTOR(ArgumentSpec); + + ArgumentSpec(const std::string &name, const int minCount, const int maxCount, + const std::string &description) + : mName(name), mMinCount(minCount), mMaxCount(maxCount), mDescription(description) {} + + const std::string mName; + const int mMinCount; + const int mMaxCount; + const std::string mDescription; +}; + +class ArgumentsParser { + public: + ArgumentsParser(std::unordered_map<std::string, OptionSpec> &&optionSpecs, + std::vector<ArgumentSpec> &&argumentSpecs) + : mOptionSpecs(std::move(optionSpecs)), mArgumentSpecs(std::move(argumentSpecs)) {} + + const ArgumentsAndOptions parseArguments(const int argc, char **argv) const; + bool validateSpecs() const; + void printUsage(const std::string &commandName, const std::string &description) const; + + private: + DISALLOW_DEFAULT_CONSTRUCTOR(ArgumentsParser); + DISALLOW_ASSIGNMENT_OPERATOR(ArgumentsParser); + + const std::unordered_map<std::string, OptionSpec> mOptionSpecs; + const std::vector<ArgumentSpec> mArgumentSpecs; +}; + +} // namespace dicttoolkit +} // namespace latinime +#endif // LATINIME_DICT_TOOLKIT_ARGUMENTS_PARSER_H diff --git a/native/dicttoolkit/src/utils/command_utils.cpp b/native/dicttoolkit/src/utils/command_utils.cpp new file mode 100644 index 000000000..34196425e --- /dev/null +++ b/native/dicttoolkit/src/utils/command_utils.cpp @@ -0,0 +1,74 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "utils/command_utils.h" + +#include <cstdio> + +#include "command_executors/diff_executor.h" +#include "command_executors/header_executor.h" +#include "command_executors/help_executor.h" +#include "command_executors/info_executor.h" +#include "command_executors/makedict_executor.h" + +namespace latinime { +namespace dicttoolkit { + +/* static */ CommandType CommandUtils::getCommandType(const std::string &commandName) { + if (commandName == InfoExecutor::COMMAND_NAME) { + return CommandType::Info; + } else if (commandName == DiffExecutor::COMMAND_NAME) { + return CommandType::Diff; + } else if (commandName == MakedictExecutor::COMMAND_NAME) { + return CommandType::Makedict; + } else if (commandName == HeaderExecutor::COMMAND_NAME) { + return CommandType::Header; + } else if (commandName == HelpExecutor::COMMAND_NAME) { + return CommandType::Help; + } else { + return CommandType::Unknown; + } +} + +/* static */ void CommandUtils::printCommandUnknownMessage(const std::string &programName, + const std::string &commandName) { + fprintf(stderr, "Command '%s' is unknown. Try '%s %s' for more information.\n", + commandName.c_str(), programName.c_str(), HelpExecutor::COMMAND_NAME); +} + +/* static */ std::function<int(int, char **)> CommandUtils::getCommandExecutor( + const CommandType commandType) { + switch (commandType) { + case CommandType::Info: + return InfoExecutor::run; + case CommandType::Diff: + return DiffExecutor::run; + case CommandType::Makedict: + return MakedictExecutor::run; + case CommandType::Header: + return HeaderExecutor::run; + case CommandType::Help: + return HelpExecutor::run; + default: + return [] (int, char **) -> int { + printf("Command executor not found."); + return 1; + }; + } +} + +} // namespace dicttoolkit +} // namespace latinime diff --git a/native/dicttoolkit/src/utils/command_utils.h b/native/dicttoolkit/src/utils/command_utils.h new file mode 100644 index 000000000..4a181f194 --- /dev/null +++ b/native/dicttoolkit/src/utils/command_utils.h @@ -0,0 +1,50 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DICT_TOOLKIT_COMMAND_UTILS_H +#define LATINIME_DICT_TOOLKIT_COMMAND_UTILS_H + +#include <functional> +#include <memory> +#include <string> + +#include "dict_toolkit_defines.h" + +namespace latinime { +namespace dicttoolkit { + +enum class CommandType : int { + Info, + Diff, + Makedict, + Header, + Help, + Unknown +}; + +class CommandUtils { +public: + static CommandType getCommandType(const std::string &commandName); + static void printCommandUnknownMessage(const std::string &programName, + const std::string &commandName); + static std::function<int(int, char **)> getCommandExecutor(const CommandType commandType); + +private: + DISALLOW_IMPLICIT_CONSTRUCTORS(CommandUtils); +}; +} // namespace dicttoolkit +} // namespace latinime +#endif // LATINIME_DICT_TOOLKIT_COMMAND_UTILS_H diff --git a/native/dicttoolkit/src/utils/utf8_utils.cpp b/native/dicttoolkit/src/utils/utf8_utils.cpp new file mode 100644 index 000000000..0f349f512 --- /dev/null +++ b/native/dicttoolkit/src/utils/utf8_utils.cpp @@ -0,0 +1,119 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "utils/utf8_utils.h" + +#include "utils/char_utils.h" + +namespace latinime { +namespace dicttoolkit { + +const size_t Utf8Utils::MAX_SEQUENCE_SIZE_FOR_A_CODE_POINT = 4; +const uint8_t Utf8Utils::FIRST_BYTE_MARKER_MASKS[] = {0, 0x80, 0xE0, 0xF0, 0xF8}; +const uint8_t Utf8Utils::FIRST_BYTE_MARKERS[] = {0, 0x00, 0xC0, 0xE0, 0xF0}; +const uint8_t Utf8Utils::FIRST_BYTE_CODE_POINT_BITS_MASKS[] = {0, 0x7F, 0x1F, 0x0F, 0x03}; +const int Utf8Utils::MAX_ENCODED_CODE_POINT_VALUES[] = {-1, 0x7F, 0x7FF, 0xFFFF, 0x10FFFF}; + +const uint8_t Utf8Utils::TRAILING_BYTE_CODE_POINT_BITS_MASK = 0x3F; +const uint8_t Utf8Utils::TRAILING_BYTE_MARKER = 0x80; +const size_t Utf8Utils::CODE_POINT_BIT_COUNT_IN_TRAILING_BYTE = 6; + +/* static */ std::vector<int> Utf8Utils::getCodePoints(const std::string &utf8Str) { + std::vector<int> codePoints; + int remainingByteCountForCurrentCodePoint = 0; + int currentCodePointSequenceSize = 0; + int codePoint = 0; + for (const char c : utf8Str) { + if (remainingByteCountForCurrentCodePoint == 0) { + currentCodePointSequenceSize = getSequenceSizeByCheckingFirstByte(c); + if (currentCodePointSequenceSize <= 0) { + AKLOGE("%x is an invalid utf8 first byte value.", c); + return std::vector<int>(); + } + remainingByteCountForCurrentCodePoint = currentCodePointSequenceSize; + codePoint = maskFirstByte(c, remainingByteCountForCurrentCodePoint); + } else { + codePoint <<= CODE_POINT_BIT_COUNT_IN_TRAILING_BYTE; + codePoint += maskTrailingByte(c); + } + remainingByteCountForCurrentCodePoint--; + if (remainingByteCountForCurrentCodePoint == 0) { + if (codePoint <= MAX_ENCODED_CODE_POINT_VALUES[currentCodePointSequenceSize - 1]) { + AKLOGE("%d bytes encode for codePoint(%x) is a redundant UTF-8 sequence.", + currentCodePointSequenceSize, codePoint); + return std::vector<int>(); + } + codePoints.push_back(codePoint); + } + } + return codePoints; +} + +/* static */ int Utf8Utils::getSequenceSizeByCheckingFirstByte(const uint8_t firstByte) { + for (size_t i = 1; i <= MAX_SEQUENCE_SIZE_FOR_A_CODE_POINT; ++i) { + if ((firstByte & FIRST_BYTE_MARKER_MASKS[i]) == FIRST_BYTE_MARKERS[i]) { + return i; + } + } + // Not a valid utf8 char first byte. + return -1; +} + +/* static */ AK_FORCE_INLINE int Utf8Utils::maskFirstByte(const uint8_t firstByte, + const int sequenceSize) { + return firstByte & FIRST_BYTE_CODE_POINT_BITS_MASKS[sequenceSize]; +} + +/* static */ AK_FORCE_INLINE int Utf8Utils::maskTrailingByte(const uint8_t secondOrLaterByte) { + return secondOrLaterByte & TRAILING_BYTE_CODE_POINT_BITS_MASK; +} + +/* static */ std::string Utf8Utils::getUtf8String(const CodePointArrayView codePoints) { + std::string utf8String; + for (const int codePoint : codePoints) { + const int sequenceSize = getSequenceSizeToEncodeCodePoint(codePoint); + if (sequenceSize <= 0) { + AKLOGE("Cannot encode code point (%d).", codePoint); + return std::string(); + } + const int trailingByteCount = sequenceSize - 1; + // Output first byte. + const int value = codePoint >> (trailingByteCount * CODE_POINT_BIT_COUNT_IN_TRAILING_BYTE); + utf8String.push_back(static_cast<char>(value | FIRST_BYTE_MARKERS[sequenceSize])); + // Output second and later bytes. + for (int i = 1; i < sequenceSize; ++i) { + const int shiftAmount = (trailingByteCount - i) * CODE_POINT_BIT_COUNT_IN_TRAILING_BYTE; + const int value = (codePoint >> shiftAmount) & TRAILING_BYTE_CODE_POINT_BITS_MASK; + utf8String.push_back(static_cast<char>(value | TRAILING_BYTE_MARKER)); + } + } + return utf8String; +} + +/* static */ int Utf8Utils::getSequenceSizeToEncodeCodePoint(const int codePoint) { + if (codePoint < 0) { + return -1; + } + for (size_t i = 1; i <= MAX_SEQUENCE_SIZE_FOR_A_CODE_POINT; ++i) { + if (codePoint <= MAX_ENCODED_CODE_POINT_VALUES[i]) { + return i; + } + } + return -1; +} + +} // namespace dicttoolkit +} // namespace latinime diff --git a/native/dicttoolkit/src/utils/utf8_utils.h b/native/dicttoolkit/src/utils/utf8_utils.h new file mode 100644 index 000000000..35818e56c --- /dev/null +++ b/native/dicttoolkit/src/utils/utf8_utils.h @@ -0,0 +1,56 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DICT_TOOLKIT_UTF8_UTILS_H +#define LATINIME_DICT_TOOLKIT_UTF8_UTILS_H + +#include <cstdint> +#include <string> +#include <vector> + +#include "dict_toolkit_defines.h" +#include "utils/int_array_view.h" + +namespace latinime { +namespace dicttoolkit { + +class Utf8Utils { +public: + static std::vector<int> getCodePoints(const std::string &utf8Str); + static std::string getUtf8String(const CodePointArrayView codePoints); + +private: + DISALLOW_IMPLICIT_CONSTRUCTORS(Utf8Utils); + + // Values indexed by sequence size. + static const size_t MAX_SEQUENCE_SIZE_FOR_A_CODE_POINT; + static const uint8_t FIRST_BYTE_MARKER_MASKS[]; + static const uint8_t FIRST_BYTE_MARKERS[]; + static const uint8_t FIRST_BYTE_CODE_POINT_BITS_MASKS[]; + static const int MAX_ENCODED_CODE_POINT_VALUES[]; + + static const uint8_t TRAILING_BYTE_CODE_POINT_BITS_MASK; + static const uint8_t TRAILING_BYTE_MARKER; + static const size_t CODE_POINT_BIT_COUNT_IN_TRAILING_BYTE; + + static int getSequenceSizeByCheckingFirstByte(const uint8_t firstByte); + static int maskFirstByte(const uint8_t firstByte, const int encodeSize); + static int maskTrailingByte(const uint8_t secondOrLaterByte); + static int getSequenceSizeToEncodeCodePoint(const int codePoint); +}; +} // namespace dicttoolkit +} // namespace latinime +#endif // LATINIME_DICT_TOOLKIT_UTF8_UTILS_H diff --git a/native/dicttoolkit/tests/command_executors/diff_executor_test.cpp b/native/dicttoolkit/tests/command_executors/diff_executor_test.cpp new file mode 100644 index 000000000..444141427 --- /dev/null +++ b/native/dicttoolkit/tests/command_executors/diff_executor_test.cpp @@ -0,0 +1,31 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "command_executors/diff_executor.h" + +#include <gtest/gtest.h> + +namespace latinime { +namespace dicttoolkit { +namespace { + +TEST(DiffExecutorTests, TestArguemntSpecs) { + EXPECT_TRUE(DiffExecutor::getArgumentsParser().validateSpecs()); +} + +} // namespace +} // namespace dicttoolkit +} // namespace latinime diff --git a/native/dicttoolkit/tests/command_executors/header_executor_test.cpp b/native/dicttoolkit/tests/command_executors/header_executor_test.cpp new file mode 100644 index 000000000..a94150b01 --- /dev/null +++ b/native/dicttoolkit/tests/command_executors/header_executor_test.cpp @@ -0,0 +1,31 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "command_executors/header_executor.h" + +#include <gtest/gtest.h> + +namespace latinime { +namespace dicttoolkit { +namespace { + +TEST(HeaderExecutorTests, TestArguemntSpecs) { + EXPECT_TRUE(HeaderExecutor::getArgumentsParser().validateSpecs()); +} + +} // namespace +} // namespace dicttoolkit +} // namespace latinime diff --git a/native/dicttoolkit/tests/command_executors/info_executor_test.cpp b/native/dicttoolkit/tests/command_executors/info_executor_test.cpp new file mode 100644 index 000000000..debe8c601 --- /dev/null +++ b/native/dicttoolkit/tests/command_executors/info_executor_test.cpp @@ -0,0 +1,31 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "command_executors/info_executor.h" + +#include <gtest/gtest.h> + +namespace latinime { +namespace dicttoolkit { +namespace { + +TEST(InfoExecutorTests, TestArguemntSpecs) { + EXPECT_TRUE(InfoExecutor::getArgumentsParser().validateSpecs()); +} + +} // namespace +} // namespace dicttoolkit +} // namespace latinime diff --git a/native/dicttoolkit/tests/command_executors/makedict_executor_test.cpp b/native/dicttoolkit/tests/command_executors/makedict_executor_test.cpp new file mode 100644 index 000000000..44eb3dc1b --- /dev/null +++ b/native/dicttoolkit/tests/command_executors/makedict_executor_test.cpp @@ -0,0 +1,31 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "command_executors/makedict_executor.h" + +#include <gtest/gtest.h> + +namespace latinime { +namespace dicttoolkit { +namespace { + +TEST(MakedictExecutorTests, TestArguemntSpecs) { + EXPECT_TRUE(MakedictExecutor::getArgumentsParser().validateSpecs()); +} + +} // namespace +} // namespace dicttoolkit +} // namespace latinime diff --git a/native/dicttoolkit/tests/dict_toolkit_defines_test.cpp b/native/dicttoolkit/tests/dict_toolkit_defines_test.cpp new file mode 100644 index 000000000..3445bd0c5 --- /dev/null +++ b/native/dicttoolkit/tests/dict_toolkit_defines_test.cpp @@ -0,0 +1,32 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dict_toolkit_defines.h" + +#include <gtest/gtest.h> + +namespace latinime { +namespace dicttoolkit { +namespace { + +// Initial trivial test case. +TEST(DictToolkitDefinesTest, TestKeycodeSpace) { + EXPECT_EQ(' ', KEYCODE_SPACE); +} + +} // namespace +} // namespace dicttoolkit +} // namespace latinime diff --git a/native/dicttoolkit/tests/offdevice_intermediate_dict/offdevice_intermediate_dict_test.cpp b/native/dicttoolkit/tests/offdevice_intermediate_dict/offdevice_intermediate_dict_test.cpp new file mode 100644 index 000000000..f2e24ab5f --- /dev/null +++ b/native/dicttoolkit/tests/offdevice_intermediate_dict/offdevice_intermediate_dict_test.cpp @@ -0,0 +1,85 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "offdevice_intermediate_dict/offdevice_intermediate_dict.h" + +#include <gtest/gtest.h> + +#include <vector> + +#include "suggest/core/dictionary/property/word_property.h" +#include "utils/int_array_view.h" + +namespace latinime { +namespace dicttoolkit { +namespace { + +const std::vector<int> getCodePointVector(const char *str) { + std::vector<int> codePoints; + while (*str) { + codePoints.push_back(*str); + ++str; + } + return codePoints; +} + +const WordProperty getDummpWordProperty(const std::vector<int> &&codePoints) { + return WordProperty(std::move(codePoints), UnigramProperty(), std::vector<NgramProperty>()); +} + +TEST(OffdeviceIntermediateDictTest, TestAddWordProperties) { + OffdeviceIntermediateDict dict = OffdeviceIntermediateDict( + OffdeviceIntermediateDictHeader(OffdeviceIntermediateDictHeader::AttributeMap())); + EXPECT_EQ(nullptr, dict.getWordProperty(CodePointArrayView())); + + const WordProperty wordProperty0 = getDummpWordProperty(getCodePointVector("abcd")); + EXPECT_TRUE(dict.addWord(wordProperty0)); + EXPECT_NE(nullptr, dict.getWordProperty(wordProperty0.getCodePoints())); + + const WordProperty wordProperty1 = getDummpWordProperty(getCodePointVector("efgh")); + EXPECT_TRUE(dict.addWord(wordProperty1)); + EXPECT_NE(nullptr, dict.getWordProperty(wordProperty1.getCodePoints())); + + const WordProperty wordProperty2 = getDummpWordProperty(getCodePointVector("ab")); + EXPECT_TRUE(dict.addWord(wordProperty2)); + EXPECT_NE(nullptr, dict.getWordProperty(wordProperty2.getCodePoints())); + + const WordProperty wordProperty3 = getDummpWordProperty(getCodePointVector("abcdefg")); + EXPECT_TRUE(dict.addWord(wordProperty3)); + EXPECT_NE(nullptr, dict.getWordProperty(wordProperty3.getCodePoints())); + + const WordProperty wordProperty4 = getDummpWordProperty(getCodePointVector("efef")); + EXPECT_TRUE(dict.addWord(wordProperty4)); + EXPECT_NE(nullptr, dict.getWordProperty(wordProperty4.getCodePoints())); + + const WordProperty wordProperty5 = getDummpWordProperty(getCodePointVector("ef")); + EXPECT_TRUE(dict.addWord(wordProperty5)); + EXPECT_NE(nullptr, dict.getWordProperty(wordProperty5.getCodePoints())); + + const WordProperty wordProperty6 = getDummpWordProperty(getCodePointVector("abcd")); + EXPECT_FALSE(dict.addWord(wordProperty6)) << "Adding the same word multiple times should fail."; + + EXPECT_NE(nullptr, dict.getWordProperty(wordProperty0.getCodePoints())); + EXPECT_NE(nullptr, dict.getWordProperty(wordProperty1.getCodePoints())); + EXPECT_NE(nullptr, dict.getWordProperty(wordProperty2.getCodePoints())); + EXPECT_NE(nullptr, dict.getWordProperty(wordProperty3.getCodePoints())); + EXPECT_NE(nullptr, dict.getWordProperty(wordProperty4.getCodePoints())); + EXPECT_NE(nullptr, dict.getWordProperty(wordProperty5.getCodePoints())); +} + +} // namespace +} // namespace dicttoolkit +} // namespace latinime diff --git a/native/dicttoolkit/tests/utils/command_utils_test.cpp b/native/dicttoolkit/tests/utils/command_utils_test.cpp new file mode 100644 index 000000000..9d79c9dd9 --- /dev/null +++ b/native/dicttoolkit/tests/utils/command_utils_test.cpp @@ -0,0 +1,37 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "utils/command_utils.h" + +#include <gtest/gtest.h> + +namespace latinime { +namespace dicttoolkit { +namespace { + +TEST(CommandUtilsTests, TestGetCommandType) { + EXPECT_EQ(CommandUtils::getCommandType(""), CommandType::Unknown); + EXPECT_EQ(CommandUtils::getCommandType("abc"), CommandType::Unknown); + EXPECT_EQ(CommandUtils::getCommandType("info"), CommandType::Info); + EXPECT_EQ(CommandUtils::getCommandType("diff"), CommandType::Diff); + EXPECT_EQ(CommandUtils::getCommandType("makedict"), CommandType::Makedict); + EXPECT_EQ(CommandUtils::getCommandType("header"), CommandType::Header); + EXPECT_EQ(CommandUtils::getCommandType("help"), CommandType::Help); +} + +} // namespace +} // namespace dicttoolkit +} // namespace latinime diff --git a/native/dicttoolkit/tests/utils/utf8_utils_test.cpp b/native/dicttoolkit/tests/utils/utf8_utils_test.cpp new file mode 100644 index 000000000..9c59a8b05 --- /dev/null +++ b/native/dicttoolkit/tests/utils/utf8_utils_test.cpp @@ -0,0 +1,85 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "utils/utf8_utils.h" + +#include <gtest/gtest.h> + +#include <vector> + +#include "utils/int_array_view.h" + +namespace latinime { +namespace dicttoolkit { +namespace { + +TEST(Utf8UtilsTests, TestGetCodePoints) { + { + const std::vector<int> codePoints = Utf8Utils::getCodePoints(""); + EXPECT_EQ(0u, codePoints.size()); + } + { + const std::vector<int> codePoints = Utf8Utils::getCodePoints("test"); + EXPECT_EQ(4u, codePoints.size()); + EXPECT_EQ('t', codePoints[0]); + EXPECT_EQ('e', codePoints[1]); + EXPECT_EQ('s', codePoints[2]); + EXPECT_EQ('t', codePoints[3]); + } + { + const std::vector<int> codePoints = Utf8Utils::getCodePoints(u8"\u3042a\u03C2\u0410"); + EXPECT_EQ(4u, codePoints.size()); + EXPECT_EQ(0x3042, codePoints[0]); // HIRAGANA LETTER A + EXPECT_EQ('a', codePoints[1]); + EXPECT_EQ(0x03C2, codePoints[2]); // CYRILLIC CAPITAL LETTER A + EXPECT_EQ(0x0410, codePoints[3]); // GREEK SMALL LETTER FINAL SIGMA + } + { + const std::vector<int> codePoints = Utf8Utils::getCodePoints(u8"\U0001F36A?\U0001F752"); + EXPECT_EQ(3u, codePoints.size()); + EXPECT_EQ(0x1F36A, codePoints[0]); // COOKIE + EXPECT_EQ('?', codePoints[1]); + EXPECT_EQ(0x1F752, codePoints[2]); // ALCHEMICAL SYMBOL FOR STARRED TRIDENT + } + + // Redundant UTF-8 sequences must be rejected. + EXPECT_TRUE(Utf8Utils::getCodePoints("\xC0\xAF").empty()); + EXPECT_TRUE(Utf8Utils::getCodePoints("\xE0\x80\xAF").empty()); + EXPECT_TRUE(Utf8Utils::getCodePoints("\xF0\x80\x80\xAF").empty()); +} + +TEST(Utf8UtilsTests, TestGetUtf8String) { + { + const std::vector<int> codePoints = {'t', 'e', 's', 't'}; + EXPECT_EQ("test", Utf8Utils::getUtf8String(CodePointArrayView(codePoints))); + } + { + const std::vector<int> codePoints = { + 0x00E0 /* LATIN SMALL LETTER A WITH GRAVE */, + 0x03C2 /* GREEK SMALL LETTER FINAL SIGMA */, + 0x0430 /* CYRILLIC SMALL LETTER A */, + 0x3042 /* HIRAGANA LETTER A */, + 0x1F36A /* COOKIE */, + 0x1F752 /* ALCHEMICAL SYMBOL FOR STARRED TRIDENT */ + }; + EXPECT_EQ(u8"\u00E0\u03C2\u0430\u3042\U0001F36A\U0001F752", + Utf8Utils::getUtf8String(CodePointArrayView(codePoints))); + } +} + +} // namespace +} // namespace dicttoolkit +} // namespace latinime diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp index 118f600bb..9c065e0d1 100644 --- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp +++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp @@ -35,6 +35,7 @@ #include "utils/int_array_view.h" #include "utils/jni_data_utils.h" #include "utils/log_utils.h" +#include "utils/profiler.h" #include "utils/time_keeper.h" namespace latinime { @@ -43,8 +44,8 @@ class ProximityInfo; static jlong latinime_BinaryDictionary_open(JNIEnv *env, jclass clazz, jstring sourceDir, jlong dictOffset, jlong dictSize, jboolean isUpdatable) { - PROF_OPEN; - PROF_START(66); + PROF_INIT; + PROF_TIMER_START(66); const jsize sourceDirUtf8Length = env->GetStringUTFLength(sourceDir); if (sourceDirUtf8Length <= 0) { AKLOGE("DICT: Can't get sourceDir string"); @@ -63,8 +64,7 @@ static jlong latinime_BinaryDictionary_open(JNIEnv *env, jclass clazz, jstring s Dictionary *const dictionary = new Dictionary(env, std::move(dictionaryStructureWithBufferPolicy)); - PROF_END(66); - PROF_CLOSE; + PROF_TIMER_END(66); return reinterpret_cast<jlong>(dictionary); } @@ -586,7 +586,7 @@ static bool latinime_BinaryDictionary_migrateNative(JNIEnv *env, jclass clazz, j } if (!dictionaryStructureWithBufferPolicy->addUnigramEntry( CodePointArrayView(wordCodePoints, wordCodePointCount), - wordProperty.getUnigramProperty())) { + &wordProperty.getUnigramProperty())) { LogUtils::logToJava(env, "Cannot add unigram to the new dict."); return false; } @@ -605,7 +605,7 @@ static bool latinime_BinaryDictionary_migrateNative(JNIEnv *env, jclass clazz, j return false; } } - for (const NgramProperty &ngramProperty : *wordProperty.getNgramProperties()) { + for (const NgramProperty &ngramProperty : wordProperty.getNgramProperties()) { if (!dictionaryStructureWithBufferPolicy->addNgramEntry(&ngramProperty)) { LogUtils::logToJava(env, "Cannot add ngram to the new dict."); return false; diff --git a/native/jni/src/defines.h b/native/jni/src/defines.h index 885118524..0e67b4d5a 100644 --- a/native/jni/src/defines.h +++ b/native/jni/src/defines.h @@ -23,10 +23,10 @@ #define AK_FORCE_INLINE inline #endif // __GNUC__ -#if defined(FLAG_DO_PROFILE) || defined(FLAG_DBG) +#if defined(FLAG_DBG) #undef AK_FORCE_INLINE #define AK_FORCE_INLINE inline -#endif // defined(FLAG_DO_PROFILE) || defined(FLAG_DBG) +#endif // defined(FLAG_DBG) // Must be equal to Constants.Dictionary.MAX_WORD_LENGTH in Java #define MAX_WORD_LENGTH 48 @@ -172,69 +172,6 @@ static inline void showStackTrace() { #define INTS_TO_CHARS(input, length, output) #endif // defined(FLAG_DO_PROFILE) || defined(FLAG_DBG) -#ifdef FLAG_DO_PROFILE -// Profiler -#include <time.h> - -#define PROF_BUF_SIZE 100 -static float profile_buf[PROF_BUF_SIZE]; -static float profile_old[PROF_BUF_SIZE]; -static unsigned int profile_counter[PROF_BUF_SIZE]; - -#define PROF_RESET prof_reset() -#define PROF_COUNT(prof_buf_id) ++profile_counter[prof_buf_id] -#define PROF_OPEN do { PROF_RESET; PROF_START(PROF_BUF_SIZE - 1); } while (0) -#define PROF_START(prof_buf_id) do { \ - PROF_COUNT(prof_buf_id); profile_old[prof_buf_id] = (clock()); } while (0) -#define PROF_CLOSE do { PROF_END(PROF_BUF_SIZE - 1); PROF_OUTALL; } while (0) -#define PROF_END(prof_buf_id) profile_buf[prof_buf_id] += ((clock()) - profile_old[prof_buf_id]) -#define PROF_CLOCKOUT(prof_buf_id) \ - AKLOGI("%s : clock is %f", __FUNCTION__, (clock() - profile_old[prof_buf_id])) -#define PROF_OUTALL do { AKLOGI("--- %s ---", __FUNCTION__); prof_out(); } while (0) - -static inline void prof_reset(void) { - for (int i = 0; i < PROF_BUF_SIZE; ++i) { - profile_buf[i] = 0; - profile_old[i] = 0; - profile_counter[i] = 0; - } -} - -static inline void prof_out(void) { - if (profile_counter[PROF_BUF_SIZE - 1] != 1) { - AKLOGI("Error: You must call PROF_OPEN before PROF_CLOSE."); - } - AKLOGI("Total time is %6.3f ms.", - profile_buf[PROF_BUF_SIZE - 1] * 1000.0f / static_cast<float>(CLOCKS_PER_SEC)); - float all = 0.0f; - for (int i = 0; i < PROF_BUF_SIZE - 1; ++i) { - all += profile_buf[i]; - } - if (all < 1.0f) all = 1.0f; - for (int i = 0; i < PROF_BUF_SIZE - 1; ++i) { - if (profile_buf[i] > 0.0f) { - AKLOGI("(%d): Used %4.2f%%, %8.4f ms. Called %d times.", - i, (profile_buf[i] * 100.0f / all), - profile_buf[i] * 1000.0f / static_cast<float>(CLOCKS_PER_SEC), - profile_counter[i]); - } - } -} - -#else // FLAG_DO_PROFILE -#define PROF_BUF_SIZE 0 -#define PROF_RESET -#define PROF_COUNT(prof_buf_id) -#define PROF_OPEN -#define PROF_START(prof_buf_id) -#define PROF_CLOSE -#define PROF_END(prof_buf_id) -#define PROF_CLOCK_OUT(prof_buf_id) -#define PROF_CLOCKOUT(prof_buf_id) -#define PROF_OUTALL - -#endif // FLAG_DO_PROFILE - #ifdef FLAG_DBG #define DEBUG_DICT true #define DEBUG_DICT_FULL false diff --git a/native/jni/src/suggest/core/dictionary/error_type_utils.cpp b/native/jni/src/suggest/core/dictionary/error_type_utils.cpp index 1e2494e92..8f07ce275 100644 --- a/native/jni/src/suggest/core/dictionary/error_type_utils.cpp +++ b/native/jni/src/suggest/core/dictionary/error_type_utils.cpp @@ -31,6 +31,7 @@ const ErrorTypeUtils::ErrorType ErrorTypeUtils::NEW_WORD = 0x100; const ErrorTypeUtils::ErrorType ErrorTypeUtils::ERRORS_TREATED_AS_AN_EXACT_MATCH = NOT_AN_ERROR | MATCH_WITH_WRONG_CASE | MATCH_WITH_MISSING_ACCENT | MATCH_WITH_DIGRAPH; +const ErrorTypeUtils::ErrorType ErrorTypeUtils::ERRORS_TREATED_AS_A_PERFECT_MATCH = NOT_AN_ERROR; const ErrorTypeUtils::ErrorType ErrorTypeUtils::ERRORS_TREATED_AS_AN_EXACT_MATCH_WITH_INTENTIONAL_OMISSION = diff --git a/native/jni/src/suggest/core/dictionary/error_type_utils.h b/native/jni/src/suggest/core/dictionary/error_type_utils.h index fd1d5fcff..e92c509fa 100644 --- a/native/jni/src/suggest/core/dictionary/error_type_utils.h +++ b/native/jni/src/suggest/core/dictionary/error_type_utils.h @@ -52,6 +52,10 @@ class ErrorTypeUtils { return (containedErrorTypes & ~ERRORS_TREATED_AS_AN_EXACT_MATCH) == 0; } + static bool isPerfectMatch(const ErrorType containedErrorTypes) { + return (containedErrorTypes & ~ERRORS_TREATED_AS_A_PERFECT_MATCH) == 0; + } + static bool isExactMatchWithIntentionalOmission(const ErrorType containedErrorTypes) { return (containedErrorTypes & ~ERRORS_TREATED_AS_AN_EXACT_MATCH_WITH_INTENTIONAL_OMISSION) == 0; @@ -73,6 +77,7 @@ class ErrorTypeUtils { DISALLOW_IMPLICIT_CONSTRUCTORS(ErrorTypeUtils); static const ErrorType ERRORS_TREATED_AS_AN_EXACT_MATCH; + static const ErrorType ERRORS_TREATED_AS_A_PERFECT_MATCH; static const ErrorType ERRORS_TREATED_AS_AN_EXACT_MATCH_WITH_INTENTIONAL_OMISSION; }; } // namespace latinime diff --git a/native/jni/src/suggest/core/dictionary/property/historical_info.h b/native/jni/src/suggest/core/dictionary/property/historical_info.h index f9bd6fd8c..e5ce1ea25 100644 --- a/native/jni/src/suggest/core/dictionary/property/historical_info.h +++ b/native/jni/src/suggest/core/dictionary/property/historical_info.h @@ -38,6 +38,7 @@ class HistoricalInfo { return mTimestamp; } + // TODO: Remove int getLevel() const { return mLevel; } diff --git a/native/jni/src/suggest/core/dictionary/property/word_property.h b/native/jni/src/suggest/core/dictionary/property/word_property.h index b5314faaa..d4db3f09f 100644 --- a/native/jni/src/suggest/core/dictionary/property/word_property.h +++ b/native/jni/src/suggest/core/dictionary/property/word_property.h @@ -23,6 +23,7 @@ #include "jni.h" #include "suggest/core/dictionary/property/ngram_property.h" #include "suggest/core/dictionary/property/unigram_property.h" +#include "utils/int_array_view.h" namespace latinime { @@ -33,10 +34,10 @@ class WordProperty { WordProperty() : mCodePoints(), mUnigramProperty(), mNgrams() {} - WordProperty(const std::vector<int> &&codePoints, const UnigramProperty *const unigramProperty, - const std::vector<NgramProperty> *const ngrams) - : mCodePoints(std::move(codePoints)), mUnigramProperty(*unigramProperty), - mNgrams(*ngrams) {} + WordProperty(const std::vector<int> &&codePoints, const UnigramProperty &unigramProperty, + const std::vector<NgramProperty> &ngrams) + : mCodePoints(std::move(codePoints)), mUnigramProperty(unigramProperty), + mNgrams(ngrams) {} void outputProperties(JNIEnv *const env, jintArray outCodePoints, jbooleanArray outFlags, jintArray outProbabilityInfo, jobject outNgramPrevWordsArray, @@ -44,12 +45,16 @@ class WordProperty { jobject outNgramProbabilities, jobject outShortcutTargets, jobject outShortcutProbabilities) const; - const UnigramProperty *getUnigramProperty() const { - return &mUnigramProperty; + const CodePointArrayView getCodePoints() const { + return CodePointArrayView(mCodePoints); } - const std::vector<NgramProperty> *getNgramProperties() const { - return &mNgrams; + const UnigramProperty &getUnigramProperty() const { + return mUnigramProperty; + } + + const std::vector<NgramProperty> &getNgramProperties() const { + return mNgrams; } private: diff --git a/native/jni/src/suggest/core/policy/scoring.h b/native/jni/src/suggest/core/policy/scoring.h index ce3684a1c..b9dda83ad 100644 --- a/native/jni/src/suggest/core/policy/scoring.h +++ b/native/jni/src/suggest/core/policy/scoring.h @@ -30,7 +30,7 @@ class Scoring { public: virtual int calculateFinalScore(const float compoundDistance, const int inputSize, const ErrorTypeUtils::ErrorType containedErrorTypes, const bool forceCommit, - const bool boostExactMatches) const = 0; + const bool boostExactMatches, const bool hasProbabilityZero) const = 0; virtual void getMostProbableString(const DicTraverseSession *const traverseSession, const float weightOfLangModelVsSpatialModel, SuggestionResults *const outSuggestionResults) const = 0; diff --git a/native/jni/src/suggest/core/result/suggestions_output_utils.cpp b/native/jni/src/suggest/core/result/suggestions_output_utils.cpp index 3283f6deb..74db95953 100644 --- a/native/jni/src/suggest/core/result/suggestions_output_utils.cpp +++ b/native/jni/src/suggest/core/result/suggestions_output_utils.cpp @@ -76,6 +76,52 @@ const int SuggestionsOutputUtils::MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT = 16; weightOfLangModelVsSpatialModelToOutputSuggestions, outSuggestionResults); } +/* static */ bool SuggestionsOutputUtils::shouldBlockWord( + const SuggestOptions *const suggestOptions, const DicNode *const terminalDicNode, + const WordAttributes wordAttributes, const bool isLastWord) { + const bool currentWordExactMatch = + ErrorTypeUtils::isExactMatch(terminalDicNode->getContainedErrorTypes()); + // When we have to block offensive words, non-exact matched offensive words should not be + // output. + const bool shouldBlockOffensiveWords = suggestOptions->blockOffensiveWords(); + + const bool isBlockedOffensiveWord = shouldBlockOffensiveWords && + wordAttributes.isPossiblyOffensive(); + + // This function is called in two situations: + // + // 1) At the end of a search, in which case terminalDicNode will point to the last DicNode + // of the search, and isLastWord will be true. + // "fuck" + // | + // \ terminalDicNode (isLastWord=true, currentWordExactMatch=true) + // In this case, if the current word is an exact match, we will always let the word + // through, even if the user is blocking offensive words (it's exactly what they typed!) + // + // 2) In the middle of the search, when we hit a terminal node, to decide whether or not + // to start a new search at root, to try to match the rest of the input. In this case, + // terminalDicNode will point to the terminal node we just hit, and isLastWord will be + // false. + // "fuckvthis" + // | + // \ terminalDicNode (isLastWord=false, currentWordExactMatch=true) + // + // In this case, we should NOT allow the match through (correcting "fuckthis" to "fuck this" + // when offensive words are blocked would be a bad idea). + // + // In the case of a multi-word correction where the offensive word is typed last (eg. + // for the input "allfuck"), this function will be called with isLastWord==true, but + // currentWordExactMatch==false. So we are OK in this case as well. + // "allfuck" + // | + // \ terminalDicNode (isLastWord=true, currentWordExactMatch=false) + if (isLastWord && currentWordExactMatch) { + return false; + } else { + return isBlockedOffensiveWord; + } +} + /* static */ void SuggestionsOutputUtils::outputSuggestionsOfDicNode( const Scoring *const scoringPolicy, DicTraverseSession *traverseSession, const DicNode *const terminalDicNode, const float weightOfLangModelVsSpatialModel, @@ -98,24 +144,16 @@ const int SuggestionsOutputUtils::MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT = 16; const bool isExactMatchWithIntentionalOmission = ErrorTypeUtils::isExactMatchWithIntentionalOmission( terminalDicNode->getContainedErrorTypes()); - const bool isFirstCharUppercase = terminalDicNode->isFirstCharUppercase(); - // Heuristic: We exclude probability=0 first-char-uppercase words from exact match. - // (e.g. "AMD" and "and") - const bool isSafeExactMatch = isExactMatch - && !(wordAttributes.isPossiblyOffensive() && isFirstCharUppercase); const int outputTypeFlags = (wordAttributes.isPossiblyOffensive() ? Dictionary::KIND_FLAG_POSSIBLY_OFFENSIVE : 0) - | ((isSafeExactMatch && boostExactMatches) ? Dictionary::KIND_FLAG_EXACT_MATCH : 0) + | ((isExactMatch && boostExactMatches) ? Dictionary::KIND_FLAG_EXACT_MATCH : 0) | (isExactMatchWithIntentionalOmission ? Dictionary::KIND_FLAG_EXACT_MATCH_WITH_INTENTIONAL_OMISSION : 0); - // Entries that are blacklisted or do not represent a word should not be output. const bool isValidWord = !(wordAttributes.isBlacklisted() || wordAttributes.isNotAWord()); - // When we have to block offensive words, non-exact matched offensive words should not be - // output. - const bool blockOffensiveWords = traverseSession->getSuggestOptions()->blockOffensiveWords(); - const bool isBlockedOffensiveWord = blockOffensiveWords && wordAttributes.isPossiblyOffensive() - && !isSafeExactMatch; + + const bool shouldBlockThisWord = shouldBlockWord(traverseSession->getSuggestOptions(), + terminalDicNode, wordAttributes, true /* isLastWord */); // Increase output score of top typing suggestion to ensure autocorrection. // TODO: Better integration with java side autocorrection logic. @@ -123,11 +161,11 @@ const int SuggestionsOutputUtils::MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT = 16; compoundDistance, traverseSession->getInputSize(), terminalDicNode->getContainedErrorTypes(), (forceCommitMultiWords && terminalDicNode->hasMultipleWords()), - boostExactMatches); + boostExactMatches, wordAttributes.getProbability() == 0); // Don't output invalid or blocked offensive words. However, we still need to submit their // shortcuts if any. - if (isValidWord && !isBlockedOffensiveWord) { + if (isValidWord && !shouldBlockThisWord) { int codePoints[MAX_WORD_LENGTH]; terminalDicNode->outputResult(codePoints); const int indexToPartialCommit = outputSecondWordFirstLetterInputIndex ? diff --git a/native/jni/src/suggest/core/result/suggestions_output_utils.h b/native/jni/src/suggest/core/result/suggestions_output_utils.h index bf8497828..eca1f78b2 100644 --- a/native/jni/src/suggest/core/result/suggestions_output_utils.h +++ b/native/jni/src/suggest/core/result/suggestions_output_utils.h @@ -18,6 +18,7 @@ #define LATINIME_SUGGESTIONS_OUTPUT_UTILS #include "defines.h" +#include "suggest/core/dictionary/word_attributes.h" namespace latinime { @@ -25,11 +26,19 @@ class BinaryDictionaryShortcutIterator; class DicNode; class DicTraverseSession; class Scoring; +class SuggestOptions; class SuggestionResults; class SuggestionsOutputUtils { public: /** + * Returns true if we should block the incoming word, in the context of the user's + * preferences to include or not include possibly offensive words + */ + static bool shouldBlockWord(const SuggestOptions *const suggestOptions, + const DicNode *const terminalDicNode, const WordAttributes wordAttributes, + const bool isLastWord); + /** * Outputs the final list of suggestions (i.e., terminal nodes). */ static void outputSuggestions(const Scoring *const scoringPolicy, diff --git a/native/jni/src/suggest/core/suggest.cpp b/native/jni/src/suggest/core/suggest.cpp index 68a36454e..e5e9b46bf 100644 --- a/native/jni/src/suggest/core/suggest.cpp +++ b/native/jni/src/suggest/core/suggest.cpp @@ -29,6 +29,7 @@ #include "suggest/core/result/suggestions_output_utils.h" #include "suggest/core/session/dic_traverse_session.h" #include "suggest/core/suggest_options.h" +#include "utils/profiler.h" namespace latinime { @@ -48,8 +49,8 @@ void Suggest::getSuggestions(ProximityInfo *pInfo, void *traverseSession, int *inputXs, int *inputYs, int *times, int *pointerIds, int *inputCodePoints, int inputSize, const float weightOfLangModelVsSpatialModel, SuggestionResults *const outSuggestionResults) const { - PROF_OPEN; - PROF_START(0); + PROF_INIT; + PROF_TIMER_START(0); const float maxSpatialDistance = TRAVERSAL->getMaxSpatialDistance(); DicTraverseSession *tSession = static_cast<DicTraverseSession *>(traverseSession); tSession->setupForGetSuggestions(pInfo, inputCodePoints, inputSize, inputXs, inputYs, times, @@ -57,8 +58,8 @@ void Suggest::getSuggestions(ProximityInfo *pInfo, void *traverseSession, // TODO: Add the way to evaluate cache initializeSearch(tSession); - PROF_END(0); - PROF_START(1); + PROF_TIMER_END(0); + PROF_TIMER_START(1); // keep expanding search dicNodes until all have terminated. while (tSession->getDicTraverseCache()->activeSize() > 0) { @@ -66,12 +67,11 @@ void Suggest::getSuggestions(ProximityInfo *pInfo, void *traverseSession, tSession->getDicTraverseCache()->advanceActiveDicNodes(); tSession->getDicTraverseCache()->advanceInputIndex(inputSize); } - PROF_END(1); - PROF_START(2); + PROF_TIMER_END(1); + PROF_TIMER_START(2); SuggestionsOutputUtils::outputSuggestions( SCORING, tSession, weightOfLangModelVsSpatialModel, outSuggestionResults); - PROF_END(2); - PROF_CLOSE; + PROF_TIMER_END(2); } /** @@ -416,6 +416,11 @@ void Suggest::createNextWordDicNode(DicTraverseSession *traverseSession, DicNode traverseSession->getDictionaryStructurePolicy()->getWordAttributesInContext( dicNode->getPrevWordIds(), dicNode->getWordId(), traverseSession->getMultiBigramMap()); + if (SuggestionsOutputUtils::shouldBlockWord(traverseSession->getSuggestOptions(), + dicNode, wordAttributes, false /* isLastWord */)) { + return; + } + if (!TRAVERSAL->isGoodToTraverseNextWord(dicNode, wordAttributes.getProbability())) { return; } diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h index 44c2f443f..7a5acd7d5 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h @@ -134,15 +134,17 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { // same so we use them for both here. switch (mDictFormatVersion) { case FormatUtils::VERSION_2: - return FormatUtils::VERSION_2; case FormatUtils::VERSION_201: - return FormatUtils::VERSION_201; + AKLOGE("Dictionary versions 2 and 201 are incompatible with this version"); + return FormatUtils::UNKNOWN_VERSION; + case FormatUtils::VERSION_202: + return FormatUtils::VERSION_202; case FormatUtils::VERSION_4_ONLY_FOR_TESTING: return FormatUtils::VERSION_4_ONLY_FOR_TESTING; - case FormatUtils::VERSION_4: - return FormatUtils::VERSION_4; - case FormatUtils::VERSION_4_DEV: - return FormatUtils::VERSION_4_DEV; + case FormatUtils::VERSION_402: + return FormatUtils::VERSION_402; + case FormatUtils::VERSION_403: + return FormatUtils::VERSION_403; default: return FormatUtils::UNKNOWN_VERSION; } @@ -245,7 +247,7 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { } bool supportsBeginningOfSentence() const { - return mDictFormatVersion >= FormatUtils::VERSION_4; + return mDictFormatVersion >= FormatUtils::VERSION_402; } const int *getCodePointTable() const { diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp index 41a8b13b8..19ed0d468 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp @@ -111,11 +111,12 @@ typedef DictionaryHeaderStructurePolicy::AttributeMap AttributeMap; switch (version) { case FormatUtils::VERSION_2: case FormatUtils::VERSION_201: - // Version 2 or 201 dictionary writing is not supported. + case FormatUtils::VERSION_202: + // None of the static dictionaries (v2x) support writing return false; case FormatUtils::VERSION_4_ONLY_FOR_TESTING: - case FormatUtils::VERSION_4: - case FormatUtils::VERSION_4_DEV: + case FormatUtils::VERSION_402: + case FormatUtils::VERSION_403: return buffer->writeUintAndAdvancePosition(version /* data */, HEADER_DICTIONARY_VERSION_SIZE, writingPos); default: diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_dict_content.cpp index 9e1adff70..15ac88319 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_dict_content.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_dict_content.cpp @@ -65,6 +65,8 @@ const BigramEntry BigramDictContent::getBigramEntryAndAdvancePosition( (encodedTargetTerminalId == Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID) ? Ver4DictConstants::NOT_A_TERMINAL_ID : encodedTargetTerminalId; if (mHasHistoricalInfo) { + // Hack for better migration. + count += level; const HistoricalInfo historicalInfo(timestamp, level, count); return BigramEntry(hasNext, probability, &historicalInfo, targetTerminalId); } else { diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/probability_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/probability_dict_content.cpp index ef6166ffd..61ef4aa42 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/probability_dict_content.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/probability_dict_content.cpp @@ -50,7 +50,8 @@ const ProbabilityEntry ProbabilityDictContent::getProbabilityEntry(const int ter Ver4DictConstants::WORD_LEVEL_FIELD_SIZE, &entryPos); const int count = buffer->readUintAndAdvancePosition( Ver4DictConstants::WORD_COUNT_FIELD_SIZE, &entryPos); - const HistoricalInfo historicalInfo(timestamp, level, count); + // Hack for better migration. + const HistoricalInfo historicalInfo(timestamp, level, count + level); return ProbabilityEntry(flags, probability, &historicalInfo); } else { return ProbabilityEntry(flags, probability); diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp index 08e39ce43..ca7d93b0e 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp @@ -140,7 +140,7 @@ const WordAttributes Ver4PatriciaTriePolicy::getWordAttributesInContext( const WordAttributes Ver4PatriciaTriePolicy::getWordAttributes(const int probability, const PtNodeParams &ptNodeParams) const { - return WordAttributes(probability, ptNodeParams.isBlacklisted(), ptNodeParams.isNotAWord(), + return WordAttributes(probability, false /* isBlacklisted */, ptNodeParams.isNotAWord(), ptNodeParams.getProbability() == 0); } @@ -164,7 +164,7 @@ int Ver4PatriciaTriePolicy::getProbabilityOfWord(const WordIdArrayView prevWordI } const int ptNodePos = getTerminalPtNodePosFromWordId(wordId); const PtNodeParams ptNodeParams(mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos)); - if (ptNodeParams.isDeleted() || ptNodeParams.isBlacklisted() || ptNodeParams.isNotAWord()) { + if (ptNodeParams.isDeleted() || ptNodeParams.isNotAWord()) { return NOT_A_PROBABILITY; } if (prevWordIds.empty()) { @@ -614,7 +614,7 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty( const UnigramProperty unigramProperty(ptNodeParams.representsBeginningOfSentence(), ptNodeParams.isNotAWord(), ptNodeParams.isPossiblyOffensive(), ptNodeParams.getProbability(), *historicalInfo, std::move(shortcuts)); - return WordProperty(wordCodePoints.toVector(), &unigramProperty, &ngrams); + return WordProperty(wordCodePoints.toVector(), unigramProperty, ngrams); } int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints, diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp index 372c9e36f..9a9a21b6b 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp @@ -58,7 +58,7 @@ namespace latinime { const DictionaryHeaderStructurePolicy::AttributeMap *const attributeMap) { FormatUtils::FORMAT_VERSION dictFormatVersion = FormatUtils::getFormatVersion(formatVersion); switch (dictFormatVersion) { - case FormatUtils::VERSION_4: { + case FormatUtils::VERSION_402: { return newPolicyForOnMemoryV4Dict<backward::v402::Ver4DictConstants, backward::v402::Ver4DictBuffers, backward::v402::Ver4DictBuffers::Ver4DictBuffersPtr, @@ -66,7 +66,7 @@ namespace latinime { dictFormatVersion, locale, attributeMap); } case FormatUtils::VERSION_4_ONLY_FOR_TESTING: - case FormatUtils::VERSION_4_DEV: { + case FormatUtils::VERSION_403: { return newPolicyForOnMemoryV4Dict<Ver4DictConstants, Ver4DictBuffers, Ver4DictBuffers::Ver4DictBuffersPtr, Ver4PatriciaTriePolicy>( dictFormatVersion, locale, attributeMap); @@ -115,9 +115,10 @@ template<class DictConstants, class DictBuffers, class DictBuffersPtr, class Str switch (formatVersion) { case FormatUtils::VERSION_2: case FormatUtils::VERSION_201: - AKLOGE("Given path is a directory but the format is version 2 or 201. path: %s", path); + case FormatUtils::VERSION_202: + AKLOGE("Given path is a directory but the format is version 2xx. path: %s", path); break; - case FormatUtils::VERSION_4: { + case FormatUtils::VERSION_402: { return newPolicyForV4Dict<backward::v402::Ver4DictConstants, backward::v402::Ver4DictBuffers, backward::v402::Ver4DictBuffers::Ver4DictBuffersPtr, @@ -125,7 +126,7 @@ template<class DictConstants, class DictBuffers, class DictBuffersPtr, class Str headerFilePath, formatVersion, std::move(mmappedBuffer)); } case FormatUtils::VERSION_4_ONLY_FOR_TESTING: - case FormatUtils::VERSION_4_DEV: { + case FormatUtils::VERSION_403: { return newPolicyForV4Dict<Ver4DictConstants, Ver4DictBuffers, Ver4DictBuffers::Ver4DictBuffersPtr, Ver4PatriciaTriePolicy>( headerFilePath, formatVersion, std::move(mmappedBuffer)); @@ -177,11 +178,14 @@ template<class DictConstants, class DictBuffers, class DictBuffersPtr, class Str switch (FormatUtils::detectFormatVersion(mmappedBuffer->getReadOnlyByteArrayView())) { case FormatUtils::VERSION_2: case FormatUtils::VERSION_201: + AKLOGE("Dictionary versions 2 and 201 are incompatible with this version"); + break; + case FormatUtils::VERSION_202: return DictionaryStructureWithBufferPolicy::StructurePolicyPtr( new PatriciaTriePolicy(std::move(mmappedBuffer))); case FormatUtils::VERSION_4_ONLY_FOR_TESTING: - case FormatUtils::VERSION_4: - case FormatUtils::VERSION_4_DEV: + case FormatUtils::VERSION_402: + case FormatUtils::VERSION_403: AKLOGE("Given path is a file but the format is version 4. path: %s", path); break; default: diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h index 585e87a24..e52706e07 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h @@ -144,17 +144,6 @@ class PtNodeParams { return PatriciaTrieReadingUtils::isTerminal(mFlags); } - AK_FORCE_INLINE bool isBlacklisted() const { - // Note: this method will be removed in the next change. - // It is used in getProbabilityOfWord and getWordAttributes for both v402 and v403. - // * getProbabilityOfWord will be changed to no longer return NOT_A_PROBABILITY - // when isBlacklisted (i.e. to only check if isNotAWord or isDeleted) - // * getWordAttributes will be changed to always return blacklisted=false and - // isPossiblyOffensive according to the function below (instead of the current - // behaviour of checking if the probability is zero) - return PatriciaTrieReadingUtils::isPossiblyOffensive(mFlags); - } - AK_FORCE_INLINE bool isPossiblyOffensive() const { return PatriciaTrieReadingUtils::isPossiblyOffensive(mFlags); } diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp index 66fd18a52..1a51acad5 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp @@ -14,7 +14,6 @@ * limitations under the License. */ - #include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h" #include "defines.h" @@ -317,8 +316,8 @@ const WordAttributes PatriciaTriePolicy::getWordAttributesInContext( const WordAttributes PatriciaTriePolicy::getWordAttributes(const int probability, const PtNodeParams &ptNodeParams) const { - return WordAttributes(probability, ptNodeParams.isBlacklisted(), ptNodeParams.isNotAWord(), - ptNodeParams.getProbability() == 0); + return WordAttributes(probability, false /* isBlacklisted */, ptNodeParams.isNotAWord(), + ptNodeParams.isPossiblyOffensive()); } int PatriciaTriePolicy::getProbability(const int unigramProbability, @@ -345,10 +344,9 @@ int PatriciaTriePolicy::getProbabilityOfWord(const WordIdArrayView prevWordIds, const int ptNodePos = getTerminalPtNodePosFromWordId(wordId); const PtNodeParams ptNodeParams = mPtNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos); - if (ptNodeParams.isNotAWord() || ptNodeParams.isBlacklisted()) { - // If this is not a word, or if it's a blacklisted entry, it should behave as - // having no probability outside of the suggestion process (where it should be used - // for shortcuts). + if (ptNodeParams.isNotAWord()) { + // If this is not a word, it should behave as having no probability outside of the + // suggestion process (where it should be used for shortcuts). return NOT_A_PROBABILITY; } if (!prevWordIds.empty()) { @@ -480,7 +478,7 @@ const WordProperty PatriciaTriePolicy::getWordProperty( const UnigramProperty unigramProperty(ptNodeParams.representsBeginningOfSentence(), ptNodeParams.isNotAWord(), ptNodeParams.isPossiblyOffensive(), ptNodeParams.getProbability(), HistoricalInfo(), std::move(shortcuts)); - return WordProperty(wordCodePoints.toVector(), &unigramProperty, &ngrams); + return WordProperty(wordCodePoints.toVector(), unigramProperty, ngrams); } int PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints, diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h index f4d340f86..9c4ab18e4 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h @@ -105,7 +105,7 @@ class ProbabilityEntry { encodedEntry = (encodedEntry << (Ver4DictConstants::WORD_LEVEL_FIELD_SIZE * CHAR_BIT)) | static_cast<uint8_t>(mHistoricalInfo.getLevel()); encodedEntry = (encodedEntry << (Ver4DictConstants::WORD_COUNT_FIELD_SIZE * CHAR_BIT)) - | static_cast<uint8_t>(mHistoricalInfo.getCount()); + | static_cast<uint16_t>(mHistoricalInfo.getCount()); } else { encodedEntry = (encodedEntry << (Ver4DictConstants::PROBABILITY_SIZE * CHAR_BIT)) | static_cast<uint8_t>(mProbability); diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp index eb6080a24..bd89b8da7 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp @@ -49,8 +49,8 @@ const int Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE = 3; const int Ver4DictConstants::NOT_A_TERMINAL_ADDRESS = 0; const int Ver4DictConstants::TERMINAL_ID_FIELD_SIZE = 4; const int Ver4DictConstants::TIME_STAMP_FIELD_SIZE = 4; -const int Ver4DictConstants::WORD_LEVEL_FIELD_SIZE = 1; -const int Ver4DictConstants::WORD_COUNT_FIELD_SIZE = 1; +const int Ver4DictConstants::WORD_LEVEL_FIELD_SIZE = 0; +const int Ver4DictConstants::WORD_COUNT_FIELD_SIZE = 2; const uint8_t Ver4DictConstants::FLAG_REPRESENTS_BEGINNING_OF_SENTENCE = 0x1; const uint8_t Ver4DictConstants::FLAG_NOT_A_VALID_ENTRY = 0x2; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h index 600b5ffe4..13d7a5714 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h @@ -47,6 +47,7 @@ class Ver4DictConstants { static const int NOT_A_TERMINAL_ADDRESS; static const int TERMINAL_ID_FIELD_SIZE; static const int TIME_STAMP_FIELD_SIZE; + // TODO: Remove static const int WORD_LEVEL_FIELD_SIZE; static const int WORD_COUNT_FIELD_SIZE; // Flags in probability entry. diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp index 96d789f58..7449cd02b 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp @@ -146,8 +146,16 @@ void Ver4PatriciaTriePolicy::iterateNgramEntries(const WordIdArrayView prevWordI if (!probabilityEntry.isValid()) { continue; } - const int probability = probabilityEntry.hasHistoricalInfo() ? - 0 : probabilityEntry.getProbability(); + int probability = NOT_A_PROBABILITY; + if (probabilityEntry.hasHistoricalInfo()) { + // TODO: Quit checking count here. + // If count <= 1, the word can be an invaild word. The actual probability should + // be checked using getWordAttributesInContext() in onVisitEntry(). + probability = probabilityEntry.getHistoricalInfo()->getCount() <= 1 ? + NOT_A_PROBABILITY : 0; + } else { + probability = probabilityEntry.getProbability(); + } listener->onVisitEntry(probability, entry.getWordId()); } } @@ -552,7 +560,7 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty( wordAttributes.isNotAWord(), wordAttributes.isBlacklisted(), wordAttributes.isPossiblyOffensive(), wordAttributes.getProbability(), *historicalInfo, std::move(shortcuts)); - return WordProperty(wordCodePoints.toVector(), &unigramProperty, &ngrams); + return WordProperty(wordCodePoints.toVector(), unigramProperty, ngrams); } int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints, diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp index 9d8e86675..edcb43678 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp @@ -44,13 +44,13 @@ const int DictFileWritingUtils::SIZE_OF_BUFFER_SIZE_FIELD = 4; TimeKeeper::setCurrentTime(); const FormatUtils::FORMAT_VERSION formatVersion = FormatUtils::getFormatVersion(dictVersion); switch (formatVersion) { - case FormatUtils::VERSION_4: + case FormatUtils::VERSION_402: return createEmptyV4DictFile<backward::v402::Ver4DictConstants, backward::v402::Ver4DictBuffers, backward::v402::Ver4DictBuffers::Ver4DictBuffersPtr>( filePath, localeAsCodePointVector, attributeMap, formatVersion); case FormatUtils::VERSION_4_ONLY_FOR_TESTING: - case FormatUtils::VERSION_4_DEV: + case FormatUtils::VERSION_403: return createEmptyV4DictFile<Ver4DictConstants, Ver4DictBuffers, Ver4DictBuffers::Ver4DictBuffersPtr>( filePath, localeAsCodePointVector, attributeMap, formatVersion); diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp index 0cffe569d..e225c235e 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp @@ -28,15 +28,17 @@ const size_t FormatUtils::DICTIONARY_MINIMUM_SIZE = 12; /* static */ FormatUtils::FORMAT_VERSION FormatUtils::getFormatVersion(const int formatVersion) { switch (formatVersion) { case VERSION_2: - return VERSION_2; case VERSION_201: - return VERSION_201; + AKLOGE("Dictionary versions 2 and 201 are incompatible with this version"); + return UNKNOWN_VERSION; + case VERSION_202: + return VERSION_202; case VERSION_4_ONLY_FOR_TESTING: return VERSION_4_ONLY_FOR_TESTING; - case VERSION_4: - return VERSION_4; - case VERSION_4_DEV: - return VERSION_4_DEV; + case VERSION_402: + return VERSION_402; + case VERSION_403: + return VERSION_403; default: return UNKNOWN_VERSION; } diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h index 96310086b..1616efcce 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h @@ -31,11 +31,15 @@ class FormatUtils { public: enum FORMAT_VERSION { // These MUST have the same values as the relevant constants in FormatSpec.java. + // TODO: Remove VERSION_2 and VERSION_201 when we: + // * Confirm that old versions of LatinIME download old-format dictionaries + // * We no longer need the corresponding constants on the Java side for dicttool VERSION_2 = 2, VERSION_201 = 201, + VERSION_202 = 202, VERSION_4_ONLY_FOR_TESTING = 399, - VERSION_4 = 402, - VERSION_4_DEV = 403, + VERSION_402 = 402, + VERSION_403 = 403, UNKNOWN_VERSION = -1 }; diff --git a/native/jni/src/suggest/policyimpl/typing/scoring_params.cpp b/native/jni/src/suggest/policyimpl/typing/scoring_params.cpp index a6f9a8b23..856808a74 100644 --- a/native/jni/src/suggest/policyimpl/typing/scoring_params.cpp +++ b/native/jni/src/suggest/policyimpl/typing/scoring_params.cpp @@ -24,6 +24,7 @@ const int ScoringParams::THRESHOLD_NEXT_WORD_PROBABILITY_FOR_CAPPED = 120; const float ScoringParams::AUTOCORRECT_OUTPUT_THRESHOLD = 1.0f; const float ScoringParams::EXACT_MATCH_PROMOTION = 1.1f; +const float ScoringParams::PERFECT_MATCH_PROMOTION = 1.1f; const float ScoringParams::CASE_ERROR_PENALTY_FOR_EXACT_MATCH = 0.01f; const float ScoringParams::ACCENT_ERROR_PENALTY_FOR_EXACT_MATCH = 0.02f; const float ScoringParams::DIGRAPH_PENALTY_FOR_EXACT_MATCH = 0.03f; diff --git a/native/jni/src/suggest/policyimpl/typing/scoring_params.h b/native/jni/src/suggest/policyimpl/typing/scoring_params.h index b8f889559..6f327a370 100644 --- a/native/jni/src/suggest/policyimpl/typing/scoring_params.h +++ b/native/jni/src/suggest/policyimpl/typing/scoring_params.h @@ -34,6 +34,7 @@ class ScoringParams { static const int THRESHOLD_SHORT_WORD_LENGTH; static const float EXACT_MATCH_PROMOTION; + static const float PERFECT_MATCH_PROMOTION; static const float CASE_ERROR_PENALTY_FOR_EXACT_MATCH; static const float ACCENT_ERROR_PENALTY_FOR_EXACT_MATCH; static const float DIGRAPH_PENALTY_FOR_EXACT_MATCH; diff --git a/native/jni/src/suggest/policyimpl/typing/typing_scoring.h b/native/jni/src/suggest/policyimpl/typing/typing_scoring.h index 0240bcf54..6acd767ea 100644 --- a/native/jni/src/suggest/policyimpl/typing/typing_scoring.h +++ b/native/jni/src/suggest/policyimpl/typing/typing_scoring.h @@ -44,23 +44,50 @@ class TypingScoring : public Scoring { AK_FORCE_INLINE int calculateFinalScore(const float compoundDistance, const int inputSize, const ErrorTypeUtils::ErrorType containedErrorTypes, const bool forceCommit, - const bool boostExactMatches) const { + const bool boostExactMatches, const bool hasProbabilityZero) const { const float maxDistance = ScoringParams::DISTANCE_WEIGHT_LANGUAGE + static_cast<float>(inputSize) * ScoringParams::TYPING_MAX_OUTPUT_SCORE_PER_INPUT; float score = ScoringParams::TYPING_BASE_OUTPUT_SCORE - compoundDistance / maxDistance; if (forceCommit) { score += ScoringParams::AUTOCORRECT_OUTPUT_THRESHOLD; } - if (boostExactMatches && ErrorTypeUtils::isExactMatch(containedErrorTypes)) { - score += ScoringParams::EXACT_MATCH_PROMOTION; - if ((ErrorTypeUtils::MATCH_WITH_WRONG_CASE & containedErrorTypes) != 0) { - score -= ScoringParams::CASE_ERROR_PENALTY_FOR_EXACT_MATCH; + if (hasProbabilityZero) { + // Previously, when both legitimate 0-frequency words (such as distracters) and + // offensive words were encoded in the same way, distracters would never show up + // when the user blocked offensive words (the default setting, as well as the + // setting for regression tests). + // + // When b/11031090 was fixed and a separate encoding was used for offensive words, + // 0-frequency words would no longer be blocked when they were an "exact match" + // (where case mismatches and accent mismatches would be considered an "exact + // match"). The exact match boosting functionality meant that, for example, when + // the user typed "mt" they would be suggested the word "Mt", although they most + // probably meant to type "my". + // + // For this reason, we introduced this change, which does the following: + // * Defines the "perfect match" as a really exact match, with no room for case or + // accent mismatches + // * When the target word has probability zero (as "Mt" does, because it is a + // distracter), ONLY boost its score if it is a perfect match. + // + // By doing this, when the user types "mt", the word "Mt" will NOT be boosted, and + // they will get "my". However, if the user makes an explicit effort to type "Mt", + // we do boost the word "Mt" so that the user's input is not autocorrected to "My". + if (boostExactMatches && ErrorTypeUtils::isPerfectMatch(containedErrorTypes)) { + score += ScoringParams::PERFECT_MATCH_PROMOTION; } - if ((ErrorTypeUtils::MATCH_WITH_MISSING_ACCENT & containedErrorTypes) != 0) { - score -= ScoringParams::ACCENT_ERROR_PENALTY_FOR_EXACT_MATCH; - } - if ((ErrorTypeUtils::MATCH_WITH_DIGRAPH & containedErrorTypes) != 0) { - score -= ScoringParams::DIGRAPH_PENALTY_FOR_EXACT_MATCH; + } else { + if (boostExactMatches && ErrorTypeUtils::isExactMatch(containedErrorTypes)) { + score += ScoringParams::EXACT_MATCH_PROMOTION; + if ((ErrorTypeUtils::MATCH_WITH_WRONG_CASE & containedErrorTypes) != 0) { + score -= ScoringParams::CASE_ERROR_PENALTY_FOR_EXACT_MATCH; + } + if ((ErrorTypeUtils::MATCH_WITH_MISSING_ACCENT & containedErrorTypes) != 0) { + score -= ScoringParams::ACCENT_ERROR_PENALTY_FOR_EXACT_MATCH; + } + if ((ErrorTypeUtils::MATCH_WITH_DIGRAPH & containedErrorTypes) != 0) { + score -= ScoringParams::DIGRAPH_PENALTY_FOR_EXACT_MATCH; + } } } return static_cast<int>(score * SUGGEST_INTERFACE_OUTPUT_SCALE); diff --git a/native/jni/src/utils/int_array_view.h b/native/jni/src/utils/int_array_view.h index 408373176..e0f671056 100644 --- a/native/jni/src/utils/int_array_view.h +++ b/native/jni/src/utils/int_array_view.h @@ -133,6 +133,29 @@ class IntArrayView { return std::vector<int>(begin(), end()); } + std::vector<IntArrayView> split(const int separator, const int limit = S_INT_MAX) const { + if (limit <= 0) { + return std::vector<IntArrayView>(); + } + std::vector<IntArrayView> result; + if (limit == 1) { + result.emplace_back(mPtr, mSize); + return result; + } + size_t startIndex = 0; + for (size_t i = 0; i < mSize; ++i) { + if (mPtr[i] == separator) { + result.emplace_back(mPtr + startIndex, i - startIndex); + startIndex = i + 1; + if (result.size() >= static_cast<size_t>(limit - 1)) { + break; + } + } + } + result.emplace_back(mPtr + startIndex, mSize - startIndex); + return result; + } + private: DISALLOW_ASSIGNMENT_OPERATOR(IntArrayView); diff --git a/native/jni/src/utils/profiler.h b/native/jni/src/utils/profiler.h new file mode 100644 index 000000000..5f107fed3 --- /dev/null +++ b/native/jni/src/utils/profiler.h @@ -0,0 +1,86 @@ +/* + * Copyright (C) 2014, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_PROFILER_H +#define LATINIME_PROFILER_H + +#ifdef FLAG_DO_PROFILE + +#include "defines.h" + +#include <ctime> +#include <unordered_map> + +namespace latinime { + +class Profiler final { + public: + Profiler(const clockid_t clockId) + : mClockId(clockId), mStartTime(getTimeInMicroSec()), mStartTimes(), mTimes(), + mCounters() {} + + ~Profiler() { + const float totalTime = + static_cast<float>(getTimeInMicroSec() - mStartTime) / 1000.f; + AKLOGI("Total time is %6.3f ms.", totalTime); + for (const auto &time : mTimes) { + AKLOGI("(%d): Used %4.2f%%, %8.4f ms. Called %d times.", time.first, + time.second / totalTime * 100.0f, time.second, mCounters[time.first]); + } + } + + void startTimer(const int id) { + mStartTimes[id] = getTimeInMicroSec(); + } + + void endTimer(const int id) { + mTimes[id] += static_cast<float>(getTimeInMicroSec() - mStartTimes[id]) / 1000.0f; + mCounters[id]++; + } + + operator bool() const { return false; } + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(Profiler); + + const clockid_t mClockId; + int64_t mStartTime; + std::unordered_map<int, int64_t> mStartTimes; + std::unordered_map<int, float> mTimes; + std::unordered_map<int, int> mCounters; + + int64_t getTimeInMicroSec() { + timespec time; + clock_gettime(mClockId, &time); + return static_cast<int64_t>(time.tv_sec) * 1000000 + + static_cast<int64_t>(time.tv_nsec) / 1000; + } +}; +} // namespace latinime + +#define PROF_INIT Profiler __LATINIME__PROFILER__(CLOCK_THREAD_CPUTIME_ID) +#define PROF_TIMER_START(timer_id) __LATINIME__PROFILER__.startTimer(timer_id) +#define PROF_TIMER_END(timer_id) __LATINIME__PROFILER__.endTimer(timer_id) + +#else // FLAG_DO_PROFILE + +#define PROF_INIT +#define PROF_TIMER_START(timer_id) +#define PROF_TIMER_END(timer_id) + +#endif // FLAG_DO_PROFILE + +#endif /* LATINIME_PROFILER_H */ diff --git a/native/jni/tests/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_test.cpp b/native/jni/tests/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_test.cpp index 86040f12c..313a9af10 100644 --- a/native/jni/tests/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_test.cpp +++ b/native/jni/tests/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_test.cpp @@ -52,16 +52,14 @@ TEST(LanguageModelDictContentTest, TestUnigramProbabilityWithHistoricalInfo) { const int flag = 0xF0; const int timestamp = 0x3FFFFFFF; - const int level = 3; const int count = 10; const int wordId = 100; - const HistoricalInfo historicalInfo(timestamp, level, count); + const HistoricalInfo historicalInfo(timestamp, 0 /* level */, count); const ProbabilityEntry probabilityEntry(flag, &historicalInfo); languageModelDictContent.setProbabilityEntry(wordId, &probabilityEntry); const ProbabilityEntry entry = languageModelDictContent.getProbabilityEntry(wordId); EXPECT_EQ(flag, entry.getFlags()); EXPECT_EQ(timestamp, entry.getHistoricalInfo()->getTimestamp()); - EXPECT_EQ(level, entry.getHistoricalInfo()->getLevel()); EXPECT_EQ(count, entry.getHistoricalInfo()->getCount()); // Remove diff --git a/native/jni/tests/suggest/policyimpl/dictionary/structure/v4/content/probability_entry_test.cpp b/native/jni/tests/suggest/policyimpl/dictionary/structure/v4/content/probability_entry_test.cpp index 260b347ce..eb78034ba 100644 --- a/native/jni/tests/suggest/policyimpl/dictionary/structure/v4/content/probability_entry_test.cpp +++ b/native/jni/tests/suggest/policyimpl/dictionary/structure/v4/content/probability_entry_test.cpp @@ -39,20 +39,18 @@ TEST(ProbabilityEntryTest, TestEncodeDecode) { TEST(ProbabilityEntryTest, TestEncodeDecodeWithHistoricalInfo) { const int flag = 0xF0; const int timestamp = 0x3FFFFFFF; - const int level = 3; - const int count = 10; + const int count = 0xABCD; - const HistoricalInfo historicalInfo(timestamp, level, count); + const HistoricalInfo historicalInfo(timestamp, 0 /* level */, count); const ProbabilityEntry entry(flag, &historicalInfo); const uint64_t encodedEntry = entry.encode(true /* hasHistoricalInfo */); - EXPECT_EQ(0xF03FFFFFFF030Aull, encodedEntry); + EXPECT_EQ(0xF03FFFFFFFABCDull, encodedEntry); const ProbabilityEntry decodedEntry = ProbabilityEntry::decode(encodedEntry, true /* hasHistoricalInfo */); EXPECT_EQ(flag, decodedEntry.getFlags()); EXPECT_EQ(timestamp, decodedEntry.getHistoricalInfo()->getTimestamp()); - EXPECT_EQ(level, decodedEntry.getHistoricalInfo()->getLevel()); EXPECT_EQ(count, decodedEntry.getHistoricalInfo()->getCount()); } diff --git a/native/jni/tests/suggest/policyimpl/dictionary/utils/format_utils_test.cpp b/native/jni/tests/suggest/policyimpl/dictionary/utils/format_utils_test.cpp index 15f560cd1..494200568 100644 --- a/native/jni/tests/suggest/policyimpl/dictionary/utils/format_utils_test.cpp +++ b/native/jni/tests/suggest/policyimpl/dictionary/utils/format_utils_test.cpp @@ -62,14 +62,14 @@ TEST(FormatUtilsTest, TestDetectFormatVersion) { } { const std::vector<uint8_t> buffer = - getBuffer(FormatUtils::MAGIC_NUMBER, FormatUtils::VERSION_4, 0, 0); - EXPECT_EQ(FormatUtils::VERSION_4, FormatUtils::detectFormatVersion( + getBuffer(FormatUtils::MAGIC_NUMBER, FormatUtils::VERSION_402, 0, 0); + EXPECT_EQ(FormatUtils::VERSION_402, FormatUtils::detectFormatVersion( ReadOnlyByteArrayView(buffer.data(), buffer.size()))); } { const std::vector<uint8_t> buffer = - getBuffer(FormatUtils::MAGIC_NUMBER, FormatUtils::VERSION_4_DEV, 0, 0); - EXPECT_EQ(FormatUtils::VERSION_4_DEV, FormatUtils::detectFormatVersion( + getBuffer(FormatUtils::MAGIC_NUMBER, FormatUtils::VERSION_403, 0, 0); + EXPECT_EQ(FormatUtils::VERSION_403, FormatUtils::detectFormatVersion( ReadOnlyByteArrayView(buffer.data(), buffer.size()))); } diff --git a/native/jni/tests/utils/int_array_view_test.cpp b/native/jni/tests/utils/int_array_view_test.cpp index 4757a416b..2fce633f5 100644 --- a/native/jni/tests/utils/int_array_view_test.cpp +++ b/native/jni/tests/utils/int_array_view_test.cpp @@ -151,5 +151,52 @@ TEST(IntArrayViewTest, TestToVector) { EXPECT_EQ(std::vector<int>(), CodePointArrayView().toVector()); } +TEST(IntArrayViewTest, TestSplit) { + EXPECT_TRUE(IntArrayView().split(0, 0).empty()); + { + const auto intArrayViews = IntArrayView().split(0, 1); + EXPECT_EQ(1u, intArrayViews.size()); + EXPECT_TRUE(intArrayViews[0].empty()); + } + { + const auto intArrayViews = IntArrayView().split(0, 100); + EXPECT_EQ(1u, intArrayViews.size()); + EXPECT_TRUE(intArrayViews[0].empty()); + } + + const std::vector<int> intVector = {1, 2, 3, 3, 2, 3}; + const IntArrayView intArrayView(intVector); + { + const auto intArrayViews = intArrayView.split(2); + EXPECT_EQ(3u, intArrayViews.size()); + EXPECT_EQ(std::vector<int>({1}), intArrayViews[0].toVector()); + EXPECT_EQ(std::vector<int>({3, 3}), intArrayViews[1].toVector()); + EXPECT_EQ(std::vector<int>({3}), intArrayViews[2].toVector()); + } + { + const auto intArrayViews = intArrayView.split(2, 2); + EXPECT_EQ(2u, intArrayViews.size()); + EXPECT_EQ(std::vector<int>({1}), intArrayViews[0].toVector()); + EXPECT_EQ(std::vector<int>({3, 3, 2, 3}), intArrayViews[1].toVector()); + } + { + const auto intArrayViews = intArrayView.split(2, 1); + EXPECT_EQ(1u, intArrayViews.size()); + EXPECT_EQ(intVector, intArrayViews[0].toVector()); + } + { + const auto intArrayViews = intArrayView.split(2, 0); + EXPECT_EQ(0u, intArrayViews.size()); + } + { + const auto intArrayViews = intArrayView.split(3); + EXPECT_EQ(4u, intArrayViews.size()); + EXPECT_EQ(std::vector<int>({1, 2}), intArrayViews[0].toVector()); + EXPECT_EQ(std::vector<int>(), intArrayViews[1].toVector()); + EXPECT_EQ(std::vector<int>({2}), intArrayViews[2].toVector()); + EXPECT_EQ(std::vector<int>(), intArrayViews[3].toVector()); + } +} + } // namespace } // namespace latinime |