diff options
Diffstat (limited to 'native')
36 files changed, 1481 insertions, 95 deletions
diff --git a/native/dicttoolkit/NativeFileList.mk b/native/dicttoolkit/NativeFileList.mk index b6be9c541..1c004f73a 100644 --- a/native/dicttoolkit/NativeFileList.mk +++ b/native/dicttoolkit/NativeFileList.mk @@ -15,7 +15,23 @@ LATIN_IME_DICT_TOOLKIT_MAIN_SRC_FILES := \ dict_toolkit_main.cpp -LATIN_IME_DICT_TOOLKIT_SRC_FILES := +LATIN_IME_DICT_TOOLKIT_SRC_FILES := \ + $(addprefix command_executors/, \ + diff_executor.cpp \ + header_executor.cpp \ + help_executor.cpp \ + info_executor.cpp \ + makedict_executor.cpp) \ + $(addprefix offdevice_intermediate_dict/, \ + offdevice_intermediate_dict.cpp) \ + $(addprefix utils/, \ + command_utils.cpp \ + utf8_utils.cpp) LATIN_IME_DICT_TOOLKIT_TEST_FILES := \ - dict_toolkit_defines_test.cpp + dict_toolkit_defines_test.cpp \ + $(addprefix offdevice_intermediate_dict/, \ + offdevice_intermediate_dict_test.cpp) \ + $(addprefix utils/, \ + command_utils_test.cpp \ + utf8_utils_test.cpp) diff --git a/native/dicttoolkit/UnitTests.mk b/native/dicttoolkit/UnitTests.mk index d568db44a..96e28730e 100644 --- a/native/dicttoolkit/UnitTests.mk +++ b/native/dicttoolkit/UnitTests.mk @@ -46,6 +46,7 @@ include $(BUILD_HOST_STATIC_LIBRARY) include $(CLEAR_VARS) # TODO: Remove -std=c++11 once it is set by default on host build. +LOCAL_ADDRESS_SANITIZER := true LOCAL_CFLAGS += -std=c++11 -Wno-unused-parameter -Wno-unused-function LOCAL_CLANG := true LOCAL_CXX_STL := libc++ diff --git a/native/dicttoolkit/dict_toolkit_main.cpp b/native/dicttoolkit/dict_toolkit_main.cpp index d71b50eb4..53cc5e915 100644 --- a/native/dicttoolkit/dict_toolkit_main.cpp +++ b/native/dicttoolkit/dict_toolkit_main.cpp @@ -16,8 +16,24 @@ #include <cstdio> +#include "dict_toolkit_defines.h" +#include "utils/command_utils.h" + +void usage(int argc, char **argv) { + fprintf(stderr, "Usage: %s <command> [arguments]\n", argc > 0 ? argv[0] : "dicttoolkit"); +} + int main(int argc, char **argv) { - // TODO: Implement. - printf("%s\n", argv[0]); - return 0; + if (argc < MIN_ARG_COUNT) { + usage(argc, argv); + return 1; + } + using namespace latinime::dicttoolkit; + const CommandType commandType = CommandUtils::getCommandType(argv[1]); + if (commandType == CommandType::Unknown) { + CommandUtils::printCommandUnknownMessage(argv[0], argv[1]); + return 1; + } + const auto executor = CommandUtils::getCommandExecutor(commandType); + return executor(argc - 1, argv + 1); } diff --git a/native/dicttoolkit/src/command_executors/diff_executor.cpp b/native/dicttoolkit/src/command_executors/diff_executor.cpp new file mode 100644 index 000000000..077a40090 --- /dev/null +++ b/native/dicttoolkit/src/command_executors/diff_executor.cpp @@ -0,0 +1,38 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "command_executors/diff_executor.h" + +#include <cstdio> + +namespace latinime { +namespace dicttoolkit { + +const char *const DiffExecutor::COMMAND_NAME = "diff"; + +/* static */ int DiffExecutor::run(const int argc, char **argv) { + fprintf(stderr, "Command '%s' has not been implemented yet.\n", COMMAND_NAME); + return 0; +} + +/* static */ void DiffExecutor::printUsage() { + printf("*** %s\n", COMMAND_NAME); + printf("Usage: %s\n", COMMAND_NAME); + printf("Shows differences between two dictionaries.\n\n"); +} + +} // namespace dicttoolkit +} // namespace latinime diff --git a/native/dicttoolkit/src/command_executors/diff_executor.h b/native/dicttoolkit/src/command_executors/diff_executor.h new file mode 100644 index 000000000..fc8dc0d8f --- /dev/null +++ b/native/dicttoolkit/src/command_executors/diff_executor.h @@ -0,0 +1,38 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DICT_TOOLKIT_DIFF_EXECUTOR_H +#define LATINIME_DICT_TOOLKIT_DIFF_EXECUTOR_H + +#include "dict_toolkit_defines.h" + +namespace latinime { +namespace dicttoolkit { + +class DiffExecutor final { + public: + static const char *const COMMAND_NAME; + + static int run(const int argc, char **argv); + static void printUsage(); + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(DiffExecutor); +}; + +} // namespace dicttoolkit +} // namepsace latinime +#endif // LATINIME_DICT_TOOLKIT_DIFF_EXECUTOR_H diff --git a/native/dicttoolkit/src/command_executors/header_executor.cpp b/native/dicttoolkit/src/command_executors/header_executor.cpp new file mode 100644 index 000000000..068a62c31 --- /dev/null +++ b/native/dicttoolkit/src/command_executors/header_executor.cpp @@ -0,0 +1,38 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "command_executors/header_executor.h" + +#include <cstdio> + +namespace latinime { +namespace dicttoolkit { + +const char *const HeaderExecutor::COMMAND_NAME = "header"; + +/* static */ int HeaderExecutor::run(const int argc, char **argv) { + fprintf(stderr, "Command '%s' has not been implemented yet.\n", COMMAND_NAME); + return 0; +} + +/* static */ void HeaderExecutor::printUsage() { + printf("*** %s\n", COMMAND_NAME); + printf("Usage: %s\n", COMMAND_NAME); + printf("Prints the header contents of a dictionary file.\n\n"); +} + +} // namespace dicttoolkit +} // namespace latinime diff --git a/native/dicttoolkit/src/command_executors/header_executor.h b/native/dicttoolkit/src/command_executors/header_executor.h new file mode 100644 index 000000000..4cdeb1a99 --- /dev/null +++ b/native/dicttoolkit/src/command_executors/header_executor.h @@ -0,0 +1,38 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DICT_TOOLKIT_HEADER_EXECUTOR_H +#define LATINIME_DICT_TOOLKIT_HEADER_EXECUTOR_H + +#include "dict_toolkit_defines.h" + +namespace latinime { +namespace dicttoolkit { + +class HeaderExecutor final { + public: + static const char *const COMMAND_NAME; + + static int run(const int argc, char **argv); + static void printUsage(); + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(HeaderExecutor); +}; + +} // namespace dicttoolkit +} // namepsace latinime +#endif // LATINIME_DICT_TOOLKIT_HEADER_EXECUTOR_H diff --git a/native/dicttoolkit/src/command_executors/help_executor.cpp b/native/dicttoolkit/src/command_executors/help_executor.cpp new file mode 100644 index 000000000..bd29a5b16 --- /dev/null +++ b/native/dicttoolkit/src/command_executors/help_executor.cpp @@ -0,0 +1,52 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "command_executors/help_executor.h" + +#include <cstdio> +#include <functional> +#include <vector> + +#include "command_executors/diff_executor.h" +#include "command_executors/header_executor.h" +#include "command_executors/info_executor.h" +#include "command_executors/makedict_executor.h" +#include "utils/command_utils.h" + +namespace latinime { +namespace dicttoolkit { + +const char *const HelpExecutor::COMMAND_NAME = "help"; + +/* static */ int HelpExecutor::run(const int argc, char **argv) { + printf("Available commands:\n\n"); + const std::vector<std::function<void(void)>> printUsageMethods = {DiffExecutor::printUsage, + HeaderExecutor::printUsage, InfoExecutor::printUsage, MakedictExecutor::printUsage, + printUsage}; + for (const auto &printUsageMethod : printUsageMethods) { + printUsageMethod(); + } + return 0; +} + +/* static */ void HelpExecutor::printUsage() { + printf("*** %s\n", COMMAND_NAME); + printf("Usage: %s\n", COMMAND_NAME); + printf("Show this help list.\n\n"); +} + +} // namespace dicttoolkit +} // namespace latinime diff --git a/native/dicttoolkit/src/command_executors/help_executor.h b/native/dicttoolkit/src/command_executors/help_executor.h new file mode 100644 index 000000000..280610eb9 --- /dev/null +++ b/native/dicttoolkit/src/command_executors/help_executor.h @@ -0,0 +1,38 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DICT_TOOLKIT_HELP_EXECUTOR_H +#define LATINIME_DICT_TOOLKIT_HELP_EXECUTOR_H + +#include "dict_toolkit_defines.h" + +namespace latinime { +namespace dicttoolkit { + +class HelpExecutor final { + public: + static const char *const COMMAND_NAME; + + static int run(const int argc, char **argv); + static void printUsage(); + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(HelpExecutor); +}; + +} // namespace dicttoolkit +} // namepsace latinime +#endif // LATINIME_DICT_TOOLKIT_HELP_EXECUTOR_H diff --git a/native/dicttoolkit/src/command_executors/info_executor.cpp b/native/dicttoolkit/src/command_executors/info_executor.cpp new file mode 100644 index 000000000..c4d84cab3 --- /dev/null +++ b/native/dicttoolkit/src/command_executors/info_executor.cpp @@ -0,0 +1,38 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "command_executors/info_executor.h" + +#include <cstdio> + +namespace latinime { +namespace dicttoolkit { + +const char *const InfoExecutor::COMMAND_NAME = "info"; + +/* static */ int InfoExecutor::run(const int argc, char **argv) { + fprintf(stderr, "Command '%s' has not been implemented yet.\n", COMMAND_NAME); + return 0; +} + +/* static */ void InfoExecutor::printUsage() { + printf("*** %s\n", COMMAND_NAME); + printf("Usage: %s\n", COMMAND_NAME); + printf("Prints various information about a dictionary file.\n\n"); +} + +} // namespace dicttoolkit +} // namespace latinime diff --git a/native/dicttoolkit/src/command_executors/info_executor.h b/native/dicttoolkit/src/command_executors/info_executor.h new file mode 100644 index 000000000..4ffa74fb0 --- /dev/null +++ b/native/dicttoolkit/src/command_executors/info_executor.h @@ -0,0 +1,38 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DICT_TOOLKIT_INFO_EXECUTOR_H +#define LATINIME_DICT_TOOLKIT_INFO_EXECUTOR_H + +#include "dict_toolkit_defines.h" + +namespace latinime { +namespace dicttoolkit { + +class InfoExecutor final { + public: + static const char *const COMMAND_NAME; + + static int run(const int argc, char **argv); + static void printUsage(); + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(InfoExecutor); +}; + +} // namepsace dicttoolkit +} // namespace latinime +#endif // LATINIME_DICT_TOOLKIT_INFO_EXECUTOR_H diff --git a/native/dicttoolkit/src/command_executors/makedict_executor.cpp b/native/dicttoolkit/src/command_executors/makedict_executor.cpp new file mode 100644 index 000000000..ea62e3c37 --- /dev/null +++ b/native/dicttoolkit/src/command_executors/makedict_executor.cpp @@ -0,0 +1,40 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "command_executors/makedict_executor.h" + +#include <cstdio> + +namespace latinime { +namespace dicttoolkit { + +const char *const MakedictExecutor::COMMAND_NAME = "makedict"; + +/* static */ int MakedictExecutor::run(const int argc, char **argv) { + fprintf(stderr, "Command '%s' has not been implemented yet.\n", COMMAND_NAME); + return 0; +} + +/* static */ void MakedictExecutor::printUsage() { + printf("*** %s\n", COMMAND_NAME); + printf("Usage: %s\n", COMMAND_NAME); + printf("Converts a source dictionary file to one or several outputs.\n" + "Source can be a binary dictionary file or a combined format file.\n" + "Binary version 2 (Jelly Bean), 4, and combined format outputs are supported.\n\n"); +} + +} // namespace dicttoolkit +} // namespace latinime diff --git a/native/dicttoolkit/src/command_executors/makedict_executor.h b/native/dicttoolkit/src/command_executors/makedict_executor.h new file mode 100644 index 000000000..ae1309f60 --- /dev/null +++ b/native/dicttoolkit/src/command_executors/makedict_executor.h @@ -0,0 +1,38 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DICT_TOOLKIT_MAKEDICT_EXECUTOR_H +#define LATINIME_DICT_TOOLKIT_MAKEDICT_EXECUTOR_H + +#include "dict_toolkit_defines.h" + +namespace latinime { +namespace dicttoolkit { + +class MakedictExecutor final { + public: + static const char *const COMMAND_NAME; + + static int run(const int argc, char **argv); + static void printUsage(); + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(MakedictExecutor); +}; + +} // namespace dicttoolkit +} // namepsace latinime +#endif // LATINIME_DICT_TOOLKIT_MAKEDICT_EXECUTOR_H diff --git a/native/dicttoolkit/src/dict_toolkit_defines.h b/native/dicttoolkit/src/dict_toolkit_defines.h index 2a2104e26..dbaae0ca0 100644 --- a/native/dicttoolkit/src/dict_toolkit_defines.h +++ b/native/dicttoolkit/src/dict_toolkit_defines.h @@ -19,4 +19,6 @@ #include "defines.h" +#define MIN_ARG_COUNT 2 + #endif // LATINIME_DICT_TOOLKIT_DEFINES_H diff --git a/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict.cpp b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict.cpp new file mode 100644 index 000000000..af28131cf --- /dev/null +++ b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict.cpp @@ -0,0 +1,126 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "offdevice_intermediate_dict/offdevice_intermediate_dict.h" + +#include "offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node.h" + +namespace latinime { +namespace dicttoolkit { + +bool OffdeviceIntermediateDict::addWord(const WordProperty &wordProperty) { + const CodePointArrayView codePoints = wordProperty.getCodePoints(); + if (codePoints.empty() || codePoints.size() > MAX_WORD_LENGTH) { + return false; + } + return addWordInner(codePoints, wordProperty, mRootPtNodeArray); +} + +bool OffdeviceIntermediateDict::addWordInner(const CodePointArrayView codePoints, + const WordProperty &wordProperty, OffdeviceIntermediateDictPtNodeArray &ptNodeArray) { + auto ptNodeList = ptNodeArray.getMutablePtNodeList(); + auto ptNodeIt = ptNodeList->begin(); + for (; ptNodeIt != ptNodeList->end(); ++ptNodeIt) { + const auto &ptNode = *ptNodeIt; + const CodePointArrayView ptNodeCodePoints = ptNode->getPtNodeCodePoints(); + if (codePoints[0] < ptNodeCodePoints[0]) { + continue; + } + if (codePoints[0] > ptNodeCodePoints[0]) { + break; + } + size_t i = 1; + for (; i < codePoints.size(); ++i) { + if (i >= ptNodeCodePoints.size()) { + // Add new child. + return addWordInner(codePoints.skip(i), wordProperty, + ptNode->getChildrenPtNodeArray()); + } + if (codePoints[i] != ptNodeCodePoints[i]) { + break; + } + } + if (codePoints.size() == i && codePoints.size() == ptNodeCodePoints.size()) { + // All code points matched. + if (ptNode->getWordProperty()) { + // Adding the same word multiple times is not supported. + return false; + } + ptNodeList->insert(ptNodeIt, + std::make_shared<OffdeviceIntermediateDictPtNode>(wordProperty, *ptNode)); + ptNodeList->erase(ptNodeIt); + return true; + } + // The (i+1)-th elements are different. + // Create and Add new parent ptNode for the common part. + auto newPtNode = codePoints.size() == i + ? std::make_shared<OffdeviceIntermediateDictPtNode>(codePoints, wordProperty) + : std::make_shared<OffdeviceIntermediateDictPtNode>(codePoints.limit(i)); + ptNodeList->insert(ptNodeIt, newPtNode); + OffdeviceIntermediateDictPtNodeArray &childrenPtNodeArray = + newPtNode->getChildrenPtNodeArray(); + // Add new child for the existing ptNode. + childrenPtNodeArray.getMutablePtNodeList()->push_back( + std::make_shared<OffdeviceIntermediateDictPtNode>( + ptNodeCodePoints.skip(i), *ptNode)); + ptNodeList->erase(ptNodeIt); + if (codePoints.size() != i) { + // Add a child for the new word. + return addWordInner(codePoints.skip(i), wordProperty, childrenPtNodeArray); + } + return true; + } + ptNodeList->insert(ptNodeIt, + std::make_shared<OffdeviceIntermediateDictPtNode>(codePoints, wordProperty)); + return true; +} + +const WordProperty *OffdeviceIntermediateDict::getWordProperty( + const CodePointArrayView codePoints) const { + const OffdeviceIntermediateDictPtNodeArray *ptNodeArray = &mRootPtNodeArray; + for (size_t i = 0; i < codePoints.size();) { + bool foundNext = false; + for (const auto ptNode : ptNodeArray->getPtNodeList()) { + const CodePointArrayView ptNodeCodePoints = ptNode->getPtNodeCodePoints(); + if (codePoints[i] < ptNodeCodePoints[0]) { + continue; + } + if (codePoints[i] > ptNodeCodePoints[0] + || codePoints.size() < ptNodeCodePoints.size()) { + return nullptr; + } + for (size_t j = 1; j < ptNodeCodePoints.size(); ++j) { + if (codePoints[i + j] != ptNodeCodePoints[j]) { + return nullptr; + } + } + i += ptNodeCodePoints.size(); + if (i == codePoints.size()) { + return ptNode->getWordProperty(); + } + ptNodeArray = &ptNode->getChildrenPtNodeArray(); + foundNext = true; + break; + } + if (!foundNext) { + break; + } + } + return nullptr; +} + +} // namespace dicttoolkit +} // namespace latinime diff --git a/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict.h b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict.h new file mode 100644 index 000000000..13d26ba91 --- /dev/null +++ b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict.h @@ -0,0 +1,54 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_H +#define LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_H + +#include "dict_toolkit_defines.h" +#include "offdevice_intermediate_dict/offdevice_intermediate_dict_header.h" +#include "offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node_array.h" +#include "suggest/core/dictionary/property/word_property.h" +#include "utils/int_array_view.h" + +namespace latinime { +namespace dicttoolkit { + +/** + * On memory patricia trie to represent a dictionary. + */ +class OffdeviceIntermediateDict final { + public: + OffdeviceIntermediateDict(const OffdeviceIntermediateDictHeader &header) + : mHeader(header), mRootPtNodeArray() {} + + bool addWord(const WordProperty &wordProperty); + // The returned value will be invalid after modifying the dictionary. e.g. calling addWord(). + const WordProperty *getWordProperty(const CodePointArrayView codePoints) const; + const OffdeviceIntermediateDictHeader &getHeader() const { return mHeader; } + + private: + DISALLOW_ASSIGNMENT_OPERATOR(OffdeviceIntermediateDict); + + const OffdeviceIntermediateDictHeader mHeader; + OffdeviceIntermediateDictPtNodeArray mRootPtNodeArray; + + bool addWordInner(const CodePointArrayView codePoints, const WordProperty &wordProperty, + OffdeviceIntermediateDictPtNodeArray &ptNodeArray); +}; + +} // namespace dicttoolkit +} // namespace latinime +#endif // LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_H diff --git a/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_header.h b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_header.h new file mode 100644 index 000000000..440627a79 --- /dev/null +++ b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_header.h @@ -0,0 +1,44 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_HEADER_H +#define LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_HEADER_H + +#include <map> +#include <vector> + +#include "dict_toolkit_defines.h" + +namespace latinime { +namespace dicttoolkit { + +class OffdeviceIntermediateDictHeader final { + public: + using AttributeMap = std::map<std::vector<int>, std::vector<int>>; + + OffdeviceIntermediateDictHeader(const AttributeMap &attributesMap) + : mAttributeMap(attributesMap) {} + + private: + DISALLOW_DEFAULT_CONSTRUCTOR(OffdeviceIntermediateDictHeader); + DISALLOW_ASSIGNMENT_OPERATOR(OffdeviceIntermediateDictHeader); + + const AttributeMap mAttributeMap; +}; + +} // namespace dicttoolkit +} // namespace latinime +#endif // LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_HEADER_H diff --git a/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node.h b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node.h new file mode 100644 index 000000000..721ccd778 --- /dev/null +++ b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node.h @@ -0,0 +1,79 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_PT_NODE_H +#define LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_PT_NODE_H + +#include <memory> + +#include "dict_toolkit_defines.h" +#include "offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node_array.h" +#include "suggest/core/dictionary/property/word_property.h" +#include "utils/int_array_view.h" + +namespace latinime { +namespace dicttoolkit { + +class OffdeviceIntermediateDictPtNode final { + public: + // Non-terminal + OffdeviceIntermediateDictPtNode(const CodePointArrayView ptNodeCodePoints) + : mPtNodeCodePoints(ptNodeCodePoints.toVector()), mChildrenPtNodeArray(), + mWortProperty(nullptr) {} + + // Terminal + OffdeviceIntermediateDictPtNode(const CodePointArrayView ptNodeCodePoints, + const WordProperty &wordProperty) + : mPtNodeCodePoints(ptNodeCodePoints.toVector()), mChildrenPtNodeArray(), + mWortProperty(new WordProperty(wordProperty)) {} + + // Replacing PtNodeCodePoints. + OffdeviceIntermediateDictPtNode(const CodePointArrayView ptNodeCodePoints, + const OffdeviceIntermediateDictPtNode &ptNode) + : mPtNodeCodePoints(ptNodeCodePoints.toVector()), + mChildrenPtNodeArray(ptNode.mChildrenPtNodeArray), + mWortProperty(new WordProperty(*ptNode.mWortProperty)) {} + + // Replacing WordProperty. + OffdeviceIntermediateDictPtNode(const WordProperty &wordProperty, + const OffdeviceIntermediateDictPtNode &ptNode) + : mPtNodeCodePoints(ptNode.mPtNodeCodePoints), + mChildrenPtNodeArray(ptNode.mChildrenPtNodeArray), + mWortProperty(new WordProperty(wordProperty)) {} + + const WordProperty *getWordProperty() const { + return mWortProperty.get(); + } + + const CodePointArrayView getPtNodeCodePoints() const { + return CodePointArrayView(mPtNodeCodePoints); + } + + OffdeviceIntermediateDictPtNodeArray &getChildrenPtNodeArray() { + return mChildrenPtNodeArray; + } + + private: + DISALLOW_COPY_AND_ASSIGN(OffdeviceIntermediateDictPtNode); + + const std::vector<int> mPtNodeCodePoints; + OffdeviceIntermediateDictPtNodeArray mChildrenPtNodeArray; + const std::unique_ptr<WordProperty> mWortProperty; +}; + +} // namespace dicttoolkit +} // namespace latinime +#endif // LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_PT_NODE_H diff --git a/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node_array.h b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node_array.h new file mode 100644 index 000000000..f87456ce0 --- /dev/null +++ b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node_array.h @@ -0,0 +1,48 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_PT_NODE_ARRAY_H +#define LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_PT_NODE_ARRAY_H + +#include <list> +#include <memory> + +#include "dict_toolkit_defines.h" + +namespace latinime { +namespace dicttoolkit { + +class OffdeviceIntermediateDictPtNode; + +class OffdeviceIntermediateDictPtNodeArray final { + public: + const std::list<std::shared_ptr<OffdeviceIntermediateDictPtNode>> &getPtNodeList() const { + return mPtNodes; + } + + std::list<std::shared_ptr<OffdeviceIntermediateDictPtNode>> *getMutablePtNodeList() { + return &mPtNodes; + } + + private: + DISALLOW_ASSIGNMENT_OPERATOR(OffdeviceIntermediateDictPtNodeArray); + + std::list<std::shared_ptr<OffdeviceIntermediateDictPtNode>> mPtNodes; +}; + +} // namespace dicttoolkit +} // namespace latinime +#endif // LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_PT_NODE_ARRAY_H diff --git a/native/dicttoolkit/src/utils/command_utils.cpp b/native/dicttoolkit/src/utils/command_utils.cpp new file mode 100644 index 000000000..34196425e --- /dev/null +++ b/native/dicttoolkit/src/utils/command_utils.cpp @@ -0,0 +1,74 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "utils/command_utils.h" + +#include <cstdio> + +#include "command_executors/diff_executor.h" +#include "command_executors/header_executor.h" +#include "command_executors/help_executor.h" +#include "command_executors/info_executor.h" +#include "command_executors/makedict_executor.h" + +namespace latinime { +namespace dicttoolkit { + +/* static */ CommandType CommandUtils::getCommandType(const std::string &commandName) { + if (commandName == InfoExecutor::COMMAND_NAME) { + return CommandType::Info; + } else if (commandName == DiffExecutor::COMMAND_NAME) { + return CommandType::Diff; + } else if (commandName == MakedictExecutor::COMMAND_NAME) { + return CommandType::Makedict; + } else if (commandName == HeaderExecutor::COMMAND_NAME) { + return CommandType::Header; + } else if (commandName == HelpExecutor::COMMAND_NAME) { + return CommandType::Help; + } else { + return CommandType::Unknown; + } +} + +/* static */ void CommandUtils::printCommandUnknownMessage(const std::string &programName, + const std::string &commandName) { + fprintf(stderr, "Command '%s' is unknown. Try '%s %s' for more information.\n", + commandName.c_str(), programName.c_str(), HelpExecutor::COMMAND_NAME); +} + +/* static */ std::function<int(int, char **)> CommandUtils::getCommandExecutor( + const CommandType commandType) { + switch (commandType) { + case CommandType::Info: + return InfoExecutor::run; + case CommandType::Diff: + return DiffExecutor::run; + case CommandType::Makedict: + return MakedictExecutor::run; + case CommandType::Header: + return HeaderExecutor::run; + case CommandType::Help: + return HelpExecutor::run; + default: + return [] (int, char **) -> int { + printf("Command executor not found."); + return 1; + }; + } +} + +} // namespace dicttoolkit +} // namespace latinime diff --git a/native/dicttoolkit/src/utils/command_utils.h b/native/dicttoolkit/src/utils/command_utils.h new file mode 100644 index 000000000..4a181f194 --- /dev/null +++ b/native/dicttoolkit/src/utils/command_utils.h @@ -0,0 +1,50 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DICT_TOOLKIT_COMMAND_UTILS_H +#define LATINIME_DICT_TOOLKIT_COMMAND_UTILS_H + +#include <functional> +#include <memory> +#include <string> + +#include "dict_toolkit_defines.h" + +namespace latinime { +namespace dicttoolkit { + +enum class CommandType : int { + Info, + Diff, + Makedict, + Header, + Help, + Unknown +}; + +class CommandUtils { +public: + static CommandType getCommandType(const std::string &commandName); + static void printCommandUnknownMessage(const std::string &programName, + const std::string &commandName); + static std::function<int(int, char **)> getCommandExecutor(const CommandType commandType); + +private: + DISALLOW_IMPLICIT_CONSTRUCTORS(CommandUtils); +}; +} // namespace dicttoolkit +} // namespace latinime +#endif // LATINIME_DICT_TOOLKIT_COMMAND_UTILS_H diff --git a/native/dicttoolkit/src/utils/utf8_utils.cpp b/native/dicttoolkit/src/utils/utf8_utils.cpp new file mode 100644 index 000000000..0f349f512 --- /dev/null +++ b/native/dicttoolkit/src/utils/utf8_utils.cpp @@ -0,0 +1,119 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "utils/utf8_utils.h" + +#include "utils/char_utils.h" + +namespace latinime { +namespace dicttoolkit { + +const size_t Utf8Utils::MAX_SEQUENCE_SIZE_FOR_A_CODE_POINT = 4; +const uint8_t Utf8Utils::FIRST_BYTE_MARKER_MASKS[] = {0, 0x80, 0xE0, 0xF0, 0xF8}; +const uint8_t Utf8Utils::FIRST_BYTE_MARKERS[] = {0, 0x00, 0xC0, 0xE0, 0xF0}; +const uint8_t Utf8Utils::FIRST_BYTE_CODE_POINT_BITS_MASKS[] = {0, 0x7F, 0x1F, 0x0F, 0x03}; +const int Utf8Utils::MAX_ENCODED_CODE_POINT_VALUES[] = {-1, 0x7F, 0x7FF, 0xFFFF, 0x10FFFF}; + +const uint8_t Utf8Utils::TRAILING_BYTE_CODE_POINT_BITS_MASK = 0x3F; +const uint8_t Utf8Utils::TRAILING_BYTE_MARKER = 0x80; +const size_t Utf8Utils::CODE_POINT_BIT_COUNT_IN_TRAILING_BYTE = 6; + +/* static */ std::vector<int> Utf8Utils::getCodePoints(const std::string &utf8Str) { + std::vector<int> codePoints; + int remainingByteCountForCurrentCodePoint = 0; + int currentCodePointSequenceSize = 0; + int codePoint = 0; + for (const char c : utf8Str) { + if (remainingByteCountForCurrentCodePoint == 0) { + currentCodePointSequenceSize = getSequenceSizeByCheckingFirstByte(c); + if (currentCodePointSequenceSize <= 0) { + AKLOGE("%x is an invalid utf8 first byte value.", c); + return std::vector<int>(); + } + remainingByteCountForCurrentCodePoint = currentCodePointSequenceSize; + codePoint = maskFirstByte(c, remainingByteCountForCurrentCodePoint); + } else { + codePoint <<= CODE_POINT_BIT_COUNT_IN_TRAILING_BYTE; + codePoint += maskTrailingByte(c); + } + remainingByteCountForCurrentCodePoint--; + if (remainingByteCountForCurrentCodePoint == 0) { + if (codePoint <= MAX_ENCODED_CODE_POINT_VALUES[currentCodePointSequenceSize - 1]) { + AKLOGE("%d bytes encode for codePoint(%x) is a redundant UTF-8 sequence.", + currentCodePointSequenceSize, codePoint); + return std::vector<int>(); + } + codePoints.push_back(codePoint); + } + } + return codePoints; +} + +/* static */ int Utf8Utils::getSequenceSizeByCheckingFirstByte(const uint8_t firstByte) { + for (size_t i = 1; i <= MAX_SEQUENCE_SIZE_FOR_A_CODE_POINT; ++i) { + if ((firstByte & FIRST_BYTE_MARKER_MASKS[i]) == FIRST_BYTE_MARKERS[i]) { + return i; + } + } + // Not a valid utf8 char first byte. + return -1; +} + +/* static */ AK_FORCE_INLINE int Utf8Utils::maskFirstByte(const uint8_t firstByte, + const int sequenceSize) { + return firstByte & FIRST_BYTE_CODE_POINT_BITS_MASKS[sequenceSize]; +} + +/* static */ AK_FORCE_INLINE int Utf8Utils::maskTrailingByte(const uint8_t secondOrLaterByte) { + return secondOrLaterByte & TRAILING_BYTE_CODE_POINT_BITS_MASK; +} + +/* static */ std::string Utf8Utils::getUtf8String(const CodePointArrayView codePoints) { + std::string utf8String; + for (const int codePoint : codePoints) { + const int sequenceSize = getSequenceSizeToEncodeCodePoint(codePoint); + if (sequenceSize <= 0) { + AKLOGE("Cannot encode code point (%d).", codePoint); + return std::string(); + } + const int trailingByteCount = sequenceSize - 1; + // Output first byte. + const int value = codePoint >> (trailingByteCount * CODE_POINT_BIT_COUNT_IN_TRAILING_BYTE); + utf8String.push_back(static_cast<char>(value | FIRST_BYTE_MARKERS[sequenceSize])); + // Output second and later bytes. + for (int i = 1; i < sequenceSize; ++i) { + const int shiftAmount = (trailingByteCount - i) * CODE_POINT_BIT_COUNT_IN_TRAILING_BYTE; + const int value = (codePoint >> shiftAmount) & TRAILING_BYTE_CODE_POINT_BITS_MASK; + utf8String.push_back(static_cast<char>(value | TRAILING_BYTE_MARKER)); + } + } + return utf8String; +} + +/* static */ int Utf8Utils::getSequenceSizeToEncodeCodePoint(const int codePoint) { + if (codePoint < 0) { + return -1; + } + for (size_t i = 1; i <= MAX_SEQUENCE_SIZE_FOR_A_CODE_POINT; ++i) { + if (codePoint <= MAX_ENCODED_CODE_POINT_VALUES[i]) { + return i; + } + } + return -1; +} + +} // namespace dicttoolkit +} // namespace latinime diff --git a/native/dicttoolkit/src/utils/utf8_utils.h b/native/dicttoolkit/src/utils/utf8_utils.h new file mode 100644 index 000000000..35818e56c --- /dev/null +++ b/native/dicttoolkit/src/utils/utf8_utils.h @@ -0,0 +1,56 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DICT_TOOLKIT_UTF8_UTILS_H +#define LATINIME_DICT_TOOLKIT_UTF8_UTILS_H + +#include <cstdint> +#include <string> +#include <vector> + +#include "dict_toolkit_defines.h" +#include "utils/int_array_view.h" + +namespace latinime { +namespace dicttoolkit { + +class Utf8Utils { +public: + static std::vector<int> getCodePoints(const std::string &utf8Str); + static std::string getUtf8String(const CodePointArrayView codePoints); + +private: + DISALLOW_IMPLICIT_CONSTRUCTORS(Utf8Utils); + + // Values indexed by sequence size. + static const size_t MAX_SEQUENCE_SIZE_FOR_A_CODE_POINT; + static const uint8_t FIRST_BYTE_MARKER_MASKS[]; + static const uint8_t FIRST_BYTE_MARKERS[]; + static const uint8_t FIRST_BYTE_CODE_POINT_BITS_MASKS[]; + static const int MAX_ENCODED_CODE_POINT_VALUES[]; + + static const uint8_t TRAILING_BYTE_CODE_POINT_BITS_MASK; + static const uint8_t TRAILING_BYTE_MARKER; + static const size_t CODE_POINT_BIT_COUNT_IN_TRAILING_BYTE; + + static int getSequenceSizeByCheckingFirstByte(const uint8_t firstByte); + static int maskFirstByte(const uint8_t firstByte, const int encodeSize); + static int maskTrailingByte(const uint8_t secondOrLaterByte); + static int getSequenceSizeToEncodeCodePoint(const int codePoint); +}; +} // namespace dicttoolkit +} // namespace latinime +#endif // LATINIME_DICT_TOOLKIT_UTF8_UTILS_H diff --git a/native/dicttoolkit/tests/offdevice_intermediate_dict/offdevice_intermediate_dict_test.cpp b/native/dicttoolkit/tests/offdevice_intermediate_dict/offdevice_intermediate_dict_test.cpp new file mode 100644 index 000000000..f2e24ab5f --- /dev/null +++ b/native/dicttoolkit/tests/offdevice_intermediate_dict/offdevice_intermediate_dict_test.cpp @@ -0,0 +1,85 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "offdevice_intermediate_dict/offdevice_intermediate_dict.h" + +#include <gtest/gtest.h> + +#include <vector> + +#include "suggest/core/dictionary/property/word_property.h" +#include "utils/int_array_view.h" + +namespace latinime { +namespace dicttoolkit { +namespace { + +const std::vector<int> getCodePointVector(const char *str) { + std::vector<int> codePoints; + while (*str) { + codePoints.push_back(*str); + ++str; + } + return codePoints; +} + +const WordProperty getDummpWordProperty(const std::vector<int> &&codePoints) { + return WordProperty(std::move(codePoints), UnigramProperty(), std::vector<NgramProperty>()); +} + +TEST(OffdeviceIntermediateDictTest, TestAddWordProperties) { + OffdeviceIntermediateDict dict = OffdeviceIntermediateDict( + OffdeviceIntermediateDictHeader(OffdeviceIntermediateDictHeader::AttributeMap())); + EXPECT_EQ(nullptr, dict.getWordProperty(CodePointArrayView())); + + const WordProperty wordProperty0 = getDummpWordProperty(getCodePointVector("abcd")); + EXPECT_TRUE(dict.addWord(wordProperty0)); + EXPECT_NE(nullptr, dict.getWordProperty(wordProperty0.getCodePoints())); + + const WordProperty wordProperty1 = getDummpWordProperty(getCodePointVector("efgh")); + EXPECT_TRUE(dict.addWord(wordProperty1)); + EXPECT_NE(nullptr, dict.getWordProperty(wordProperty1.getCodePoints())); + + const WordProperty wordProperty2 = getDummpWordProperty(getCodePointVector("ab")); + EXPECT_TRUE(dict.addWord(wordProperty2)); + EXPECT_NE(nullptr, dict.getWordProperty(wordProperty2.getCodePoints())); + + const WordProperty wordProperty3 = getDummpWordProperty(getCodePointVector("abcdefg")); + EXPECT_TRUE(dict.addWord(wordProperty3)); + EXPECT_NE(nullptr, dict.getWordProperty(wordProperty3.getCodePoints())); + + const WordProperty wordProperty4 = getDummpWordProperty(getCodePointVector("efef")); + EXPECT_TRUE(dict.addWord(wordProperty4)); + EXPECT_NE(nullptr, dict.getWordProperty(wordProperty4.getCodePoints())); + + const WordProperty wordProperty5 = getDummpWordProperty(getCodePointVector("ef")); + EXPECT_TRUE(dict.addWord(wordProperty5)); + EXPECT_NE(nullptr, dict.getWordProperty(wordProperty5.getCodePoints())); + + const WordProperty wordProperty6 = getDummpWordProperty(getCodePointVector("abcd")); + EXPECT_FALSE(dict.addWord(wordProperty6)) << "Adding the same word multiple times should fail."; + + EXPECT_NE(nullptr, dict.getWordProperty(wordProperty0.getCodePoints())); + EXPECT_NE(nullptr, dict.getWordProperty(wordProperty1.getCodePoints())); + EXPECT_NE(nullptr, dict.getWordProperty(wordProperty2.getCodePoints())); + EXPECT_NE(nullptr, dict.getWordProperty(wordProperty3.getCodePoints())); + EXPECT_NE(nullptr, dict.getWordProperty(wordProperty4.getCodePoints())); + EXPECT_NE(nullptr, dict.getWordProperty(wordProperty5.getCodePoints())); +} + +} // namespace +} // namespace dicttoolkit +} // namespace latinime diff --git a/native/dicttoolkit/tests/utils/command_utils_test.cpp b/native/dicttoolkit/tests/utils/command_utils_test.cpp new file mode 100644 index 000000000..9d79c9dd9 --- /dev/null +++ b/native/dicttoolkit/tests/utils/command_utils_test.cpp @@ -0,0 +1,37 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "utils/command_utils.h" + +#include <gtest/gtest.h> + +namespace latinime { +namespace dicttoolkit { +namespace { + +TEST(CommandUtilsTests, TestGetCommandType) { + EXPECT_EQ(CommandUtils::getCommandType(""), CommandType::Unknown); + EXPECT_EQ(CommandUtils::getCommandType("abc"), CommandType::Unknown); + EXPECT_EQ(CommandUtils::getCommandType("info"), CommandType::Info); + EXPECT_EQ(CommandUtils::getCommandType("diff"), CommandType::Diff); + EXPECT_EQ(CommandUtils::getCommandType("makedict"), CommandType::Makedict); + EXPECT_EQ(CommandUtils::getCommandType("header"), CommandType::Header); + EXPECT_EQ(CommandUtils::getCommandType("help"), CommandType::Help); +} + +} // namespace +} // namespace dicttoolkit +} // namespace latinime diff --git a/native/dicttoolkit/tests/utils/utf8_utils_test.cpp b/native/dicttoolkit/tests/utils/utf8_utils_test.cpp new file mode 100644 index 000000000..9c59a8b05 --- /dev/null +++ b/native/dicttoolkit/tests/utils/utf8_utils_test.cpp @@ -0,0 +1,85 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "utils/utf8_utils.h" + +#include <gtest/gtest.h> + +#include <vector> + +#include "utils/int_array_view.h" + +namespace latinime { +namespace dicttoolkit { +namespace { + +TEST(Utf8UtilsTests, TestGetCodePoints) { + { + const std::vector<int> codePoints = Utf8Utils::getCodePoints(""); + EXPECT_EQ(0u, codePoints.size()); + } + { + const std::vector<int> codePoints = Utf8Utils::getCodePoints("test"); + EXPECT_EQ(4u, codePoints.size()); + EXPECT_EQ('t', codePoints[0]); + EXPECT_EQ('e', codePoints[1]); + EXPECT_EQ('s', codePoints[2]); + EXPECT_EQ('t', codePoints[3]); + } + { + const std::vector<int> codePoints = Utf8Utils::getCodePoints(u8"\u3042a\u03C2\u0410"); + EXPECT_EQ(4u, codePoints.size()); + EXPECT_EQ(0x3042, codePoints[0]); // HIRAGANA LETTER A + EXPECT_EQ('a', codePoints[1]); + EXPECT_EQ(0x03C2, codePoints[2]); // CYRILLIC CAPITAL LETTER A + EXPECT_EQ(0x0410, codePoints[3]); // GREEK SMALL LETTER FINAL SIGMA + } + { + const std::vector<int> codePoints = Utf8Utils::getCodePoints(u8"\U0001F36A?\U0001F752"); + EXPECT_EQ(3u, codePoints.size()); + EXPECT_EQ(0x1F36A, codePoints[0]); // COOKIE + EXPECT_EQ('?', codePoints[1]); + EXPECT_EQ(0x1F752, codePoints[2]); // ALCHEMICAL SYMBOL FOR STARRED TRIDENT + } + + // Redundant UTF-8 sequences must be rejected. + EXPECT_TRUE(Utf8Utils::getCodePoints("\xC0\xAF").empty()); + EXPECT_TRUE(Utf8Utils::getCodePoints("\xE0\x80\xAF").empty()); + EXPECT_TRUE(Utf8Utils::getCodePoints("\xF0\x80\x80\xAF").empty()); +} + +TEST(Utf8UtilsTests, TestGetUtf8String) { + { + const std::vector<int> codePoints = {'t', 'e', 's', 't'}; + EXPECT_EQ("test", Utf8Utils::getUtf8String(CodePointArrayView(codePoints))); + } + { + const std::vector<int> codePoints = { + 0x00E0 /* LATIN SMALL LETTER A WITH GRAVE */, + 0x03C2 /* GREEK SMALL LETTER FINAL SIGMA */, + 0x0430 /* CYRILLIC SMALL LETTER A */, + 0x3042 /* HIRAGANA LETTER A */, + 0x1F36A /* COOKIE */, + 0x1F752 /* ALCHEMICAL SYMBOL FOR STARRED TRIDENT */ + }; + EXPECT_EQ(u8"\u00E0\u03C2\u0430\u3042\U0001F36A\U0001F752", + Utf8Utils::getUtf8String(CodePointArrayView(codePoints))); + } +} + +} // namespace +} // namespace dicttoolkit +} // namespace latinime diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp index 118f600bb..9c065e0d1 100644 --- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp +++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp @@ -35,6 +35,7 @@ #include "utils/int_array_view.h" #include "utils/jni_data_utils.h" #include "utils/log_utils.h" +#include "utils/profiler.h" #include "utils/time_keeper.h" namespace latinime { @@ -43,8 +44,8 @@ class ProximityInfo; static jlong latinime_BinaryDictionary_open(JNIEnv *env, jclass clazz, jstring sourceDir, jlong dictOffset, jlong dictSize, jboolean isUpdatable) { - PROF_OPEN; - PROF_START(66); + PROF_INIT; + PROF_TIMER_START(66); const jsize sourceDirUtf8Length = env->GetStringUTFLength(sourceDir); if (sourceDirUtf8Length <= 0) { AKLOGE("DICT: Can't get sourceDir string"); @@ -63,8 +64,7 @@ static jlong latinime_BinaryDictionary_open(JNIEnv *env, jclass clazz, jstring s Dictionary *const dictionary = new Dictionary(env, std::move(dictionaryStructureWithBufferPolicy)); - PROF_END(66); - PROF_CLOSE; + PROF_TIMER_END(66); return reinterpret_cast<jlong>(dictionary); } @@ -586,7 +586,7 @@ static bool latinime_BinaryDictionary_migrateNative(JNIEnv *env, jclass clazz, j } if (!dictionaryStructureWithBufferPolicy->addUnigramEntry( CodePointArrayView(wordCodePoints, wordCodePointCount), - wordProperty.getUnigramProperty())) { + &wordProperty.getUnigramProperty())) { LogUtils::logToJava(env, "Cannot add unigram to the new dict."); return false; } @@ -605,7 +605,7 @@ static bool latinime_BinaryDictionary_migrateNative(JNIEnv *env, jclass clazz, j return false; } } - for (const NgramProperty &ngramProperty : *wordProperty.getNgramProperties()) { + for (const NgramProperty &ngramProperty : wordProperty.getNgramProperties()) { if (!dictionaryStructureWithBufferPolicy->addNgramEntry(&ngramProperty)) { LogUtils::logToJava(env, "Cannot add ngram to the new dict."); return false; diff --git a/native/jni/src/defines.h b/native/jni/src/defines.h index 885118524..0e67b4d5a 100644 --- a/native/jni/src/defines.h +++ b/native/jni/src/defines.h @@ -23,10 +23,10 @@ #define AK_FORCE_INLINE inline #endif // __GNUC__ -#if defined(FLAG_DO_PROFILE) || defined(FLAG_DBG) +#if defined(FLAG_DBG) #undef AK_FORCE_INLINE #define AK_FORCE_INLINE inline -#endif // defined(FLAG_DO_PROFILE) || defined(FLAG_DBG) +#endif // defined(FLAG_DBG) // Must be equal to Constants.Dictionary.MAX_WORD_LENGTH in Java #define MAX_WORD_LENGTH 48 @@ -172,69 +172,6 @@ static inline void showStackTrace() { #define INTS_TO_CHARS(input, length, output) #endif // defined(FLAG_DO_PROFILE) || defined(FLAG_DBG) -#ifdef FLAG_DO_PROFILE -// Profiler -#include <time.h> - -#define PROF_BUF_SIZE 100 -static float profile_buf[PROF_BUF_SIZE]; -static float profile_old[PROF_BUF_SIZE]; -static unsigned int profile_counter[PROF_BUF_SIZE]; - -#define PROF_RESET prof_reset() -#define PROF_COUNT(prof_buf_id) ++profile_counter[prof_buf_id] -#define PROF_OPEN do { PROF_RESET; PROF_START(PROF_BUF_SIZE - 1); } while (0) -#define PROF_START(prof_buf_id) do { \ - PROF_COUNT(prof_buf_id); profile_old[prof_buf_id] = (clock()); } while (0) -#define PROF_CLOSE do { PROF_END(PROF_BUF_SIZE - 1); PROF_OUTALL; } while (0) -#define PROF_END(prof_buf_id) profile_buf[prof_buf_id] += ((clock()) - profile_old[prof_buf_id]) -#define PROF_CLOCKOUT(prof_buf_id) \ - AKLOGI("%s : clock is %f", __FUNCTION__, (clock() - profile_old[prof_buf_id])) -#define PROF_OUTALL do { AKLOGI("--- %s ---", __FUNCTION__); prof_out(); } while (0) - -static inline void prof_reset(void) { - for (int i = 0; i < PROF_BUF_SIZE; ++i) { - profile_buf[i] = 0; - profile_old[i] = 0; - profile_counter[i] = 0; - } -} - -static inline void prof_out(void) { - if (profile_counter[PROF_BUF_SIZE - 1] != 1) { - AKLOGI("Error: You must call PROF_OPEN before PROF_CLOSE."); - } - AKLOGI("Total time is %6.3f ms.", - profile_buf[PROF_BUF_SIZE - 1] * 1000.0f / static_cast<float>(CLOCKS_PER_SEC)); - float all = 0.0f; - for (int i = 0; i < PROF_BUF_SIZE - 1; ++i) { - all += profile_buf[i]; - } - if (all < 1.0f) all = 1.0f; - for (int i = 0; i < PROF_BUF_SIZE - 1; ++i) { - if (profile_buf[i] > 0.0f) { - AKLOGI("(%d): Used %4.2f%%, %8.4f ms. Called %d times.", - i, (profile_buf[i] * 100.0f / all), - profile_buf[i] * 1000.0f / static_cast<float>(CLOCKS_PER_SEC), - profile_counter[i]); - } - } -} - -#else // FLAG_DO_PROFILE -#define PROF_BUF_SIZE 0 -#define PROF_RESET -#define PROF_COUNT(prof_buf_id) -#define PROF_OPEN -#define PROF_START(prof_buf_id) -#define PROF_CLOSE -#define PROF_END(prof_buf_id) -#define PROF_CLOCK_OUT(prof_buf_id) -#define PROF_CLOCKOUT(prof_buf_id) -#define PROF_OUTALL - -#endif // FLAG_DO_PROFILE - #ifdef FLAG_DBG #define DEBUG_DICT true #define DEBUG_DICT_FULL false diff --git a/native/jni/src/suggest/core/dictionary/property/word_property.h b/native/jni/src/suggest/core/dictionary/property/word_property.h index b5314faaa..d4db3f09f 100644 --- a/native/jni/src/suggest/core/dictionary/property/word_property.h +++ b/native/jni/src/suggest/core/dictionary/property/word_property.h @@ -23,6 +23,7 @@ #include "jni.h" #include "suggest/core/dictionary/property/ngram_property.h" #include "suggest/core/dictionary/property/unigram_property.h" +#include "utils/int_array_view.h" namespace latinime { @@ -33,10 +34,10 @@ class WordProperty { WordProperty() : mCodePoints(), mUnigramProperty(), mNgrams() {} - WordProperty(const std::vector<int> &&codePoints, const UnigramProperty *const unigramProperty, - const std::vector<NgramProperty> *const ngrams) - : mCodePoints(std::move(codePoints)), mUnigramProperty(*unigramProperty), - mNgrams(*ngrams) {} + WordProperty(const std::vector<int> &&codePoints, const UnigramProperty &unigramProperty, + const std::vector<NgramProperty> &ngrams) + : mCodePoints(std::move(codePoints)), mUnigramProperty(unigramProperty), + mNgrams(ngrams) {} void outputProperties(JNIEnv *const env, jintArray outCodePoints, jbooleanArray outFlags, jintArray outProbabilityInfo, jobject outNgramPrevWordsArray, @@ -44,12 +45,16 @@ class WordProperty { jobject outNgramProbabilities, jobject outShortcutTargets, jobject outShortcutProbabilities) const; - const UnigramProperty *getUnigramProperty() const { - return &mUnigramProperty; + const CodePointArrayView getCodePoints() const { + return CodePointArrayView(mCodePoints); } - const std::vector<NgramProperty> *getNgramProperties() const { - return &mNgrams; + const UnigramProperty &getUnigramProperty() const { + return mUnigramProperty; + } + + const std::vector<NgramProperty> &getNgramProperties() const { + return mNgrams; } private: diff --git a/native/jni/src/suggest/core/suggest.cpp b/native/jni/src/suggest/core/suggest.cpp index c372d668b..e5e9b46bf 100644 --- a/native/jni/src/suggest/core/suggest.cpp +++ b/native/jni/src/suggest/core/suggest.cpp @@ -29,6 +29,7 @@ #include "suggest/core/result/suggestions_output_utils.h" #include "suggest/core/session/dic_traverse_session.h" #include "suggest/core/suggest_options.h" +#include "utils/profiler.h" namespace latinime { @@ -48,8 +49,8 @@ void Suggest::getSuggestions(ProximityInfo *pInfo, void *traverseSession, int *inputXs, int *inputYs, int *times, int *pointerIds, int *inputCodePoints, int inputSize, const float weightOfLangModelVsSpatialModel, SuggestionResults *const outSuggestionResults) const { - PROF_OPEN; - PROF_START(0); + PROF_INIT; + PROF_TIMER_START(0); const float maxSpatialDistance = TRAVERSAL->getMaxSpatialDistance(); DicTraverseSession *tSession = static_cast<DicTraverseSession *>(traverseSession); tSession->setupForGetSuggestions(pInfo, inputCodePoints, inputSize, inputXs, inputYs, times, @@ -57,8 +58,8 @@ void Suggest::getSuggestions(ProximityInfo *pInfo, void *traverseSession, // TODO: Add the way to evaluate cache initializeSearch(tSession); - PROF_END(0); - PROF_START(1); + PROF_TIMER_END(0); + PROF_TIMER_START(1); // keep expanding search dicNodes until all have terminated. while (tSession->getDicTraverseCache()->activeSize() > 0) { @@ -66,12 +67,11 @@ void Suggest::getSuggestions(ProximityInfo *pInfo, void *traverseSession, tSession->getDicTraverseCache()->advanceActiveDicNodes(); tSession->getDicTraverseCache()->advanceInputIndex(inputSize); } - PROF_END(1); - PROF_START(2); + PROF_TIMER_END(1); + PROF_TIMER_START(2); SuggestionsOutputUtils::outputSuggestions( SCORING, tSession, weightOfLangModelVsSpatialModel, outSuggestionResults); - PROF_END(2); - PROF_CLOSE; + PROF_TIMER_END(2); } /** diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp index 9455222dd..ca7d93b0e 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp @@ -614,7 +614,7 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty( const UnigramProperty unigramProperty(ptNodeParams.representsBeginningOfSentence(), ptNodeParams.isNotAWord(), ptNodeParams.isPossiblyOffensive(), ptNodeParams.getProbability(), *historicalInfo, std::move(shortcuts)); - return WordProperty(wordCodePoints.toVector(), &unigramProperty, &ngrams); + return WordProperty(wordCodePoints.toVector(), unigramProperty, ngrams); } int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints, diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp index 59873612a..1a51acad5 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp @@ -478,7 +478,7 @@ const WordProperty PatriciaTriePolicy::getWordProperty( const UnigramProperty unigramProperty(ptNodeParams.representsBeginningOfSentence(), ptNodeParams.isNotAWord(), ptNodeParams.isPossiblyOffensive(), ptNodeParams.getProbability(), HistoricalInfo(), std::move(shortcuts)); - return WordProperty(wordCodePoints.toVector(), &unigramProperty, &ngrams); + return WordProperty(wordCodePoints.toVector(), unigramProperty, ngrams); } int PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints, diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp index 1992d4a5a..7449cd02b 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp @@ -560,7 +560,7 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty( wordAttributes.isNotAWord(), wordAttributes.isBlacklisted(), wordAttributes.isPossiblyOffensive(), wordAttributes.getProbability(), *historicalInfo, std::move(shortcuts)); - return WordProperty(wordCodePoints.toVector(), &unigramProperty, &ngrams); + return WordProperty(wordCodePoints.toVector(), unigramProperty, ngrams); } int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints, diff --git a/native/jni/src/utils/int_array_view.h b/native/jni/src/utils/int_array_view.h index 408373176..e0f671056 100644 --- a/native/jni/src/utils/int_array_view.h +++ b/native/jni/src/utils/int_array_view.h @@ -133,6 +133,29 @@ class IntArrayView { return std::vector<int>(begin(), end()); } + std::vector<IntArrayView> split(const int separator, const int limit = S_INT_MAX) const { + if (limit <= 0) { + return std::vector<IntArrayView>(); + } + std::vector<IntArrayView> result; + if (limit == 1) { + result.emplace_back(mPtr, mSize); + return result; + } + size_t startIndex = 0; + for (size_t i = 0; i < mSize; ++i) { + if (mPtr[i] == separator) { + result.emplace_back(mPtr + startIndex, i - startIndex); + startIndex = i + 1; + if (result.size() >= static_cast<size_t>(limit - 1)) { + break; + } + } + } + result.emplace_back(mPtr + startIndex, mSize - startIndex); + return result; + } + private: DISALLOW_ASSIGNMENT_OPERATOR(IntArrayView); diff --git a/native/jni/src/utils/profiler.h b/native/jni/src/utils/profiler.h new file mode 100644 index 000000000..5f107fed3 --- /dev/null +++ b/native/jni/src/utils/profiler.h @@ -0,0 +1,86 @@ +/* + * Copyright (C) 2014, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_PROFILER_H +#define LATINIME_PROFILER_H + +#ifdef FLAG_DO_PROFILE + +#include "defines.h" + +#include <ctime> +#include <unordered_map> + +namespace latinime { + +class Profiler final { + public: + Profiler(const clockid_t clockId) + : mClockId(clockId), mStartTime(getTimeInMicroSec()), mStartTimes(), mTimes(), + mCounters() {} + + ~Profiler() { + const float totalTime = + static_cast<float>(getTimeInMicroSec() - mStartTime) / 1000.f; + AKLOGI("Total time is %6.3f ms.", totalTime); + for (const auto &time : mTimes) { + AKLOGI("(%d): Used %4.2f%%, %8.4f ms. Called %d times.", time.first, + time.second / totalTime * 100.0f, time.second, mCounters[time.first]); + } + } + + void startTimer(const int id) { + mStartTimes[id] = getTimeInMicroSec(); + } + + void endTimer(const int id) { + mTimes[id] += static_cast<float>(getTimeInMicroSec() - mStartTimes[id]) / 1000.0f; + mCounters[id]++; + } + + operator bool() const { return false; } + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(Profiler); + + const clockid_t mClockId; + int64_t mStartTime; + std::unordered_map<int, int64_t> mStartTimes; + std::unordered_map<int, float> mTimes; + std::unordered_map<int, int> mCounters; + + int64_t getTimeInMicroSec() { + timespec time; + clock_gettime(mClockId, &time); + return static_cast<int64_t>(time.tv_sec) * 1000000 + + static_cast<int64_t>(time.tv_nsec) / 1000; + } +}; +} // namespace latinime + +#define PROF_INIT Profiler __LATINIME__PROFILER__(CLOCK_THREAD_CPUTIME_ID) +#define PROF_TIMER_START(timer_id) __LATINIME__PROFILER__.startTimer(timer_id) +#define PROF_TIMER_END(timer_id) __LATINIME__PROFILER__.endTimer(timer_id) + +#else // FLAG_DO_PROFILE + +#define PROF_INIT +#define PROF_TIMER_START(timer_id) +#define PROF_TIMER_END(timer_id) + +#endif // FLAG_DO_PROFILE + +#endif /* LATINIME_PROFILER_H */ diff --git a/native/jni/tests/utils/int_array_view_test.cpp b/native/jni/tests/utils/int_array_view_test.cpp index 4757a416b..2fce633f5 100644 --- a/native/jni/tests/utils/int_array_view_test.cpp +++ b/native/jni/tests/utils/int_array_view_test.cpp @@ -151,5 +151,52 @@ TEST(IntArrayViewTest, TestToVector) { EXPECT_EQ(std::vector<int>(), CodePointArrayView().toVector()); } +TEST(IntArrayViewTest, TestSplit) { + EXPECT_TRUE(IntArrayView().split(0, 0).empty()); + { + const auto intArrayViews = IntArrayView().split(0, 1); + EXPECT_EQ(1u, intArrayViews.size()); + EXPECT_TRUE(intArrayViews[0].empty()); + } + { + const auto intArrayViews = IntArrayView().split(0, 100); + EXPECT_EQ(1u, intArrayViews.size()); + EXPECT_TRUE(intArrayViews[0].empty()); + } + + const std::vector<int> intVector = {1, 2, 3, 3, 2, 3}; + const IntArrayView intArrayView(intVector); + { + const auto intArrayViews = intArrayView.split(2); + EXPECT_EQ(3u, intArrayViews.size()); + EXPECT_EQ(std::vector<int>({1}), intArrayViews[0].toVector()); + EXPECT_EQ(std::vector<int>({3, 3}), intArrayViews[1].toVector()); + EXPECT_EQ(std::vector<int>({3}), intArrayViews[2].toVector()); + } + { + const auto intArrayViews = intArrayView.split(2, 2); + EXPECT_EQ(2u, intArrayViews.size()); + EXPECT_EQ(std::vector<int>({1}), intArrayViews[0].toVector()); + EXPECT_EQ(std::vector<int>({3, 3, 2, 3}), intArrayViews[1].toVector()); + } + { + const auto intArrayViews = intArrayView.split(2, 1); + EXPECT_EQ(1u, intArrayViews.size()); + EXPECT_EQ(intVector, intArrayViews[0].toVector()); + } + { + const auto intArrayViews = intArrayView.split(2, 0); + EXPECT_EQ(0u, intArrayViews.size()); + } + { + const auto intArrayViews = intArrayView.split(3); + EXPECT_EQ(4u, intArrayViews.size()); + EXPECT_EQ(std::vector<int>({1, 2}), intArrayViews[0].toVector()); + EXPECT_EQ(std::vector<int>(), intArrayViews[1].toVector()); + EXPECT_EQ(std::vector<int>({2}), intArrayViews[2].toVector()); + EXPECT_EQ(std::vector<int>(), intArrayViews[3].toVector()); + } +} + } // namespace } // namespace latinime |