diff options
Diffstat (limited to 'native/jni/src')
56 files changed, 499 insertions, 389 deletions
diff --git a/native/jni/src/defines.h b/native/jni/src/defines.h index 1719b1c60..3becc79e8 100644 --- a/native/jni/src/defines.h +++ b/native/jni/src/defines.h @@ -35,7 +35,13 @@ // Must be equal to ProximityInfo.MAX_PROXIMITY_CHARS_SIZE in Java #define MAX_PROXIMITY_CHARS_SIZE 16 #define ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE 2 -#define NELEMS(x) (sizeof(x) / sizeof((x)[0])) + +// TODO: Use size_t instead of int. +// Disclaimer: You will see a compile error if you use this macro against a variable-length array. +// Sorry for the inconvenience. It isn't supported. +template <typename T, int N> +char (&ArraySizeHelper(T (&array)[N]))[N]; +#define NELEMS(x) (sizeof(ArraySizeHelper(x))) AK_FORCE_INLINE static int intArrayToCharArray(const int *const source, const int sourceSize, char *dest, const int destSize) { diff --git a/native/jni/src/suggest/core/dicnode/dic_nodes_cache.h b/native/jni/src/suggest/core/dicnode/dic_nodes_cache.h index 089d4467f..fb76c731f 100644 --- a/native/jni/src/suggest/core/dicnode/dic_nodes_cache.h +++ b/native/jni/src/suggest/core/dicnode/dic_nodes_cache.h @@ -18,7 +18,6 @@ #define LATINIME_DIC_NODES_CACHE_H #include <algorithm> -#include <stdint.h> #include "defines.h" #include "suggest/core/dicnode/dic_node_priority_queue.h" diff --git a/native/jni/src/suggest/core/dicnode/internal/dic_node_properties.h b/native/jni/src/suggest/core/dicnode/internal/dic_node_properties.h index 6ddb7f1af..11f8c2905 100644 --- a/native/jni/src/suggest/core/dicnode/internal/dic_node_properties.h +++ b/native/jni/src/suggest/core/dicnode/internal/dic_node_properties.h @@ -17,7 +17,7 @@ #ifndef LATINIME_DIC_NODE_PROPERTIES_H #define LATINIME_DIC_NODE_PROPERTIES_H -#include <stdint.h> +#include <cstdint> #include "defines.h" diff --git a/native/jni/src/suggest/core/dicnode/internal/dic_node_state_output.h b/native/jni/src/suggest/core/dicnode/internal/dic_node_state_output.h index ea48de1ea..69a886f55 100644 --- a/native/jni/src/suggest/core/dicnode/internal/dic_node_state_output.h +++ b/native/jni/src/suggest/core/dicnode/internal/dic_node_state_output.h @@ -18,8 +18,8 @@ #define LATINIME_DIC_NODE_STATE_OUTPUT_H #include <algorithm> +#include <cstdint> #include <cstring> // for memmove() -#include <stdint.h> #include "defines.h" diff --git a/native/jni/src/suggest/core/dicnode/internal/dic_node_state_scoring.h b/native/jni/src/suggest/core/dicnode/internal/dic_node_state_scoring.h index f164edbee..c19d48eb9 100644 --- a/native/jni/src/suggest/core/dicnode/internal/dic_node_state_scoring.h +++ b/native/jni/src/suggest/core/dicnode/internal/dic_node_state_scoring.h @@ -18,7 +18,7 @@ #define LATINIME_DIC_NODE_STATE_SCORING_H #include <algorithm> -#include <stdint.h> +#include <cstdint> #include "defines.h" #include "suggest/core/dictionary/digraph_utils.h" diff --git a/native/jni/src/suggest/core/dictionary/bloom_filter.cpp b/native/jni/src/suggest/core/dictionary/bloom_filter.cpp deleted file mode 100644 index 4ae474e0c..000000000 --- a/native/jni/src/suggest/core/dictionary/bloom_filter.cpp +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Copyright (C) 2013, The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "suggest/core/dictionary/bloom_filter.h" - -namespace latinime { - -// Must be smaller than BIGRAM_FILTER_BYTE_SIZE * 8, and preferably prime. 1021 is the largest -// prime under 128 * 8. -const int BloomFilter::BIGRAM_FILTER_MODULO = 1021; - -} // namespace latinime diff --git a/native/jni/src/suggest/core/dictionary/bloom_filter.h b/native/jni/src/suggest/core/dictionary/bloom_filter.h index e22c3ae5c..1e60f49ed 100644 --- a/native/jni/src/suggest/core/dictionary/bloom_filter.h +++ b/native/jni/src/suggest/core/dictionary/bloom_filter.h @@ -17,8 +17,7 @@ #ifndef LATINIME_BLOOM_FILTER_H #define LATINIME_BLOOM_FILTER_H -#include <cstring> -#include <stdint.h> +#include <bitset> #include "defines.h" @@ -34,41 +33,37 @@ namespace latinime { // Total 148603.14 (sum of others 148579.90) class BloomFilter { public: - BloomFilter() { - ASSERT(BIGRAM_FILTER_BYTE_SIZE * 8 >= BIGRAM_FILTER_MODULO); - memset(mFilter, 0, sizeof(mFilter)); - } + BloomFilter() : mFilter() {} - // TODO: uint32_t position - AK_FORCE_INLINE void setInFilter(const int32_t position) { - const uint32_t bucket = static_cast<uint32_t>(position % BIGRAM_FILTER_MODULO); - mFilter[bucket >> 3] |= static_cast<uint8_t>(1 << (bucket & 0x7)); + AK_FORCE_INLINE void setInFilter(const int position) { + mFilter.set(getIndex(position)); } - // TODO: uint32_t position - AK_FORCE_INLINE bool isInFilter(const int32_t position) const { - const uint32_t bucket = static_cast<uint32_t>(position % BIGRAM_FILTER_MODULO); - return (mFilter[bucket >> 3] & static_cast<uint8_t>(1 << (bucket & 0x7))) != 0; + AK_FORCE_INLINE bool isInFilter(const int position) const { + return mFilter.test(getIndex(position)); } private: DISALLOW_ASSIGNMENT_OPERATOR(BloomFilter); - // Size, in bytes, of the bloom filter index for bigrams - // 128 gives us 1024 buckets. The probability of false positive is (1 - e ** (-kn/m))**k, + AK_FORCE_INLINE size_t getIndex(const int position) const { + return static_cast<size_t>(position) % BIGRAM_FILTER_MODULO; + } + + // Size, in bits, of the bloom filter index for bigrams + // The probability of false positive is (1 - e ** (-kn/m))**k, // where k is the number of hash functions, n the number of bigrams, and m the number of // bits we can test. - // At the moment 100 is the maximum number of bigrams for a word with the current + // At the moment 100 is the maximum number of bigrams for a word with the current main // dictionaries, so n = 100. 1024 buckets give us m = 1024. // With 1 hash function, our false positive rate is about 9.3%, which should be enough for // our uses since we are only using this to increase average performance. For the record, // k = 2 gives 3.1% and k = 3 gives 1.6%. With k = 1, making m = 2048 gives 4.8%, // and m = 4096 gives 2.4%. - // This is assigned here because it is used for array size. - static const int BIGRAM_FILTER_BYTE_SIZE = 128; - static const int BIGRAM_FILTER_MODULO; - - uint8_t mFilter[BIGRAM_FILTER_BYTE_SIZE]; + // This is assigned here because it is used for bitset size. + // 1021 is the largest prime under 1024. + static const size_t BIGRAM_FILTER_MODULO = 1021; + std::bitset<BIGRAM_FILTER_MODULO> mFilter; }; } // namespace latinime #endif // LATINIME_BLOOM_FILTER_H diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp index ef7a0a8fe..8445da310 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.cpp +++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp @@ -18,8 +18,6 @@ #include "suggest/core/dictionary/dictionary.h" -#include <stdint.h> - #include "defines.h" #include "suggest/core/policy/dictionary_header_structure_policy.h" #include "suggest/core/result/suggestion_results.h" diff --git a/native/jni/src/suggest/core/dictionary/dictionary.h b/native/jni/src/suggest/core/dictionary/dictionary.h index cd983b032..6b7756565 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.h +++ b/native/jni/src/suggest/core/dictionary/dictionary.h @@ -17,13 +17,12 @@ #ifndef LATINIME_DICTIONARY_H #define LATINIME_DICTIONARY_H -#include <stdint.h> #include <memory> #include "defines.h" #include "jni.h" #include "suggest/core/dictionary/bigram_dictionary.h" -#include "suggest/core/dictionary/word_property.h" +#include "suggest/core/dictionary/property/word_property.h" #include "suggest/core/policy/dictionary_header_structure_policy.h" #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" #include "suggest/core/suggest_interface.h" diff --git a/native/jni/src/suggest/core/dictionary/error_type_utils.h b/native/jni/src/suggest/core/dictionary/error_type_utils.h index cc7786765..0e8e5b635 100644 --- a/native/jni/src/suggest/core/dictionary/error_type_utils.h +++ b/native/jni/src/suggest/core/dictionary/error_type_utils.h @@ -17,7 +17,7 @@ #ifndef LATINIME_ERROR_TYPE_UTILS_H #define LATINIME_ERROR_TYPE_UTILS_H -#include <stdint.h> +#include <cstdint> #include "defines.h" diff --git a/native/jni/src/suggest/core/dictionary/property/bigram_property.h b/native/jni/src/suggest/core/dictionary/property/bigram_property.h new file mode 100644 index 000000000..8d3429b5b --- /dev/null +++ b/native/jni/src/suggest/core/dictionary/property/bigram_property.h @@ -0,0 +1,65 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_BIGRAM_PROPERTY_H +#define LATINIME_BIGRAM_PROPERTY_H + +#include <vector> + +#include "defines.h" + +namespace latinime { + +class BigramProperty { + public: + BigramProperty(const std::vector<int> *const targetCodePoints, + const int probability, const int timestamp, const int level, const int count) + : mTargetCodePoints(*targetCodePoints), mProbability(probability), + mTimestamp(timestamp), mLevel(level), mCount(count) {} + + const std::vector<int> *getTargetCodePoints() const { + return &mTargetCodePoints; + } + + int getProbability() const { + return mProbability; + } + + int getTimestamp() const { + return mTimestamp; + } + + int getLevel() const { + return mLevel; + } + + int getCount() const { + return mCount; + } + + private: + // Default copy constructor and assign operator are used for using in std::vector. + DISALLOW_DEFAULT_CONSTRUCTOR(BigramProperty); + + // TODO: Make members const. + std::vector<int> mTargetCodePoints; + int mProbability; + int mTimestamp; + int mLevel; + int mCount; +}; +} // namespace latinime +#endif // LATINIME_WORD_PROPERTY_H diff --git a/native/jni/src/suggest/core/dictionary/property/unigram_property.h b/native/jni/src/suggest/core/dictionary/property/unigram_property.h new file mode 100644 index 000000000..d2551057b --- /dev/null +++ b/native/jni/src/suggest/core/dictionary/property/unigram_property.h @@ -0,0 +1,107 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_UNIGRAM_PROPERTY_H +#define LATINIME_UNIGRAM_PROPERTY_H + +#include <vector> + +#include "defines.h" + +namespace latinime { + +class UnigramProperty { + public: + class ShortcutProperty { + public: + ShortcutProperty(const std::vector<int> *const targetCodePoints, const int probability) + : mTargetCodePoints(*targetCodePoints), mProbability(probability) {} + + const std::vector<int> *getTargetCodePoints() const { + return &mTargetCodePoints; + } + + int getProbability() const { + return mProbability; + } + + private: + // Default copy constructor and assign operator are used for using in std::vector. + DISALLOW_DEFAULT_CONSTRUCTOR(ShortcutProperty); + + // TODO: Make members const. + std::vector<int> mTargetCodePoints; + int mProbability; + }; + + UnigramProperty() + : mIsNotAWord(false), mIsBlacklisted(false), mProbability(NOT_A_PROBABILITY), + mTimestamp(NOT_A_TIMESTAMP), mLevel(0), mCount(0), mShortcuts() {} + + UnigramProperty(const bool isNotAWord, const bool isBlacklisted, const int probability, + const int timestamp, const int level, const int count, + const std::vector<ShortcutProperty> *const shortcuts) + : mIsNotAWord(isNotAWord), mIsBlacklisted(isBlacklisted), mProbability(probability), + mTimestamp(timestamp), mLevel(level), mCount(count), mShortcuts(*shortcuts) {} + + bool isNotAWord() const { + return mIsNotAWord; + } + + bool isBlacklisted() const { + return mIsBlacklisted; + } + + bool hasShortcuts() const { + return !mShortcuts.empty(); + } + + int getProbability() const { + return mProbability; + } + + int getTimestamp() const { + return mTimestamp; + } + + int getLevel() const { + return mLevel; + } + + int getCount() const { + return mCount; + } + + const std::vector<ShortcutProperty> &getShortcuts() const { + return mShortcuts; + } + + private: + // Default copy constructor is used for using as a return value. + DISALLOW_ASSIGNMENT_OPERATOR(UnigramProperty); + + // TODO: Make members const. + bool mIsNotAWord; + bool mIsBlacklisted; + int mProbability; + // Historical information + int mTimestamp; + int mLevel; + int mCount; + std::vector<ShortcutProperty> mShortcuts; +}; +} // namespace latinime +#endif // LATINIME_UNIGRAM_PROPERTY_H diff --git a/native/jni/src/suggest/core/dictionary/word_property.cpp b/native/jni/src/suggest/core/dictionary/property/word_property.cpp index 473311842..95608dcf8 100644 --- a/native/jni/src/suggest/core/dictionary/word_property.cpp +++ b/native/jni/src/suggest/core/dictionary/property/word_property.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "suggest/core/dictionary/word_property.h" +#include "suggest/core/dictionary/property/word_property.h" namespace latinime { @@ -23,9 +23,12 @@ void WordProperty::outputProperties(JNIEnv *const env, jintArray outCodePoints, jobject outBigramProbabilities, jobject outShortcutTargets, jobject outShortcutProbabilities) const { env->SetIntArrayRegion(outCodePoints, 0 /* start */, mCodePoints.size(), &mCodePoints[0]); - jboolean flags[] = {mIsNotAWord, mIsBlacklisted, mHasBigrams, mHasShortcuts}; + + jboolean flags[] = {mUnigramProperty.isNotAWord(), mUnigramProperty.isBlacklisted(), + !mBigrams.empty(), mUnigramProperty.hasShortcuts()}; env->SetBooleanArrayRegion(outFlags, 0 /* start */, NELEMS(flags), flags); - int probabilityInfo[] = {mProbability, mTimestamp, mLevel, mCount}; + int probabilityInfo[] = {mUnigramProperty.getProbability(), mUnigramProperty.getTimestamp(), + mUnigramProperty.getLevel(), mUnigramProperty.getCount()}; env->SetIntArrayRegion(outProbabilityInfo, 0 /* start */, NELEMS(probabilityInfo), probabilityInfo); @@ -35,19 +38,17 @@ void WordProperty::outputProperties(JNIEnv *const env, jintArray outCodePoints, jmethodID addMethodId = env->GetMethodID(arrayListClass, "add", "(Ljava/lang/Object;)Z"); // Output bigrams. - const int bigramCount = mBigrams.size(); - for (int i = 0; i < bigramCount; ++i) { - const BigramProperty *const bigramProperty = &mBigrams[i]; - const std::vector<int> *const word1CodePoints = bigramProperty->getTargetCodePoints(); + for (const auto &bigramProperty : mBigrams) { + const std::vector<int> *const word1CodePoints = bigramProperty.getTargetCodePoints(); jintArray bigramWord1CodePointArray = env->NewIntArray(word1CodePoints->size()); env->SetIntArrayRegion(bigramWord1CodePointArray, 0 /* start */, - word1CodePoints->size(), &word1CodePoints->at(0)); + word1CodePoints->size(), word1CodePoints->data()); env->CallBooleanMethod(outBigramTargets, addMethodId, bigramWord1CodePointArray); env->DeleteLocalRef(bigramWord1CodePointArray); - int bigramProbabilityInfo[] = {bigramProperty->getProbability(), - bigramProperty->getTimestamp(), bigramProperty->getLevel(), - bigramProperty->getCount()}; + int bigramProbabilityInfo[] = {bigramProperty.getProbability(), + bigramProperty.getTimestamp(), bigramProperty.getLevel(), + bigramProperty.getCount()}; jintArray bigramProbabilityInfoArray = env->NewIntArray(NELEMS(bigramProbabilityInfo)); env->SetIntArrayRegion(bigramProbabilityInfoArray, 0 /* start */, NELEMS(bigramProbabilityInfo), bigramProbabilityInfo); @@ -56,16 +57,15 @@ void WordProperty::outputProperties(JNIEnv *const env, jintArray outCodePoints, } // Output shortcuts. - const int shortcutTargetCount = mShortcuts.size(); - for (int i = 0; i < shortcutTargetCount; ++i) { - const std::vector<int> *const targetCodePoints = mShortcuts[i].getTargetCodePoints(); + for (const auto &shortcut : mUnigramProperty.getShortcuts()) { + const std::vector<int> *const targetCodePoints = shortcut.getTargetCodePoints(); jintArray shortcutTargetCodePointArray = env->NewIntArray(targetCodePoints->size()); env->SetIntArrayRegion(shortcutTargetCodePointArray, 0 /* start */, - targetCodePoints->size(), &targetCodePoints->at(0)); + targetCodePoints->size(), targetCodePoints->data()); env->CallBooleanMethod(outShortcutTargets, addMethodId, shortcutTargetCodePointArray); env->DeleteLocalRef(shortcutTargetCodePointArray); jobject integerProbability = env->NewObject(integerClass, intToIntegerConstructorId, - mShortcuts[i].getProbability()); + shortcut.getProbability()); env->CallBooleanMethod(outShortcutProbabilities, addMethodId, integerProbability); env->DeleteLocalRef(integerProbability); } diff --git a/native/jni/src/suggest/core/dictionary/property/word_property.h b/native/jni/src/suggest/core/dictionary/property/word_property.h new file mode 100644 index 000000000..5519a917c --- /dev/null +++ b/native/jni/src/suggest/core/dictionary/property/word_property.h @@ -0,0 +1,54 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_WORD_PROPERTY_H +#define LATINIME_WORD_PROPERTY_H + +#include <vector> + +#include "defines.h" +#include "jni.h" +#include "suggest/core/dictionary/property/bigram_property.h" +#include "suggest/core/dictionary/property/unigram_property.h" + +namespace latinime { + +// This class is used for returning information belonging to a word to java side. +class WordProperty { + public: + // Default constructor is used to create an instance that indicates an invalid word. + WordProperty() + : mCodePoints(), mUnigramProperty(), mBigrams() {} + + WordProperty(const std::vector<int> *const codePoints, + const UnigramProperty *const unigramProperty, + const std::vector<BigramProperty> *const bigrams) + : mCodePoints(*codePoints), mUnigramProperty(*unigramProperty), mBigrams(*bigrams) {} + + void outputProperties(JNIEnv *const env, jintArray outCodePoints, jbooleanArray outFlags, + jintArray outProbabilityInfo, jobject outBigramTargets, jobject outBigramProbabilities, + jobject outShortcutTargets, jobject outShortcutProbabilities) const; + + private: + // Default copy constructor is used for using as a return value. + DISALLOW_ASSIGNMENT_OPERATOR(WordProperty); + + const std::vector<int> mCodePoints; + const UnigramProperty mUnigramProperty; + const std::vector<BigramProperty> mBigrams; +}; +} // namespace latinime +#endif // LATINIME_WORD_PROPERTY_H diff --git a/native/jni/src/suggest/core/dictionary/word_property.h b/native/jni/src/suggest/core/dictionary/word_property.h deleted file mode 100644 index 40b1a91a4..000000000 --- a/native/jni/src/suggest/core/dictionary/word_property.h +++ /dev/null @@ -1,121 +0,0 @@ -/* - * Copyright (C) 2013 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef LATINIME_WORD_PROPERTY_H -#define LATINIME_WORD_PROPERTY_H - -#include <cstring> -#include <vector> - -#include "defines.h" -#include "jni.h" - -namespace latinime { - -// This class is used for returning information belonging to a word to java side. -class WordProperty { - public: - class BigramProperty { - public: - BigramProperty(const std::vector<int> *const targetCodePoints, - const int probability, const int timestamp, const int level, const int count) - : mTargetCodePoints(*targetCodePoints), mProbability(probability), - mTimestamp(timestamp), mLevel(level), mCount(count) {} - - const std::vector<int> *getTargetCodePoints() const { - return &mTargetCodePoints; - } - - int getProbability() const { - return mProbability; - } - - int getTimestamp() const { - return mTimestamp; - } - - int getLevel() const { - return mLevel; - } - - int getCount() const { - return mCount; - } - - private: - std::vector<int> mTargetCodePoints; - int mProbability; - int mTimestamp; - int mLevel; - int mCount; - }; - - class ShortcutProperty { - public: - ShortcutProperty(const std::vector<int> *const targetCodePoints, const int probability) - : mTargetCodePoints(*targetCodePoints), mProbability(probability) {} - - const std::vector<int> *getTargetCodePoints() const { - return &mTargetCodePoints; - } - - int getProbability() const { - return mProbability; - } - - private: - std::vector<int> mTargetCodePoints; - int mProbability; - }; - - // Invalid word. - WordProperty() - : mCodePoints(), mIsNotAWord(false), mIsBlacklisted(false), - mHasBigrams(false), mHasShortcuts(false), mProbability(NOT_A_PROBABILITY), - mTimestamp(0), mLevel(0), mCount(0), mBigrams(), mShortcuts() {} - - WordProperty(const std::vector<int> *const codePoints, - const bool isNotAWord, const bool isBlacklisted, const bool hasBigrams, - const bool hasShortcuts, const int probability, const int timestamp, - const int level, const int count, const std::vector<BigramProperty> *const bigrams, - const std::vector<ShortcutProperty> *const shortcuts) - : mCodePoints(*codePoints), mIsNotAWord(isNotAWord), mIsBlacklisted(isBlacklisted), - mHasBigrams(hasBigrams), mHasShortcuts(hasShortcuts), mProbability(probability), - mTimestamp(timestamp), mLevel(level), mCount(count), mBigrams(*bigrams), - mShortcuts(*shortcuts) {} - - void outputProperties(JNIEnv *const env, jintArray outCodePoints, jbooleanArray outFlags, - jintArray outProbabilityInfo, jobject outBigramTargets, jobject outBigramProbabilities, - jobject outShortcutTargets, jobject outShortcutProbabilities) const; - - private: - DISALLOW_ASSIGNMENT_OPERATOR(WordProperty); - - std::vector<int> mCodePoints; - bool mIsNotAWord; - bool mIsBlacklisted; - bool mHasBigrams; - bool mHasShortcuts; - int mProbability; - // Historical information - int mTimestamp; - int mLevel; - int mCount; - std::vector<BigramProperty> mBigrams; - std::vector<ShortcutProperty> mShortcuts; -}; -} // namespace latinime -#endif // LATINIME_WORD_PROPERTY_H diff --git a/native/jni/src/suggest/core/layout/normal_distribution.h b/native/jni/src/suggest/core/layout/normal_distribution.h new file mode 100644 index 000000000..5f21a59c0 --- /dev/null +++ b/native/jni/src/suggest/core/layout/normal_distribution.h @@ -0,0 +1,49 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_NORMAL_DISTRIBUTION_H +#define LATINIME_NORMAL_DISTRIBUTION_H + +#include <cmath> + +#include "defines.h" + +namespace latinime { + +// Normal distribution N(u, sigma^2). +class NormalDistribution { + public: + NormalDistribution(const float u, const float sigma) + : mU(u), + mPreComputedNonExpPart(1.0f / sqrtf(2.0f * M_PI_F + * GeometryUtils::SQUARE_FLOAT(sigma))), + mPreComputedExponentPart(-1.0f / (2.0f * GeometryUtils::SQUARE_FLOAT(sigma))) {} + + float getProbabilityDensity(const float x) const { + const float shiftedX = x - mU; + return mPreComputedNonExpPart + * expf(mPreComputedExponentPart * GeometryUtils::SQUARE_FLOAT(shiftedX)); + } + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(NormalDistribution); + + const float mU; // mean value + const float mPreComputedNonExpPart; // = 1 / sqrt(2 * PI * sigma^2) + const float mPreComputedExponentPart; // = -1 / (2 * sigma^2) +}; +} // namespace latinime +#endif // LATINIME_NORMAL_DISTRIBUTION_H diff --git a/native/jni/src/suggest/core/layout/normal_distribution_2d.h b/native/jni/src/suggest/core/layout/normal_distribution_2d.h new file mode 100644 index 000000000..3bc0a0153 --- /dev/null +++ b/native/jni/src/suggest/core/layout/normal_distribution_2d.h @@ -0,0 +1,59 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_NORMAL_DISTRIBUTION_2D_H +#define LATINIME_NORMAL_DISTRIBUTION_2D_H + +#include <cmath> + +#include "defines.h" +#include "suggest/core/layout/geometry_utils.h" +#include "suggest/core/layout/normal_distribution.h" + +namespace latinime { + +// Normal distribution on a 2D plane. The covariance is always zero, but the distribution can be +// rotated. +class NormalDistribution2D { + public: + NormalDistribution2D(const float uX, const float sigmaX, const float uY, const float sigmaY, + const float theta) + : mXDistribution(0.0f, sigmaX), mYDistribution(0.0f, sigmaY), mUX(uX), mUY(uY), + mSinTheta(sinf(theta)), mCosTheta(cosf(theta)) {} + + float getProbabilityDensity(const float x, const float y) const { + // Shift + const float shiftedX = x - mUX; + const float shiftedY = y - mUY; + // Rotate + const float rotatedShiftedX = mCosTheta * shiftedX + mSinTheta * shiftedY; + const float rotatedShiftedY = -mSinTheta * shiftedX + mCosTheta * shiftedY; + return mXDistribution.getProbabilityDensity(rotatedShiftedX) + * mYDistribution.getProbabilityDensity(rotatedShiftedY); + } + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(NormalDistribution2D); + + const NormalDistribution mXDistribution; + const NormalDistribution mYDistribution; + const float mUX; + const float mUY; + const float mSinTheta; + const float mCosTheta; +}; +} // namespace latinime +#endif // LATINIME_NORMAL_DISTRIBUTION_2D_H diff --git a/native/jni/src/suggest/core/layout/proximity_info_params.cpp b/native/jni/src/suggest/core/layout/proximity_info_params.cpp index 597518a4c..68bb0ae9d 100644 --- a/native/jni/src/suggest/core/layout/proximity_info_params.cpp +++ b/native/jni/src/suggest/core/layout/proximity_info_params.cpp @@ -24,9 +24,6 @@ const float ProximityInfoParams::VERTICAL_SWEET_SPOT_SCALE = 1.0f; const float ProximityInfoParams::VERTICAL_SWEET_SPOT_SCALE_G = 0.5f; /* Per method constants */ -// Used by ProximityInfoStateUtils::initGeometricDistanceInfos() -const float ProximityInfoParams::NEAR_KEY_NORMALIZED_SQUARED_THRESHOLD = 4.0f; - // Used by ProximityInfoStateUtils::updateNearKeysDistances() const float ProximityInfoParams::NEAR_KEY_THRESHOLD_FOR_DISTANCE = 2.0f; @@ -50,7 +47,7 @@ const int ProximityInfoParams::NUM_POINTS_FOR_SPEED_CALCULATION = 2; const int ProximityInfoParams::LAST_POINT_SKIP_DISTANCE_SCALE = 4; // Used by ProximityInfoStateUtils::updateAlignPointProbabilities() -const float ProximityInfoParams::MIN_PROBABILITY = 0.000001f; +const float ProximityInfoParams::MIN_PROBABILITY = 0.000005f; const float ProximityInfoParams::MAX_SKIP_PROBABILITY = 0.95f; const float ProximityInfoParams::SKIP_FIRST_POINT_PROBABILITY = 0.01f; const float ProximityInfoParams::SKIP_LAST_POINT_PROBABILITY = 0.1f; @@ -76,8 +73,12 @@ const float ProximityInfoParams::MAX_SPEEDxANGLE_RATE_FOR_STANDARD_DEVIATION = 0 const float ProximityInfoParams::SPEEDxNEAREST_WEIGHT_FOR_STANDARD_DEVIATION = 0.5f; const float ProximityInfoParams::MAX_SPEEDxNEAREST_RATE_FOR_STANDARD_DEVIATION = 0.15f; const float ProximityInfoParams::MIN_STANDARD_DEVIATION = 0.37f; -const float ProximityInfoParams::PREV_DISTANCE_WEIGHT = 0.5f; -const float ProximityInfoParams::NEXT_DISTANCE_WEIGHT = 0.6f; +const float ProximityInfoParams::STANDARD_DEVIATION_X_WEIGHT_FOR_FIRST = 1.25f; +const float ProximityInfoParams::STANDARD_DEVIATION_Y_WEIGHT_FOR_FIRST = 0.85f; +const float ProximityInfoParams::STANDARD_DEVIATION_X_WEIGHT_FOR_LAST = 1.4f; +const float ProximityInfoParams::STANDARD_DEVIATION_Y_WEIGHT_FOR_LAST = 0.95f; +const float ProximityInfoParams::STANDARD_DEVIATION_X_WEIGHT = 1.1f; +const float ProximityInfoParams::STANDARD_DEVIATION_Y_WEIGHT = 0.95f; // Used by ProximityInfoStateUtils::suppressCharProbabilities() const float ProximityInfoParams::SUPPRESSION_LENGTH_WEIGHT = 1.5f; diff --git a/native/jni/src/suggest/core/layout/proximity_info_params.h b/native/jni/src/suggest/core/layout/proximity_info_params.h index ae1f82c22..d9515c837 100644 --- a/native/jni/src/suggest/core/layout/proximity_info_params.h +++ b/native/jni/src/suggest/core/layout/proximity_info_params.h @@ -28,9 +28,6 @@ class ProximityInfoParams { static const float VERTICAL_SWEET_SPOT_SCALE; static const float VERTICAL_SWEET_SPOT_SCALE_G; - // Used by ProximityInfoStateUtils::initGeometricDistanceInfos() - static const float NEAR_KEY_NORMALIZED_SQUARED_THRESHOLD; - // Used by ProximityInfoStateUtils::updateNearKeysDistances() static const float NEAR_KEY_THRESHOLD_FOR_DISTANCE; @@ -78,8 +75,13 @@ class ProximityInfoParams { static const float SPEEDxNEAREST_WEIGHT_FOR_STANDARD_DEVIATION; static const float MAX_SPEEDxNEAREST_RATE_FOR_STANDARD_DEVIATION; static const float MIN_STANDARD_DEVIATION; - static const float PREV_DISTANCE_WEIGHT; - static const float NEXT_DISTANCE_WEIGHT; + // X means gesture's direction. Y means gesture's orthogonal direction. + static const float STANDARD_DEVIATION_X_WEIGHT_FOR_FIRST; + static const float STANDARD_DEVIATION_Y_WEIGHT_FOR_FIRST; + static const float STANDARD_DEVIATION_X_WEIGHT_FOR_LAST; + static const float STANDARD_DEVIATION_Y_WEIGHT_FOR_LAST; + static const float STANDARD_DEVIATION_X_WEIGHT; + static const float STANDARD_DEVIATION_Y_WEIGHT; // Used by ProximityInfoStateUtils::suppressCharProbabilities() static const float SUPPRESSION_LENGTH_WEIGHT; diff --git a/native/jni/src/suggest/core/layout/proximity_info_state.cpp b/native/jni/src/suggest/core/layout/proximity_info_state.cpp index 2919904e5..e585f9088 100644 --- a/native/jni/src/suggest/core/layout/proximity_info_state.cpp +++ b/native/jni/src/suggest/core/layout/proximity_info_state.cpp @@ -91,7 +91,6 @@ void ProximityInfoState::initInputParams(const int pointerId, const float maxPoi mSampledInputIndice.clear(); mSampledLengthCache.clear(); mSampledNormalizedSquaredLengthCache.clear(); - mSampledNearKeySets.clear(); mSampledSearchKeySets.clear(); mSpeedRates.clear(); mBeelineSpeedPercentiles.clear(); @@ -126,18 +125,17 @@ void ProximityInfoState::initInputParams(const int pointerId, const float maxPoi if (mSampledInputSize > 0) { ProximityInfoStateUtils::initGeometricDistanceInfos(mProximityInfo, mSampledInputSize, lastSavedInputSize, isGeometric, &mSampledInputXs, &mSampledInputYs, - &mSampledNearKeySets, &mSampledNormalizedSquaredLengthCache); + &mSampledNormalizedSquaredLengthCache); if (isGeometric) { // updates probabilities of skipping or mapping each key for all points. ProximityInfoStateUtils::updateAlignPointProbabilities( mMaxPointToKeyLength, mProximityInfo->getMostCommonKeyWidth(), mProximityInfo->getKeyCount(), lastSavedInputSize, mSampledInputSize, &mSampledInputXs, &mSampledInputYs, &mSpeedRates, &mSampledLengthCache, - &mSampledNormalizedSquaredLengthCache, &mSampledNearKeySets, - &mCharProbabilities); + &mSampledNormalizedSquaredLengthCache, mProximityInfo, &mCharProbabilities); ProximityInfoStateUtils::updateSampledSearchKeySets(mProximityInfo, mSampledInputSize, lastSavedInputSize, &mSampledLengthCache, - &mSampledNearKeySets, &mSampledSearchKeySets, + &mCharProbabilities, &mSampledSearchKeySets, &mSampledSearchKeyVectors); mMostProbableStringProbability = ProximityInfoStateUtils::getMostProbableString( mProximityInfo, mSampledInputSize, &mCharProbabilities, mMostProbableString); diff --git a/native/jni/src/suggest/core/layout/proximity_info_state.h b/native/jni/src/suggest/core/layout/proximity_info_state.h index e253d9550..d66121b74 100644 --- a/native/jni/src/suggest/core/layout/proximity_info_state.h +++ b/native/jni/src/suggest/core/layout/proximity_info_state.h @@ -50,9 +50,9 @@ class ProximityInfoState { mSampledInputXs(), mSampledInputYs(), mSampledTimes(), mSampledInputIndice(), mSampledLengthCache(), mBeelineSpeedPercentiles(), mSampledNormalizedSquaredLengthCache(), mSpeedRates(), mDirections(), - mCharProbabilities(), mSampledNearKeySets(), mSampledSearchKeySets(), - mSampledSearchKeyVectors(), mTouchPositionCorrectionEnabled(false), - mSampledInputSize(0), mMostProbableStringProbability(0.0f) { + mCharProbabilities(), mSampledSearchKeySets(), mSampledSearchKeyVectors(), + mTouchPositionCorrectionEnabled(false), mSampledInputSize(0), + mMostProbableStringProbability(0.0f) { memset(mInputProximities, 0, sizeof(mInputProximities)); memset(mPrimaryInputWord, 0, sizeof(mPrimaryInputWord)); memset(mMostProbableString, 0, sizeof(mMostProbableString)); @@ -216,10 +216,6 @@ class ProximityInfoState { std::vector<float> mDirections; // probabilities of skipping or mapping to a key for each point. std::vector<hash_map_compat<int, float> > mCharProbabilities; - // The vector for the key code set which holds nearby keys for each sampled input point - // 1. Used to calculate the probability of the key - // 2. Used to calculate mSampledSearchKeySets - std::vector<ProximityInfoStateUtils::NearKeycodesSet> mSampledNearKeySets; // The vector for the key code set which holds nearby keys of some trailing sampled input points // for each sampled input point. These nearby keys contain the next characters which can be in // the dictionary. Specifically, currently we are looking for keys nearby trailing sampled diff --git a/native/jni/src/suggest/core/layout/proximity_info_state_utils.cpp b/native/jni/src/suggest/core/layout/proximity_info_state_utils.cpp index 867f59843..72bb68fc4 100644 --- a/native/jni/src/suggest/core/layout/proximity_info_state_utils.cpp +++ b/native/jni/src/suggest/core/layout/proximity_info_state_utils.cpp @@ -24,6 +24,7 @@ #include "defines.h" #include "suggest/core/layout/geometry_utils.h" +#include "suggest/core/layout/normal_distribution_2d.h" #include "suggest/core/layout/proximity_info.h" #include "suggest/core/layout/proximity_info_params.h" @@ -187,13 +188,10 @@ namespace latinime { const int lastSavedInputSize, const bool isGeometric, const std::vector<int> *const sampledInputXs, const std::vector<int> *const sampledInputYs, - std::vector<NearKeycodesSet> *sampledNearKeySets, std::vector<float> *sampledNormalizedSquaredLengthCache) { - sampledNearKeySets->resize(sampledInputSize); const int keyCount = proximityInfo->getKeyCount(); sampledNormalizedSquaredLengthCache->resize(sampledInputSize * keyCount); for (int i = lastSavedInputSize; i < sampledInputSize; ++i) { - (*sampledNearKeySets)[i].reset(); for (int k = 0; k < keyCount; ++k) { const int index = i * keyCount + k; const int x = (*sampledInputXs)[i]; @@ -202,10 +200,6 @@ namespace latinime { proximityInfo->getNormalizedSquaredDistanceFromCenterFloatG( k, x, y, isGeometric); (*sampledNormalizedSquaredLengthCache)[index] = normalizedSquaredDistance; - if (normalizedSquaredDistance - < ProximityInfoParams::NEAR_KEY_NORMALIZED_SQUARED_THRESHOLD) { - (*sampledNearKeySets)[i][k] = true; - } } } } @@ -625,7 +619,7 @@ namespace latinime { const std::vector<float> *const sampledSpeedRates, const std::vector<int> *const sampledLengthCache, const std::vector<float> *const sampledNormalizedSquaredLengthCache, - std::vector<NearKeycodesSet> *sampledNearKeySets, + const ProximityInfo *const proximityInfo, std::vector<hash_map_compat<int, float> > *charProbabilities) { charProbabilities->resize(sampledInputSize); // Calculates probabilities of using a point as a correlated point with the character @@ -641,12 +635,10 @@ namespace latinime { float nearestKeyDistance = static_cast<float>(MAX_VALUE_FOR_WEIGHTING); for (int j = 0; j < keyCount; ++j) { - if ((*sampledNearKeySets)[i].test(j)) { - const float distance = getPointToKeyByIdLength( - maxPointToKeyLength, sampledNormalizedSquaredLengthCache, keyCount, i, j); - if (distance < nearestKeyDistance) { - nearestKeyDistance = distance; - } + const float distance = getPointToKeyByIdLength( + maxPointToKeyLength, sampledNormalizedSquaredLengthCache, keyCount, i, j); + if (distance < nearestKeyDistance) { + nearestKeyDistance = distance; } } @@ -708,93 +700,57 @@ namespace latinime { // (1.0f - skipProbability). const float inputCharProbability = 1.0f - skipProbability; - const float speedxAngleRate = std::min(speedRate * currentAngle / M_PI_F + const float speedMultipliedByAngleRate = std::min(speedRate * currentAngle / M_PI_F * ProximityInfoParams::SPEEDxANGLE_WEIGHT_FOR_STANDARD_DEVIATION, ProximityInfoParams::MAX_SPEEDxANGLE_RATE_FOR_STANDARD_DEVIATION); - const float speedxNearestKeyDistanceRate = std::min(speedRate * nearestKeyDistance - * ProximityInfoParams::SPEEDxNEAREST_WEIGHT_FOR_STANDARD_DEVIATION, - ProximityInfoParams::MAX_SPEEDxNEAREST_RATE_FOR_STANDARD_DEVIATION); - const float sigma = speedxAngleRate + speedxNearestKeyDistanceRate - + ProximityInfoParams::MIN_STANDARD_DEVIATION; - - ProximityInfoUtils::NormalDistribution - distribution(ProximityInfoParams::CENTER_VALUE_OF_NORMALIZED_DISTRIBUTION, sigma); + const float speedMultipliedByNearestKeyDistanceRate = std::min( + speedRate * nearestKeyDistance + * ProximityInfoParams::SPEEDxNEAREST_WEIGHT_FOR_STANDARD_DEVIATION, + ProximityInfoParams::MAX_SPEEDxNEAREST_RATE_FOR_STANDARD_DEVIATION); + const float sigma = (speedMultipliedByAngleRate + speedMultipliedByNearestKeyDistanceRate + + ProximityInfoParams::MIN_STANDARD_DEVIATION) * mostCommonKeyWidth; + float theta = 0.0f; + // TODO: Use different metrics to compute sigmas. + float sigmaX = sigma; + float sigmaY = sigma; + if (i == 0 && i != sampledInputSize - 1) { + // First point + theta = getDirection(sampledInputXs, sampledInputYs, i + 1, i); + sigmaX *= ProximityInfoParams::STANDARD_DEVIATION_X_WEIGHT_FOR_FIRST; + sigmaY *= ProximityInfoParams::STANDARD_DEVIATION_Y_WEIGHT_FOR_FIRST; + } else { + if (i == sampledInputSize - 1) { + // Last point + sigmaX *= ProximityInfoParams::STANDARD_DEVIATION_X_WEIGHT_FOR_LAST; + sigmaY *= ProximityInfoParams::STANDARD_DEVIATION_Y_WEIGHT_FOR_LAST; + } else { + sigmaX *= ProximityInfoParams::STANDARD_DEVIATION_X_WEIGHT; + sigmaY *= ProximityInfoParams::STANDARD_DEVIATION_Y_WEIGHT; + } + theta = getDirection(sampledInputXs, sampledInputYs, i, i - 1); + } + NormalDistribution2D distribution((*sampledInputXs)[i], sigmaX, (*sampledInputYs)[i], + sigmaY, theta); // Summing up probability densities of all near keys. float sumOfProbabilityDensities = 0.0f; for (int j = 0; j < keyCount; ++j) { - if ((*sampledNearKeySets)[i].test(j)) { - float distance = sqrtf(getPointToKeyByIdLength( - maxPointToKeyLength, sampledNormalizedSquaredLengthCache, keyCount, i, j)); - if (i == 0 && i != sampledInputSize - 1) { - // For the first point, weighted average of distances from first point and the - // next point to the key is used as a point to key distance. - const float nextDistance = sqrtf(getPointToKeyByIdLength( - maxPointToKeyLength, sampledNormalizedSquaredLengthCache, keyCount, - i + 1, j)); - if (nextDistance < distance) { - // The distance of the first point tends to bigger than continuing - // points because the first touch by the user can be sloppy. - // So we promote the first point if the distance of that point is larger - // than the distance of the next point. - distance = (distance - + nextDistance * ProximityInfoParams::NEXT_DISTANCE_WEIGHT) - / (1.0f + ProximityInfoParams::NEXT_DISTANCE_WEIGHT); - } - } else if (i != 0 && i == sampledInputSize - 1) { - // For the first point, weighted average of distances from last point and - // the previous point to the key is used as a point to key distance. - const float previousDistance = sqrtf(getPointToKeyByIdLength( - maxPointToKeyLength, sampledNormalizedSquaredLengthCache, keyCount, - i - 1, j)); - if (previousDistance < distance) { - // The distance of the last point tends to bigger than continuing points - // because the last touch by the user can be sloppy. So we promote the - // last point if the distance of that point is larger than the distance of - // the previous point. - distance = (distance - + previousDistance * ProximityInfoParams::PREV_DISTANCE_WEIGHT) - / (1.0f + ProximityInfoParams::PREV_DISTANCE_WEIGHT); - } - } - // TODO: Promote the first point when the extended line from the next input is near - // from a key. Also, promote the last point as well. - sumOfProbabilityDensities += distribution.getProbabilityDensity(distance); - } + sumOfProbabilityDensities += distribution.getProbabilityDensity( + proximityInfo->getKeyCenterXOfKeyIdG(j, + NOT_A_COORDINATE /* referencePointX */, true /* isGeometric */), + proximityInfo->getKeyCenterYOfKeyIdG(j, + NOT_A_COORDINATE /* referencePointY */, true /* isGeometric */)); } // Split the probability of an input point to keys that are close to the input point. for (int j = 0; j < keyCount; ++j) { - if ((*sampledNearKeySets)[i].test(j)) { - float distance = sqrtf(getPointToKeyByIdLength( - maxPointToKeyLength, sampledNormalizedSquaredLengthCache, keyCount, i, j)); - if (i == 0 && i != sampledInputSize - 1) { - // For the first point, weighted average of distances from the first point and - // the next point to the key is used as a point to key distance. - const float prevDistance = sqrtf(getPointToKeyByIdLength( - maxPointToKeyLength, sampledNormalizedSquaredLengthCache, keyCount, - i + 1, j)); - if (prevDistance < distance) { - distance = (distance - + prevDistance * ProximityInfoParams::NEXT_DISTANCE_WEIGHT) - / (1.0f + ProximityInfoParams::NEXT_DISTANCE_WEIGHT); - } - } else if (i != 0 && i == sampledInputSize - 1) { - // For the first point, weighted average of distances from last point and - // the previous point to the key is used as a point to key distance. - const float prevDistance = sqrtf(getPointToKeyByIdLength( - maxPointToKeyLength, sampledNormalizedSquaredLengthCache, keyCount, - i - 1, j)); - if (prevDistance < distance) { - distance = (distance - + prevDistance * ProximityInfoParams::PREV_DISTANCE_WEIGHT) - / (1.0f + ProximityInfoParams::PREV_DISTANCE_WEIGHT); - } - } - const float probabilityDensity = distribution.getProbabilityDensity(distance); - const float probability = inputCharProbability * probabilityDensity - / sumOfProbabilityDensities; - (*charProbabilities)[i][j] = probability; - } + const float probabilityDensity = distribution.getProbabilityDensity( + proximityInfo->getKeyCenterXOfKeyIdG(j, + NOT_A_COORDINATE /* referencePointX */, true /* isGeometric */), + proximityInfo->getKeyCenterYOfKeyIdG(j, + NOT_A_COORDINATE /* referencePointY */, true /* isGeometric */)); + const float probability = inputCharProbability * probabilityDensity + / sumOfProbabilityDensities; + (*charProbabilities)[i][j] = probability; } } @@ -850,10 +806,9 @@ namespace latinime { for (int j = 0; j < keyCount; ++j) { hash_map_compat<int, float>::iterator it = (*charProbabilities)[i].find(j); if (it == (*charProbabilities)[i].end()){ - (*sampledNearKeySets)[i].reset(j); + continue; } else if(it->second < ProximityInfoParams::MIN_PROBABILITY) { // Erases from near keys vector because it has very low probability. - (*sampledNearKeySets)[i].reset(j); (*charProbabilities)[i].erase(j); } else { it->second = -logf(it->second); @@ -865,9 +820,8 @@ namespace latinime { /* static */ void ProximityInfoStateUtils::updateSampledSearchKeySets( const ProximityInfo *const proximityInfo, const int sampledInputSize, - const int lastSavedInputSize, - const std::vector<int> *const sampledLengthCache, - const std::vector<NearKeycodesSet> *const sampledNearKeySets, + const int lastSavedInputSize, const std::vector<int> *const sampledLengthCache, + const std::vector<hash_map_compat<int, float> > *const charProbabilities, std::vector<NearKeycodesSet> *sampledSearchKeySets, std::vector<std::vector<int> > *sampledSearchKeyVectors) { sampledSearchKeySets->resize(sampledInputSize); @@ -884,7 +838,12 @@ namespace latinime { if ((*sampledLengthCache)[j] - (*sampledLengthCache)[i] >= readForwordLength) { break; } - (*sampledSearchKeySets)[i] |= (*sampledNearKeySets)[j]; + for(const auto& charProbability : charProbabilities->at(j)) { + if (charProbability.first == NOT_AN_INDEX) { + continue; + } + (*sampledSearchKeySets)[i].set(charProbability.first); + } } } const int keyCount = proximityInfo->getKeyCount(); diff --git a/native/jni/src/suggest/core/layout/proximity_info_state_utils.h b/native/jni/src/suggest/core/layout/proximity_info_state_utils.h index ea0cd1149..7aa20c3d1 100644 --- a/native/jni/src/suggest/core/layout/proximity_info_state_utils.h +++ b/native/jni/src/suggest/core/layout/proximity_info_state_utils.h @@ -71,12 +71,12 @@ class ProximityInfoStateUtils { const std::vector<float> *const sampledSpeedRates, const std::vector<int> *const sampledLengthCache, const std::vector<float> *const sampledNormalizedSquaredLengthCache, - std::vector<NearKeycodesSet> *sampledNearKeySets, + const ProximityInfo *const proximityInfo, std::vector<hash_map_compat<int, float> > *charProbabilities); static void updateSampledSearchKeySets(const ProximityInfo *const proximityInfo, const int sampledInputSize, const int lastSavedInputSize, const std::vector<int> *const sampledLengthCache, - const std::vector<NearKeycodesSet> *const sampledNearKeySets, + const std::vector<hash_map_compat<int, float> > *const charProbabilities, std::vector<NearKeycodesSet> *sampledSearchKeySets, std::vector<std::vector<int> > *sampledSearchKeyVectors); static float getPointToKeyByIdLength(const float maxPointToKeyLength, @@ -86,7 +86,6 @@ class ProximityInfoStateUtils { const int sampledInputSize, const int lastSavedInputSize, const bool isGeometric, const std::vector<int> *const sampledInputXs, const std::vector<int> *const sampledInputYs, - std::vector<NearKeycodesSet> *sampledNearKeySets, std::vector<float> *sampledNormalizedSquaredLengthCache); static void initPrimaryInputWord(const int inputSize, const int *const inputProximities, int *primaryInputWord); diff --git a/native/jni/src/suggest/core/layout/proximity_info_utils.h b/native/jni/src/suggest/core/layout/proximity_info_utils.h index 310bbdb62..c273ef456 100644 --- a/native/jni/src/suggest/core/layout/proximity_info_utils.h +++ b/native/jni/src/suggest/core/layout/proximity_info_utils.h @@ -125,28 +125,6 @@ class ProximityInfoUtils { return type == MATCH_CHAR || type == PROXIMITY_CHAR || type == ADDITIONAL_PROXIMITY_CHAR; } - // Normal distribution N(u, sigma^2). - struct NormalDistribution { - public: - NormalDistribution(const float u, const float sigma) - : mU(u), - mPreComputedNonExpPart(1.0f / sqrtf(2.0f * M_PI_F - * GeometryUtils::SQUARE_FLOAT(sigma))), - mPreComputedExponentPart(-1.0f / (2.0f * GeometryUtils::SQUARE_FLOAT(sigma))) {} - - float getProbabilityDensity(const float x) const { - const float shiftedX = x - mU; - return mPreComputedNonExpPart - * expf(mPreComputedExponentPart * GeometryUtils::SQUARE_FLOAT(shiftedX)); - } - - private: - DISALLOW_IMPLICIT_CONSTRUCTORS(NormalDistribution); - const float mU; // mean value - const float mPreComputedNonExpPart; // = 1 / sqrt(2 * PI * sigma^2) - const float mPreComputedExponentPart; // = -1 / (2 * sigma^2) - }; // struct NormalDistribution - private: DISALLOW_IMPLICIT_CONSTRUCTORS(ProximityInfoUtils); diff --git a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h index b6dc7d006..ae2e7a8fe 100644 --- a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h +++ b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h @@ -20,7 +20,7 @@ #include <memory> #include "defines.h" -#include "suggest/core/dictionary/word_property.h" +#include "suggest/core/dictionary/property/word_property.h" namespace latinime { diff --git a/native/jni/src/suggest/core/policy/traversal.h b/native/jni/src/suggest/core/policy/traversal.h index d3b8da0cc..8ddaa0514 100644 --- a/native/jni/src/suggest/core/policy/traversal.h +++ b/native/jni/src/suggest/core/policy/traversal.h @@ -45,6 +45,7 @@ class Traversal { virtual float getMaxSpatialDistance() const = 0; virtual int getDefaultExpandDicNodeSize() const = 0; virtual int getMaxCacheSize(const int inputSize) const = 0; + virtual int getTerminalCacheSize() const = 0; virtual bool isPossibleOmissionChildNode(const DicTraverseSession *const traverseSession, const DicNode *const parentDicNode, const DicNode *const dicNode) const = 0; virtual bool isGoodToTraverseNextWord(const DicNode *const dicNode) const = 0; diff --git a/native/jni/src/suggest/core/session/dic_traverse_session.h b/native/jni/src/suggest/core/session/dic_traverse_session.h index 9e5d902dd..843ca85a0 100644 --- a/native/jni/src/suggest/core/session/dic_traverse_session.h +++ b/native/jni/src/suggest/core/session/dic_traverse_session.h @@ -17,7 +17,6 @@ #ifndef LATINIME_DIC_TRAVERSE_SESSION_H #define LATINIME_DIC_TRAVERSE_SESSION_H -#include <stdint.h> #include <vector> #include "defines.h" diff --git a/native/jni/src/suggest/core/suggest.cpp b/native/jni/src/suggest/core/suggest.cpp index 433820a42..e675e0bb3 100644 --- a/native/jni/src/suggest/core/suggest.cpp +++ b/native/jni/src/suggest/core/suggest.cpp @@ -88,7 +88,7 @@ void Suggest::initializeSearch(DicTraverseSession *traverseSession) const { } else { // Restart recognition at the root. traverseSession->resetCache(TRAVERSAL->getMaxCacheSize(traverseSession->getInputSize()), - MAX_RESULTS); + TRAVERSAL->getTerminalCacheSize()); // Create a new dic node here DicNode rootNode; DicNodeUtils::initAsRoot(traverseSession->getDictionaryStructurePolicy(), diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_policy.h index 6ff95cac4..a898e2afc 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_policy.h @@ -17,7 +17,7 @@ #ifndef LATINIME_BIGRAM_LIST_POLICY_H #define LATINIME_BIGRAM_LIST_POLICY_H -#include <stdint.h> +#include <cstdint> #include "defines.h" #include "suggest/core/policy/dictionary_bigrams_structure_policy.h" diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h index 7e1038300..15f924a6a 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h @@ -17,8 +17,8 @@ #ifndef LATINIME_BIGRAM_LIST_READ_WRITE_UTILS_H #define LATINIME_BIGRAM_LIST_READ_WRITE_UTILS_H +#include <cstdint> #include <cstdlib> -#include <stdint.h> #include "defines.h" diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h index 66824245e..f950cade3 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h @@ -17,7 +17,7 @@ #ifndef LATINIME_HEADER_POLICY_H #define LATINIME_HEADER_POLICY_H -#include <stdint.h> +#include <cstdint> #include "defines.h" #include "suggest/core/policy/dictionary_header_structure_policy.h" diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.h index 4185a2e7c..a6b4c4e14 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.h @@ -17,7 +17,7 @@ #ifndef LATINIME_HEADER_READ_WRITE_UTILS_H #define LATINIME_HEADER_READ_WRITE_UTILS_H -#include <stdint.h> +#include <cstdint> #include "defines.h" #include "suggest/core/policy/dictionary_header_structure_policy.h" diff --git a/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h index d73f73953..6d2b4778c 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h @@ -17,7 +17,7 @@ #ifndef LATINIME_SHORTCUT_LIST_POLICY_H #define LATINIME_SHORTCUT_LIST_POLICY_H -#include <stdint.h> +#include <cstdint> #include "defines.h" #include "suggest/core/policy/dictionary_shortcuts_structure_policy.h" diff --git a/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h index a83ed5a50..d065bf7fd 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h @@ -17,7 +17,7 @@ #ifndef LATINIME_SHORTCUT_LIST_READING_UTILS_H #define LATINIME_SHORTCUT_LIST_READING_UTILS_H -#include <stdint.h> +#include <cstdint> #include "defines.h" diff --git a/native/jni/src/suggest/policyimpl/dictionary/shortcut/ver4_shortcut_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/shortcut/ver4_shortcut_list_policy.h index f2fa5b75b..fe984615c 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/shortcut/ver4_shortcut_list_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/shortcut/ver4_shortcut_list_policy.h @@ -17,8 +17,6 @@ #ifndef LATINIME_VER4_SHORTCUT_LIST_POLICY_H #define LATINIME_VER4_SHORTCUT_LIST_POLICY_H -#include <stdint.h> - #include "defines.h" #include "suggest/core/policy/dictionary_shortcuts_structure_policy.h" #include "suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h" diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp index 79bcf6fa4..5f19534e2 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp @@ -17,7 +17,6 @@ #include "suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h" #include <climits> -#include <stdint.h> #include "defines.h" #include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h" diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h index 9454ddf33..6053b7ed3 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h @@ -17,8 +17,6 @@ #ifndef LATINIME_DICTIONARY_STRUCTURE_WITH_BUFFER_POLICY_FACTORY_H #define LATINIME_DICTIONARY_STRUCTURE_WITH_BUFFER_POLICY_FACTORY_H -#include <stdint.h> - #include "defines.h" #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h index 89ae12c0b..b13a075d5 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h @@ -17,7 +17,7 @@ #ifndef LATINIME_DYNAMIC_PT_READING_UTILS_H #define LATINIME_DYNAMIC_PT_READING_UTILS_H -#include <stdint.h> +#include <cstdint> #include "defines.h" diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h index 71f473096..f703baf85 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h @@ -17,8 +17,6 @@ #ifndef LATINIME_DYNAMIC_PT_UPDATING_HELPER_H #define LATINIME_DYNAMIC_PT_UPDATING_HELPER_H -#include <stdint.h> - #include "defines.h" #include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h" #include "utils/hash_map_compat.h" diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.cpp index ebbdc2ea2..664aeebbb 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.cpp @@ -17,8 +17,8 @@ #include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.h" #include <cstddef> +#include <cstdint> #include <cstdlib> -#include <stdint.h> #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp index 4e795f82c..b426dbf28 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp @@ -339,7 +339,7 @@ const WordProperty PatriciaTriePolicy::getWordProperty(const int *const codePoin std::vector<int> codePointVector(ptNodeParams.getCodePoints(), ptNodeParams.getCodePoints() + ptNodeParams.getCodePointCount()); // Fetch bigram information. - std::vector<WordProperty::BigramProperty> bigrams; + std::vector<BigramProperty> bigrams; const int bigramListPos = getBigramsPositionOfPtNode(ptNodePos); int bigramWord1CodePoints[MAX_WORD_LENGTH]; BinaryDictionaryBigramsIterator bigramsIt(getBigramsStructurePolicy(), bigramListPos); @@ -360,7 +360,7 @@ const WordProperty PatriciaTriePolicy::getWordProperty(const int *const codePoin } } // Fetch shortcut information. - std::vector<WordProperty::ShortcutProperty> shortcuts; + std::vector<UnigramProperty::ShortcutProperty> shortcuts; int shortcutPos = getShortcutPositionOfPtNode(ptNodePos); if (shortcutPos != NOT_A_DICT_POS) { int shortcutTargetCodePoints[MAX_WORD_LENGTH]; @@ -379,11 +379,10 @@ const WordProperty PatriciaTriePolicy::getWordProperty(const int *const codePoin shortcuts.emplace_back(&shortcutTarget, shortcutProbability); } } - return WordProperty(&codePointVector, ptNodeParams.isNotAWord(), - ptNodeParams.isBlacklisted(), ptNodeParams.hasBigrams(), - ptNodeParams.hasShortcutTargets(), ptNodeParams.getProbability(), - NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */, - &bigrams, &shortcuts); + const UnigramProperty unigramProperty(ptNodeParams.isNotAWord(), + ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(), + NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */, &shortcuts); + return WordProperty(&codePointVector, &unigramProperty, &bigrams); } int PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints) { diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h index 11a40de55..79500b9fa 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h @@ -17,7 +17,7 @@ #ifndef LATINIME_PATRICIA_TRIE_POLICY_H #define LATINIME_PATRICIA_TRIE_POLICY_H -#include <stdint.h> +#include <cstdint> #include <vector> #include "defines.h" diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h index fa1430ce6..a6090a513 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h @@ -17,7 +17,7 @@ #ifndef LATINIME_PATRICIA_TRIE_READING_UTILS_H #define LATINIME_PATRICIA_TRIE_READING_UTILS_H -#include <stdint.h> +#include <cstdint> #include "defines.h" diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.h index dd1a0da51..86fc89c5e 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.h @@ -17,7 +17,7 @@ #ifndef LATINIME_VER2_PATRICIA_TRIE_NODE_READER_H #define LATINIME_VER2_PATRICIA_TRIE_NODE_READER_H -#include <stdint.h> +#include <cstdint> #include "defines.h" #include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h" diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.h index 77404adf8..548272148 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.h @@ -17,7 +17,7 @@ #ifndef LATINIME_VER2_PT_NODE_ARRAY_READER_H #define LATINIME_VER2_PT_NODE_ARRAY_READER_H -#include <stdint.h> +#include <cstdint> #include "defines.h" #include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_array_reader.h" diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h index 1db9ea026..f24307e7b 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h @@ -17,8 +17,6 @@ #ifndef LATINIME_VER4_PATRICIA_TRIE_NODE_READER_H #define LATINIME_VER4_PATRICIA_TRIE_NODE_READER_H -#include <stdint.h> - #include "defines.h" #include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h" #include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h" diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h index f01b3af0e..b2b0504a1 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h @@ -17,8 +17,6 @@ #ifndef LATINIME_VER4_PATRICIA_TRIE_NODE_WRITER_H #define LATINIME_VER4_PATRICIA_TRIE_NODE_WRITER_H -#include <stdint.h> - #include "defines.h" #include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h" #include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h" diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp index 107ddab2c..6cf8409dc 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp @@ -20,7 +20,9 @@ #include "suggest/core/dicnode/dic_node.h" #include "suggest/core/dicnode/dic_node_vector.h" -#include "suggest/core/dictionary/word_property.h" +#include "suggest/core/dictionary/property/bigram_property.h" +#include "suggest/core/dictionary/property/unigram_property.h" +#include "suggest/core/dictionary/property/word_property.h" #include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h" #include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h" @@ -358,7 +360,7 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(const int *const code ptNodeParams.getTerminalId()); const HistoricalInfo *const historicalInfo = probabilityEntry.getHistoricalInfo(); // Fetch bigram information. - std::vector<WordProperty::BigramProperty> bigrams; + std::vector<BigramProperty> bigrams; const int bigramListPos = getBigramsPositionOfPtNode(ptNodePos); if (bigramListPos != NOT_A_DICT_POS) { int bigramWord1CodePoints[MAX_WORD_LENGTH]; @@ -395,7 +397,7 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(const int *const code } } // Fetch shortcut information. - std::vector<WordProperty::ShortcutProperty> shortcuts; + std::vector<UnigramProperty::ShortcutProperty> shortcuts; int shortcutPos = getShortcutPositionOfPtNode(ptNodePos); if (shortcutPos != NOT_A_DICT_POS) { int shortcutTarget[MAX_WORD_LENGTH]; @@ -411,11 +413,11 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(const int *const code shortcuts.emplace_back(&target, shortcutProbability); } } - return WordProperty(&codePointVector, ptNodeParams.isNotAWord(), - ptNodeParams.isBlacklisted(), ptNodeParams.hasBigrams(), - ptNodeParams.hasShortcutTargets(), ptNodeParams.getProbability(), + const UnigramProperty unigramProperty(ptNodeParams.isNotAWord(), + ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(), historicalInfo->getTimeStamp(), historicalInfo->getLevel(), - historicalInfo->getCount(), &bigrams, &shortcuts); + historicalInfo->getCount(), &shortcuts); + return WordProperty(&codePointVector, &unigramProperty, &bigrams); } int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints) { diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.h index e418c4933..466ff55d5 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.h @@ -17,7 +17,7 @@ #ifndef LATINIME_VER4_PATRICIA_TRIE_READING_UTILS_H #define LATINIME_VER4_PATRICIA_TRIE_READING_UTILS_H -#include <stdint.h> +#include <cstdint> #include "defines.h" diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h index 76be16518..23cbe3aa3 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h @@ -18,7 +18,7 @@ #define LATINIME_BUFFER_WITH_EXTENDABLE_BUFFER_H #include <cstddef> -#include <stdint.h> +#include <cstdint> #include <vector> #include "defines.h" diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h index ebdd523e1..c0a9fcb1d 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h @@ -17,7 +17,7 @@ #ifndef LATINIME_BYTE_ARRAY_UTILS_H #define LATINIME_BYTE_ARRAY_UTILS_H -#include <stdint.h> +#include <cstdint> #include "defines.h" diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h index 7c6a21dd7..759b1c9b2 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h @@ -17,7 +17,7 @@ #ifndef LATINIME_FORMAT_UTILS_H #define LATINIME_FORMAT_UTILS_H -#include <stdint.h> +#include <cstdint> #include "defines.h" diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h b/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h index f73716c8e..8460087ab 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h @@ -17,8 +17,8 @@ #ifndef LATINIME_MMAPPED_BUFFER_H #define LATINIME_MMAPPED_BUFFER_H +#include <cstdint> #include <memory> -#include <stdint.h> #include "defines.h" diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/probability_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/probability_utils.h index 14fdf53cb..3b339e61a 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/probability_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/probability_utils.h @@ -17,8 +17,6 @@ #ifndef LATINIME_PROBABILITY_UTILS_H #define LATINIME_PROBABILITY_UTILS_H -#include <stdint.h> - #include "defines.h" namespace latinime { diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.h b/native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.h index 21c167506..fca8120f1 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.h +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.h @@ -17,7 +17,7 @@ #ifndef LATINIME_SPARSE_TABLE_H #define LATINIME_SPARSE_TABLE_H -#include <stdint.h> +#include <cstdint> #include "defines.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" diff --git a/native/jni/src/suggest/policyimpl/typing/typing_traversal.h b/native/jni/src/suggest/policyimpl/typing/typing_traversal.h index 7c86b7dc9..cb3dfac70 100644 --- a/native/jni/src/suggest/policyimpl/typing/typing_traversal.h +++ b/native/jni/src/suggest/policyimpl/typing/typing_traversal.h @@ -17,7 +17,7 @@ #ifndef LATINIME_TYPING_TRAVERSAL_H #define LATINIME_TYPING_TRAVERSAL_H -#include <stdint.h> +#include <cstdint> #include "defines.h" #include "suggest/core/dicnode/dic_node.h" @@ -146,6 +146,10 @@ class TypingTraversal : public Traversal { : ScoringParams::MAX_CACHE_DIC_NODE_SIZE; } + AK_FORCE_INLINE int getTerminalCacheSize() const { + return MAX_RESULTS; + } + AK_FORCE_INLINE bool isPossibleOmissionChildNode( const DicTraverseSession *const traverseSession, const DicNode *const parentDicNode, const DicNode *const dicNode) const { |