diff options
-rw-r--r-- | native/jni/Android.mk | 2 | ||||
-rw-r--r-- | native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_iterator.h | 16 | ||||
-rw-r--r-- | native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.cpp (renamed from native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_reading_utils.cpp) | 31 | ||||
-rw-r--r-- | native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h (renamed from native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_reading_utils.h) | 66 | ||||
-rw-r--r-- | native/jni/src/suggest/core/suggest.cpp | 23 | ||||
-rw-r--r-- | native/jni/src/suggest/core/suggest.h | 2 | ||||
-rw-r--r-- | native/jni/src/suggest/policyimpl/typing/typing_weighting.h | 3 |
7 files changed, 84 insertions, 59 deletions
diff --git a/native/jni/Android.mk b/native/jni/Android.mk index fb60139d3..d5df6b62e 100644 --- a/native/jni/Android.mk +++ b/native/jni/Android.mk @@ -53,10 +53,10 @@ LATIN_IME_CORE_SRC_FILES := \ dic_nodes_cache.cpp) \ $(addprefix suggest/core/dictionary/, \ bigram_dictionary.cpp \ - binary_dictionary_bigrams_reading_utils.cpp \ binary_dictionary_format_utils.cpp \ binary_dictionary_header.cpp \ binary_dictionary_header_reading_utils.cpp \ + binary_dictionary_terminal_attributes_reading_utils.cpp \ bloom_filter.cpp \ byte_array_utils.cpp \ dictionary.cpp \ diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_iterator.h b/native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_iterator.h index 0856840b2..f2b48e960 100644 --- a/native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_iterator.h +++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_iterator.h @@ -18,8 +18,8 @@ #define LATINIME_BINARY_DICTIONARY_BIGRAMS_ITERATOR_H #include "defines.h" -#include "suggest/core/dictionary/binary_dictionary_bigrams_reading_utils.h" #include "suggest/core/dictionary/binary_dictionary_info.h" +#include "suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h" namespace latinime { @@ -35,15 +35,17 @@ class BinaryDictionaryBigramsIterator { } AK_FORCE_INLINE void next() { - mBigramFlags = BinaryDictionaryBigramsReadingUtils::getFlagsAndForwardPointer( + mBigramFlags = BinaryDictionaryTerminalAttributesReadingUtils::getFlagsAndForwardPointer( mBinaryDictionaryInfo, &mPos); - mBigramPos = BinaryDictionaryBigramsReadingUtils::getBigramAddressAndForwardPointer( - mBinaryDictionaryInfo, mBigramFlags, &mPos); - mHasNext = BinaryDictionaryBigramsReadingUtils::hasNext(mBigramFlags); + mBigramPos = + BinaryDictionaryTerminalAttributesReadingUtils::getBigramAddressAndForwardPointer( + mBinaryDictionaryInfo, mBigramFlags, &mPos); + mHasNext = BinaryDictionaryTerminalAttributesReadingUtils::hasNext(mBigramFlags); } AK_FORCE_INLINE int getProbability() const { - return BinaryDictionaryBigramsReadingUtils::getBigramProbability(mBigramFlags); + return BinaryDictionaryTerminalAttributesReadingUtils::getProbabilityFromFlags( + mBigramFlags); } AK_FORCE_INLINE int getBigramPos() const { @@ -59,7 +61,7 @@ class BinaryDictionaryBigramsIterator { const BinaryDictionaryInfo *const mBinaryDictionaryInfo; int mPos; - BinaryDictionaryBigramsReadingUtils::BigramFlags mBigramFlags; + BinaryDictionaryTerminalAttributesReadingUtils::BigramFlags mBigramFlags; int mBigramPos; bool mHasNext; }; diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_reading_utils.cpp b/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.cpp index 78a54b141..0a7509c8b 100644 --- a/native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_reading_utils.cpp +++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.cpp @@ -14,33 +14,28 @@ * limitations under the License. */ -#include "suggest/core/dictionary/binary_dictionary_bigrams_reading_utils.h" +#include "suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h" #include "suggest/core/dictionary/binary_dictionary_info.h" #include "suggest/core/dictionary/byte_array_utils.h" namespace latinime { -const BinaryDictionaryBigramsReadingUtils::BigramFlags - BinaryDictionaryBigramsReadingUtils::MASK_ATTRIBUTE_ADDRESS_TYPE = 0x30; -const BinaryDictionaryBigramsReadingUtils::BigramFlags - BinaryDictionaryBigramsReadingUtils::FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE = 0x10; -const BinaryDictionaryBigramsReadingUtils::BigramFlags - BinaryDictionaryBigramsReadingUtils::FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES = 0x20; -const BinaryDictionaryBigramsReadingUtils::BigramFlags - BinaryDictionaryBigramsReadingUtils::FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES = 0x30; -const BinaryDictionaryBigramsReadingUtils::BigramFlags - BinaryDictionaryBigramsReadingUtils::FLAG_ATTRIBUTE_OFFSET_NEGATIVE = 0x40; +typedef BinaryDictionaryTerminalAttributesReadingUtils TaUtils; + +const TaUtils::TerminalAttributeFlags TaUtils::MASK_ATTRIBUTE_ADDRESS_TYPE = 0x30; +const TaUtils::TerminalAttributeFlags TaUtils::FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE = 0x10; +const TaUtils::TerminalAttributeFlags TaUtils::FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES = 0x20; +const TaUtils::TerminalAttributeFlags TaUtils::FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES = 0x30; +const TaUtils::TerminalAttributeFlags TaUtils::FLAG_ATTRIBUTE_OFFSET_NEGATIVE = 0x40; // Flag for presence of more attributes -const BinaryDictionaryBigramsReadingUtils::BigramFlags - BinaryDictionaryBigramsReadingUtils::FLAG_ATTRIBUTE_HAS_NEXT = 0x80; +const TaUtils::TerminalAttributeFlags TaUtils::FLAG_ATTRIBUTE_HAS_NEXT = 0x80; // Mask for attribute probability, stored on 4 bits inside the flags byte. -const BinaryDictionaryBigramsReadingUtils::BigramFlags - BinaryDictionaryBigramsReadingUtils::MASK_ATTRIBUTE_PROBABILITY = 0x0F; -const int BinaryDictionaryBigramsReadingUtils::ATTRIBUTE_ADDRESS_SHIFT = 4; +const TaUtils::TerminalAttributeFlags TaUtils::MASK_ATTRIBUTE_PROBABILITY = 0x0F; +const int TaUtils::ATTRIBUTE_ADDRESS_SHIFT = 4; -/* static */ int BinaryDictionaryBigramsReadingUtils::getBigramAddressAndForwardPointer( - const BinaryDictionaryInfo *const binaryDictionaryInfo, const BigramFlags flags, +/* static */ int TaUtils::getBigramAddressAndForwardPointer( + const BinaryDictionaryInfo *const binaryDictionaryInfo, const TerminalAttributeFlags flags, int *const pos) { int offset = 0; const int origin = *pos; diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_reading_utils.h b/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h index e71f2a17a..f38fd5aaa 100644 --- a/native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_reading_utils.h +++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef LATINIME_BINARY_DICTIONARY_BIGRAM_READING_UTILS_H -#define LATINIME_BINARY_DICTIONARY_BIGRAM_READING_UTILS_H +#ifndef LATINIME_BINARY_DICTIONARY_TERMINAL_ATTRIBUTES_READING_UTILS_H +#define LATINIME_BINARY_DICTIONARY_TERMINAL_ATTRIBUTES_READING_UTILS_H #include <stdint.h> @@ -25,55 +25,57 @@ namespace latinime { -class BinaryDictionaryBigramsReadingUtils { +class BinaryDictionaryTerminalAttributesReadingUtils { public: - typedef uint8_t BigramFlags; + typedef uint8_t TerminalAttributeFlags; + typedef TerminalAttributeFlags BigramFlags; - static AK_FORCE_INLINE void skipExistingBigrams( - const BinaryDictionaryInfo *const binaryDictionaryInfo, int *const pos) { - BigramFlags flags = getFlagsAndForwardPointer(binaryDictionaryInfo, pos); - while (hasNext(flags)) { - *pos += attributeAddressSize(flags); - flags = getFlagsAndForwardPointer(binaryDictionaryInfo, pos); - } - *pos += attributeAddressSize(flags); - } - - static AK_FORCE_INLINE BigramFlags getFlagsAndForwardPointer( + static AK_FORCE_INLINE TerminalAttributeFlags getFlagsAndForwardPointer( const BinaryDictionaryInfo *const binaryDictionaryInfo, int *const pos) { return ByteArrayUtils::readUint8andAdvancePosition( binaryDictionaryInfo->getDictRoot(), pos); } - static AK_FORCE_INLINE int getBigramProbability(const BigramFlags flags) { + static AK_FORCE_INLINE int getProbabilityFromFlags(const TerminalAttributeFlags flags) { return flags & MASK_ATTRIBUTE_PROBABILITY; } - static AK_FORCE_INLINE bool isOffsetNegative(const BigramFlags flags) { - return (flags & FLAG_ATTRIBUTE_OFFSET_NEGATIVE) != 0; + static AK_FORCE_INLINE bool hasNext(const TerminalAttributeFlags flags) { + return (flags & FLAG_ATTRIBUTE_HAS_NEXT) != 0; } - static AK_FORCE_INLINE bool hasNext(const BigramFlags flags) { - return (flags & FLAG_ATTRIBUTE_HAS_NEXT) != 0; + // Bigrams reading methods + static AK_FORCE_INLINE void skipExistingBigrams( + const BinaryDictionaryInfo *const binaryDictionaryInfo, int *const pos) { + BigramFlags flags = getFlagsAndForwardPointer(binaryDictionaryInfo, pos); + while (hasNext(flags)) { + *pos += attributeAddressSize(flags); + flags = getFlagsAndForwardPointer(binaryDictionaryInfo, pos); + } + *pos += attributeAddressSize(flags); } static int getBigramAddressAndForwardPointer( - const BinaryDictionaryInfo *const binaryDictionaryInfo, - const BigramFlags flags, int *const pos); + const BinaryDictionaryInfo *const binaryDictionaryInfo, const BigramFlags flags, + int *const pos); private: - DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryDictionaryBigramsReadingUtils); + DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryDictionaryTerminalAttributesReadingUtils); - static const BigramFlags MASK_ATTRIBUTE_ADDRESS_TYPE; - static const BigramFlags FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE; - static const BigramFlags FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES; - static const BigramFlags FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES; - static const BigramFlags FLAG_ATTRIBUTE_OFFSET_NEGATIVE; - static const BigramFlags FLAG_ATTRIBUTE_HAS_NEXT; - static const BigramFlags MASK_ATTRIBUTE_PROBABILITY; + static const TerminalAttributeFlags MASK_ATTRIBUTE_ADDRESS_TYPE; + static const TerminalAttributeFlags FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE; + static const TerminalAttributeFlags FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES; + static const TerminalAttributeFlags FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES; + static const TerminalAttributeFlags FLAG_ATTRIBUTE_OFFSET_NEGATIVE; + static const TerminalAttributeFlags FLAG_ATTRIBUTE_HAS_NEXT; + static const TerminalAttributeFlags MASK_ATTRIBUTE_PROBABILITY; static const int ATTRIBUTE_ADDRESS_SHIFT; - static AK_FORCE_INLINE int attributeAddressSize(const BigramFlags flags) { + static AK_FORCE_INLINE bool isOffsetNegative(const TerminalAttributeFlags flags) { + return (flags & FLAG_ATTRIBUTE_OFFSET_NEGATIVE) != 0; + } + + static AK_FORCE_INLINE int attributeAddressSize(const TerminalAttributeFlags flags) { return (flags & MASK_ATTRIBUTE_ADDRESS_TYPE) >> ATTRIBUTE_ADDRESS_SHIFT; /* Note: this is a value-dependant optimization of what may probably be more readably written this way: @@ -87,4 +89,4 @@ class BinaryDictionaryBigramsReadingUtils { } }; } -#endif /* LATINIME_BINARY_DICTIONARY_BIGRAM_READING_UTILS_H */ +#endif /* LATINIME_BINARY_DICTIONARY_TERMINAL_ATTRIBUTES_READING_UTILS_H */ diff --git a/native/jni/src/suggest/core/suggest.cpp b/native/jni/src/suggest/core/suggest.cpp index a8f16c8cb..173a612be 100644 --- a/native/jni/src/suggest/core/suggest.cpp +++ b/native/jni/src/suggest/core/suggest.cpp @@ -36,6 +36,7 @@ namespace latinime { const int Suggest::MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT = 16; const int Suggest::MIN_CONTINUOUS_SUGGESTION_INPUT_SIZE = 2; const float Suggest::AUTOCORRECT_CLASSIFICATION_THRESHOLD = 0.33f; +const int Suggest::FINAL_SCORE_PENALTY_FOR_NOT_BEST_EXACT_MATCHED_WORD = 1; /** * Returns a set of suggestions for the given input touch points. The commitPoint argument indicates @@ -148,6 +149,8 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen &doubleLetterTerminalIndex, &doubleLetterLevel); int maxScore = S_INT_MIN; + int bestExactMatchedNodeTerminalIndex = -1; + int bestExactMatchedNodeOutputWordIndex = -1; // Output suggestion results here for (int terminalIndex = 0; terminalIndex < terminalSize && outputWordIndex < MAX_RESULTS; ++terminalIndex) { @@ -186,7 +189,6 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen const int finalScore = SCORING->calculateFinalScore( compoundDistance, traverseSession->getInputSize(), isForceCommitMultiWords || (isValidWord && SCORING->doesAutoCorrectValidWord())); - maxScore = max(maxScore, finalScore); if (TRAVERSAL->allowPartialCommit()) { @@ -200,6 +202,25 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen if (isValidWord) { outputTypes[outputWordIndex] = Dictionary::KIND_CORRECTION | outputTypeFlags; frequencies[outputWordIndex] = finalScore; + if (isSafeExactMatch) { + // Demote exact matches that are not the highest probable node among all exact + // matches. + const bool isBestTerminal = bestExactMatchedNodeTerminalIndex < 0 + || terminals[bestExactMatchedNodeTerminalIndex].getProbability() + < terminalDicNode->getProbability(); + const int outputWordIndexToBeDemoted = isBestTerminal ? + bestExactMatchedNodeOutputWordIndex : outputWordIndex; + if (outputWordIndexToBeDemoted >= 0) { + frequencies[outputWordIndexToBeDemoted] -= + FINAL_SCORE_PENALTY_FOR_NOT_BEST_EXACT_MATCHED_WORD; + } + if (isBestTerminal) { + // Updates the best exact matched node index. + bestExactMatchedNodeTerminalIndex = terminalIndex; + // Updates the best exact matched output word index. + bestExactMatchedNodeOutputWordIndex = outputWordIndex; + } + } // Populate the outputChars array with the suggested word. const int startIndex = outputWordIndex * MAX_WORD_LENGTH; terminalDicNode->outputResult(&outputCodePoints[startIndex]); diff --git a/native/jni/src/suggest/core/suggest.h b/native/jni/src/suggest/core/suggest.h index 875cbe4e0..752bde9ac 100644 --- a/native/jni/src/suggest/core/suggest.h +++ b/native/jni/src/suggest/core/suggest.h @@ -82,6 +82,8 @@ class Suggest : public SuggestInterface { // Threshold for autocorrection classifier static const float AUTOCORRECT_CLASSIFICATION_THRESHOLD; + // Final score penalty to exact match words that are not the most probable exact match. + static const int FINAL_SCORE_PENALTY_FOR_NOT_BEST_EXACT_MATCHED_WORD; const Traversal *const TRAVERSAL; const Scoring *const SCORING; diff --git a/native/jni/src/suggest/policyimpl/typing/typing_weighting.h b/native/jni/src/suggest/policyimpl/typing/typing_weighting.h index 7333dbe0d..e098f353e 100644 --- a/native/jni/src/suggest/policyimpl/typing/typing_weighting.h +++ b/native/jni/src/suggest/policyimpl/typing/typing_weighting.h @@ -169,6 +169,9 @@ class TypingWeighting : public Weighting { float getTerminalLanguageCost(const DicTraverseSession *const traverseSession, const DicNode *const dicNode, const float dicNodeLanguageImprobability) const { + // We promote exact matches here to prevent them from being pruned. The final score of + // exact match nodes might be demoted later in Suggest::outputSuggestions if there are + // multiple exact matches. const float languageImprobability = (dicNode->isExactMatch()) ? 0.0f : dicNodeLanguageImprobability; return languageImprobability * ScoringParams::DISTANCE_WEIGHT_LANGUAGE; |