diff options
Diffstat (limited to 'native/jni/src')
-rw-r--r-- | native/jni/src/binary_format.h | 4 | ||||
-rw-r--r-- | native/jni/src/terminal_attributes.h | 4 | ||||
-rw-r--r-- | native/jni/src/unigram_dictionary.cpp | 10 |
3 files changed, 17 insertions, 1 deletions
diff --git a/native/jni/src/binary_format.h b/native/jni/src/binary_format.h index d8f3e83dd..25d504bfb 100644 --- a/native/jni/src/binary_format.h +++ b/native/jni/src/binary_format.h @@ -43,6 +43,10 @@ class BinaryFormat { static const int FLAG_HAS_SHORTCUT_TARGETS = 0x08; // Flag for bigram presence static const int FLAG_HAS_BIGRAMS = 0x04; + // Flag for non-words (typically, shortcut only entries) + static const int FLAG_IS_NOT_A_WORD = 0x02; + // Flag for blacklist + static const int FLAG_IS_BLACKLISTED = 0x01; // Attribute (bigram/shortcut) related flags: // Flag for presence of more attributes diff --git a/native/jni/src/terminal_attributes.h b/native/jni/src/terminal_attributes.h index 34ab8f0ef..9ff2772b1 100644 --- a/native/jni/src/terminal_attributes.h +++ b/native/jni/src/terminal_attributes.h @@ -72,6 +72,10 @@ class TerminalAttributes { return ShortcutIterator(mDict, mStartPos + BinaryFormat::SHORTCUT_LIST_SIZE_SIZE, mFlags); } + bool isBlacklistedOrNotAWord() const { + return mFlags & (BinaryFormat::FLAG_IS_BLACKLISTED | BinaryFormat::FLAG_IS_NOT_A_WORD); + } + private: DISALLOW_IMPLICIT_CONSTRUCTORS(TerminalAttributes); const uint8_t *const mDict; diff --git a/native/jni/src/unigram_dictionary.cpp b/native/jni/src/unigram_dictionary.cpp index ba3c2db6b..d4c51df63 100644 --- a/native/jni/src/unigram_dictionary.cpp +++ b/native/jni/src/unigram_dictionary.cpp @@ -391,9 +391,11 @@ inline void UnigramDictionary::onTerminal(const int probability, const int finalProbability = correction->getFinalProbability(probability, &wordPointer, &wordLength); - if (0 != finalProbability) { + if (0 != finalProbability && !terminalAttributes.isBlacklistedOrNotAWord()) { // If the probability is 0, we don't want to add this word. However we still // want to add its shortcuts (including a possible whitelist entry) if any. + // Furthermore, if this is not a word (shortcut only for example) or a blacklisted + // entry then we never want to suggest this. addWord(wordPointer, wordLength, finalProbability, masterQueue, Dictionary::KIND_CORRECTION); } @@ -841,6 +843,12 @@ int UnigramDictionary::getFrequency(const int32_t *const inWord, const int lengt return NOT_A_PROBABILITY; } const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos); + if (flags & (BinaryFormat::FLAG_IS_BLACKLISTED | BinaryFormat::FLAG_IS_NOT_A_WORD)) { + // If this is not a word, or if it's a blacklisted entry, it should behave as + // having no frequency outside of the suggestion process (where it should be used + // for shortcuts). + return NOT_A_PROBABILITY; + } const bool hasMultipleChars = (0 != (BinaryFormat::FLAG_HAS_MULTIPLE_CHARS & flags)); if (hasMultipleChars) { pos = BinaryFormat::skipOtherCharacters(root, pos); |