aboutsummaryrefslogtreecommitdiffstats
path: root/native/jni/src
diff options
context:
space:
mode:
Diffstat (limited to 'native/jni/src')
-rw-r--r--native/jni/src/binary_format.h4
-rw-r--r--native/jni/src/terminal_attributes.h4
-rw-r--r--native/jni/src/unigram_dictionary.cpp10
3 files changed, 17 insertions, 1 deletions
diff --git a/native/jni/src/binary_format.h b/native/jni/src/binary_format.h
index d8f3e83dd..25d504bfb 100644
--- a/native/jni/src/binary_format.h
+++ b/native/jni/src/binary_format.h
@@ -43,6 +43,10 @@ class BinaryFormat {
static const int FLAG_HAS_SHORTCUT_TARGETS = 0x08;
// Flag for bigram presence
static const int FLAG_HAS_BIGRAMS = 0x04;
+ // Flag for non-words (typically, shortcut only entries)
+ static const int FLAG_IS_NOT_A_WORD = 0x02;
+ // Flag for blacklist
+ static const int FLAG_IS_BLACKLISTED = 0x01;
// Attribute (bigram/shortcut) related flags:
// Flag for presence of more attributes
diff --git a/native/jni/src/terminal_attributes.h b/native/jni/src/terminal_attributes.h
index 34ab8f0ef..9ff2772b1 100644
--- a/native/jni/src/terminal_attributes.h
+++ b/native/jni/src/terminal_attributes.h
@@ -72,6 +72,10 @@ class TerminalAttributes {
return ShortcutIterator(mDict, mStartPos + BinaryFormat::SHORTCUT_LIST_SIZE_SIZE, mFlags);
}
+ bool isBlacklistedOrNotAWord() const {
+ return mFlags & (BinaryFormat::FLAG_IS_BLACKLISTED | BinaryFormat::FLAG_IS_NOT_A_WORD);
+ }
+
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(TerminalAttributes);
const uint8_t *const mDict;
diff --git a/native/jni/src/unigram_dictionary.cpp b/native/jni/src/unigram_dictionary.cpp
index ba3c2db6b..d4c51df63 100644
--- a/native/jni/src/unigram_dictionary.cpp
+++ b/native/jni/src/unigram_dictionary.cpp
@@ -391,9 +391,11 @@ inline void UnigramDictionary::onTerminal(const int probability,
const int finalProbability =
correction->getFinalProbability(probability, &wordPointer, &wordLength);
- if (0 != finalProbability) {
+ if (0 != finalProbability && !terminalAttributes.isBlacklistedOrNotAWord()) {
// If the probability is 0, we don't want to add this word. However we still
// want to add its shortcuts (including a possible whitelist entry) if any.
+ // Furthermore, if this is not a word (shortcut only for example) or a blacklisted
+ // entry then we never want to suggest this.
addWord(wordPointer, wordLength, finalProbability, masterQueue,
Dictionary::KIND_CORRECTION);
}
@@ -841,6 +843,12 @@ int UnigramDictionary::getFrequency(const int32_t *const inWord, const int lengt
return NOT_A_PROBABILITY;
}
const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
+ if (flags & (BinaryFormat::FLAG_IS_BLACKLISTED | BinaryFormat::FLAG_IS_NOT_A_WORD)) {
+ // If this is not a word, or if it's a blacklisted entry, it should behave as
+ // having no frequency outside of the suggestion process (where it should be used
+ // for shortcuts).
+ return NOT_A_PROBABILITY;
+ }
const bool hasMultipleChars = (0 != (BinaryFormat::FLAG_HAS_MULTIPLE_CHARS & flags));
if (hasMultipleChars) {
pos = BinaryFormat::skipOtherCharacters(root, pos);