aboutsummaryrefslogtreecommitdiffstats
path: root/native/jni/src
diff options
context:
space:
mode:
Diffstat (limited to 'native/jni/src')
-rw-r--r--native/jni/src/binary_format.h7
-rw-r--r--native/jni/src/correction.cpp4
-rw-r--r--native/jni/src/correction.h1
-rw-r--r--native/jni/src/proximity_info_state.h30
-rw-r--r--native/jni/src/terminal_attributes.h3
-rw-r--r--native/jni/src/unigram_dictionary.cpp25
6 files changed, 49 insertions, 21 deletions
diff --git a/native/jni/src/binary_format.h b/native/jni/src/binary_format.h
index 2ee4077c1..4cabc8404 100644
--- a/native/jni/src/binary_format.h
+++ b/native/jni/src/binary_format.h
@@ -52,6 +52,8 @@ class BinaryFormat {
// Mask for attribute frequency, stored on 4 bits inside the flags byte.
static const int MASK_ATTRIBUTE_FREQUENCY = 0x0F;
+ // The numeric value of the shortcut frequency that means 'whitelist'.
+ static const int WHITELIST_SHORTCUT_FREQUENCY = 15;
// Mask and flags for attribute address type selection.
static const int MASK_ATTRIBUTE_ADDRESS_TYPE = 0x30;
@@ -99,6 +101,7 @@ class BinaryFormat {
static bool hasChildrenInFlags(const uint8_t flags);
static int getAttributeAddressAndForwardPointer(const uint8_t *const dict, const uint8_t flags,
int *pos);
+ static int getAttributeFrequencyFromFlags(const int flags);
static int getTerminalPosition(const uint8_t *const root, const int32_t *const inWord,
const int length, const bool forceLowerCaseSearch);
static int getWordAtAddress(const uint8_t *const root, const int address, const int maxDepth,
@@ -340,6 +343,10 @@ inline int BinaryFormat::getAttributeAddressAndForwardPointer(const uint8_t *con
}
}
+inline int BinaryFormat::getAttributeFrequencyFromFlags(const int flags) {
+ return flags & MASK_ATTRIBUTE_FREQUENCY;
+}
+
// This function gets the byte position of the last chargroup of the exact matching word in the
// dictionary. If no match is found, it returns NOT_VALID_WORD.
inline int BinaryFormat::getTerminalPosition(const uint8_t *const root,
diff --git a/native/jni/src/correction.cpp b/native/jni/src/correction.cpp
index b18b35e8c..c815dabe6 100644
--- a/native/jni/src/correction.cpp
+++ b/native/jni/src/correction.cpp
@@ -157,6 +157,10 @@ void Correction::checkState() {
}
}
+bool Correction::sameAsTyped() {
+ return mProximityInfoState.sameAsTyped(mWord, mOutputIndex);
+}
+
int Correction::getFreqForSplitMultipleWords(const int *freqArray, const int *wordLengthArray,
const int wordCount, const bool isSpaceProximity, const unsigned short *word) {
return Correction::RankingAlgorithm::calcFreqForSplitMultipleWords(freqArray, wordLengthArray,
diff --git a/native/jni/src/correction.h b/native/jni/src/correction.h
index 81623a46b..57e7b7189 100644
--- a/native/jni/src/correction.h
+++ b/native/jni/src/correction.h
@@ -105,6 +105,7 @@ class Correction {
const int spaceProximityPos, const int missingSpacePos, const bool useFullEditDistance,
const bool doAutoCompletion, const int maxErrors);
void checkState();
+ bool sameAsTyped();
bool initProcessState(const int index);
int getInputIndex();
diff --git a/native/jni/src/proximity_info_state.h b/native/jni/src/proximity_info_state.h
index 76d45516e..474c40757 100644
--- a/native/jni/src/proximity_info_state.h
+++ b/native/jni/src/proximity_info_state.h
@@ -160,6 +160,21 @@ class ProximityInfoState {
return mTouchPositionCorrectionEnabled;
}
+ inline bool sameAsTyped(const unsigned short *word, int length) const {
+ if (length != mInputLength) {
+ return false;
+ }
+ const int *inputCodes = mInputCodes;
+ while (length--) {
+ if (static_cast<unsigned int>(*inputCodes) != static_cast<unsigned int>(*word)) {
+ return false;
+ }
+ inputCodes += MAX_PROXIMITY_CHARS_SIZE_INTERNAL;
+ word++;
+ }
+ return true;
+ }
+
private:
DISALLOW_COPY_AND_ASSIGN(ProximityInfoState);
/////////////////////////////////////////
@@ -179,21 +194,6 @@ class ProximityInfoState {
return mInputXCoordinates && mInputYCoordinates;
}
- bool sameAsTyped(const unsigned short *word, int length) const {
- if (length != mInputLength) {
- return false;
- }
- const int *inputCodes = mInputCodes;
- while (length--) {
- if ((unsigned int) *inputCodes != (unsigned int) *word) {
- return false;
- }
- inputCodes += MAX_PROXIMITY_CHARS_SIZE_INTERNAL;
- word++;
- }
- return true;
- }
-
// const
const ProximityInfo *mProximityInfo;
bool mHasTouchPositionCorrectionData;
diff --git a/native/jni/src/terminal_attributes.h b/native/jni/src/terminal_attributes.h
index d63364514..1ae9c7cbb 100644
--- a/native/jni/src/terminal_attributes.h
+++ b/native/jni/src/terminal_attributes.h
@@ -46,7 +46,7 @@ class TerminalAttributes {
// Gets the shortcut target itself as a uint16_t string. For parameters and return value
// see BinaryFormat::getWordAtAddress.
// TODO: make the output an uint32_t* to handle the whole unicode range.
- inline int getNextShortcutTarget(const int maxDepth, uint16_t *outWord) {
+ inline int getNextShortcutTarget(const int maxDepth, uint16_t *outWord, int *outFreq) {
const int shortcutFlags = BinaryFormat::getFlagsAndForwardPointer(mDict, &mPos);
mHasNextShortcutTarget =
0 != (shortcutFlags & BinaryFormat::FLAG_ATTRIBUTE_HAS_NEXT);
@@ -56,6 +56,7 @@ class TerminalAttributes {
if (NOT_A_CHARACTER == charCode) break;
outWord[i] = (uint16_t)charCode;
}
+ *outFreq = BinaryFormat::getAttributeFrequencyFromFlags(shortcutFlags);
mPos += BinaryFormat::CHARACTER_ARRAY_TERMINATOR_SIZE;
return i;
}
diff --git a/native/jni/src/unigram_dictionary.cpp b/native/jni/src/unigram_dictionary.cpp
index 9f7ab5362..cc6d39a29 100644
--- a/native/jni/src/unigram_dictionary.cpp
+++ b/native/jni/src/unigram_dictionary.cpp
@@ -391,8 +391,12 @@ inline void UnigramDictionary::onTerminal(const int probability,
const int finalProbability =
correction->getFinalProbability(probability, &wordPointer, &wordLength);
if (finalProbability != NOT_A_PROBABILITY) {
- addWord(wordPointer, wordLength, finalProbability, masterQueue,
- Dictionary::KIND_CORRECTION);
+ if (0 != finalProbability) {
+ // If the probability is 0, we don't want to add this word. However we still
+ // want to add its shortcuts (including a possible whitelist entry) if any.
+ addWord(wordPointer, wordLength, finalProbability, masterQueue,
+ Dictionary::KIND_CORRECTION);
+ }
const int shortcutProbability = finalProbability > 0 ? finalProbability - 1 : 0;
// Please note that the shortcut candidates will be added to the master queue only.
@@ -407,10 +411,21 @@ inline void UnigramDictionary::onTerminal(const int probability,
// with the same score. For the moment we use -1 to make sure the shortcut will
// never be in front of the word.
uint16_t shortcutTarget[MAX_WORD_LENGTH_INTERNAL];
+ int shortcutFrequency;
const int shortcutTargetStringLength = iterator.getNextShortcutTarget(
- MAX_WORD_LENGTH_INTERNAL, shortcutTarget);
- addWord(shortcutTarget, shortcutTargetStringLength, shortcutProbability,
- masterQueue, Dictionary::KIND_CORRECTION);
+ MAX_WORD_LENGTH_INTERNAL, shortcutTarget, &shortcutFrequency);
+ int shortcutScore;
+ int kind;
+ if (shortcutFrequency == BinaryFormat::WHITELIST_SHORTCUT_FREQUENCY
+ && correction->sameAsTyped()) {
+ shortcutScore = S_INT_MAX;
+ kind = Dictionary::KIND_WHITELIST;
+ } else {
+ shortcutScore = shortcutProbability;
+ kind = Dictionary::KIND_CORRECTION;
+ }
+ addWord(shortcutTarget, shortcutTargetStringLength, shortcutScore,
+ masterQueue, kind);
}
}
}