3 files changed, 55 insertions, 27 deletions
diff --git a/native/src/defines.h b/native/src/defines.h
index 71aaf28ae..c1eaf0df2 100644
--- a/native/src/defines.h
+++ b/native/src/defines.h
@@ -129,11 +129,16 @@ static void prof_out(void) {
 #define SUGGEST_WORDS_WITH_EXCESSIVE_CHARACTER true
 #define SUGGEST_WORDS_WITH_TRANSPOSED_CHARACTERS true
 
+// The following "rate"s are used as a multiplier before dividing by 100, so they are in percent.
 #define WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE 75
 #define WORDS_WITH_MISSING_SPACE_CHARACTER_DEMOTION_RATE 80
 #define WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE 75
 #define WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE 75
 #define WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE 60
+#define FULL_MATCHED_WORDS_PROMOTION_RATE 120
+
+// This is used as a bare multiplier (not subject to /100)
+#define FULL_MATCH_ACCENTS_OR_CAPITALIZATION_DIFFER_MULTIPLIER 2
 
 // This should be greater than or equal to MAX_WORD_LENGTH defined in BinaryDictionary.java
 // This is only used for the size of array. Not to be used in c functions.
diff --git a/native/src/unigram_dictionary.cpp b/native/src/unigram_dictionary.cpp
index 3f9bcd758..dfbe8228e 100644
--- a/native/src/unigram_dictionary.cpp
+++ b/native/src/unigram_dictionary.cpp
@@ -347,9 +347,9 @@ void UnigramDictionary::getWordsRec(const int childrenCount, const int pos, cons
     }
 }
 
-inline int UnigramDictionary::calculateFinalFreq(const int inputIndex, const int snr,
-        const int skipPos, const int excessivePos, const int transposedPos, const int freq,
-        const bool sameLength) {
+inline int UnigramDictionary::calculateFinalFreq(const int inputIndex, const int depth,
+        const int snr, const int skipPos, const int excessivePos, const int transposedPos,
+        const int freq, const bool sameLength) {
     // TODO: Demote by edit distance
     int finalFreq = freq * snr;
     if (skipPos >= 0) multiplyRate(WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE, &finalFreq);
@@ -361,6 +361,17 @@ inline int UnigramDictionary::calculateFinalFreq(const int inputIndex, const int
             multiplyRate(WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE, &finalFreq);
         }
     }
+    int lengthFreq = TYPED_LETTER_MULTIPLIER;
+    for (int i = 0; i < depth; ++i) lengthFreq *= TYPED_LETTER_MULTIPLIER;
+    if (lengthFreq == snr) {
+        if (depth > 1) {
+            if (DEBUG_DICT) LOGI("Found full matched word.");
+            multiplyRate(FULL_MATCHED_WORDS_PROMOTION_RATE, &finalFreq);
+        }
+        if (sameLength && transposedPos < 0 && skipPos < 0 && excessivePos < 0) {
+            finalFreq *= FULL_MATCH_ACCENTS_OR_CAPITALIZATION_DIFFER_MULTIPLIER;
+        }
+    }
     if (sameLength && skipPos < 0) finalFreq *= FULL_WORD_MULTIPLIER;
     return finalFreq;
 }
@@ -369,8 +380,8 @@ inline void UnigramDictionary::onTerminalWhenUserTypedLengthIsGreaterThanInputLe
         unsigned short *word, const int inputIndex, const int depth, const int snr,
         int *nextLetters, const int nextLettersSize, const int skipPos, const int excessivePos,
         const int transposedPos, const int freq) {
-    const int finalFreq = calculateFinalFreq(inputIndex, snr, skipPos, excessivePos, transposedPos,
-            freq, false);
+    const int finalFreq = calculateFinalFreq(inputIndex, depth, snr, skipPos, excessivePos,
+            transposedPos, freq, false);
     if (depth >= MIN_SUGGEST_DEPTH) addWord(word, depth + 1, finalFreq);
     if (depth >= mInputLength && skipPos < 0) {
         registerNextLetter(mWord[mInputLength], nextLetters, nextLettersSize);
@@ -379,10 +390,9 @@ inline void UnigramDictionary::onTerminalWhenUserTypedLengthIsGreaterThanInputLe
 
 inline void UnigramDictionary::onTerminalWhenUserTypedLengthIsSameAsInputLength(
         unsigned short *word, const int inputIndex, const int depth, const int snr,
-        const int skipPos, const int excessivePos, const int transposedPos, const int freq,
-        const int addedWeight) {
+        const int skipPos, const int excessivePos, const int transposedPos, const int freq) {
     if (sameAsTyped(word, depth + 1)) return;
-    const int finalFreq = calculateFinalFreq(inputIndex, snr * addedWeight, skipPos,
+    const int finalFreq = calculateFinalFreq(inputIndex, depth, snr, skipPos,
             excessivePos, transposedPos, freq, true);
     // Proximity collection will promote a word of the same length as what user typed.
     if (depth >= MIN_SUGGEST_DEPTH) addWord(word, depth + 1, finalFreq);
@@ -418,9 +428,9 @@ inline bool UnigramDictionary::existsAdjacentProximityChars(const int inputIndex
     return false;
 }
 
-inline int UnigramDictionary::getMatchedProximityId(const int *currentChars,
-        const unsigned short c, const int skipPos, const int excessivePos,
-        const int transposedPos) {
+inline UnigramDictionary::ProximityType UnigramDictionary::getMatchedProximityId(
+        const int *currentChars, const unsigned short c, const int skipPos,
+        const int excessivePos, const int transposedPos) {
     const unsigned short lowerC = toLowerCase(c);
     int j = 0;
     while (currentChars[j] > 0 && j < MAX_PROXIMITY_CHARS) {
@@ -428,18 +438,19 @@ inline int UnigramDictionary::getMatchedProximityId(const int *currentChars,
         // If skipPos is defined, not to search proximity collections.
         // First char is what user  typed.
         if (matched) {
-            return j;
+            if (j > 0) return NEAR_PROXIMITY_CHAR;
+            return SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR;
         } else if (skipPos >= 0 || excessivePos >= 0 || transposedPos >= 0) {
             // Not to check proximity characters
-            return -1;
+            return UNRELATED_CHAR;
         }
         ++j;
     }
-    return -1;
+    return UNRELATED_CHAR;
 }
 
 inline bool UnigramDictionary::processCurrentNode(const int pos, const int depth,
-        const int maxDepth, const bool traverseAllNodes, const int snr, int inputIndex,
+        const int maxDepth, const bool traverseAllNodes, int snr, int inputIndex,
         const int diffs, const int skipPos, const int excessivePos, const int transposedPos,
         int *nextLetters, const int nextLettersSize, int *newCount, int *newChildPosition,
         bool *newTraverseAllNodes, int *newSnr, int*newInputIndex, int *newDiffs,
@@ -455,8 +466,9 @@ inline bool UnigramDictionary::processCurrentNode(const int pos, const int depth
     int childPosition;
     bool terminal;
     int freq;
+    bool isSameAsUserTypedLength = false;
 
-    if (excessivePos == depth) ++inputIndex;
+    if (excessivePos == depth && inputIndex < mInputLength - 1) ++inputIndex;
 
     *nextSiblingPosition = Dictionary::setDictionaryValues(DICT, IS_LATEST_DICT_VERSION, pos, &c,
             &childPosition, &terminal, &freq);
@@ -485,21 +497,24 @@ inline bool UnigramDictionary::processCurrentNode(const int pos, const int depth
 
         int matchedProximityCharId = getMatchedProximityId(currentChars, c, skipPos, excessivePos,
                 transposedPos);
-        if (matchedProximityCharId < 0) return false;
+        if (UNRELATED_CHAR == matchedProximityCharId) return false;
         mWord[depth] = c;
         // If inputIndex is greater than mInputLength, that means there is no
         // proximity chars. So, we don't need to check proximity.
-        const int addedWeight = matchedProximityCharId == 0 ? TYPED_LETTER_MULTIPLIER : 1;
-        const bool isSameAsUserTypedLength = mInputLength == inputIndex + 1;
+        if (SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR == matchedProximityCharId) {
+            snr = snr * TYPED_LETTER_MULTIPLIER;
+        }
+        bool isSameAsUserTypedLength = mInputLength == inputIndex + 1
+                || (excessivePos == mInputLength - 1 && inputIndex == mInputLength - 2);
         if (isSameAsUserTypedLength && terminal) {
             onTerminalWhenUserTypedLengthIsSameAsInputLength(mWord, inputIndex, depth, snr,
-                    skipPos, excessivePos, transposedPos, freq, addedWeight);
+                    skipPos, excessivePos, transposedPos, freq);
         }
         if (!needsToTraverseChildrenNodes) return false;
         // Start traversing all nodes after the index exceeds the user typed length
         *newTraverseAllNodes = isSameAsUserTypedLength;
-        *newSnr = snr * addedWeight;
-        *newDiffs = diffs + ((matchedProximityCharId > 0) ? 1 : 0);
+        *newSnr = snr;
+        *newDiffs = diffs + ((NEAR_PROXIMITY_CHAR == matchedProximityCharId) ? 1 : 0);
         *newInputIndex = inputIndex + 1;
     }
     // Optimization: Prune out words that are too long compared to how much was typed.
@@ -508,7 +523,8 @@ inline bool UnigramDictionary::processCurrentNode(const int pos, const int depth
     }
 
     // If inputIndex is greater than mInputLength, that means there are no proximity chars.
-    if (mInputLength <= *newInputIndex) {
+    // TODO: Check if this can be isSameAsUserTypedLength only.
+    if (isSameAsUserTypedLength || mInputLength <= *newInputIndex) {
         *newTraverseAllNodes = true;
     }
     // get the count of nodes and increment childAddress.
diff --git a/native/src/unigram_dictionary.h b/native/src/unigram_dictionary.h
index 7f7b7bd21..90c98149b 100644
--- a/native/src/unigram_dictionary.h
+++ b/native/src/unigram_dictionary.h
@@ -22,6 +22,13 @@
 namespace latinime {
 
 class UnigramDictionary {
+
+    typedef enum {                             // Used as a return value for character comparison
+        SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR,  // Same char, possibly with different case or accent
+        NEAR_PROXIMITY_CHAR,                   // It is a char located nearby on the keyboard
+        UNRELATED_CHAR                         // It is an unrelated char
+    } ProximityType;
+
 public:
     UnigramDictionary(const unsigned char *dict, int typedLetterMultipler, int fullWordMultiplier,
             int maxWordLength, int maxWords, int maxProximityChars, const bool isLatestDictVersion);
@@ -52,7 +59,7 @@ private:
             const int excessivePos, const int transposedPos, int *nextLetters,
             const int nextLettersSize);
     void registerNextLetter(unsigned short c, int *nextLetters, int nextLettersSize);
-    int calculateFinalFreq(const int inputIndex, const int snr, const int skipPos,
+    int calculateFinalFreq(const int inputIndex, const int depth, const int snr, const int skipPos,
             const int excessivePos, const int transposedPos, const int freq, const bool sameLength);
     void onTerminalWhenUserTypedLengthIsGreaterThanInputLength(unsigned short *word,
             const int inputIndex, const int depth, const int snr, int *nextLetters,
@@ -60,11 +67,11 @@ private:
             const int transposedPos, const int freq);
     void onTerminalWhenUserTypedLengthIsSameAsInputLength(unsigned short *word,
             const int inputIndex, const int depth, const int snr, const int skipPos,
-            const int excessivePos, const int transposedPos, const int freq, const int addedWeight);
+            const int excessivePos, const int transposedPos, const int freq);
     bool needsToSkipCurrentNode(const unsigned short c,
             const int inputIndex, const int skipPos, const int depth);
-    int getMatchedProximityId(const int *currentChars, const unsigned short c, const int skipPos,
-            const int excessivePos, const int transposedPos);
+    ProximityType getMatchedProximityId(const int *currentChars, const unsigned short c,
+            const int skipPos, const int excessivePos, const int transposedPos);
     // Process a node by considering proximity, missing and excessive character
     bool processCurrentNode(const int pos, const int depth,
             const int maxDepth, const bool traverseAllNodes, const int snr, int inputIndex,