1 files changed, 255 insertions, 83 deletions
diff --git a/native/src/unigram_dictionary.cpp b/native/src/unigram_dictionary.cpp
index dfbe8228e..30fbaeae1 100644
--- a/native/src/unigram_dictionary.cpp
+++ b/native/src/unigram_dictionary.cpp
@@ -29,20 +29,144 @@
 
 namespace latinime {
 
+const UnigramDictionary::digraph_t UnigramDictionary::GERMAN_UMLAUT_DIGRAPHS[] =
+        { { 'a', 'e' },
+        { 'o', 'e' },
+        { 'u', 'e' } };
+
 UnigramDictionary::UnigramDictionary(const unsigned char *dict, int typedLetterMultiplier,
         int fullWordMultiplier, int maxWordLength, int maxWords, int maxProximityChars,
         const bool isLatestDictVersion)
-    : DICT(dict), MAX_WORD_LENGTH(maxWordLength),MAX_WORDS(maxWords),
+    : DICT(dict), MAX_WORD_LENGTH(maxWordLength), MAX_WORDS(maxWords),
     MAX_PROXIMITY_CHARS(maxProximityChars), IS_LATEST_DICT_VERSION(isLatestDictVersion),
     TYPED_LETTER_MULTIPLIER(typedLetterMultiplier), FULL_WORD_MULTIPLIER(fullWordMultiplier),
-    ROOT_POS(isLatestDictVersion ? DICTIONARY_HEADER_SIZE : 0) {
+    ROOT_POS(isLatestDictVersion ? DICTIONARY_HEADER_SIZE : 0),
+    BYTES_IN_ONE_CHAR(MAX_PROXIMITY_CHARS * sizeof(*mInputCodes)),
+    MAX_UMLAUT_SEARCH_DEPTH(DEFAULT_MAX_UMLAUT_SEARCH_DEPTH) {
     if (DEBUG_DICT) LOGI("UnigramDictionary - constructor");
 }
 
 UnigramDictionary::~UnigramDictionary() {}
 
-int UnigramDictionary::getSuggestions(int *codes, int codesSize, unsigned short *outWords,
-        int *frequencies, int *nextLetters, int nextLettersSize) {
+static inline unsigned int getCodesBufferSize(const int* codes, const int codesSize,
+        const int MAX_PROXIMITY_CHARS) {
+    return sizeof(*codes) * MAX_PROXIMITY_CHARS * codesSize;
+}
+
+bool UnigramDictionary::isDigraph(const int* codes, const int i, const int codesSize) const {
+
+    // There can't be a digraph if we don't have at least 2 characters to examine
+    if (i + 2 > codesSize) return false;
+
+    // Search for the first char of some digraph
+    int lastDigraphIndex = -1;
+    const int thisChar = codes[i * MAX_PROXIMITY_CHARS];
+    for (lastDigraphIndex = sizeof(GERMAN_UMLAUT_DIGRAPHS) / sizeof(GERMAN_UMLAUT_DIGRAPHS[0]) - 1;
+            lastDigraphIndex >= 0; --lastDigraphIndex) {
+        if (thisChar == GERMAN_UMLAUT_DIGRAPHS[lastDigraphIndex].first) break;
+    }
+    // No match: return early
+    if (lastDigraphIndex < 0) return false;
+
+    // It's an interesting digraph if the second char matches too.
+    return GERMAN_UMLAUT_DIGRAPHS[lastDigraphIndex].second == codes[(i + 1) * MAX_PROXIMITY_CHARS];
+}
+
+// Mostly the same arguments as the non-recursive version, except:
+// codes is the original value. It points to the start of the work buffer, and gets passed as is.
+// codesSize is the size of the user input (thus, it is the size of codesSrc).
+// codesDest is the current point in the work buffer.
+// codesSrc is the current point in the user-input, original, content-unmodified buffer.
+// codesRemain is the remaining size in codesSrc.
+void UnigramDictionary::getWordWithDigraphSuggestionsRec(const ProximityInfo *proximityInfo,
+        const int *xcoordinates, const int* ycoordinates, const int *codesBuffer,
+        const int codesBufferSize, const int flags, const int* codesSrc, const int codesRemain,
+        const int currentDepth, int* codesDest, unsigned short* outWords, int* frequencies) {
+
+    if (currentDepth < MAX_UMLAUT_SEARCH_DEPTH) {
+        for (int i = 0; i < codesRemain; ++i) {
+            if (isDigraph(codesSrc, i, codesRemain)) {
+                // Found a digraph. We will try both spellings. eg. the word is "pruefen"
+
+                // Copy the word up to the first char of the digraph, then continue processing
+                // on the remaining part of the word, skipping the second char of the digraph.
+                // In our example, copy "pru" and continue running on "fen"
+                // Make i the index of the second char of the digraph for simplicity. Forgetting
+                // to do that results in an infinite recursion so take care!
+                ++i;
+                memcpy(codesDest, codesSrc, i * BYTES_IN_ONE_CHAR);
+                getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates,
+                        codesBuffer, codesBufferSize, flags,
+                        codesSrc + (i + 1) * MAX_PROXIMITY_CHARS, codesRemain - i - 1,
+                        currentDepth + 1, codesDest + i * MAX_PROXIMITY_CHARS, outWords,
+                        frequencies);
+
+                // Copy the second char of the digraph in place, then continue processing on
+                // the remaining part of the word.
+                // In our example, after "pru" in the buffer copy the "e", and continue on "fen"
+                memcpy(codesDest + i * MAX_PROXIMITY_CHARS, codesSrc + i * MAX_PROXIMITY_CHARS,
+                        BYTES_IN_ONE_CHAR);
+                getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates,
+                        codesBuffer, codesBufferSize, flags, codesSrc + i * MAX_PROXIMITY_CHARS,
+                        codesRemain - i, currentDepth + 1, codesDest + i * MAX_PROXIMITY_CHARS,
+                        outWords, frequencies);
+                return;
+            }
+        }
+    }
+
+    // If we come here, we hit the end of the word: let's check it against the dictionary.
+    // In our example, we'll come here once for "prufen" and then once for "pruefen".
+    // If the word contains several digraphs, we'll come it for the product of them.
+    // eg. if the word is "ueberpruefen" we'll test, in order, against
+    // "uberprufen", "uberpruefen", "ueberprufen", "ueberpruefen".
+    const unsigned int remainingBytes = BYTES_IN_ONE_CHAR * codesRemain;
+    if (0 != remainingBytes)
+        memcpy(codesDest, codesSrc, remainingBytes);
+
+    getWordSuggestions(proximityInfo, xcoordinates, ycoordinates, codesBuffer,
+            (codesDest - codesBuffer) / MAX_PROXIMITY_CHARS + codesRemain, outWords, frequencies);
+}
+
+int UnigramDictionary::getSuggestions(const ProximityInfo *proximityInfo, const int *xcoordinates,
+        const int *ycoordinates, const int *codes, const int codesSize, const int flags,
+        unsigned short *outWords, int *frequencies) {
+
+    if (REQUIRES_GERMAN_UMLAUT_PROCESSING & flags)
+    { // Incrementally tune the word and try all possibilities
+        int codesBuffer[getCodesBufferSize(codes, codesSize, MAX_PROXIMITY_CHARS)];
+        getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates, codesBuffer,
+                codesSize, flags, codes, codesSize, 0, codesBuffer, outWords, frequencies);
+    } else { // Normal processing
+        getWordSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, codesSize,
+                outWords, frequencies);
+    }
+
+    PROF_START(20);
+    // Get the word count
+    int suggestedWordsCount = 0;
+    while (suggestedWordsCount < MAX_WORDS && mFrequencies[suggestedWordsCount] > 0) {
+        suggestedWordsCount++;
+    }
+
+    if (DEBUG_DICT) {
+        LOGI("Returning %d words", suggestedWordsCount);
+        LOGI("Next letters: ");
+        for (int k = 0; k < NEXT_LETTERS_SIZE; k++) {
+            if (mNextLettersFrequency[k] > 0) {
+                LOGI("%c = %d,", k, mNextLettersFrequency[k]);
+            }
+        }
+    }
+    PROF_END(20);
+    PROF_CLOSE;
+    return suggestedWordsCount;
+}
+
+void UnigramDictionary::getWordSuggestions(const ProximityInfo *proximityInfo,
+        const int *xcoordinates, const int *ycoordinates, const int *codes, const int codesSize,
+        unsigned short *outWords, int *frequencies) {
+
     PROF_OPEN;
     PROF_START(0);
     initSuggestions(codes, codesSize, outWords, frequencies);
@@ -52,7 +176,7 @@ int UnigramDictionary::getSuggestions(int *codes, int codesSize, unsigned short
     PROF_END(0);
 
     PROF_START(1);
-    getSuggestionCandidates(-1, -1, -1, nextLetters, nextLettersSize, MAX_DEPTH);
+    getSuggestionCandidates(-1, -1, -1, mNextLettersFrequency, NEXT_LETTERS_SIZE, MAX_DEPTH);
     PROF_END(1);
 
     PROF_START(2);
@@ -99,28 +223,25 @@ int UnigramDictionary::getSuggestions(int *codes, int codesSize, unsigned short
     PROF_END(5);
 
     PROF_START(6);
-    // Get the word count
-    int suggestedWordsCount = 0;
-    while (suggestedWordsCount < MAX_WORDS && mFrequencies[suggestedWordsCount] > 0) {
-        suggestedWordsCount++;
-    }
-
-    if (DEBUG_DICT) {
-        LOGI("Returning %d words", suggestedWordsCount);
-        LOGI("Next letters: ");
-        for (int k = 0; k < nextLettersSize; k++) {
-            if (nextLetters[k] > 0) {
-                LOGI("%c = %d,", k, nextLetters[k]);
+    if (SUGGEST_WORDS_WITH_SPACE_PROXIMITY) {
+        // The first and last "mistyped spaces" are taken care of by excessive character handling
+        for (int i = 1; i < codesSize - 1; ++i) {
+            if (DEBUG_DICT) LOGI("--- Suggest words with proximity space %d", i);
+            const int x = xcoordinates[i];
+            const int y = ycoordinates[i];
+            if (DEBUG_PROXIMITY_INFO)
+                LOGI("Input[%d] x = %d, y = %d, has space proximity = %d",
+                        i, x, y, proximityInfo->hasSpaceProximity(x, y));
+            if (proximityInfo->hasSpaceProximity(x, y)) {
+                getMistypedSpaceWords(mInputLength, i);
             }
         }
     }
     PROF_END(6);
-    PROF_CLOSE;
-    return suggestedWordsCount;
 }
 
-void UnigramDictionary::initSuggestions(int *codes, int codesSize, unsigned short *outWords,
-        int *frequencies) {
+void UnigramDictionary::initSuggestions(const int *codes, const int codesSize,
+        unsigned short *outWords, int *frequencies) {
     if (DEBUG_DICT) LOGI("initSuggest");
     mFrequencies = frequencies;
     mOutputChars = outWords;
@@ -182,7 +303,7 @@ bool UnigramDictionary::addWord(unsigned short *word, int length, int frequency)
     return false;
 }
 
-unsigned short UnigramDictionary::toLowerCase(unsigned short c) {
+unsigned short UnigramDictionary::toBaseLowerCase(unsigned short c) {
     if (c < sizeof(BASE_CHARS) / sizeof(BASE_CHARS[0])) {
         c = BASE_CHARS[c];
     }
@@ -198,7 +319,7 @@ bool UnigramDictionary::sameAsTyped(unsigned short *word, int length) {
     if (length != mInputLength) {
         return false;
     }
-    int *inputCodes = mInputCodes;
+    const int *inputCodes = mInputCodes;
     while (length--) {
         if ((unsigned int) *inputCodes != (unsigned int) *word) {
             return false;
@@ -238,7 +359,7 @@ void UnigramDictionary::getSuggestionCandidates(const int skipPos,
         if (mStackChildCount[depth] > 0) {
             --mStackChildCount[depth];
             bool traverseAllNodes = mStackTraverseAll[depth];
-            int snr = mStackNodeFreq[depth];
+            int matchWeight = mStackNodeFreq[depth];
             int inputIndex = mStackInputIndex[depth];
             int diffs = mStackDiffs[depth];
             int siblingPos = mStackSiblingPos[depth];
@@ -246,9 +367,10 @@ void UnigramDictionary::getSuggestionCandidates(const int skipPos,
             // depth will never be greater than maxDepth because in that case,
             // needsToTraverseChildrenNodes should be false
             const bool needsToTraverseChildrenNodes = processCurrentNode(siblingPos, depth,
-                    maxDepth, traverseAllNodes, snr, inputIndex, diffs, skipPos, excessivePos,
-                    transposedPos, nextLetters, nextLettersSize, &childCount, &firstChildPos,
-                    &traverseAllNodes, &snr, &inputIndex, &diffs, &siblingPos);
+                    maxDepth, traverseAllNodes, matchWeight, inputIndex, diffs, skipPos,
+                    excessivePos, transposedPos, nextLetters, nextLettersSize, &childCount,
+                    &firstChildPos, &traverseAllNodes, &matchWeight, &inputIndex, &diffs,
+                    &siblingPos);
             // Update next sibling pos
             mStackSiblingPos[depth] = siblingPos;
             if (needsToTraverseChildrenNodes) {
@@ -256,7 +378,7 @@ void UnigramDictionary::getSuggestionCandidates(const int skipPos,
                 ++depth;
                 mStackChildCount[depth] = childCount;
                 mStackTraverseAll[depth] = traverseAllNodes;
-                mStackNodeFreq[depth] = snr;
+                mStackNodeFreq[depth] = matchWeight;
                 mStackInputIndex[depth] = inputIndex;
                 mStackDiffs[depth] = diffs;
                 mStackSiblingPos[depth] = firstChildPos;
@@ -276,27 +398,31 @@ inline static void multiplyRate(const int rate, int *freq) {
     }
 }
 
-bool UnigramDictionary::getMissingSpaceWords(const int inputLength, const int missingSpacePos) {
-    if (missingSpacePos <= 0 || missingSpacePos >= inputLength
-            || inputLength >= MAX_WORD_LENGTH) return false;
-    const int newWordLength = inputLength + 1;
+bool UnigramDictionary::getSplitTwoWordsSuggestion(const int inputLength,
+        const int firstWordStartPos, const int firstWordLength, const int secondWordStartPos,
+        const int secondWordLength) {
+    if (inputLength >= MAX_WORD_LENGTH) return false;
+    if (0 >= firstWordLength || 0 >= secondWordLength || firstWordStartPos >= secondWordStartPos
+            || firstWordStartPos < 0 || secondWordStartPos + secondWordLength > inputLength)
+        return false;
+    const int newWordLength = firstWordLength + secondWordLength + 1;
     // Allocating variable length array on stack
     unsigned short word[newWordLength];
-    const int firstFreq = getBestWordFreq(0, missingSpacePos, mWord);
+    const int firstFreq = getBestWordFreq(firstWordStartPos, firstWordLength, mWord);
     if (DEBUG_DICT) LOGI("First freq: %d", firstFreq);
     if (firstFreq <= 0) return false;
 
-    for (int i = 0; i < missingSpacePos; ++i) {
+    for (int i = 0; i < firstWordLength; ++i) {
         word[i] = mWord[i];
     }
 
-    const int secondFreq = getBestWordFreq(missingSpacePos, inputLength - missingSpacePos, mWord);
+    const int secondFreq = getBestWordFreq(secondWordStartPos, secondWordLength, mWord);
     if (DEBUG_DICT) LOGI("Second  freq:  %d", secondFreq);
     if (secondFreq <= 0) return false;
 
-    word[missingSpacePos] = SPACE;
-    for (int i = (missingSpacePos + 1); i < newWordLength; ++i) {
-        word[i] = mWord[i - missingSpacePos - 1];
+    word[firstWordLength] = SPACE;
+    for (int i = (firstWordLength + 1); i < newWordLength; ++i) {
+        word[i] = mWord[i - firstWordLength - 1];
     }
 
     int pairFreq = ((firstFreq + secondFreq) / 2);
@@ -306,6 +432,17 @@ bool UnigramDictionary::getMissingSpaceWords(const int inputLength, const int mi
     return true;
 }
 
+bool UnigramDictionary::getMissingSpaceWords(const int inputLength, const int missingSpacePos) {
+    return getSplitTwoWordsSuggestion(
+            inputLength, 0, missingSpacePos, missingSpacePos, inputLength - missingSpacePos);
+}
+
+bool UnigramDictionary::getMistypedSpaceWords(const int inputLength, const int spaceProximityPos) {
+    return getSplitTwoWordsSuggestion(
+            inputLength, 0, spaceProximityPos, spaceProximityPos + 1,
+            inputLength - spaceProximityPos - 1);
+}
+
 // Keep this for comparing spec to new getWords
 void UnigramDictionary::getWordsOld(const int initialPos, const int inputLength, const int skipPos,
         const int excessivePos, const int transposedPos,int *nextLetters,
@@ -319,40 +456,52 @@ void UnigramDictionary::getWordsOld(const int initialPos, const int inputLength,
 }
 
 void UnigramDictionary::getWordsRec(const int childrenCount, const int pos, const int depth,
-        const int maxDepth, const bool traverseAllNodes, const int snr, const int inputIndex,
-        const int diffs, const int skipPos, const int excessivePos, const int transposedPos,
-        int *nextLetters, const int nextLettersSize) {
+        const int maxDepth, const bool traverseAllNodes, const int matchWeight,
+        const int inputIndex, const int diffs, const int skipPos, const int excessivePos,
+        const int transposedPos, int *nextLetters, const int nextLettersSize) {
     int siblingPos = pos;
     for (int i = 0; i < childrenCount; ++i) {
         int newCount;
         int newChildPosition;
         const int newDepth = depth + 1;
         bool newTraverseAllNodes;
-        int newSnr;
+        int newMatchRate;
         int newInputIndex;
         int newDiffs;
         int newSiblingPos;
         const bool needsToTraverseChildrenNodes = processCurrentNode(siblingPos, depth, maxDepth,
-                traverseAllNodes, snr, inputIndex, diffs, skipPos, excessivePos, transposedPos,
+                traverseAllNodes, matchWeight, inputIndex, diffs,
+                skipPos, excessivePos, transposedPos,
                 nextLetters, nextLettersSize,
-                &newCount, &newChildPosition, &newTraverseAllNodes, &newSnr,
+                &newCount, &newChildPosition, &newTraverseAllNodes, &newMatchRate,
                 &newInputIndex, &newDiffs, &newSiblingPos);
         siblingPos = newSiblingPos;
 
         if (needsToTraverseChildrenNodes) {
             getWordsRec(newCount, newChildPosition, newDepth, maxDepth, newTraverseAllNodes,
-                    newSnr, newInputIndex, newDiffs, skipPos, excessivePos, transposedPos,
+                    newMatchRate, newInputIndex, newDiffs, skipPos, excessivePos, transposedPos,
                     nextLetters, nextLettersSize);
         }
     }
 }
 
+static const int TWO_31ST_DIV_255 = S_INT_MAX / 255;
+static inline int capped255MultForFullMatchAccentsOrCapitalizationDifference(const int num) {
+    return (num < TWO_31ST_DIV_255 ? 255 * num : S_INT_MAX);
+}
 inline int UnigramDictionary::calculateFinalFreq(const int inputIndex, const int depth,
-        const int snr, const int skipPos, const int excessivePos, const int transposedPos,
-        const int freq, const bool sameLength) {
+        const int matchWeight, const int skipPos, const int excessivePos, const int transposedPos,
+        const int freq, const bool sameLength) const {
     // TODO: Demote by edit distance
-    int finalFreq = freq * snr;
-    if (skipPos >= 0) multiplyRate(WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE, &finalFreq);
+    int finalFreq = freq * matchWeight;
+    if (skipPos >= 0) {
+        if (mInputLength >= 3) {
+            multiplyRate(WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE *
+                    (mInputLength - 2) / (mInputLength - 1), &finalFreq);
+        } else {
+            finalFreq = 0;
+        }
+    }
     if (transposedPos >= 0) multiplyRate(
             WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE, &finalFreq);
     if (excessivePos >= 0) {
@@ -363,24 +512,24 @@ inline int UnigramDictionary::calculateFinalFreq(const int inputIndex, const int
     }
     int lengthFreq = TYPED_LETTER_MULTIPLIER;
     for (int i = 0; i < depth; ++i) lengthFreq *= TYPED_LETTER_MULTIPLIER;
-    if (lengthFreq == snr) {
+    if (lengthFreq == matchWeight) {
         if (depth > 1) {
             if (DEBUG_DICT) LOGI("Found full matched word.");
             multiplyRate(FULL_MATCHED_WORDS_PROMOTION_RATE, &finalFreq);
         }
         if (sameLength && transposedPos < 0 && skipPos < 0 && excessivePos < 0) {
-            finalFreq *= FULL_MATCH_ACCENTS_OR_CAPITALIZATION_DIFFER_MULTIPLIER;
+            finalFreq = capped255MultForFullMatchAccentsOrCapitalizationDifference(finalFreq);
         }
     }
-    if (sameLength && skipPos < 0) finalFreq *= FULL_WORD_MULTIPLIER;
+    if (sameLength) finalFreq *= FULL_WORD_MULTIPLIER;
     return finalFreq;
 }
 
 inline void UnigramDictionary::onTerminalWhenUserTypedLengthIsGreaterThanInputLength(
-        unsigned short *word, const int inputIndex, const int depth, const int snr,
+        unsigned short *word, const int inputIndex, const int depth, const int matchWeight,
         int *nextLetters, const int nextLettersSize, const int skipPos, const int excessivePos,
         const int transposedPos, const int freq) {
-    const int finalFreq = calculateFinalFreq(inputIndex, depth, snr, skipPos, excessivePos,
+    const int finalFreq = calculateFinalFreq(inputIndex, depth, matchWeight, skipPos, excessivePos,
             transposedPos, freq, false);
     if (depth >= MIN_SUGGEST_DEPTH) addWord(word, depth + 1, finalFreq);
     if (depth >= mInputLength && skipPos < 0) {
@@ -389,10 +538,10 @@ inline void UnigramDictionary::onTerminalWhenUserTypedLengthIsGreaterThanInputLe
 }
 
 inline void UnigramDictionary::onTerminalWhenUserTypedLengthIsSameAsInputLength(
-        unsigned short *word, const int inputIndex, const int depth, const int snr,
+        unsigned short *word, const int inputIndex, const int depth, const int matchWeight,
         const int skipPos, const int excessivePos, const int transposedPos, const int freq) {
     if (sameAsTyped(word, depth + 1)) return;
-    const int finalFreq = calculateFinalFreq(inputIndex, depth, snr, skipPos,
+    const int finalFreq = calculateFinalFreq(inputIndex, depth, matchWeight, skipPos,
             excessivePos, transposedPos, freq, true);
     // Proximity collection will promote a word of the same length as what user typed.
     if (depth >= MIN_SUGGEST_DEPTH) addWord(word, depth + 1, finalFreq);
@@ -400,18 +549,18 @@ inline void UnigramDictionary::onTerminalWhenUserTypedLengthIsSameAsInputLength(
 
 inline bool UnigramDictionary::needsToSkipCurrentNode(const unsigned short c,
         const int inputIndex, const int skipPos, const int depth) {
-    const unsigned short userTypedChar = (mInputCodes + (inputIndex * MAX_PROXIMITY_CHARS))[0];
+    const unsigned short userTypedChar = getInputCharsAt(inputIndex)[0];
     // Skip the ' or other letter and continue deeper
     return (c == QUOTE && userTypedChar != QUOTE) || skipPos == depth;
 }
 
 inline bool UnigramDictionary::existsAdjacentProximityChars(const int inputIndex,
-        const int inputLength) {
+        const int inputLength) const {
     if (inputIndex < 0 || inputIndex >= inputLength) return false;
     const int currentChar = *getInputCharsAt(inputIndex);
     const int leftIndex = inputIndex - 1;
     if (leftIndex >= 0) {
-        int *leftChars = getInputCharsAt(leftIndex);
+        const int *leftChars = getInputCharsAt(leftIndex);
         int i = 0;
         while (leftChars[i] > 0 && i < MAX_PROXIMITY_CHARS) {
             if (leftChars[i++] == currentChar) return true;
@@ -419,7 +568,7 @@ inline bool UnigramDictionary::existsAdjacentProximityChars(const int inputIndex
     }
     const int rightIndex = inputIndex + 1;
     if (rightIndex < inputLength) {
-        int *rightChars = getInputCharsAt(rightIndex);
+        const int *rightChars = getInputCharsAt(rightIndex);
         int i = 0;
         while (rightChars[i] > 0 && i < MAX_PROXIMITY_CHARS) {
             if (rightChars[i++] == currentChar) return true;
@@ -428,32 +577,54 @@ inline bool UnigramDictionary::existsAdjacentProximityChars(const int inputIndex
     return false;
 }
 
+
+// In the following function, c is the current character of the dictionary word
+// currently examined.
+// currentChars is an array containing the keys close to the character the
+// user actually typed at the same position. We want to see if c is in it: if so,
+// then the word contains at that position a character close to what the user
+// typed.
+// What the user typed is actually the first character of the array.
+// Notice : accented characters do not have a proximity list, so they are alone
+// in their list. The non-accented version of the character should be considered
+// "close", but not the other keys close to the non-accented version.
 inline UnigramDictionary::ProximityType UnigramDictionary::getMatchedProximityId(
         const int *currentChars, const unsigned short c, const int skipPos,
         const int excessivePos, const int transposedPos) {
-    const unsigned short lowerC = toLowerCase(c);
-    int j = 0;
+    const unsigned short baseLowerC = toBaseLowerCase(c);
+
+    // The first char in the array is what user typed. If it matches right away,
+    // that means the user typed that same char for this pos.
+    if (currentChars[0] == baseLowerC || currentChars[0] == c)
+        return SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR;
+
+    // If one of those is true, we should not check for close characters at all.
+    if (skipPos >= 0 || excessivePos >= 0 || transposedPos >= 0)
+        return UNRELATED_CHAR;
+
+    // If the non-accented, lowercased version of that first character matches c,
+    // then we have a non-accented version of the accented character the user
+    // typed. Treat it as a close char.
+    if (toBaseLowerCase(currentChars[0]) == baseLowerC)
+        return NEAR_PROXIMITY_CHAR;
+
+    // Not an exact nor an accent-alike match: search the list of close keys
+    int j = 1;
     while (currentChars[j] > 0 && j < MAX_PROXIMITY_CHARS) {
-        const bool matched = (currentChars[j] == lowerC || currentChars[j] == c);
-        // If skipPos is defined, not to search proximity collections.
-        // First char is what user  typed.
-        if (matched) {
-            if (j > 0) return NEAR_PROXIMITY_CHAR;
-            return SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR;
-        } else if (skipPos >= 0 || excessivePos >= 0 || transposedPos >= 0) {
-            // Not to check proximity characters
-            return UNRELATED_CHAR;
-        }
+        const bool matched = (currentChars[j] == baseLowerC || currentChars[j] == c);
+        if (matched) return NEAR_PROXIMITY_CHAR;
         ++j;
     }
+
+    // Was not included, signal this as an unrelated character.
     return UNRELATED_CHAR;
 }
 
 inline bool UnigramDictionary::processCurrentNode(const int pos, const int depth,
-        const int maxDepth, const bool traverseAllNodes, int snr, int inputIndex,
+        const int maxDepth, const bool traverseAllNodes, int matchWeight, int inputIndex,
         const int diffs, const int skipPos, const int excessivePos, const int transposedPos,
         int *nextLetters, const int nextLettersSize, int *newCount, int *newChildPosition,
-        bool *newTraverseAllNodes, int *newSnr, int*newInputIndex, int *newDiffs,
+        bool *newTraverseAllNodes, int *newMatchRate, int *newInputIndex, int *newDiffs,
         int *nextSiblingPosition) {
     if (DEBUG_DICT) {
         int inputCount = 0;
@@ -480,15 +651,16 @@ inline bool UnigramDictionary::processCurrentNode(const int pos, const int depth
         mWord[depth] = c;
         if (traverseAllNodes && terminal) {
             onTerminalWhenUserTypedLengthIsGreaterThanInputLength(mWord, inputIndex, depth,
-                    snr, nextLetters, nextLettersSize, skipPos, excessivePos, transposedPos, freq);
+                    matchWeight, nextLetters, nextLettersSize, skipPos, excessivePos, transposedPos,
+                    freq);
         }
         if (!needsToTraverseChildrenNodes) return false;
         *newTraverseAllNodes = traverseAllNodes;
-        *newSnr = snr;
+        *newMatchRate = matchWeight;
         *newDiffs = diffs;
         *newInputIndex = inputIndex;
     } else {
-        int *currentChars = mInputCodes + (inputIndex * MAX_PROXIMITY_CHARS);
+        const int *currentChars = getInputCharsAt(inputIndex);
 
         if (transposedPos >= 0) {
             if (inputIndex == transposedPos) currentChars += MAX_PROXIMITY_CHARS;
@@ -502,18 +674,18 @@ inline bool UnigramDictionary::processCurrentNode(const int pos, const int depth
         // If inputIndex is greater than mInputLength, that means there is no
         // proximity chars. So, we don't need to check proximity.
         if (SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR == matchedProximityCharId) {
-            snr = snr * TYPED_LETTER_MULTIPLIER;
+            matchWeight = matchWeight * TYPED_LETTER_MULTIPLIER;
         }
         bool isSameAsUserTypedLength = mInputLength == inputIndex + 1
                 || (excessivePos == mInputLength - 1 && inputIndex == mInputLength - 2);
         if (isSameAsUserTypedLength && terminal) {
-            onTerminalWhenUserTypedLengthIsSameAsInputLength(mWord, inputIndex, depth, snr,
+            onTerminalWhenUserTypedLengthIsSameAsInputLength(mWord, inputIndex, depth, matchWeight,
                     skipPos, excessivePos, transposedPos, freq);
         }
         if (!needsToTraverseChildrenNodes) return false;
         // Start traversing all nodes after the index exceeds the user typed length
         *newTraverseAllNodes = isSameAsUserTypedLength;
-        *newSnr = snr;
+        *newMatchRate = matchWeight;
         *newDiffs = diffs + ((NEAR_PROXIMITY_CHAR == matchedProximityCharId) ? 1 : 0);
         *newInputIndex = inputIndex + 1;
     }
@@ -591,14 +763,14 @@ inline bool UnigramDictionary::processCurrentNodeForExactMatch(const int firstCh
         const int startInputIndex, const int depth, unsigned short *word, int *newChildPosition,
         int *newCount, bool *newTerminal, int *newFreq, int *siblingPos) {
     const int inputIndex = startInputIndex + depth;
-    const int *currentChars = mInputCodes + (inputIndex * MAX_PROXIMITY_CHARS);
+    const int *currentChars = getInputCharsAt(inputIndex);
     unsigned short c;
     *siblingPos = Dictionary::setDictionaryValues(DICT, IS_LATEST_DICT_VERSION, firstChildPos, &c,
             newChildPosition, newTerminal, newFreq);
     const unsigned int inputC = currentChars[0];
     if (DEBUG_DICT) assert(inputC <= U_SHORT_MAX);
-    const unsigned short lowerC = toLowerCase(c);
-    const bool matched = (inputC == lowerC || inputC == c);
+    const unsigned short baseLowerC = toBaseLowerCase(c);
+    const bool matched = (inputC == baseLowerC || inputC == c);
     const bool hasChild = *newChildPosition != 0;
     if (matched) {
         word[depth] = c;