8 files changed, 213 insertions, 155 deletions
diff --git a/native/src/basechars.h b/native/src/basechars.h
index 5a4406606..3843e11c5 100644
--- a/native/src/basechars.h
+++ b/native/src/basechars.h
@@ -1,3 +1,22 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_BASECHARS_H
+#define LATINIME_BASECHARS_H
+
 /**
  * Table mapping most combined Latin, Greek, and Cyrillic characters
  * to their base characters.  If c is in range, BASE_CHARS[c] == c
@@ -170,3 +189,4 @@ static unsigned short BASE_CHARS[] = {
 
 // generated with:
 // cat UnicodeData.txt | perl -e 'while (<>) { @foo = split(/;/); $foo[5] =~ s/<.*> //; $base[hex($foo[0])] = hex($foo[5]);} for ($i = 0; $i < 0x500; $i += 8) { for ($j = $i; $j < $i + 8; $j++) { printf("0x%04x, ", $base[$j] ? $base[$j] : $j)}; print "\n"; }'
+#endif // LATINIME_BASECHARS_H
diff --git a/native/src/bigram_dictionary.cpp b/native/src/bigram_dictionary.cpp
index d11aee28e..6ed4d0982 100644
--- a/native/src/bigram_dictionary.cpp
+++ b/native/src/bigram_dictionary.cpp
@@ -45,8 +45,8 @@ bool BigramDictionary::addWordBigram(unsigned short *word, int length, int frequ
 #ifdef FLAG_DBG
         char s[length + 1];
         for (int i = 0; i <= length; i++) s[i] = word[i];
-#endif
         LOGI("Bigram: Found word = %s, freq = %d :", s, frequency);
+#endif
     }
 
     // Find the right insertion point
diff --git a/native/src/defines.h b/native/src/defines.h
index a516190af..bea83b2c5 100644
--- a/native/src/defines.h
+++ b/native/src/defines.h
@@ -18,8 +18,16 @@
 #ifndef LATINIME_DEFINES_H
 #define LATINIME_DEFINES_H
 
+#if defined(FLAG_DO_PROFILE) || defined(FLAG_DBG)
+#include <cutils/log.h>
+#else
+#define LOGE(fmt, ...)
+#define LOGI(fmt, ...)
+#endif
+
 #ifdef FLAG_DO_PROFILE
 // Profiler
+#include <cutils/log.h>
 #include <time.h>
 #define PROF_BUF_SIZE 100
 static double profile_buf[PROF_BUF_SIZE];
@@ -92,8 +100,7 @@ static void prof_out(void) {
 #define DEBUG_PROXIMITY_INFO true
 
 #else // FLAG_DBG
-#define LOGE(fmt, ...)
-#define LOGI(fmt, ...)
+
 #define DEBUG_DICT false
 #define DEBUG_DICT_FULL false
 #define DEBUG_SHOW_FOUND_WORD false
diff --git a/native/src/dictionary.h b/native/src/dictionary.h
index 3dc577a56..73e03d8fd 100644
--- a/native/src/dictionary.h
+++ b/native/src/dictionary.h
@@ -17,7 +17,9 @@
 #ifndef LATINIME_DICTIONARY_H
 #define LATINIME_DICTIONARY_H
 
+#include "basechars.h"
 #include "bigram_dictionary.h"
+#include "char_utils.h"
 #include "defines.h"
 #include "proximity_info.h"
 #include "unigram_dictionary.h"
@@ -61,7 +63,7 @@ public:
     static int setDictionaryValues(const unsigned char *dict, const bool isLatestDictVersion,
             const int pos, unsigned short *c, int *childrenPosition,
             bool *terminal, int *freq);
-
+    static inline unsigned short toBaseLowerCase(unsigned short c);
     // TODO: delete this
     int getBigramPosition(unsigned short *word, int length);
 
@@ -156,6 +158,19 @@ inline int Dictionary::setDictionaryValues(const unsigned char *dict,
     return position;
 }
 
+
+inline unsigned short Dictionary::toBaseLowerCase(unsigned short c) {
+    if (c < sizeof(BASE_CHARS) / sizeof(BASE_CHARS[0])) {
+        c = BASE_CHARS[c];
+    }
+    if (c >='A' && c <= 'Z') {
+        c |= 32;
+    } else if (c > 127) {
+        c = latin_tolower(c);
+    }
+    return c;
+}
+
 } // namespace latinime
 
 #endif // LATINIME_DICTIONARY_H
diff --git a/native/src/proximity_info.cpp b/native/src/proximity_info.cpp
index 209c31e6e..c45393f18 100644
--- a/native/src/proximity_info.cpp
+++ b/native/src/proximity_info.cpp
@@ -19,6 +19,7 @@
 
 #define LOG_TAG "LatinIME: proximity_info.cpp"
 
+#include "dictionary.h"
 #include "proximity_info.h"
 
 namespace latinime {
@@ -63,4 +64,101 @@ bool ProximityInfo::hasSpaceProximity(const int x, const int y) const {
     return false;
 }
 
+// TODO: Calculate nearby codes here.
+void ProximityInfo::setInputParams(const int* inputCodes, const int inputLength) {
+    mInputCodes = inputCodes;
+    mInputLength = inputLength;
+}
+
+inline const int* ProximityInfo::getProximityCharsAt(const int index) const {
+    return mInputCodes + (index * MAX_PROXIMITY_CHARS_SIZE);
+}
+
+unsigned short ProximityInfo::getPrimaryCharAt(const int index) const {
+    return getProximityCharsAt(index)[0];
+}
+
+bool ProximityInfo::existsCharInProximityAt(const int index, const int c) const {
+    const int *chars = getProximityCharsAt(index);
+    int i = 0;
+    while (chars[i] > 0 && i < MAX_PROXIMITY_CHARS_SIZE) {
+        if (chars[i++] == c) {
+            return true;
+        }
+    }
+    return false;
+}
+
+bool ProximityInfo::existsAdjacentProximityChars(const int index) const {
+    if (index < 0 || index >= mInputLength) return false;
+    const int currentChar = getPrimaryCharAt(index);
+    const int leftIndex = index - 1;
+    if (leftIndex >= 0 && existsCharInProximityAt(leftIndex, currentChar)) {
+        return true;
+    }
+    const int rightIndex = index + 1;
+    if (rightIndex < mInputLength && existsCharInProximityAt(rightIndex, currentChar)) {
+        return true;
+    }
+    return false;
+}
+
+// In the following function, c is the current character of the dictionary word
+// currently examined.
+// currentChars is an array containing the keys close to the character the
+// user actually typed at the same position. We want to see if c is in it: if so,
+// then the word contains at that position a character close to what the user
+// typed.
+// What the user typed is actually the first character of the array.
+// Notice : accented characters do not have a proximity list, so they are alone
+// in their list. The non-accented version of the character should be considered
+// "close", but not the other keys close to the non-accented version.
+ProximityInfo::ProximityType ProximityInfo::getMatchedProximityId(
+        const int index, const unsigned short c, const int skipPos,
+        const int excessivePos, const int transposedPos) const {
+    const int *currentChars = getProximityCharsAt(index);
+    const unsigned short baseLowerC = Dictionary::toBaseLowerCase(c);
+
+    // The first char in the array is what user typed. If it matches right away,
+    // that means the user typed that same char for this pos.
+    if (currentChars[0] == baseLowerC || currentChars[0] == c)
+        return SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR;
+
+    // If one of those is true, we should not check for close characters at all.
+    if (skipPos >= 0 || excessivePos >= 0 || transposedPos >= 0)
+        return UNRELATED_CHAR;
+
+    // If the non-accented, lowercased version of that first character matches c,
+    // then we have a non-accented version of the accented character the user
+    // typed. Treat it as a close char.
+    if (Dictionary::toBaseLowerCase(currentChars[0]) == baseLowerC)
+        return NEAR_PROXIMITY_CHAR;
+
+    // Not an exact nor an accent-alike match: search the list of close keys
+    int j = 1;
+    while (currentChars[j] > 0 && j < MAX_PROXIMITY_CHARS_SIZE) {
+        const bool matched = (currentChars[j] == baseLowerC || currentChars[j] == c);
+        if (matched) return NEAR_PROXIMITY_CHAR;
+        ++j;
+    }
+
+    // Was not included, signal this as an unrelated character.
+    return UNRELATED_CHAR;
+}
+
+bool ProximityInfo::sameAsTyped(const unsigned short *word, int length) const {
+    if (length != mInputLength) {
+        return false;
+    }
+    const int *inputCodes = mInputCodes;
+    while (length--) {
+        if ((unsigned int) *inputCodes != (unsigned int) *word) {
+            return false;
+        }
+        inputCodes += MAX_PROXIMITY_CHARS_SIZE;
+        word++;
+    }
+    return true;
+}
+
 } // namespace latinime
diff --git a/native/src/proximity_info.h b/native/src/proximity_info.h
index 327cd0940..435a60151 100644
--- a/native/src/proximity_info.h
+++ b/native/src/proximity_info.h
@@ -25,11 +25,26 @@ namespace latinime {
 
 class ProximityInfo {
 public:
+    typedef enum {                             // Used as a return value for character comparison
+        SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR,  // Same char, possibly with different case or accent
+        NEAR_PROXIMITY_CHAR,                   // It is a char located nearby on the keyboard
+        UNRELATED_CHAR                         // It is an unrelated char
+    } ProximityType;
+
     ProximityInfo(const int maxProximityCharsSize, const int keyboardWidth,
             const int keybaordHeight, const int gridWidth, const int gridHeight,
             const uint32_t *proximityCharsArray);
     ~ProximityInfo();
     bool hasSpaceProximity(const int x, const int y) const;
+    void setInputParams(const int* inputCodes, const int inputLength);
+    const int* getProximityCharsAt(const int index) const;
+    unsigned short getPrimaryCharAt(const int index) const;
+    bool existsCharInProximityAt(const int index, const int c) const;
+    bool existsAdjacentProximityChars(const int index) const;
+    ProximityType getMatchedProximityId(
+            const int index, const unsigned short c, const int skipPos,
+            const int excessivePos, const int transposedPos) const;
+    bool sameAsTyped(const unsigned short *word, int length) const;
 private:
     int getStartIndexFromCoordinates(const int x, const int y) const;
     const int MAX_PROXIMITY_CHARS_SIZE;
@@ -39,7 +54,9 @@ private:
     const int GRID_HEIGHT;
     const int CELL_WIDTH;
     const int CELL_HEIGHT;
+    const int *mInputCodes;
     uint32_t *mProximityCharsArray;
+    int mInputLength;
 };
 
 } // namespace latinime
diff --git a/native/src/unigram_dictionary.cpp b/native/src/unigram_dictionary.cpp
index 698584e54..afa8bc545 100644
--- a/native/src/unigram_dictionary.cpp
+++ b/native/src/unigram_dictionary.cpp
@@ -20,7 +20,6 @@
 
 #define LOG_TAG "LatinIME: unigram_dictionary.cpp"
 
-#include "basechars.h"
 #include "char_utils.h"
 #include "dictionary.h"
 #include "unigram_dictionary.h"
@@ -54,7 +53,7 @@ UnigramDictionary::UnigramDictionary(const uint8_t* const streamStart, int typed
       // TODO : remove this variable.
     ROOT_POS(0),
 #endif // NEW_DICTIONARY_FORMAT
-    BYTES_IN_ONE_CHAR(MAX_PROXIMITY_CHARS * sizeof(*mInputCodes)),
+    BYTES_IN_ONE_CHAR(MAX_PROXIMITY_CHARS * sizeof(int)),
     MAX_UMLAUT_SEARCH_DEPTH(DEFAULT_MAX_UMLAUT_SEARCH_DEPTH) {
     if (DEBUG_DICT) {
         LOGI("UnigramDictionary - constructor");
@@ -93,7 +92,7 @@ bool UnigramDictionary::isDigraph(const int* codes, const int i, const int codes
 // codesDest is the current point in the work buffer.
 // codesSrc is the current point in the user-input, original, content-unmodified buffer.
 // codesRemain is the remaining size in codesSrc.
-void UnigramDictionary::getWordWithDigraphSuggestionsRec(const ProximityInfo *proximityInfo,
+void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximityInfo,
         const int *xcoordinates, const int* ycoordinates, const int *codesBuffer,
         const int codesBufferSize, const int flags, const int* codesSrc, const int codesRemain,
         const int currentDepth, int* codesDest, unsigned short* outWords, int* frequencies) {
@@ -143,7 +142,7 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(const ProximityInfo *pr
             (codesDest - codesBuffer) / MAX_PROXIMITY_CHARS + codesRemain, outWords, frequencies);
 }
 
-int UnigramDictionary::getSuggestions(const ProximityInfo *proximityInfo, const int *xcoordinates,
+int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
         const int *ycoordinates, const int *codes, const int codesSize, const int flags,
         unsigned short *outWords, int *frequencies) {
 
@@ -172,8 +171,8 @@ int UnigramDictionary::getSuggestions(const ProximityInfo *proximityInfo, const
             short unsigned int* w = mOutputChars + j * MAX_WORD_LENGTH;
             char s[MAX_WORD_LENGTH];
             for (int i = 0; i <= MAX_WORD_LENGTH; i++) s[i] = w[i];
-#endif
             LOGI("%s %i", s, mFrequencies[j]);
+#endif
         }
         LOGI("Next letters: ");
         for (int k = 0; k < NEXT_LETTERS_SIZE; k++) {
@@ -187,13 +186,14 @@ int UnigramDictionary::getSuggestions(const ProximityInfo *proximityInfo, const
     return suggestedWordsCount;
 }
 
-void UnigramDictionary::getWordSuggestions(const ProximityInfo *proximityInfo,
+void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
         const int *xcoordinates, const int *ycoordinates, const int *codes, const int codesSize,
         unsigned short *outWords, int *frequencies) {
 
     PROF_OPEN;
     PROF_START(0);
-    initSuggestions(codes, codesSize, outWords, frequencies);
+    initSuggestions(
+            proximityInfo, xcoordinates, ycoordinates, codes, codesSize, outWords, frequencies);
     if (DEBUG_DICT) assert(codesSize == mInputLength);
 
     const int MAX_DEPTH = min(mInputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH);
@@ -275,16 +275,18 @@ void UnigramDictionary::getWordSuggestions(const ProximityInfo *proximityInfo,
     PROF_END(6);
 }
 
-void UnigramDictionary::initSuggestions(const int *codes, const int codesSize,
+void UnigramDictionary::initSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
+        const int *ycoordinates, const int *codes, const int codesSize,
         unsigned short *outWords, int *frequencies) {
     if (DEBUG_DICT) {
         LOGI("initSuggest");
     }
     mFrequencies = frequencies;
     mOutputChars = outWords;
-    mInputCodes = codes;
     mInputLength = codesSize;
     mMaxEditDistance = mInputLength < 5 ? 2 : mInputLength / 2;
+    proximityInfo->setInputParams(codes, codesSize);
+    mProximityInfo = proximityInfo;
 }
 
 static inline void registerNextLetter(unsigned short c, int *nextLetters, int nextLettersSize) {
@@ -301,8 +303,8 @@ bool UnigramDictionary::addWord(unsigned short *word, int length, int frequency)
 #ifdef FLAG_DBG
         char s[length + 1];
         for (int i = 0; i <= length; i++) s[i] = word[i];
-#endif
         LOGI("Found word = %s, freq = %d", s, frequency);
+#endif
     }
     if (length > MAX_WORD_LENGTH) {
         if (DEBUG_DICT) {
@@ -325,8 +327,8 @@ bool UnigramDictionary::addWord(unsigned short *word, int length, int frequency)
 #ifdef FLAG_DBG
             char s[length + 1];
             for (int i = 0; i <= length; i++) s[i] = word[i];
-#endif
             LOGI("Added word = %s, freq = %d, %d", s, frequency, S_INT_MAX);
+#endif
         }
         memmove((char*) mFrequencies + (insertAt + 1) * sizeof(mFrequencies[0]),
                (char*) mFrequencies + insertAt * sizeof(mFrequencies[0]),
@@ -348,33 +350,6 @@ bool UnigramDictionary::addWord(unsigned short *word, int length, int frequency)
     return false;
 }
 
-static inline unsigned short toBaseLowerCase(unsigned short c) {
-    if (c < sizeof(BASE_CHARS) / sizeof(BASE_CHARS[0])) {
-        c = BASE_CHARS[c];
-    }
-    if (c >='A' && c <= 'Z') {
-        c |= 32;
-    } else if (c > 127) {
-        c = latin_tolower(c);
-    }
-    return c;
-}
-
-bool UnigramDictionary::sameAsTyped(const unsigned short *word, int length) const {
-    if (length != mInputLength) {
-        return false;
-    }
-    const int *inputCodes = mInputCodes;
-    while (length--) {
-        if ((unsigned int) *inputCodes != (unsigned int) *word) {
-            return false;
-        }
-        inputCodes += MAX_PROXIMITY_CHARS;
-        word++;
-    }
-    return true;
-}
-
 static const char QUOTE = '\'';
 static const char SPACE = ' ';
 
@@ -568,7 +543,9 @@ inline int UnigramDictionary::calculateFinalFreq(const int inputIndex, const int
             WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE, &finalFreq);
     if (excessivePos >= 0) {
         multiplyRate(WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE, &finalFreq);
-        if (!existsAdjacentProximityChars(inputIndex, mInputLength)) {
+        if (!mProximityInfo->existsAdjacentProximityChars(inputIndex)) {
+            // If an excessive character is not adjacent to the left char or the right char,
+            // we will demote this word.
             multiplyRate(WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE, &finalFreq);
         }
     }
@@ -602,75 +579,11 @@ inline int UnigramDictionary::calculateFinalFreq(const int inputIndex, const int
 
 inline bool UnigramDictionary::needsToSkipCurrentNode(const unsigned short c,
         const int inputIndex, const int skipPos, const int depth) {
-    const unsigned short userTypedChar = getInputCharsAt(inputIndex)[0];
+    const unsigned short userTypedChar = mProximityInfo->getPrimaryCharAt(inputIndex);
     // Skip the ' or other letter and continue deeper
     return (c == QUOTE && userTypedChar != QUOTE) || skipPos == depth;
 }
 
-inline bool UnigramDictionary::existsAdjacentProximityChars(const int inputIndex,
-        const int inputLength) const {
-    if (inputIndex < 0 || inputIndex >= inputLength) return false;
-    const int currentChar = *getInputCharsAt(inputIndex);
-    const int leftIndex = inputIndex - 1;
-    if (leftIndex >= 0) {
-        const int *leftChars = getInputCharsAt(leftIndex);
-        int i = 0;
-        while (leftChars[i] > 0 && i < MAX_PROXIMITY_CHARS) {
-            if (leftChars[i++] == currentChar) return true;
-        }
-    }
-    const int rightIndex = inputIndex + 1;
-    if (rightIndex < inputLength) {
-        const int *rightChars = getInputCharsAt(rightIndex);
-        int i = 0;
-        while (rightChars[i] > 0 && i < MAX_PROXIMITY_CHARS) {
-            if (rightChars[i++] == currentChar) return true;
-        }
-    }
-    return false;
-}
-
-// In the following function, c is the current character of the dictionary word
-// currently examined.
-// currentChars is an array containing the keys close to the character the
-// user actually typed at the same position. We want to see if c is in it: if so,
-// then the word contains at that position a character close to what the user
-// typed.
-// What the user typed is actually the first character of the array.
-// Notice : accented characters do not have a proximity list, so they are alone
-// in their list. The non-accented version of the character should be considered
-// "close", but not the other keys close to the non-accented version.
-inline UnigramDictionary::ProximityType UnigramDictionary::getMatchedProximityId(
-        const int *currentChars, const unsigned short c, const int skipPos,
-        const int excessivePos, const int transposedPos) {
-    const unsigned short baseLowerC = toBaseLowerCase(c);
-
-    // The first char in the array is what user typed. If it matches right away,
-    // that means the user typed that same char for this pos.
-    if (currentChars[0] == baseLowerC || currentChars[0] == c)
-        return SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR;
-
-    // If one of those is true, we should not check for close characters at all.
-    if (skipPos >= 0 || excessivePos >= 0 || transposedPos >= 0)
-        return UNRELATED_CHAR;
-
-    // If the non-accented, lowercased version of that first character matches c,
-    // then we have a non-accented version of the accented character the user
-    // typed. Treat it as a close char.
-    if (toBaseLowerCase(currentChars[0]) == baseLowerC)
-        return NEAR_PROXIMITY_CHAR;
-
-    // Not an exact nor an accent-alike match: search the list of close keys
-    int j = 1;
-    while (currentChars[j] > 0 && j < MAX_PROXIMITY_CHARS) {
-        const bool matched = (currentChars[j] == baseLowerC || currentChars[j] == c);
-        if (matched) return NEAR_PROXIMITY_CHAR;
-        ++j;
-    }
-
-    // Was not included, signal this as an unrelated character.
-    return UNRELATED_CHAR;
-}
 
 inline void UnigramDictionary::onTerminal(unsigned short int* word, const int depth,
         const uint8_t* const root, const uint8_t flags, const int pos,
@@ -678,7 +591,7 @@ inline void UnigramDictionary::onTerminal(unsigned short int* word, const int de
         const int excessivePos, const int transposedPos, const int freq, const bool sameLength,
         int* nextLetters, const int nextLettersSize) {
 
-    const bool isSameAsTyped = sameLength ? sameAsTyped(word, depth + 1) : false;
+    const bool isSameAsTyped = sameLength ? mProximityInfo->sameAsTyped(word, depth + 1) : false;
     if (isSameAsTyped) return;
 
     if (depth >= MIN_SUGGEST_DEPTH) {
@@ -809,9 +722,9 @@ inline int UnigramDictionary::getMostFrequentWordLike(const int startInputIndex,
                             char s[inputLength + 1];
                             for (int i = 0; i < inputLength; ++i) s[i] = word[i];
                             s[inputLength] = 0;
-#endif
                             LOGI("New missing space word found: %d > %d (%s), %d, %d",
                                     newFreq, maxFreq, s, inputLength, depth);
+#endif
                         }
                         maxFreq = newFreq;
                     }
@@ -836,15 +749,14 @@ inline bool UnigramDictionary::processCurrentNodeForExactMatch(const int firstCh
         const int startInputIndex, const int depth, unsigned short *word, int *newChildPosition,
         int *newCount, bool *newTerminal, int *newFreq, int *siblingPos) {
     const int inputIndex = startInputIndex + depth;
-    const int *currentChars = getInputCharsAt(inputIndex);
     unsigned short c;
     *siblingPos = Dictionary::setDictionaryValues(DICT_ROOT, IS_LATEST_DICT_VERSION, firstChildPos,
             &c, newChildPosition, newTerminal, newFreq);
-    const unsigned int inputC = currentChars[0];
+    const unsigned int inputC = mProximityInfo->getPrimaryCharAt(inputIndex);
     if (DEBUG_DICT) {
         assert(inputC <= U_SHORT_MAX);
     }
-    const unsigned short baseLowerC = toBaseLowerCase(c);
+    const unsigned short baseLowerC = Dictionary::toBaseLowerCase(c);
     const bool matched = (inputC == baseLowerC || inputC == c);
     const bool hasChild = *newChildPosition != 0;
     if (matched) {
@@ -962,20 +874,20 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos, const in
         *newDiffs = diffs;
         *newInputIndex = inputIndex;
     } else {
-        const int *currentChars = getInputCharsAt(inputIndex);
+        int inputIndexForProximity = inputIndex;
 
         if (transposedPos >= 0) {
-            if (inputIndex == transposedPos) currentChars += MAX_PROXIMITY_CHARS;
-            if (inputIndex == (transposedPos + 1)) currentChars -= MAX_PROXIMITY_CHARS;
+            if (inputIndex == transposedPos) ++inputIndexForProximity;
+            if (inputIndex == (transposedPos + 1)) --inputIndexForProximity;
         }
 
-        int matchedProximityCharId = getMatchedProximityId(currentChars, c, skipPos, excessivePos,
-                transposedPos);
-        if (UNRELATED_CHAR == matchedProximityCharId) return false;
+        ProximityInfo::ProximityType matchedProximityCharId = mProximityInfo->getMatchedProximityId(
+                inputIndexForProximity, c, skipPos, excessivePos, transposedPos);
+        if (ProximityInfo::UNRELATED_CHAR == matchedProximityCharId) return false;
         mWord[depth] = c;
         // If inputIndex is greater than mInputLength, that means there is no
         // proximity chars. So, we don't need to check proximity.
-        if (SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR == matchedProximityCharId) {
+        if (ProximityInfo::SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR == matchedProximityCharId) {
             multiplyIntCapped(TYPED_LETTER_MULTIPLIER, &matchWeight);
         }
         bool isSameAsUserTypedLength = mInputLength == inputIndex + 1
@@ -988,7 +900,8 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos, const in
         // Start traversing all nodes after the index exceeds the user typed length
         *newTraverseAllNodes = isSameAsUserTypedLength;
         *newMatchRate = matchWeight;
-        *newDiffs = diffs + ((NEAR_PROXIMITY_CHAR == matchedProximityCharId) ? 1 : 0);
+        *newDiffs = diffs
+                + ((ProximityInfo::NEAR_PROXIMITY_CHAR == matchedProximityCharId) ? 1 : 0);
         *newInputIndex = inputIndex + 1;
     }
     // Optimization: Prune out words that are too long compared to how much was typed.
@@ -1017,7 +930,7 @@ inline int UnigramDictionary::getMostFrequentWordLike(const int startInputIndex,
     uint16_t inWord[inputLength];
 
     for (int i = 0; i < inputLength; ++i) {
-        inWord[i] = *getInputCharsAt(startInputIndex + i);
+        inWord[i] = (uint16_t)mProximityInfo->getPrimaryCharAt(startInputIndex + i);
     }
     return getMostFrequentWordLikeInner(inWord, inputLength, word);
 }
@@ -1041,8 +954,8 @@ static inline bool testCharGroupForContinuedLikeness(const uint8_t flags,
     const bool hasMultipleChars = (0 != (UnigramDictionary::FLAG_HAS_MULTIPLE_CHARS & flags));
     int pos = startPos;
     int32_t character = BinaryFormat::getCharCodeAndForwardPointer(root, &pos);
-    int32_t baseChar = toBaseLowerCase(character);
-    const uint16_t wChar = toBaseLowerCase(inWord[startInputIndex]);
+    int32_t baseChar = Dictionary::toBaseLowerCase(character);
+    const uint16_t wChar = Dictionary::toBaseLowerCase(inWord[startInputIndex]);
 
     if (baseChar != wChar) {
         *outPos = hasMultipleChars ? BinaryFormat::skipOtherCharacters(root, pos) : pos;
@@ -1054,8 +967,8 @@ static inline bool testCharGroupForContinuedLikeness(const uint8_t flags,
     if (hasMultipleChars) {
         character = BinaryFormat::getCharCodeAndForwardPointer(root, &pos);
         while (NOT_A_CHARACTER != character) {
-            baseChar = toBaseLowerCase(character);
-            if (toBaseLowerCase(inWord[++inputIndex]) != baseChar) {
+            baseChar = Dictionary::toBaseLowerCase(character);
+            if (Dictionary::toBaseLowerCase(inWord[++inputIndex]) != baseChar) {
                 *outPos = BinaryFormat::skipOtherCharacters(root, pos);
                 *outInputIndex = startInputIndex;
                 return false;
@@ -1300,7 +1213,7 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos, const in
         const bool hasChildren = (!isLastChar) || BinaryFormat::hasChildrenInFlags(flags);
 
         // This has to be done for each virtual char (this forwards the "inputIndex" which
-        // is the index in the user-inputted chars, as read by getInputCharsAt.
+        // is the index in the user-inputted chars, as read by proximity chars.
         if (excessivePos == depth && inputIndex < mInputLength - 1) ++inputIndex;
         if (traverseAllNodes || needsToSkipCurrentNode(c, inputIndex, skipPos, depth)) {
             mWord[depth] = c;
@@ -1324,16 +1237,16 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos, const in
                 return false;
             }
         } else {
-            const int *currentChars = getInputCharsAt(inputIndex);
+            int inputIndexForProximity = inputIndex;
 
             if (transposedPos >= 0) {
-                if (inputIndex == transposedPos) currentChars += MAX_PROXIMITY_CHARS;
-                if (inputIndex == (transposedPos + 1)) currentChars -= MAX_PROXIMITY_CHARS;
+                if (inputIndex == transposedPos) ++inputIndexForProximity;
+                if (inputIndex == (transposedPos + 1)) --inputIndexForProximity;
             }
 
-            const int matchedProximityCharId = getMatchedProximityId(currentChars, c, skipPos,
-                    excessivePos, transposedPos);
-            if (UNRELATED_CHAR == matchedProximityCharId) {
+            int matchedProximityCharId = mProximityInfo->getMatchedProximityId(
+                    inputIndexForProximity, c, skipPos, excessivePos, transposedPos);
+            if (ProximityInfo::UNRELATED_CHAR == matchedProximityCharId) {
                 // We found that this is an unrelated character, so we should give up traversing
                 // this node and its children entirely.
                 // However we may not be on the last virtual node yet so we skip the remaining
@@ -1352,7 +1265,7 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos, const in
             mWord[depth] = c;
             // If inputIndex is greater than mInputLength, that means there is no
             // proximity chars. So, we don't need to check proximity.
-            if (SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR == matchedProximityCharId) {
+            if (ProximityInfo::SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR == matchedProximityCharId) {
                 multiplyIntCapped(TYPED_LETTER_MULTIPLIER, &matchWeight);
             }
             const bool isSameAsUserTypedLength = mInputLength == inputIndex + 1
@@ -1376,7 +1289,8 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos, const in
             }
             // Start traversing all nodes after the index exceeds the user typed length
             traverseAllNodes = isSameAsUserTypedLength;
-            diffs = diffs + ((NEAR_PROXIMITY_CHAR == matchedProximityCharId) ? 1 : 0);
+            diffs = diffs
+                    + ((ProximityInfo::NEAR_PROXIMITY_CHAR == matchedProximityCharId) ? 1 : 0);
             // Finally, we are ready to go to the next character, the next "virtual node".
             // We should advance the input index.
             // We do this in this branch of the 'if traverseAllNodes' because we are still matching
diff --git a/native/src/unigram_dictionary.h b/native/src/unigram_dictionary.h
index dcc8f2a9a..f6045c6ef 100644
--- a/native/src/unigram_dictionary.h
+++ b/native/src/unigram_dictionary.h
@@ -29,12 +29,6 @@ namespace latinime {
 
 class UnigramDictionary {
 
-    typedef enum {                             // Used as a return value for character comparison
-        SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR,  // Same char, possibly with different case or accent
-        NEAR_PROXIMITY_CHAR,                   // It is a char located nearby on the keyboard
-        UNRELATED_CHAR                         // It is an unrelated char
-    } ProximityType;
-
 public:
 #ifdef NEW_DICTIONARY_FORMAT
 
@@ -82,26 +76,26 @@ public:
             int maxAlternatives);
 #endif // NEW_DICTIONARY_FORMAT
     int getBigramPosition(int pos, unsigned short *word, int offset, int length) const;
-    int getSuggestions(const ProximityInfo *proximityInfo, const int *xcoordinates,
+    int getSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
             const int *ycoordinates, const int *codes, const int codesSize, const int flags,
             unsigned short *outWords, int *frequencies);
     ~UnigramDictionary();
 
 private:
-    void getWordSuggestions(const ProximityInfo *proximityInfo, const int *xcoordinates,
+    void getWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
             const int *ycoordinates, const int *codes, const int codesSize,
             unsigned short *outWords, int *frequencies);
     bool isDigraph(const int* codes, const int i, const int codesSize) const;
-    void getWordWithDigraphSuggestionsRec(const ProximityInfo *proximityInfo,
+    void getWordWithDigraphSuggestionsRec(ProximityInfo *proximityInfo,
         const int *xcoordinates, const int* ycoordinates, const int *codesBuffer,
         const int codesBufferSize, const int flags, const int* codesSrc, const int codesRemain,
         const int currentDepth, int* codesDest, unsigned short* outWords, int* frequencies);
-    void initSuggestions(const int *codes, const int codesSize, unsigned short *outWords,
-            int *frequencies);
+    void initSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
+            const int *ycoordinates, const int *codes, const int codesSize,
+            unsigned short *outWords, int *frequencies);
     void getSuggestionCandidates(const int skipPos, const int excessivePos,
             const int transposedPos, int *nextLetters, const int nextLettersSize,
             const int maxDepth);
-    bool sameAsTyped(const unsigned short *word, int length) const;
     bool addWord(unsigned short *word, int length, int frequency);
     bool getSplitTwoWordsSuggestion(const int inputLength,
             const int firstWordStartPos, const int firstWordLength,
@@ -118,8 +112,6 @@ private:
             int *nextLetters, const int nextLettersSize);
     bool needsToSkipCurrentNode(const unsigned short c,
             const int inputIndex, const int skipPos, const int depth);
-    ProximityType getMatchedProximityId(const int *currentChars, const unsigned short c,
-            const int skipPos, const int excessivePos, const int transposedPos);
     // Process a node by considering proximity, missing and excessive character
     bool processCurrentNode(const int initialPos, const int initialDepth,
             const int maxDepth, const bool initialTraverseAllNodes, const int snr, int inputIndex,
@@ -127,10 +119,6 @@ private:
             const int transposedPos, int *nextLetters, const int nextLettersSize, int *newCount,
             int *newChildPosition, bool *newTraverseAllNodes, int *newSnr, int*newInputIndex,
             int *newDiffs, int *nextSiblingPosition, int *nextOutputIndex);
-    bool existsAdjacentProximityChars(const int inputIndex, const int inputLength) const;
-    inline const int* getInputCharsAt(const int index) const {
-        return mInputCodes + (index * MAX_PROXIMITY_CHARS);
-    }
     int getMostFrequentWordLike(const int startInputIndex, const int inputLength,
             unsigned short *word);
 #ifndef NEW_DICTIONARY_FORMAT
@@ -174,7 +162,7 @@ private:
 
     int *mFrequencies;
     unsigned short *mOutputChars;
-    const int *mInputCodes;
+    const ProximityInfo *mProximityInfo;
     int mInputLength;
     // MAX_WORD_LENGTH_INTERNAL must be bigger than MAX_WORD_LENGTH
     unsigned short mWord[MAX_WORD_LENGTH_INTERNAL];
@@ -189,7 +177,6 @@ private:
     int mStackOutputIndex[MAX_WORD_LENGTH_INTERNAL];
     int mNextLettersFrequency[NEXT_LETTERS_SIZE];
 };
-
 } // namespace latinime
 
 #endif // LATINIME_UNIGRAM_DICTIONARY_H