8 files changed, 359 insertions, 125 deletions
diff --git a/native/src/correction.cpp b/native/src/correction.cpp
index 6d682c0c9..a4090a966 100644
--- a/native/src/correction.cpp
+++ b/native/src/correction.cpp
@@ -21,6 +21,7 @@
 #define LOG_TAG "LatinIME: correction.cpp"
 
 #include "correction.h"
+#include "dictionary.h"
 #include "proximity_info.h"
 
 namespace latinime {
@@ -49,12 +50,21 @@ void Correction::initCorrection(const ProximityInfo *pi, const int inputLength,
     mInputLength = inputLength;
     mMaxDepth = maxDepth;
     mMaxEditDistance = mInputLength < 5 ? 2 : mInputLength / 2;
-    mSkippedOutputIndex = -1;
+}
+
+void Correction::initCorrectionState(
+        const int rootPos, const int childCount, const bool traverseAll) {
+    latinime::initCorrectionState(mCorrectionStates, rootPos, childCount, traverseAll);
+    // TODO: remove
+    mCorrectionStates[0].mSkipPos = mSkipPos;
 }
 
 void Correction::setCorrectionParams(const int skipPos, const int excessivePos,
         const int transposedPos, const int spaceProximityPos, const int missingSpacePos) {
+    // TODO: remove
     mSkipPos = skipPos;
+    // TODO: remove
+    mCorrectionStates[0].mSkipPos = skipPos;
     mExcessivePos = excessivePos;
     mTransposedPos = transposedPos;
     mSpaceProximityPos = spaceProximityPos;
@@ -83,33 +93,37 @@ int Correction::getFinalFreq(const int freq, unsigned short **word, int *wordLen
     if (mProximityInfo->sameAsTyped(mWord, outputIndex + 1) || outputIndex < MIN_SUGGEST_DEPTH) {
         return -1;
     }
+
     *word = mWord;
     const bool sameLength = (mExcessivePos == mInputLength - 1) ? (mInputLength == inputIndex + 2)
             : (mInputLength == inputIndex + 1);
     return Correction::RankingAlgorithm::calculateFinalFreq(
-            inputIndex, outputIndex, mMatchedCharCount, freq, sameLength, this);
+            inputIndex, outputIndex, freq, sameLength, mEditDistanceTable, this);
 }
 
-void Correction::initProcessState(const int matchCount, const int inputIndex,
-        const int outputIndex, const bool traverseAllNodes, const int diffs) {
-    mMatchedCharCount = matchCount;
-    mInputIndex = inputIndex;
+bool Correction::initProcessState(const int outputIndex) {
+    if (mCorrectionStates[outputIndex].mChildCount <= 0) {
+        return false;
+    }
     mOutputIndex = outputIndex;
-    mTraverseAllNodes = traverseAllNodes;
-    mDiffs = diffs;
-}
-
-void Correction::getProcessState(int *matchedCount, int *inputIndex, int *outputIndex,
-        bool *traverseAllNodes, int *diffs) {
-    *matchedCount = mMatchedCharCount;
-    *inputIndex = mInputIndex;
-    *outputIndex = mOutputIndex;
-    *traverseAllNodes = mTraverseAllNodes;
-    *diffs = mDiffs;
+    --(mCorrectionStates[outputIndex].mChildCount);
+    mInputIndex = mCorrectionStates[outputIndex].mInputIndex;
+    mNeedsToTraverseAllNodes = mCorrectionStates[outputIndex].mNeedsToTraverseAllNodes;
+    mProximityCount = mCorrectionStates[outputIndex].mProximityCount;
+    mSkippedCount = mCorrectionStates[outputIndex].mSkippedCount;
+    mSkipPos = mCorrectionStates[outputIndex].mSkipPos;
+    mSkipping = false;
+    mProximityMatching = false;
+    mMatching = false;
+    return true;
 }
 
-void Correction::charMatched() {
-    ++mMatchedCharCount;
+int Correction::goDownTree(
+        const int parentIndex, const int childCount, const int firstChildPos) {
+    mCorrectionStates[mOutputIndex].mParentIndex = parentIndex;
+    mCorrectionStates[mOutputIndex].mChildCount = childCount;
+    mCorrectionStates[mOutputIndex].mSiblingPos = firstChildPos;
+    return mOutputIndex;
 }
 
 // TODO: remove
@@ -123,8 +137,8 @@ int Correction::getInputIndex() {
 }
 
 // TODO: remove
-bool Correction::needsToTraverseAll() {
-    return mTraverseAllNodes;
+bool Correction::needsToTraverseAllNodes() {
+    return mNeedsToTraverseAllNodes;
 }
 
 void Correction::incrementInputIndex() {
@@ -133,21 +147,32 @@ void Correction::incrementInputIndex() {
 
 void Correction::incrementOutputIndex() {
     ++mOutputIndex;
+    mCorrectionStates[mOutputIndex].mParentIndex = mCorrectionStates[mOutputIndex - 1].mParentIndex;
+    mCorrectionStates[mOutputIndex].mChildCount = mCorrectionStates[mOutputIndex - 1].mChildCount;
+    mCorrectionStates[mOutputIndex].mSiblingPos = mCorrectionStates[mOutputIndex - 1].mSiblingPos;
+    mCorrectionStates[mOutputIndex].mInputIndex = mInputIndex;
+    mCorrectionStates[mOutputIndex].mNeedsToTraverseAllNodes = mNeedsToTraverseAllNodes;
+    mCorrectionStates[mOutputIndex].mProximityCount = mProximityCount;
+    mCorrectionStates[mOutputIndex].mSkippedCount = mSkippedCount;
+    mCorrectionStates[mOutputIndex].mSkipping = mSkipping;
+    mCorrectionStates[mOutputIndex].mSkipPos = mSkipPos;
+    mCorrectionStates[mOutputIndex].mMatching = mMatching;
+    mCorrectionStates[mOutputIndex].mProximityMatching = mProximityMatching;
 }
 
-void Correction::startTraverseAll() {
-    mTraverseAllNodes = true;
+void Correction::startToTraverseAllNodes() {
+    mNeedsToTraverseAllNodes = true;
 }
 
 bool Correction::needsToPrune() const {
     return (mOutputIndex - 1 >= (mTransposedPos >= 0 ? mInputLength - 1 : mMaxDepth)
-            || mDiffs > mMaxEditDistance);
+            || mProximityCount > mMaxEditDistance);
 }
 
 Correction::CorrectionType Correction::processSkipChar(
         const int32_t c, const bool isTerminal) {
     mWord[mOutputIndex] = c;
-    if (needsToTraverseAll() && isTerminal) {
+    if (needsToTraverseAllNodes() && isTerminal) {
         mTerminalInputIndex = mInputIndex;
         mTerminalOutputIndex = mOutputIndex;
         incrementOutputIndex();
@@ -169,10 +194,31 @@ Correction::CorrectionType Correction::processCharAndCalcState(
 
     bool skip = false;
     if (mSkipPos >= 0) {
+        if (mSkippedCount == 0 && mSkipPos < mOutputIndex) {
+            if (DEBUG_DICT) {
+                assert(mSkipPos == mOutputIndex - 1);
+            }
+            ++mSkipPos;
+        }
         skip = mSkipPos == mOutputIndex;
+        mSkipping = true;
     }
 
-    if (mTraverseAllNodes || isQuote(c)) {
+    if (mNeedsToTraverseAllNodes || isQuote(c)) {
+        const bool checkProximityChars =
+                !(mSkippedCount > 0 || mExcessivePos >= 0 || mTransposedPos >= 0);
+        // Note: This logic tries saving cases like contrst --> contrast -- "a" is one of
+        // proximity chars of "s", but it should rather be handled as a skipped char.
+        if (checkProximityChars
+                && mInputIndex > 0
+                && mCorrectionStates[mOutputIndex].mProximityMatching
+                && mCorrectionStates[mOutputIndex].mSkipping
+                && mProximityInfo->getMatchedProximityId(
+                        mInputIndex - 1, c, false)
+                        == ProximityInfo::SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR) {
+            ++mSkippedCount;
+            --mProximityCount;
+        }
         return processSkipChar(c, isTerminal);
     } else {
         int inputIndexForProximity = mInputIndex;
@@ -186,40 +232,40 @@ Correction::CorrectionType Correction::processCharAndCalcState(
             }
         }
 
+        // TODO: sum counters
         const bool checkProximityChars =
-                !(mSkipPos >= 0 || mExcessivePos >= 0 || mTransposedPos >= 0);
+                !(mSkippedCount > 0 || mExcessivePos >= 0 || mTransposedPos >= 0);
         int matchedProximityCharId = mProximityInfo->getMatchedProximityId(
                 inputIndexForProximity, c, checkProximityChars);
 
-        const bool unrelated = ProximityInfo::UNRELATED_CHAR == matchedProximityCharId;
-        if (unrelated) {
-            if (skip) {
+        if (ProximityInfo::UNRELATED_CHAR == matchedProximityCharId) {
+            if (skip && mProximityCount == 0) {
                 // Skip this letter and continue deeper
-                mSkippedOutputIndex = mOutputIndex;
+                ++mSkippedCount;
+                return processSkipChar(c, isTerminal);
+            } else if (checkProximityChars
+                    && inputIndexForProximity > 0
+                    && mCorrectionStates[mOutputIndex].mProximityMatching
+                    && mCorrectionStates[mOutputIndex].mSkipping
+                    && mProximityInfo->getMatchedProximityId(
+                            inputIndexForProximity - 1, c, false)
+                                    == ProximityInfo::SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR) {
+                ++mSkippedCount;
+                --mProximityCount;
                 return processSkipChar(c, isTerminal);
             } else {
                 return UNRELATED;
             }
-        }
-
-        // No need to skip. Finish traversing and increment skipPos.
-        // TODO: Remove this?
-        if (skip) {
-            mWord[mOutputIndex] = c;
-            incrementOutputIndex();
-            return TRAVERSE_ALL_NOT_ON_TERMINAL;
+        } else if (ProximityInfo::SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR == matchedProximityCharId) {
+            // If inputIndex is greater than mInputLength, that means there is no
+            // proximity chars. So, we don't need to check proximity.
+            mMatching = true;
+        } else if (ProximityInfo::NEAR_PROXIMITY_CHAR == matchedProximityCharId) {
+            mProximityMatching = true;
+            incrementProximityCount();
         }
 
         mWord[mOutputIndex] = c;
-        // If inputIndex is greater than mInputLength, that means there is no
-        // proximity chars. So, we don't need to check proximity.
-        if (ProximityInfo::SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR == matchedProximityCharId) {
-            charMatched();
-        }
-
-        if (ProximityInfo::NEAR_PROXIMITY_CHAR == matchedProximityCharId) {
-            incrementDiffs();
-        }
 
         const bool isSameAsUserTypedLength = mInputLength
                 == getInputIndex() + 1
@@ -232,7 +278,7 @@ Correction::CorrectionType Correction::processCharAndCalcState(
         }
         // Start traversing all nodes after the index exceeds the user typed length
         if (isSameAsUserTypedLength) {
-            startTraverseAll();
+            startToTraverseAllNodes();
         }
 
         // Finally, we are ready to go to the next character, the next "virtual node".
@@ -298,26 +344,117 @@ inline static void multiplyRate(const int rate, int *freq) {
     }
 }
 
+/* static */
+inline static int editDistance(
+        int* editDistanceTable, const unsigned short* input,
+        const int inputLength, const unsigned short* output, const int outputLength) {
+    // dp[li][lo] dp[a][b] = dp[ a * lo + b]
+    int* dp = editDistanceTable;
+    const int li = inputLength + 1;
+    const int lo = outputLength + 1;
+    for (int i = 0; i < li; ++i) {
+        dp[lo * i] = i;
+    }
+    for (int i = 0; i < lo; ++i) {
+        dp[i] = i;
+    }
+
+    for (int i = 0; i < li - 1; ++i) {
+        for (int j = 0; j < lo - 1; ++j) {
+            const uint32_t ci = Dictionary::toBaseLowerCase(input[i]);
+            const uint32_t co = Dictionary::toBaseLowerCase(output[j]);
+            const uint16_t cost = (ci == co) ? 0 : 1;
+            dp[(i + 1) * lo + (j + 1)] = min(dp[i * lo + (j + 1)] + 1,
+                    min(dp[(i + 1) * lo + j] + 1, dp[i * lo + j] + cost));
+            if (li > 0 && lo > 0
+                    && ci == Dictionary::toBaseLowerCase(output[j - 1])
+                    && co == Dictionary::toBaseLowerCase(input[i - 1])) {
+                dp[(i + 1) * lo + (j + 1)] = min(
+                        dp[(i + 1) * lo + (j + 1)], dp[(i - 1) * lo + (j - 1)] + cost);
+            }
+        }
+    }
+
+    if (DEBUG_EDIT_DISTANCE) {
+        LOGI("IN = %d, OUT = %d", inputLength, outputLength);
+        for (int i = 0; i < li; ++i) {
+            for (int j = 0; j < lo; ++j) {
+                LOGI("EDIT[%d][%d], %d", i, j, dp[i * lo + j]);
+            }
+        }
+    }
+    return dp[li * lo - 1];
+}
+
 //////////////////////
 // RankingAlgorithm //
 //////////////////////
 
-int Correction::RankingAlgorithm::calculateFinalFreq(
-        const int inputIndex, const int outputIndex,
-        const int matchCount, const int freq, const bool sameLength,
+/* static */
+int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const int outputIndex,
+        const int freq, const bool sameLength, int* editDistanceTable,
         const Correction* correction) {
-    const int skipPos = correction->getSkipPos();
     const int excessivePos = correction->getExcessivePos();
     const int transposedPos = correction->getTransposedPos();
     const int inputLength = correction->mInputLength;
     const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER;
     const int fullWordMultiplier = correction->FULL_WORD_MULTIPLIER;
     const ProximityInfo *proximityInfo = correction->mProximityInfo;
-    const int matchWeight = powerIntCapped(typedLetterMultiplier, matchCount);
+    const int skipCount = correction->mSkippedCount;
+    const int proximityMatchedCount = correction->mProximityCount;
+
+    // TODO: use mExcessiveCount
+    int matchCount = inputLength - correction->mProximityCount - (excessivePos >= 0 ? 1 : 0);
+
+    const unsigned short* word = correction->mWord;
+    const bool skipped = skipCount > 0;
+
+    // ----- TODO: use edit distance here as follows? ---------------------- /
+    //if (!skipped && excessivePos < 0 && transposedPos < 0) {
+    //    const int ed = editDistance(dp, proximityInfo->getInputWord(),
+    //            inputLength, word, outputIndex + 1);
+    //    matchCount = outputIndex + 1 - ed;
+    //    if (ed == 1 && !sameLength) ++matchCount;
+    //}
+    //    const int ed = editDistance(dp, proximityInfo->getInputWord(),
+    //    inputLength, word, outputIndex + 1);
+    //    if (ed == 1 && !sameLength) ++matchCount; ------------------------ /
+    int matchWeight = powerIntCapped(typedLetterMultiplier, matchCount);
 
     // TODO: Demote by edit distance
     int finalFreq = freq * matchWeight;
-    if (skipPos >= 0) {
+    // +1 +11/-12
+    /*if (inputLength == outputIndex && !skipped && excessivePos < 0 && transposedPos < 0) {
+        const int ed = editDistance(dp, proximityInfo->getInputWord(),
+                inputLength, word, outputIndex + 1);
+        if (ed == 1) {
+            multiplyRate(160, &finalFreq);
+        }
+    }*/
+    if (inputLength == outputIndex && excessivePos < 0 && transposedPos < 0
+            && (proximityMatchedCount > 0 || skipped)) {
+        const int ed = editDistance(editDistanceTable, proximityInfo->getPrimaryInputWord(),
+                inputLength, word, outputIndex + 1);
+        if (ed == 1) {
+            multiplyRate(160, &finalFreq);
+        }
+    }
+
+    // TODO: Promote properly?
+    //if (skipCount == 1 && excessivePos < 0 && transposedPos < 0 && inputLength == outputIndex
+    //        && !sameLength) {
+    //    multiplyRate(150, &finalFreq);
+    //}
+    //if (skipCount == 0 && excessivePos < 0 && transposedPos < 0 && inputLength == outputIndex
+    //        && !sameLength) {
+    //    multiplyRate(150, &finalFreq);
+    //}
+    //if (skipCount == 0 && excessivePos < 0 && transposedPos < 0
+    //        && inputLength == outputIndex + 1) {
+    //    multiplyRate(150, &finalFreq);
+    //}
+
+    if (skipped) {
         if (inputLength >= 2) {
             const int demotionRate = WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE
                     * (10 * inputLength - WORDS_WITH_MISSING_CHARACTER_DEMOTION_START_POS_10X)
@@ -351,10 +488,10 @@ int Correction::RankingAlgorithm::calculateFinalFreq(
             }
             multiplyRate(FULL_MATCHED_WORDS_PROMOTION_RATE, &finalFreq);
         }
-        if (sameLength && transposedPos < 0 && skipPos < 0 && excessivePos < 0) {
+        if (sameLength && transposedPos < 0 && !skipped && excessivePos < 0) {
             finalFreq = capped255MultForFullMatchAccentsOrCapitalizationDifference(finalFreq);
         }
-    } else if (sameLength && transposedPos < 0 && skipPos < 0 && excessivePos < 0
+    } else if (sameLength && transposedPos < 0 && !skipped && excessivePos < 0
             && outputIndex > 0) {
         // A word with proximity corrections
         if (DEBUG_DICT) {
@@ -363,13 +500,34 @@ int Correction::RankingAlgorithm::calculateFinalFreq(
         multiplyIntCapped(typedLetterMultiplier, &finalFreq);
         multiplyRate(WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE, &finalFreq);
     }
-    if (DEBUG_DICT) {
+    if (DEBUG_DICT_FULL) {
         LOGI("calc: %d, %d", outputIndex, sameLength);
     }
     if (sameLength) multiplyIntCapped(fullWordMultiplier, &finalFreq);
+
+    // TODO: check excessive count and transposed count
+    /*
+     If the last character of the user input word is the same as the next character
+     of the output word, and also all of characters of the user input are matched
+     to the output word, we'll promote that word a bit because
+     that word can be considered the combination of skipped and matched characters.
+     This means that the 'sm' pattern wins over the 'ma' pattern.
+     e.g.)
+     shel -> shell [mmmma] or [mmmsm]
+     hel -> hello [mmmaa] or [mmsma]
+     m ... matching
+     s ... skipping
+     a ... traversing all
+     */
+    if (matchCount == inputLength && matchCount >= 2 && !skipped
+            && word[matchCount] == word[matchCount - 1]) {
+        multiplyRate(WORDS_WITH_MATCH_SKIP_PROMOTION_RATE, &finalFreq);
+    }
+
     return finalFreq;
 }
 
+/* static */
 int Correction::RankingAlgorithm::calcFreqForSplitTwoWords(
         const int firstFreq, const int secondFreq, const Correction* correction) {
     const int spaceProximityPos = correction->mSpaceProximityPos;
diff --git a/native/src/correction.h b/native/src/correction.h
index ae6c7a421..9d385a44e 100644
--- a/native/src/correction.h
+++ b/native/src/correction.h
@@ -18,6 +18,7 @@
 #define LATINIME_CORRECTION_H
 
 #include <stdint.h>
+#include "correction_state.h"
 
 #include "defines.h"
 
@@ -39,16 +40,16 @@ public:
     Correction(const int typedLetterMultiplier, const int fullWordMultiplier);
     void initCorrection(
             const ProximityInfo *pi, const int inputLength, const int maxWordLength);
+    void initCorrectionState(const int rootPos, const int childCount, const bool traverseAll);
+
+    // TODO: remove
     void setCorrectionParams(const int skipPos, const int excessivePos, const int transposedPos,
             const int spaceProximityPos, const int missingSpacePos);
     void checkState();
-    void initProcessState(const int matchCount, const int inputIndex, const int outputIndex,
-            const bool traverseAllNodes, const int diffs);
-    void getProcessState(int *matchedCount, int *inputIndex, int *outputIndex,
-            bool *traverseAllNodes, int *diffs);
+    bool initProcessState(const int index);
+
     int getOutputIndex();
     int getInputIndex();
-    bool needsToTraverseAll();
 
     virtual ~Correction();
     int getSpaceProximityPos() const {
@@ -77,52 +78,68 @@ public:
 
     CorrectionType processCharAndCalcState(const int32_t c, const bool isTerminal);
 
-    int getDiffs() const {
-        return mDiffs;
+    /////////////////////////
+    // Tree helper methods
+    int goDownTree(const int parentIndex, const int childCount, const int firstChildPos);
+
+    inline int getTreeSiblingPos(const int index) const {
+        return mCorrectionStates[index].mSiblingPos;
+    }
+
+    inline void setTreeSiblingPos(const int index, const int pos) {
+        mCorrectionStates[index].mSiblingPos = pos;
+    }
+
+    inline int getTreeParentIndex(const int index) const {
+        return mCorrectionStates[index].mParentIndex;
     }
 private:
-    void charMatched();
-    void incrementInputIndex();
-    void incrementOutputIndex();
-    void startTraverseAll();
+    inline void incrementInputIndex();
+    inline void incrementOutputIndex();
+    inline bool needsToTraverseAllNodes();
+    inline void startToTraverseAllNodes();
+    inline bool isQuote(const unsigned short c);
+    inline CorrectionType processSkipChar(const int32_t c, const bool isTerminal);
 
     // TODO: remove
-
-    void incrementDiffs() {
-        ++mDiffs;
+    inline void incrementProximityCount() {
+        ++mProximityCount;
     }
 
     const int TYPED_LETTER_MULTIPLIER;
     const int FULL_WORD_MULTIPLIER;
-
     const ProximityInfo *mProximityInfo;
 
     int mMaxEditDistance;
     int mMaxDepth;
     int mInputLength;
-    int mSkipPos;
-    int mSkippedOutputIndex;
     int mExcessivePos;
     int mTransposedPos;
     int mSpaceProximityPos;
     int mMissingSpacePos;
-
-    int mMatchedCharCount;
-    int mInputIndex;
-    int mOutputIndex;
     int mTerminalInputIndex;
     int mTerminalOutputIndex;
-    int mDiffs;
-    bool mTraverseAllNodes;
     unsigned short mWord[MAX_WORD_LENGTH_INTERNAL];
+    // Caveat: Do not create multiple tables per thread as this table eats up RAM a lot.
+    int mEditDistanceTable[MAX_WORD_LENGTH_INTERNAL * MAX_WORD_LENGTH_INTERNAL];
 
-    inline bool isQuote(const unsigned short c);
-    inline CorrectionType processSkipChar(const int32_t c, const bool isTerminal);
+    CorrectionState mCorrectionStates[MAX_WORD_LENGTH_INTERNAL];
+
+    // The following member variables are being used as cache values of the correction state.
+    int mOutputIndex;
+    int mInputIndex;
+    int mProximityCount;
+    int mSkippedCount;
+    int mSkipPos;
+    bool mNeedsToTraverseAllNodes;
+    bool mMatching;
+    bool mSkipping;
+    bool mProximityMatching;
 
     class RankingAlgorithm {
     public:
         static int calculateFinalFreq(const int inputIndex, const int depth,
-                const int matchCount, const int freq, const bool sameLength,
+                const int freq, const bool sameLength, int *editDistanceTable,
                 const Correction* correction);
         static int calcFreqForSplitTwoWords(const int firstFreq, const int secondFreq,
                 const Correction* correction);
diff --git a/native/src/correction_state.h b/native/src/correction_state.h
new file mode 100644
index 000000000..267deda9b
--- /dev/null
+++ b/native/src/correction_state.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_CORRECTION_STATE_H
+#define LATINIME_CORRECTION_STATE_H
+
+#include <stdint.h>
+
+#include "defines.h"
+
+namespace latinime {
+
+struct CorrectionState {
+    int mParentIndex;
+    int mSiblingPos;
+    uint16_t mChildCount;
+    uint8_t mInputIndex;
+    uint8_t mProximityCount;
+    uint8_t mSkippedCount;
+    int8_t mSkipPos; // should be signed
+    bool mMatching;
+    bool mSkipping;
+    bool mProximityMatching;
+    bool mNeedsToTraverseAllNodes;
+
+};
+
+inline static void initCorrectionState(CorrectionState *state, const int rootPos,
+        const uint16_t childCount, const bool traverseAll) {
+    state->mParentIndex = -1;
+    state->mChildCount = childCount;
+    state->mInputIndex = 0;
+    state->mProximityCount = 0;
+    state->mSiblingPos = rootPos;
+    state->mSkippedCount = 0;
+    state->mMatching = false;
+    state->mSkipping = false;
+    state->mProximityMatching = false;
+    state->mNeedsToTraverseAllNodes = traverseAll;
+    state->mSkipPos = -1;
+}
+
+} // namespace latinime
+#endif // LATINIME_CORRECTION_STATE_H
diff --git a/native/src/defines.h b/native/src/defines.h
index 5a5d3ee0c..c1d08e695 100644
--- a/native/src/defines.h
+++ b/native/src/defines.h
@@ -94,20 +94,36 @@ static void prof_out(void) {
 #endif
 #define DEBUG_DICT true
 #define DEBUG_DICT_FULL false
+#define DEBUG_EDIT_DISTANCE false
 #define DEBUG_SHOW_FOUND_WORD DEBUG_DICT_FULL
 #define DEBUG_NODE DEBUG_DICT_FULL
 #define DEBUG_TRACE DEBUG_DICT_FULL
 #define DEBUG_PROXIMITY_INFO true
 
+#define DUMP_WORD(word, length) do { dumpWord(word, length); } while(0)
+
+static char charBuf[50];
+
+static void dumpWord(const unsigned short* word, const int length) {
+    for (int i = 0; i < length; ++i) {
+        charBuf[i] = word[i];
+    }
+    charBuf[length] = 0;
+    LOGI("[ %s ]", charBuf);
+}
+
 #else // FLAG_DBG
 
 #define DEBUG_DICT false
 #define DEBUG_DICT_FULL false
+#define DEBUG_EDIT_DISTANCE false
 #define DEBUG_SHOW_FOUND_WORD false
 #define DEBUG_NODE false
 #define DEBUG_TRACE false
 #define DEBUG_PROXIMITY_INFO false
 
+#define DUMP_WORD(word, length)
+
 #endif // FLAG_DBG
 
 #ifndef U_SHORT_MAX
@@ -160,6 +176,7 @@ static void prof_out(void) {
 #define WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE 60
 #define FULL_MATCHED_WORDS_PROMOTION_RATE 120
 #define WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE 90
+#define WORDS_WITH_MATCH_SKIP_PROMOTION_RATE 105
 
 // This should be greater than or equal to MAX_WORD_LENGTH defined in BinaryDictionary.java
 // This is only used for the size of array. Not to be used in c functions.
diff --git a/native/src/proximity_info.cpp b/native/src/proximity_info.cpp
index d437e251a..361bdacbf 100644
--- a/native/src/proximity_info.cpp
+++ b/native/src/proximity_info.cpp
@@ -68,6 +68,10 @@ bool ProximityInfo::hasSpaceProximity(const int x, const int y) const {
 void ProximityInfo::setInputParams(const int* inputCodes, const int inputLength) {
     mInputCodes = inputCodes;
     mInputLength = inputLength;
+    for (int i = 0; i < inputLength; ++i) {
+        mPrimaryInputWord[i] = getPrimaryCharAt(i);
+    }
+    mPrimaryInputWord[inputLength] = 0;
 }
 
 inline const int* ProximityInfo::getProximityCharsAt(const int index) const {
diff --git a/native/src/proximity_info.h b/native/src/proximity_info.h
index 5034c3b89..75fc8fb63 100644
--- a/native/src/proximity_info.h
+++ b/native/src/proximity_info.h
@@ -46,6 +46,10 @@ public:
     ProximityType getMatchedProximityId(
             const int index, const unsigned short c, const bool checkProximityChars) const;
     bool sameAsTyped(const unsigned short *word, int length) const;
+    const unsigned short* getPrimaryInputWord() const {
+        return mPrimaryInputWord;
+    }
+
 private:
     int getStartIndexFromCoordinates(const int x, const int y) const;
     const int MAX_PROXIMITY_CHARS_SIZE;
@@ -58,6 +62,7 @@ private:
     const int *mInputCodes;
     uint32_t *mProximityCharsArray;
     int mInputLength;
+    unsigned short mPrimaryInputWord[MAX_WORD_LENGTH_INTERNAL];
 };
 
 } // namespace latinime
diff --git a/native/src/unigram_dictionary.cpp b/native/src/unigram_dictionary.cpp
index df1a2e273..6bc350505 100644
--- a/native/src/unigram_dictionary.cpp
+++ b/native/src/unigram_dictionary.cpp
@@ -187,20 +187,15 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
     mCorrection->initCorrection(mProximityInfo, mInputLength, maxDepth);
     PROF_END(0);
 
+    // TODO: remove
     PROF_START(1);
-    getSuggestionCandidates(-1, -1, -1);
+    // Note: This line is intentionally left blank
     PROF_END(1);
 
     PROF_START(2);
     // Suggestion with missing character
-    if (SUGGEST_WORDS_WITH_MISSING_CHARACTER) {
-        for (int i = 0; i < codesSize; ++i) {
-            if (DEBUG_DICT) {
-                LOGI("--- Suggest missing characters %d", i);
-            }
-            getSuggestionCandidates(i, -1, -1);
-        }
-    }
+    LOGI("--- Suggest missing characters");
+    getSuggestionCandidates(0, -1, -1);
     PROF_END(2);
 
     PROF_START(3);
@@ -352,44 +347,28 @@ void UnigramDictionary::getSuggestionCandidates(const int skipPos,
     int rootPosition = ROOT_POS;
     // Get the number of children of root, then increment the position
     int childCount = Dictionary::getCount(DICT_ROOT, &rootPosition);
-    int depth = 0;
+    int outputIndex = 0;
 
-    mStackChildCount[0] = childCount;
-    mStackTraverseAll[0] = (mInputLength <= 0);
-    mStackInputIndex[0] = 0;
-    mStackDiffs[0] = 0;
-    mStackSiblingPos[0] = rootPosition;
-    mStackOutputIndex[0] = 0;
-    mStackMatchedCount[0] = 0;
+    mCorrection->initCorrectionState(rootPosition, childCount, (mInputLength <= 0));
 
     // Depth first search
-    while (depth >= 0) {
-        if (mStackChildCount[depth] > 0) {
-            --mStackChildCount[depth];
-            int siblingPos = mStackSiblingPos[depth];
+    while (outputIndex >= 0) {
+        if (mCorrection->initProcessState(outputIndex)) {
+            int siblingPos = mCorrection->getTreeSiblingPos(outputIndex);
             int firstChildPos;
-            mCorrection->initProcessState(
-                    mStackMatchedCount[depth], mStackInputIndex[depth], mStackOutputIndex[depth],
-                    mStackTraverseAll[depth], mStackDiffs[depth]);
 
-            // needsToTraverseChildrenNodes should be false
             const bool needsToTraverseChildrenNodes = processCurrentNode(siblingPos,
                     mCorrection, &childCount, &firstChildPos, &siblingPos);
             // Update next sibling pos
-            mStackSiblingPos[depth] = siblingPos;
+            mCorrection->setTreeSiblingPos(outputIndex, siblingPos);
+
             if (needsToTraverseChildrenNodes) {
                 // Goes to child node
-                ++depth;
-                mStackChildCount[depth] = childCount;
-                mStackSiblingPos[depth] = firstChildPos;
-
-                mCorrection->getProcessState(&mStackMatchedCount[depth],
-                        &mStackInputIndex[depth], &mStackOutputIndex[depth],
-                        &mStackTraverseAll[depth], &mStackDiffs[depth]);
+                outputIndex = mCorrection->goDownTree(outputIndex, childCount, firstChildPos);
             }
         } else {
             // Goes to parent sibling node
-            --depth;
+            outputIndex = mCorrection->getTreeParentIndex(outputIndex);
         }
     }
 }
diff --git a/native/src/unigram_dictionary.h b/native/src/unigram_dictionary.h
index 8bcd7cea5..cfe63ff79 100644
--- a/native/src/unigram_dictionary.h
+++ b/native/src/unigram_dictionary.h
@@ -19,6 +19,7 @@
 
 #include <stdint.h>
 #include "correction.h"
+#include "correction_state.h"
 #include "defines.h"
 #include "proximity_info.h"
 
@@ -134,13 +135,9 @@ private:
     // MAX_WORD_LENGTH_INTERNAL must be bigger than MAX_WORD_LENGTH
     unsigned short mWord[MAX_WORD_LENGTH_INTERNAL];
 
-    int mStackMatchedCount[MAX_WORD_LENGTH_INTERNAL];
-    int mStackChildCount[MAX_WORD_LENGTH_INTERNAL];
-    bool mStackTraverseAll[MAX_WORD_LENGTH_INTERNAL];
-    int mStackInputIndex[MAX_WORD_LENGTH_INTERNAL];
-    int mStackDiffs[MAX_WORD_LENGTH_INTERNAL];
-    int mStackSiblingPos[MAX_WORD_LENGTH_INTERNAL];
-    int mStackOutputIndex[MAX_WORD_LENGTH_INTERNAL];
+    int mStackChildCount[MAX_WORD_LENGTH_INTERNAL];// TODO: remove
+    int mStackInputIndex[MAX_WORD_LENGTH_INTERNAL];// TODO: remove
+    int mStackSiblingPos[MAX_WORD_LENGTH_INTERNAL];// TODO: remove
 };
 } // namespace latinime