12 files changed, 1134 insertions, 947 deletions
diff --git a/native/src/bigram_dictionary.cpp b/native/src/bigram_dictionary.cpp
index 6ed4d0982..c340c6c1a 100644
--- a/native/src/bigram_dictionary.cpp
+++ b/native/src/bigram_dictionary.cpp
@@ -21,13 +21,14 @@
 
 #include "bigram_dictionary.h"
 #include "dictionary.h"
+#include "binary_format.h"
 
 namespace latinime {
 
 BigramDictionary::BigramDictionary(const unsigned char *dict, int maxWordLength,
         int maxAlternatives, const bool isLatestDictVersion, const bool hasBigram,
         Dictionary *parentDictionary)
-    : DICT(dict), MAX_WORD_LENGTH(maxWordLength),
+    : DICT(dict + NEW_DICTIONARY_HEADER_SIZE), MAX_WORD_LENGTH(maxWordLength),
     MAX_ALTERNATIVES(maxAlternatives), IS_LATEST_DICT_VERSION(isLatestDictVersion),
     HAS_BIGRAM(hasBigram), mParentDictionary(parentDictionary) {
     if (DEBUG_DICT) {
@@ -82,169 +83,64 @@ bool BigramDictionary::addWordBigram(unsigned short *word, int length, int frequ
     return false;
 }
 
-int BigramDictionary::getBigramAddress(int *pos, bool advance) {
-    int address = 0;
-
-    address += (DICT[*pos] & 0x3F) << 16;
-    address += (DICT[*pos + 1] & 0xFF) << 8;
-    address += (DICT[*pos + 2] & 0xFF);
-
-    if (advance) {
-        *pos += 3;
-    }
-
-    return address;
-}
-
-int BigramDictionary::getBigramFreq(int *pos) {
-    int freq = DICT[(*pos)++] & FLAG_BIGRAM_FREQ;
-
-    return freq;
-}
-
-
+/* Parameters :
+ * prevWord: the word before, the one for which we need to look up bigrams.
+ * prevWordLength: its length.
+ * codes: what user typed, in the same format as for UnigramDictionary::getSuggestions.
+ * codesSize: the size of the codes array.
+ * bigramChars: an array for output, at the same format as outwords for getSuggestions.
+ * bigramFreq: an array to output frequencies.
+ * maxWordLength: the maximum size of a word.
+ * maxBigrams: the maximum number of bigrams fitting in the bigramChars array.
+ * maxAlteratives: unused.
+ * This method returns the number of bigrams this word has, for backward compatibility.
+ * Note: this is not the number of bigrams output in the array, which is the number of
+ * bigrams this word has WHOSE first letter also matches the letter the user typed.
+ * TODO: this may not be a sensible thing to do. It makes sense when the bigrams are
+ * used to match the first letter of the second word, but once the user has typed more
+ * and the bigrams are used to boost unigram result scores, it makes little sense to
+ * reduce their scope to the ones that match the first letter.
+ */
 int BigramDictionary::getBigrams(unsigned short *prevWord, int prevWordLength, int *codes,
         int codesSize, unsigned short *bigramChars, int *bigramFreq, int maxWordLength,
         int maxBigrams, int maxAlternatives) {
+    // TODO: remove unused arguments, and refrain from storing stuff in members of this class
+    // TODO: have "in" arguments before "out" ones, and make out args explicit in the name
     mBigramFreq = bigramFreq;
     mBigramChars = bigramChars;
     mInputCodes = codes;
-    mInputLength = codesSize;
     mMaxBigrams = maxBigrams;
 
-    if (HAS_BIGRAM && IS_LATEST_DICT_VERSION) {
-        int pos = mParentDictionary->getBigramPosition(prevWord, prevWordLength);
-        if (DEBUG_DICT) {
-            LOGI("Pos -> %d", pos);
-        }
-        if (pos < 0) {
-            return 0;
-        }
-
-        int bigramCount = 0;
-        int bigramExist = (DICT[pos] & FLAG_BIGRAM_READ);
-        if (bigramExist > 0) {
-            int nextBigramExist = 1;
-            while (nextBigramExist > 0 && bigramCount < maxBigrams) {
-                int bigramAddress = getBigramAddress(&pos, true);
-                int frequency = (FLAG_BIGRAM_FREQ & DICT[pos]);
-                // search for all bigrams and store them
-                searchForTerminalNode(bigramAddress, frequency);
-                nextBigramExist = (DICT[pos++] & FLAG_BIGRAM_CONTINUED);
-                bigramCount++;
-            }
-        }
+    const uint8_t* const root = DICT;
+    int pos = BinaryFormat::getTerminalPosition(root, prevWord, prevWordLength);
 
-        return bigramCount;
+    if (NOT_VALID_WORD == pos) return 0;
+    const int flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
+    if (0 == (flags & UnigramDictionary::FLAG_HAS_BIGRAMS)) return 0;
+    if (0 == (flags & UnigramDictionary::FLAG_HAS_MULTIPLE_CHARS)) {
+        BinaryFormat::getCharCodeAndForwardPointer(root, &pos);
+    } else {
+        pos = BinaryFormat::skipOtherCharacters(root, pos);
     }
-    return 0;
-}
-
-void BigramDictionary::searchForTerminalNode(int addressLookingFor, int frequency) {
-    // track word with such address and store it in an array
-    unsigned short word[MAX_WORD_LENGTH];
-
-    int pos;
-    int followDownBranchAddress = DICTIONARY_HEADER_SIZE;
-    bool found = false;
-    char followingChar = ' ';
-    int depth = -1;
-
-    while(!found) {
-        bool followDownAddressSearchStop = false;
-        bool firstAddress = true;
-        bool haveToSearchAll = true;
-
-        if (depth < MAX_WORD_LENGTH && depth >= 0) {
-            word[depth] = (unsigned short) followingChar;
-        }
-        pos = followDownBranchAddress; // pos start at count
-        int count = DICT[pos] & 0xFF;
-        if (DEBUG_DICT) {
-            LOGI("count - %d",count);
-        }
-        pos++;
-        for (int i = 0; i < count; i++) {
-            // pos at data
-            pos++;
-            // pos now at flag
-            if (!getFirstBitOfByte(&pos)) { // non-terminal
-                if (!followDownAddressSearchStop) {
-                    int addr = getBigramAddress(&pos, false);
-                    if (addr > addressLookingFor) {
-                        followDownAddressSearchStop = true;
-                        if (firstAddress) {
-                            firstAddress = false;
-                            haveToSearchAll = true;
-                        } else if (!haveToSearchAll) {
-                            break;
-                        }
-                    } else {
-                        followDownBranchAddress = addr;
-                        followingChar = (char)(0xFF & DICT[pos-1]);
-                        if (firstAddress) {
-                            firstAddress = false;
-                            haveToSearchAll = false;
-                        }
-                    }
-                }
-                pos += 3;
-            } else if (getFirstBitOfByte(&pos)) { // terminal
-                if (addressLookingFor == (pos-1)) { // found !!
-                    depth++;
-                    word[depth] = (0xFF & DICT[pos-1]);
-                    found = true;
-                    break;
-                }
-                if (getSecondBitOfByte(&pos)) { // address + freq (4 byte)
-                    if (!followDownAddressSearchStop) {
-                        int addr = getBigramAddress(&pos, false);
-                        if (addr > addressLookingFor) {
-                            followDownAddressSearchStop = true;
-                            if (firstAddress) {
-                                firstAddress = false;
-                                haveToSearchAll = true;
-                            } else if (!haveToSearchAll) {
-                                break;
-                            }
-                        } else {
-                            followDownBranchAddress = addr;
-                            followingChar = (char)(0xFF & DICT[pos-1]);
-                            if (firstAddress) {
-                                firstAddress = false;
-                                haveToSearchAll = true;
-                            }
-                        }
-                    }
-                    pos += 4;
-                } else { // freq only (2 byte)
-                    pos += 2;
-                }
-
-                // skipping bigram
-                int bigramExist = (DICT[pos] & FLAG_BIGRAM_READ);
-                if (bigramExist > 0) {
-                    int nextBigramExist = 1;
-                    while (nextBigramExist > 0) {
-                        pos += 3;
-                        nextBigramExist = (DICT[pos++] & FLAG_BIGRAM_CONTINUED);
-                    }
-                } else {
-                    pos++;
-                }
-            }
+    pos = BinaryFormat::skipChildrenPosition(flags, pos);
+    pos = BinaryFormat::skipFrequency(flags, pos);
+    int bigramFlags;
+    int bigramCount = 0;
+    do {
+        bigramFlags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
+        uint16_t bigramBuffer[MAX_WORD_LENGTH];
+        const int bigramPos = BinaryFormat::getAttributeAddressAndForwardPointer(root, bigramFlags,
+                &pos);
+        const int length = BinaryFormat::getWordAtAddress(root, bigramPos, MAX_WORD_LENGTH,
+                bigramBuffer);
+
+        if (checkFirstCharacter(bigramBuffer)) {
+            const int frequency = UnigramDictionary::MASK_ATTRIBUTE_FREQUENCY & bigramFlags;
+            addWordBigram(bigramBuffer, length, frequency);
         }
-        depth++;
-        if (followDownBranchAddress == 0) {
-            if (DEBUG_DICT) {
-                LOGI("ERROR!!! Cannot find bigram!!");
-            }
-            break;
-        }
-    }
-    if (checkFirstCharacter(word)) {
-        addWordBigram(word, depth, frequency);
-    }
+        ++bigramCount;
+    } while (0 != (UnigramDictionary::FLAG_ATTRIBUTE_HAS_NEXT & bigramFlags));
+    return bigramCount;
 }
 
 bool BigramDictionary::checkFirstCharacter(unsigned short *word) {
diff --git a/native/src/binary_format.h b/native/src/binary_format.h
index e9f108e25..6f65088db 100644
--- a/native/src/binary_format.h
+++ b/native/src/binary_format.h
@@ -17,6 +17,8 @@
 #ifndef LATINIME_BINARY_FORMAT_H
 #define LATINIME_BINARY_FORMAT_H
 
+#include "unigram_dictionary.h"
+
 namespace latinime {
 
 class BinaryFormat {
@@ -26,6 +28,11 @@ private:
     const static int MULTIPLE_BYTE_CHARACTER_ADDITIONAL_SIZE = 2;
 
 public:
+    const static int UNKNOWN_FORMAT = -1;
+    const static int FORMAT_VERSION_1 = 1;
+    const static uint16_t FORMAT_VERSION_1_MAGIC_NUMBER = 0x78B1;
+
+    static int detectFormat(const uint8_t* const dict);
     static int getGroupCountAndForwardPointer(const uint8_t* const dict, int* pos);
     static uint8_t getFlagsAndForwardPointer(const uint8_t* const dict, int* pos);
     static int32_t getCharCodeAndForwardPointer(const uint8_t* const dict, int* pos);
@@ -41,8 +48,18 @@ public:
     static bool hasChildrenInFlags(const uint8_t flags);
     static int getAttributeAddressAndForwardPointer(const uint8_t* const dict, const uint8_t flags,
             int *pos);
+    static int getTerminalPosition(const uint8_t* const root, const uint16_t* const inWord,
+            const int length);
+    static int getWordAtAddress(const uint8_t* const root, const int address, const int maxDepth,
+            uint16_t* outWord);
 };
 
+inline int BinaryFormat::detectFormat(const uint8_t* const dict) {
+    const uint16_t magicNumber = (dict[0] << 8) + dict[1]; // big endian
+    if (FORMAT_VERSION_1_MAGIC_NUMBER == magicNumber) return FORMAT_VERSION_1;
+    return UNKNOWN_FORMAT;
+}
+
 inline int BinaryFormat::getGroupCountAndForwardPointer(const uint8_t* const dict, int* pos) {
     return dict[(*pos)++];
 }
@@ -204,6 +221,222 @@ inline int BinaryFormat::getAttributeAddressAndForwardPointer(const uint8_t* con
     }
 }
 
+// This function gets the byte position of the last chargroup of the exact matching word in the
+// dictionary. If no match is found, it returns NOT_VALID_WORD.
+inline int BinaryFormat::getTerminalPosition(const uint8_t* const root,
+        const uint16_t* const inWord, const int length) {
+    int pos = 0;
+    int wordPos = 0;
+
+    while (true) {
+        // If we already traversed the tree further than the word is long, there means
+        // there was no match (or we would have found it).
+        if (wordPos > length) return NOT_VALID_WORD;
+        int charGroupCount = BinaryFormat::getGroupCountAndForwardPointer(root, &pos);
+        const uint16_t wChar = inWord[wordPos];
+        while (true) {
+            // If there are no more character groups in this node, it means we could not
+            // find a matching character for this depth, therefore there is no match.
+            if (0 >= charGroupCount) return NOT_VALID_WORD;
+            const int charGroupPos = pos;
+            const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
+            int32_t character = BinaryFormat::getCharCodeAndForwardPointer(root, &pos);
+            if (character == wChar) {
+                // This is the correct node. Only one character group may start with the same
+                // char within a node, so either we found our match in this node, or there is
+                // no match and we can return NOT_VALID_WORD. So we will check all the characters
+                // in this character group indeed does match.
+                if (UnigramDictionary::FLAG_HAS_MULTIPLE_CHARS & flags) {
+                    character = BinaryFormat::getCharCodeAndForwardPointer(root, &pos);
+                    while (NOT_A_CHARACTER != character) {
+                        ++wordPos;
+                        // If we shoot the length of the word we search for, or if we find a single
+                        // character that does not match, as explained above, it means the word is
+                        // not in the dictionary (by virtue of this chargroup being the only one to
+                        // match the word on the first character, but not matching the whole word).
+                        if (wordPos > length) return NOT_VALID_WORD;
+                        if (inWord[wordPos] != character) return NOT_VALID_WORD;
+                        character = BinaryFormat::getCharCodeAndForwardPointer(root, &pos);
+                    }
+                }
+                // If we come here we know that so far, we do match. Either we are on a terminal
+                // and we match the length, in which case we found it, or we traverse children.
+                // If we don't match the length AND don't have children, then a word in the
+                // dictionary fully matches a prefix of the searched word but not the full word.
+                ++wordPos;
+                if (UnigramDictionary::FLAG_IS_TERMINAL & flags) {
+                    if (wordPos == length) {
+                        return charGroupPos;
+                    }
+                    pos = BinaryFormat::skipFrequency(UnigramDictionary::FLAG_IS_TERMINAL, pos);
+                }
+                if (UnigramDictionary::FLAG_GROUP_ADDRESS_TYPE_NOADDRESS
+                        == (UnigramDictionary::MASK_GROUP_ADDRESS_TYPE & flags)) {
+                    return NOT_VALID_WORD;
+                }
+                // We have children and we are still shorter than the word we are searching for, so
+                // we need to traverse children. Put the pointer on the children position, and
+                // break
+                pos = BinaryFormat::readChildrenPosition(root, flags, pos);
+                break;
+            } else {
+                // This chargroup does not match, so skip the remaining part and go to the next.
+                if (UnigramDictionary::FLAG_HAS_MULTIPLE_CHARS & flags) {
+                    pos = BinaryFormat::skipOtherCharacters(root, pos);
+                }
+                pos = BinaryFormat::skipFrequency(flags, pos);
+                pos = BinaryFormat::skipChildrenPosAndAttributes(root, flags, pos);
+            }
+            --charGroupCount;
+        }
+    }
+}
+
+// This function searches for a terminal in the dictionary by its address.
+// Due to the fact that words are ordered in the dictionary in a strict breadth-first order,
+// it is possible to check for this with advantageous complexity. For each node, we search
+// for groups with children and compare the children address with the address we look for.
+// When we shoot the address we look for, it means the word we look for is in the children
+// of the previous group. The only tricky part is the fact that if we arrive at the end of a
+// node with the last group's children address still less than what we are searching for, we
+// must descend the last group's children (for example, if the word we are searching for starts
+// with a z, it's the last group of the root node, so all children addresses will be smaller
+// than the address we look for, and we have to descend the z node).
+/* Parameters :
+ * root: the dictionary buffer
+ * address: the byte position of the last chargroup of the word we are searching for (this is
+ *   what is stored as the "bigram address" in each bigram)
+ * outword: an array to write the found word, with MAX_WORD_LENGTH size.
+ * Return value : the length of the word, of 0 if the word was not found.
+ */
+inline int BinaryFormat::getWordAtAddress(const uint8_t* const root, const int address,
+        const int maxDepth, uint16_t* outWord) {
+    int pos = 0;
+    int wordPos = 0;
+
+    // One iteration of the outer loop iterates through nodes. As stated above, we will only
+    // traverse nodes that are actually a part of the terminal we are searching, so each time
+    // we enter this loop we are one depth level further than last time.
+    // The only reason we count nodes is because we want to reduce the probability of infinite
+    // looping in case there is a bug. Since we know there is an upper bound to the depth we are
+    // supposed to traverse, it does not hurt to count iterations.
+    for (int loopCount = maxDepth; loopCount > 0; --loopCount) {
+        int lastCandidateGroupPos = 0;
+        // Let's loop through char groups in this node searching for either the terminal
+        // or one of its ascendants.
+        for (int charGroupCount = getGroupCountAndForwardPointer(root, &pos); charGroupCount > 0;
+                 --charGroupCount) {
+            const int startPos = pos;
+            const uint8_t flags = getFlagsAndForwardPointer(root, &pos);
+            const int32_t character = getCharCodeAndForwardPointer(root, &pos);
+            if (address == startPos) {
+                // We found the address. Copy the rest of the word in the buffer and return
+                // the length.
+                outWord[wordPos] = character;
+                if (UnigramDictionary::FLAG_HAS_MULTIPLE_CHARS & flags) {
+                    int32_t nextChar = getCharCodeAndForwardPointer(root, &pos);
+                    // We count chars in order to avoid infinite loops if the file is broken or
+                    // if there is some other bug
+                    int charCount = maxDepth;
+                    while (-1 != nextChar && --charCount > 0) {
+                        outWord[++wordPos] = nextChar;
+                        nextChar = getCharCodeAndForwardPointer(root, &pos);
+                    }
+                }
+                return ++wordPos;
+            }
+            // We need to skip past this char group, so skip any remaining chars after the
+            // first and possibly the frequency.
+            if (UnigramDictionary::FLAG_HAS_MULTIPLE_CHARS & flags) {
+                pos = skipOtherCharacters(root, pos);
+            }
+            pos = skipFrequency(flags, pos);
+
+            // The fact that this group has children is very important. Since we already know
+            // that this group does not match, if it has no children we know it is irrelevant
+            // to what we are searching for.
+            const bool hasChildren = (UnigramDictionary::FLAG_GROUP_ADDRESS_TYPE_NOADDRESS !=
+                    (UnigramDictionary::MASK_GROUP_ADDRESS_TYPE & flags));
+            // We will write in `found' whether we have passed the children address we are
+            // searching for. For example if we search for "beer", the children of b are less
+            // than the address we are searching for and the children of c are greater. When we
+            // come here for c, we realize this is too big, and that we should descend b.
+            bool found;
+            if (hasChildren) {
+                // Here comes the tricky part. First, read the children position.
+                const int childrenPos = readChildrenPosition(root, flags, pos);
+                if (childrenPos > address) {
+                    // If the children pos is greater than address, it means the previous chargroup,
+                    // which address is stored in lastCandidateGroupPos, was the right one.
+                    found = true;
+                } else if (1 >= charGroupCount) {
+                    // However if we are on the LAST group of this node, and we have NOT shot the
+                    // address we should descend THIS node. So we trick the lastCandidateGroupPos
+                    // so that we will descend this node, not the previous one.
+                    lastCandidateGroupPos = startPos;
+                    found = true;
+                } else {
+                    // Else, we should continue looking.
+                    found = false;
+                }
+            } else {
+                // Even if we don't have children here, we could still be on the last group of this
+                // node. If this is the case, we should descend the last group that had children,
+                // and their address is already in lastCandidateGroup.
+                found = (1 >= charGroupCount);
+            }
+
+            if (found) {
+                // Okay, we found the group we should descend. Its address is in
+                // the lastCandidateGroupPos variable, so we just re-read it.
+                if (0 != lastCandidateGroupPos) {
+                    const uint8_t lastFlags =
+                            getFlagsAndForwardPointer(root, &lastCandidateGroupPos);
+                    const int32_t lastChar =
+                            getCharCodeAndForwardPointer(root, &lastCandidateGroupPos);
+                    // We copy all the characters in this group to the buffer
+                    outWord[wordPos] = lastChar;
+                    if (UnigramDictionary::FLAG_HAS_MULTIPLE_CHARS & lastFlags) {
+                        int32_t nextChar =
+                                getCharCodeAndForwardPointer(root, &lastCandidateGroupPos);
+                        int charCount = maxDepth;
+                        while (-1 != nextChar && --charCount > 0) {
+                            outWord[++wordPos] = nextChar;
+                            nextChar = getCharCodeAndForwardPointer(root, &lastCandidateGroupPos);
+                        }
+                    }
+                    ++wordPos;
+                    // Now we only need to branch to the children address. Skip the frequency if
+                    // it's there, read pos, and break to resume the search at pos.
+                    lastCandidateGroupPos = skipFrequency(lastFlags, lastCandidateGroupPos);
+                    pos = readChildrenPosition(root, lastFlags, lastCandidateGroupPos);
+                    break;
+                } else {
+                    // Here is a little tricky part: we come here if we found out that all children
+                    // addresses in this group are bigger than the address we are searching for.
+                    // Should we conclude the word is not in the dictionary? No! It could still be
+                    // one of the remaining chargroups in this node, so we have to keep looking in
+                    // this node until we find it (or we realize it's not there either, in which
+                    // case it's actually not in the dictionary). Pass the end of this group, ready
+                    // to start the next one.
+                    pos = skipChildrenPosAndAttributes(root, flags, pos);
+                }
+            } else {
+                // If we did not find it, we should record the last children address for the next
+                // iteration.
+                if (hasChildren) lastCandidateGroupPos = startPos;
+                // Now skip the end of this group (children pos and the attributes if any) so that
+                // our pos is after the end of this char group, at the start of the next one.
+                pos = skipChildrenPosAndAttributes(root, flags, pos);
+            }
+
+        }
+    }
+    // If we have looked through all the chargroups and found no match, the address is
+    // not the address of a terminal in this dictionary.
+    return 0;
+}
+
 } // namespace latinime
 
 #endif // LATINIME_BINARY_FORMAT_H
diff --git a/native/src/correction.cpp b/native/src/correction.cpp
new file mode 100644
index 000000000..654d4715f
--- /dev/null
+++ b/native/src/correction.cpp
@@ -0,0 +1,492 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <assert.h>
+#include <stdio.h>
+#include <string.h>
+
+#define LOG_TAG "LatinIME: correction.cpp"
+
+#include "correction.h"
+#include "proximity_info.h"
+
+namespace latinime {
+
+//////////////////////
+// inline functions //
+//////////////////////
+static const char QUOTE = '\'';
+
+inline bool Correction::isQuote(const unsigned short c) {
+    const unsigned short userTypedChar = mProximityInfo->getPrimaryCharAt(mInputIndex);
+    return (c == QUOTE && userTypedChar != QUOTE);
+}
+
+////////////////
+// Correction //
+////////////////
+
+Correction::Correction(const int typedLetterMultiplier, const int fullWordMultiplier)
+        : TYPED_LETTER_MULTIPLIER(typedLetterMultiplier), FULL_WORD_MULTIPLIER(fullWordMultiplier) {
+}
+
+void Correction::initCorrection(const ProximityInfo *pi, const int inputLength,
+        const int maxDepth) {
+    mProximityInfo = pi;
+    mInputLength = inputLength;
+    mMaxDepth = maxDepth;
+    mMaxEditDistance = mInputLength < 5 ? 2 : mInputLength / 2;
+}
+
+void Correction::initCorrectionState(
+        const int rootPos, const int childCount, const bool traverseAll) {
+    latinime::initCorrectionState(mCorrectionStates, rootPos, childCount, traverseAll);
+}
+
+void Correction::setCorrectionParams(const int skipPos, const int excessivePos,
+        const int transposedPos, const int spaceProximityPos, const int missingSpacePos) {
+    mSkipPos = skipPos;
+    mExcessivePos = excessivePos;
+    mTransposedPos = transposedPos;
+    mSpaceProximityPos = spaceProximityPos;
+    mMissingSpacePos = missingSpacePos;
+}
+
+void Correction::checkState() {
+    if (DEBUG_DICT) {
+        int inputCount = 0;
+        if (mSkipPos >= 0) ++inputCount;
+        if (mExcessivePos >= 0) ++inputCount;
+        if (mTransposedPos >= 0) ++inputCount;
+        // TODO: remove this assert
+        assert(inputCount <= 1);
+    }
+}
+
+int Correction::getFreqForSplitTwoWords(const int firstFreq, const int secondFreq) {
+    return Correction::RankingAlgorithm::calcFreqForSplitTwoWords(firstFreq, secondFreq, this);
+}
+
+int Correction::getFinalFreq(const int freq, unsigned short **word, int *wordLength) {
+    const int outputIndex = mTerminalOutputIndex;
+    const int inputIndex = mTerminalInputIndex;
+    *wordLength = outputIndex + 1;
+    if (mProximityInfo->sameAsTyped(mWord, outputIndex + 1) || outputIndex < MIN_SUGGEST_DEPTH) {
+        return -1;
+    }
+
+    // TODO: Remove this
+    if (mSkipPos >= 0 && mSkippedCount <= 0) {
+        return -1;
+    }
+
+    *word = mWord;
+    const bool sameLength = (mExcessivePos == mInputLength - 1) ? (mInputLength == inputIndex + 2)
+            : (mInputLength == inputIndex + 1);
+    return Correction::RankingAlgorithm::calculateFinalFreq(
+            inputIndex, outputIndex, mMatchedCharCount, freq, sameLength, this);
+}
+
+bool Correction::initProcessState(const int outputIndex) {
+    if (mCorrectionStates[outputIndex].mChildCount <= 0) {
+        return false;
+    }
+    mOutputIndex = outputIndex;
+    --(mCorrectionStates[outputIndex].mChildCount);
+    mMatchedCharCount = mCorrectionStates[outputIndex].mMatchedCount;
+    mInputIndex = mCorrectionStates[outputIndex].mInputIndex;
+    mNeedsToTraverseAllNodes = mCorrectionStates[outputIndex].mNeedsToTraverseAllNodes;
+    mDiffs = mCorrectionStates[outputIndex].mDiffs;
+    mSkippedCount = mCorrectionStates[outputIndex].mSkippedCount;
+    mSkipping = false;
+    mMatching = false;
+    return true;
+}
+
+int Correction::goDownTree(
+        const int parentIndex, const int childCount, const int firstChildPos) {
+    mCorrectionStates[mOutputIndex].mParentIndex = parentIndex;
+    mCorrectionStates[mOutputIndex].mChildCount = childCount;
+    mCorrectionStates[mOutputIndex].mSiblingPos = firstChildPos;
+    return mOutputIndex;
+}
+
+void Correction::charMatched() {
+    ++mMatchedCharCount;
+}
+
+// TODO: remove
+int Correction::getOutputIndex() {
+    return mOutputIndex;
+}
+
+// TODO: remove
+int Correction::getInputIndex() {
+    return mInputIndex;
+}
+
+// TODO: remove
+bool Correction::needsToTraverseAllNodes() {
+    return mNeedsToTraverseAllNodes;
+}
+
+void Correction::incrementInputIndex() {
+    ++mInputIndex;
+}
+
+void Correction::incrementOutputIndex() {
+    ++mOutputIndex;
+    mCorrectionStates[mOutputIndex].mParentIndex = mCorrectionStates[mOutputIndex - 1].mParentIndex;
+    mCorrectionStates[mOutputIndex].mChildCount = mCorrectionStates[mOutputIndex - 1].mChildCount;
+    mCorrectionStates[mOutputIndex].mSiblingPos = mCorrectionStates[mOutputIndex - 1].mSiblingPos;
+    mCorrectionStates[mOutputIndex].mMatchedCount = mMatchedCharCount;
+    mCorrectionStates[mOutputIndex].mInputIndex = mInputIndex;
+    mCorrectionStates[mOutputIndex].mNeedsToTraverseAllNodes = mNeedsToTraverseAllNodes;
+    mCorrectionStates[mOutputIndex].mDiffs = mDiffs;
+    mCorrectionStates[mOutputIndex].mSkippedCount = mSkippedCount;
+    mCorrectionStates[mOutputIndex].mSkipping = mSkipping;
+    mCorrectionStates[mOutputIndex].mMatching = mMatching;
+}
+
+void Correction::startToTraverseAllNodes() {
+    mNeedsToTraverseAllNodes = true;
+}
+
+bool Correction::needsToPrune() const {
+    return (mOutputIndex - 1 >= (mTransposedPos >= 0 ? mInputLength - 1 : mMaxDepth)
+            || mDiffs > mMaxEditDistance);
+}
+
+Correction::CorrectionType Correction::processSkipChar(
+        const int32_t c, const bool isTerminal) {
+    mWord[mOutputIndex] = c;
+    if (needsToTraverseAllNodes() && isTerminal) {
+        mTerminalInputIndex = mInputIndex;
+        mTerminalOutputIndex = mOutputIndex;
+        incrementOutputIndex();
+        return TRAVERSE_ALL_ON_TERMINAL;
+    } else {
+        incrementOutputIndex();
+        return TRAVERSE_ALL_NOT_ON_TERMINAL;
+    }
+}
+
+Correction::CorrectionType Correction::processCharAndCalcState(
+        const int32_t c, const bool isTerminal) {
+    CorrectionType currentStateType = NOT_ON_TERMINAL;
+    // This has to be done for each virtual char (this forwards the "inputIndex" which
+    // is the index in the user-inputted chars, as read by proximity chars.
+    if (mExcessivePos == mOutputIndex && mInputIndex < mInputLength - 1) {
+        incrementInputIndex();
+    }
+
+    bool skip = false;
+    if (mSkipPos >= 0) {
+        skip = mSkipPos == mOutputIndex;
+        mSkipping = true;
+    }
+
+    if (mNeedsToTraverseAllNodes || isQuote(c)) {
+        return processSkipChar(c, isTerminal);
+    } else {
+        int inputIndexForProximity = mInputIndex;
+
+        if (mTransposedPos >= 0) {
+            if (mInputIndex == mTransposedPos) {
+                ++inputIndexForProximity;
+            }
+            if (mInputIndex == (mTransposedPos + 1)) {
+                --inputIndexForProximity;
+            }
+        }
+
+        const bool checkProximityChars =
+                !(mSkipPos >= 0 || mExcessivePos >= 0 || mTransposedPos >= 0);
+        int matchedProximityCharId = mProximityInfo->getMatchedProximityId(
+                inputIndexForProximity, c, checkProximityChars);
+
+        const bool unrelated = ProximityInfo::UNRELATED_CHAR == matchedProximityCharId;
+        if (unrelated) {
+            if (skip) {
+                // Skip this letter and continue deeper
+                ++mSkippedCount;
+                return processSkipChar(c, isTerminal);
+            } else {
+                return UNRELATED;
+            }
+        }
+
+        // TODO: remove after allowing combination errors
+        if (skip) {
+            return UNRELATED;
+        }
+
+        mWord[mOutputIndex] = c;
+        // If inputIndex is greater than mInputLength, that means there is no
+        // proximity chars. So, we don't need to check proximity.
+        if (ProximityInfo::SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR == matchedProximityCharId) {
+            mMatching = true;
+            charMatched();
+        }
+
+        if (ProximityInfo::NEAR_PROXIMITY_CHAR == matchedProximityCharId) {
+            incrementDiffs();
+        }
+
+        const bool isSameAsUserTypedLength = mInputLength
+                == getInputIndex() + 1
+                        || (mExcessivePos == mInputLength - 1
+                                    && getInputIndex() == mInputLength - 2);
+        if (isSameAsUserTypedLength && isTerminal) {
+            mTerminalInputIndex = mInputIndex;
+            mTerminalOutputIndex = mOutputIndex;
+            currentStateType = ON_TERMINAL;
+        }
+        // Start traversing all nodes after the index exceeds the user typed length
+        if (isSameAsUserTypedLength) {
+            startToTraverseAllNodes();
+        }
+
+        // Finally, we are ready to go to the next character, the next "virtual node".
+        // We should advance the input index.
+        // We do this in this branch of the 'if traverseAllNodes' because we are still matching
+        // characters to input; the other branch is not matching them but searching for
+        // completions, this is why it does not have to do it.
+        incrementInputIndex();
+    }
+
+    // Also, the next char is one "virtual node" depth more than this char.
+    incrementOutputIndex();
+
+    return currentStateType;
+}
+
+Correction::~Correction() {
+}
+
+/////////////////////////
+// static inline utils //
+/////////////////////////
+
+static const int TWO_31ST_DIV_255 = S_INT_MAX / 255;
+static inline int capped255MultForFullMatchAccentsOrCapitalizationDifference(const int num) {
+    return (num < TWO_31ST_DIV_255 ? 255 * num : S_INT_MAX);
+}
+
+static const int TWO_31ST_DIV_2 = S_INT_MAX / 2;
+inline static void multiplyIntCapped(const int multiplier, int *base) {
+    const int temp = *base;
+    if (temp != S_INT_MAX) {
+        // Branch if multiplier == 2 for the optimization
+        if (multiplier == 2) {
+            *base = TWO_31ST_DIV_2 >= temp ? temp << 1 : S_INT_MAX;
+        } else {
+            const int tempRetval = temp * multiplier;
+            *base = tempRetval >= temp ? tempRetval : S_INT_MAX;
+        }
+    }
+}
+
+inline static int powerIntCapped(const int base, const int n) {
+    if (n == 0) return 1;
+    if (base == 2) {
+        return n < 31 ? 1 << n : S_INT_MAX;
+    } else {
+        int ret = base;
+        for (int i = 1; i < n; ++i) multiplyIntCapped(base, &ret);
+        return ret;
+    }
+}
+
+inline static void multiplyRate(const int rate, int *freq) {
+    if (*freq != S_INT_MAX) {
+        if (*freq > 1000000) {
+            *freq /= 100;
+            multiplyIntCapped(rate, freq);
+        } else {
+            multiplyIntCapped(rate, freq);
+            *freq /= 100;
+        }
+    }
+}
+
+//////////////////////
+// RankingAlgorithm //
+//////////////////////
+
+/* static */
+int Correction::RankingAlgorithm::calculateFinalFreq(
+        const int inputIndex, const int outputIndex,
+        const int matchCount, const int freq, const bool sameLength,
+        const Correction* correction) {
+    const int skipPos = correction->getSkipPos();
+    const int excessivePos = correction->getExcessivePos();
+    const int transposedPos = correction->getTransposedPos();
+    const int inputLength = correction->mInputLength;
+    const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER;
+    const int fullWordMultiplier = correction->FULL_WORD_MULTIPLIER;
+    const ProximityInfo *proximityInfo = correction->mProximityInfo;
+    const int matchWeight = powerIntCapped(typedLetterMultiplier, matchCount);
+    const unsigned short* word = correction->mWord;
+    const int skippedCount = correction->mSkippedCount;
+
+    // TODO: Demote by edit distance
+    int finalFreq = freq * matchWeight;
+    if (skipPos >= 0) {
+        if (inputLength >= 2) {
+            const int demotionRate = WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE
+                    * (10 * inputLength - WORDS_WITH_MISSING_CHARACTER_DEMOTION_START_POS_10X)
+                    / (10 * inputLength
+                            - WORDS_WITH_MISSING_CHARACTER_DEMOTION_START_POS_10X + 10);
+            if (DEBUG_DICT_FULL) {
+                LOGI("Demotion rate for missing character is %d.", demotionRate);
+            }
+            multiplyRate(demotionRate, &finalFreq);
+        } else {
+            finalFreq = 0;
+        }
+    }
+    if (transposedPos >= 0) multiplyRate(
+            WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE, &finalFreq);
+    if (excessivePos >= 0) {
+        multiplyRate(WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE, &finalFreq);
+        if (!proximityInfo->existsAdjacentProximityChars(inputIndex)) {
+            // If an excessive character is not adjacent to the left char or the right char,
+            // we will demote this word.
+            multiplyRate(WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE, &finalFreq);
+        }
+    }
+    int lengthFreq = typedLetterMultiplier;
+    multiplyIntCapped(powerIntCapped(typedLetterMultiplier, outputIndex), &lengthFreq);
+    if ((outputIndex + 1) == matchCount) {
+        // Full exact match
+        if (outputIndex > 1) {
+            if (DEBUG_DICT) {
+                LOGI("Found full matched word.");
+            }
+            multiplyRate(FULL_MATCHED_WORDS_PROMOTION_RATE, &finalFreq);
+        }
+        if (sameLength && transposedPos < 0 && skipPos < 0 && excessivePos < 0) {
+            finalFreq = capped255MultForFullMatchAccentsOrCapitalizationDifference(finalFreq);
+        }
+    } else if (sameLength && transposedPos < 0 && skipPos < 0 && excessivePos < 0
+            && outputIndex > 0) {
+        // A word with proximity corrections
+        if (DEBUG_DICT) {
+            LOGI("Found one proximity correction.");
+        }
+        multiplyIntCapped(typedLetterMultiplier, &finalFreq);
+        multiplyRate(WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE, &finalFreq);
+    }
+    if (DEBUG_DICT) {
+        LOGI("calc: %d, %d", outputIndex, sameLength);
+    }
+    if (sameLength) multiplyIntCapped(fullWordMultiplier, &finalFreq);
+
+    // TODO: check excessive count and transposed count
+    /*
+     If the last character of the user input word is the same as the next character
+     of the output word, and also all of characters of the user input are matched
+     to the output word, we'll promote that word a bit because
+     that word can be considered the combination of skipped and matched characters.
+     This means that the 'sm' pattern wins over the 'ma' pattern.
+     e.g.)
+     shel -> shell [mmmma] or [mmmsm]
+     hel -> hello [mmmaa] or [mmsma]
+     m ... matching
+     s ... skipping
+     a ... traversing all
+     */
+    if (matchCount == inputLength && matchCount >= 2 && skippedCount == 0
+            && word[matchCount] == word[matchCount - 1]) {
+        multiplyRate(WORDS_WITH_MATCH_SKIP_PROMOTION_RATE, &finalFreq);
+    }
+
+    return finalFreq;
+}
+
+/* static */
+int Correction::RankingAlgorithm::calcFreqForSplitTwoWords(
+        const int firstFreq, const int secondFreq, const Correction* correction) {
+    const int spaceProximityPos = correction->mSpaceProximityPos;
+    const int missingSpacePos = correction->mMissingSpacePos;
+    if (DEBUG_DICT) {
+        int inputCount = 0;
+        if (spaceProximityPos >= 0) ++inputCount;
+        if (missingSpacePos >= 0) ++inputCount;
+        assert(inputCount <= 1);
+    }
+    const bool isSpaceProximity = spaceProximityPos >= 0;
+    const int inputLength = correction->mInputLength;
+    const int firstWordLength = isSpaceProximity ? spaceProximityPos : missingSpacePos;
+    const int secondWordLength = isSpaceProximity
+            ? (inputLength - spaceProximityPos - 1)
+            : (inputLength - missingSpacePos);
+    const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER;
+
+    if (firstWordLength == 0 || secondWordLength == 0) {
+        return 0;
+    }
+    const int firstDemotionRate = 100 - 100 / (firstWordLength + 1);
+    int tempFirstFreq = firstFreq;
+    multiplyRate(firstDemotionRate, &tempFirstFreq);
+
+    const int secondDemotionRate = 100 - 100 / (secondWordLength + 1);
+    int tempSecondFreq = secondFreq;
+    multiplyRate(secondDemotionRate, &tempSecondFreq);
+
+    const int totalLength = firstWordLength + secondWordLength;
+
+    // Promote pairFreq with multiplying by 2, because the word length is the same as the typed
+    // length.
+    int totalFreq = tempFirstFreq + tempSecondFreq;
+
+    // This is a workaround to try offsetting the not-enough-demotion which will be done in
+    // calcNormalizedScore in Utils.java.
+    // In calcNormalizedScore the score will be demoted by (1 - 1 / length)
+    // but we demoted only (1 - 1 / (length + 1)) so we will additionally adjust freq by
+    // (1 - 1 / length) / (1 - 1 / (length + 1)) = (1 - 1 / (length * length))
+    const int normalizedScoreNotEnoughDemotionAdjustment = 100 - 100 / (totalLength * totalLength);
+    multiplyRate(normalizedScoreNotEnoughDemotionAdjustment, &totalFreq);
+
+    // At this moment, totalFreq is calculated by the following formula:
+    // (firstFreq * (1 - 1 / (firstWordLength + 1)) + secondFreq * (1 - 1 / (secondWordLength + 1)))
+    //        * (1 - 1 / totalLength) / (1 - 1 / (totalLength + 1))
+
+    multiplyIntCapped(powerIntCapped(typedLetterMultiplier, totalLength), &totalFreq);
+
+    // This is another workaround to offset the demotion which will be done in
+    // calcNormalizedScore in Utils.java.
+    // In calcNormalizedScore the score will be demoted by (1 - 1 / length) so we have to promote
+    // the same amount because we already have adjusted the synthetic freq of this "missing or
+    // mistyped space" suggestion candidate above in this method.
+    const int normalizedScoreDemotionRateOffset = (100 + 100 / totalLength);
+    multiplyRate(normalizedScoreDemotionRateOffset, &totalFreq);
+
+    if (isSpaceProximity) {
+        // A word pair with one space proximity correction
+        if (DEBUG_DICT) {
+            LOGI("Found a word pair with space proximity correction.");
+        }
+        multiplyIntCapped(typedLetterMultiplier, &totalFreq);
+        multiplyRate(WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE, &totalFreq);
+    }
+
+    multiplyRate(WORDS_WITH_MISSING_SPACE_CHARACTER_DEMOTION_RATE, &totalFreq);
+    return totalFreq;
+}
+
+} // namespace latinime
diff --git a/native/src/correction.h b/native/src/correction.h
new file mode 100644
index 000000000..62fe38696
--- /dev/null
+++ b/native/src/correction.h
@@ -0,0 +1,154 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_CORRECTION_H
+#define LATINIME_CORRECTION_H
+
+#include <stdint.h>
+#include "correction_state.h"
+
+#include "defines.h"
+
+namespace latinime {
+
+class ProximityInfo;
+
+class Correction {
+
+public:
+    typedef enum {
+        TRAVERSE_ALL_ON_TERMINAL,
+        TRAVERSE_ALL_NOT_ON_TERMINAL,
+        UNRELATED,
+        ON_TERMINAL,
+        NOT_ON_TERMINAL
+    } CorrectionType;
+
+    Correction(const int typedLetterMultiplier, const int fullWordMultiplier);
+    void initCorrection(
+            const ProximityInfo *pi, const int inputLength, const int maxWordLength);
+    void initCorrectionState(const int rootPos, const int childCount, const bool traverseAll);
+
+    // TODO: remove
+    void setCorrectionParams(const int skipPos, const int excessivePos, const int transposedPos,
+            const int spaceProximityPos, const int missingSpacePos);
+    void checkState();
+    bool initProcessState(const int index);
+
+    void getProcessState(int *matchedCount, int *inputIndex, int *outputIndex,
+            bool *traverseAllNodes, int *diffs);
+    int getOutputIndex();
+    int getInputIndex();
+
+    virtual ~Correction();
+    int getSpaceProximityPos() const {
+        return mSpaceProximityPos;
+    }
+    int getMissingSpacePos() const {
+        return mMissingSpacePos;
+    }
+
+    int getSkipPos() const {
+        return mSkipPos;
+    }
+
+    int getExcessivePos() const {
+        return mExcessivePos;
+    }
+
+    int getTransposedPos() const {
+        return mTransposedPos;
+    }
+
+    bool needsToPrune() const;
+
+    int getFreqForSplitTwoWords(const int firstFreq, const int secondFreq);
+    int getFinalFreq(const int freq, unsigned short **word, int* wordLength);
+
+    CorrectionType processCharAndCalcState(const int32_t c, const bool isTerminal);
+
+    int getDiffs() const {
+        return mDiffs;
+    }
+
+    /////////////////////////
+    // Tree helper methods
+    int goDownTree(const int parentIndex, const int childCount, const int firstChildPos);
+
+    inline int getTreeSiblingPos(const int index) const {
+        return mCorrectionStates[index].mSiblingPos;
+    }
+
+    inline void setTreeSiblingPos(const int index, const int pos) {
+        mCorrectionStates[index].mSiblingPos = pos;
+    }
+
+    inline int getTreeParentIndex(const int index) const {
+        return mCorrectionStates[index].mParentIndex;
+    }
+private:
+    inline void charMatched();
+    inline void incrementInputIndex();
+    inline void incrementOutputIndex();
+    inline bool needsToTraverseAllNodes();
+    inline void startToTraverseAllNodes();
+    inline bool isQuote(const unsigned short c);
+    inline CorrectionType processSkipChar(const int32_t c, const bool isTerminal);
+
+    // TODO: remove
+    inline void incrementDiffs() {
+        ++mDiffs;
+    }
+
+    const int TYPED_LETTER_MULTIPLIER;
+    const int FULL_WORD_MULTIPLIER;
+    const ProximityInfo *mProximityInfo;
+
+    int mMaxEditDistance;
+    int mMaxDepth;
+    int mInputLength;
+    int mSkipPos;
+    int mExcessivePos;
+    int mTransposedPos;
+    int mSpaceProximityPos;
+    int mMissingSpacePos;
+    int mTerminalInputIndex;
+    int mTerminalOutputIndex;
+    unsigned short mWord[MAX_WORD_LENGTH_INTERNAL];
+
+    CorrectionState mCorrectionStates[MAX_WORD_LENGTH_INTERNAL];
+
+    // The following member variables are being used as cache values of the correction state.
+    int mOutputIndex;
+    int mInputIndex;
+    int mDiffs;
+    int mMatchedCharCount;
+    int mSkippedCount;
+    bool mNeedsToTraverseAllNodes;
+    bool mMatching;
+    bool mSkipping;
+
+    class RankingAlgorithm {
+    public:
+        static int calculateFinalFreq(const int inputIndex, const int depth,
+                const int matchCount, const int freq, const bool sameLength,
+                const Correction* correction);
+        static int calcFreqForSplitTwoWords(const int firstFreq, const int secondFreq,
+                const Correction* correction);
+    };
+};
+} // namespace latinime
+#endif // LATINIME_CORRECTION_H
diff --git a/native/src/correction_state.h b/native/src/correction_state.h
new file mode 100644
index 000000000..731222696
--- /dev/null
+++ b/native/src/correction_state.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_CORRECTION_STATE_H
+#define LATINIME_CORRECTION_STATE_H
+
+#include <stdint.h>
+
+#include "defines.h"
+
+namespace latinime {
+
+struct CorrectionState {
+    int mParentIndex;
+    int mSiblingPos;
+    uint16_t mChildCount;
+    uint8_t mInputIndex;
+    uint8_t mDiffs;
+    uint8_t mMatchedCount;
+    uint8_t mSkippedCount;
+    bool mMatching;
+    bool mSkipping;
+    bool mNeedsToTraverseAllNodes;
+
+};
+
+inline static void initCorrectionState(CorrectionState *state, const int rootPos,
+        const uint16_t childCount, const bool traverseAll) {
+    state->mParentIndex = -1;
+    state->mChildCount = childCount;
+    state->mInputIndex = 0;
+    state->mDiffs = 0;
+    state->mSiblingPos = rootPos;
+    state->mMatchedCount = 0;
+    state->mSkippedCount = 0;
+    state->mMatching = false;
+    state->mSkipping = false;
+    state->mNeedsToTraverseAllNodes = traverseAll;
+}
+
+} // namespace latinime
+#endif // LATINIME_CORRECTION_STATE_H
diff --git a/native/src/defines.h b/native/src/defines.h
index bea83b2c5..c1838d341 100644
--- a/native/src/defines.h
+++ b/native/src/defines.h
@@ -160,6 +160,7 @@ static void prof_out(void) {
 #define WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE 60
 #define FULL_MATCHED_WORDS_PROMOTION_RATE 120
 #define WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE 90
+#define WORDS_WITH_MATCH_SKIP_PROMOTION_RATE 105
 
 // This should be greater than or equal to MAX_WORD_LENGTH defined in BinaryDictionary.java
 // This is only used for the size of array. Not to be used in c functions.
@@ -176,9 +177,6 @@ static void prof_out(void) {
 #define MIN_USER_TYPED_LENGTH_FOR_MISSING_SPACE_SUGGESTION 3
 #define MIN_USER_TYPED_LENGTH_FOR_EXCESSIVE_CHARACTER_SUGGESTION 3
 
-// The size of next letters frequency array.  Zero will disable the feature.
-#define NEXT_LETTERS_SIZE 0
-
 #define min(a,b) ((a)<(b)?(a):(b))
 
 #endif // LATINIME_DEFINES_H
diff --git a/native/src/dictionary.cpp b/native/src/dictionary.cpp
index 9e32ee80f..a49769bdb 100644
--- a/native/src/dictionary.cpp
+++ b/native/src/dictionary.cpp
@@ -57,12 +57,4 @@ bool Dictionary::isValidWord(unsigned short *word, int length) {
     return mUnigramDictionary->isValidWord(word, length);
 }
 
-int Dictionary::getBigramPosition(unsigned short *word, int length) {
-    if (IS_LATEST_DICT_VERSION) {
-        return mUnigramDictionary->getBigramPosition(DICTIONARY_HEADER_SIZE, word, 0, length);
-    } else {
-        return mUnigramDictionary->getBigramPosition(0, word, 0, length);
-    }
-}
-
 } // namespace latinime
diff --git a/native/src/dictionary.h b/native/src/dictionary.h
index 73e03d8fd..d5de0083a 100644
--- a/native/src/dictionary.h
+++ b/native/src/dictionary.h
@@ -64,8 +64,6 @@ public:
             const int pos, unsigned short *c, int *childrenPosition,
             bool *terminal, int *freq);
     static inline unsigned short toBaseLowerCase(unsigned short c);
-    // TODO: delete this
-    int getBigramPosition(unsigned short *word, int length);
 
 private:
     bool hasBigram();
diff --git a/native/src/proximity_info.cpp b/native/src/proximity_info.cpp
index c45393f18..d437e251a 100644
--- a/native/src/proximity_info.cpp
+++ b/native/src/proximity_info.cpp
@@ -78,7 +78,7 @@ unsigned short ProximityInfo::getPrimaryCharAt(const int index) const {
     return getProximityCharsAt(index)[0];
 }
 
-bool ProximityInfo::existsCharInProximityAt(const int index, const int c) const {
+inline bool ProximityInfo::existsCharInProximityAt(const int index, const int c) const {
     const int *chars = getProximityCharsAt(index);
     int i = 0;
     while (chars[i] > 0 && i < MAX_PROXIMITY_CHARS_SIZE) {
@@ -114,8 +114,7 @@ bool ProximityInfo::existsAdjacentProximityChars(const int index) const {
 // in their list. The non-accented version of the character should be considered
 // "close", but not the other keys close to the non-accented version.
 ProximityInfo::ProximityType ProximityInfo::getMatchedProximityId(
-        const int index, const unsigned short c, const int skipPos,
-        const int excessivePos, const int transposedPos) const {
+        const int index, const unsigned short c, const bool checkProximityChars) const {
     const int *currentChars = getProximityCharsAt(index);
     const unsigned short baseLowerC = Dictionary::toBaseLowerCase(c);
 
@@ -124,9 +123,7 @@ ProximityInfo::ProximityType ProximityInfo::getMatchedProximityId(
     if (currentChars[0] == baseLowerC || currentChars[0] == c)
         return SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR;
 
-    // If one of those is true, we should not check for close characters at all.
-    if (skipPos >= 0 || excessivePos >= 0 || transposedPos >= 0)
-        return UNRELATED_CHAR;
+    if (!checkProximityChars) return UNRELATED_CHAR;
 
     // If the non-accented, lowercased version of that first character matches c,
     // then we have a non-accented version of the accented character the user
diff --git a/native/src/proximity_info.h b/native/src/proximity_info.h
index 435a60151..d9ed46f5b 100644
--- a/native/src/proximity_info.h
+++ b/native/src/proximity_info.h
@@ -23,6 +23,8 @@
 
 namespace latinime {
 
+class Correction;
+
 class ProximityInfo {
 public:
     typedef enum {                             // Used as a return value for character comparison
@@ -42,9 +44,9 @@ public:
     bool existsCharInProximityAt(const int index, const int c) const;
     bool existsAdjacentProximityChars(const int index) const;
     ProximityType getMatchedProximityId(
-            const int index, const unsigned short c, const int skipPos,
-            const int excessivePos, const int transposedPos) const;
+            const int index, const unsigned short c, const bool checkProximityChars) const;
     bool sameAsTyped(const unsigned short *word, int length) const;
+
 private:
     int getStartIndexFromCoordinates(const int x, const int y) const;
     const int MAX_PROXIMITY_CHARS_SIZE;
diff --git a/native/src/unigram_dictionary.cpp b/native/src/unigram_dictionary.cpp
index afa8bc545..bbfaea454 100644
--- a/native/src/unigram_dictionary.cpp
+++ b/native/src/unigram_dictionary.cpp
@@ -24,9 +24,7 @@
 #include "dictionary.h"
 #include "unigram_dictionary.h"
 
-#ifdef NEW_DICTIONARY_FORMAT
 #include "binary_format.h"
-#endif // NEW_DICTIONARY_FORMAT
 
 namespace latinime {
 
@@ -39,28 +37,23 @@ const UnigramDictionary::digraph_t UnigramDictionary::GERMAN_UMLAUT_DIGRAPHS[] =
 UnigramDictionary::UnigramDictionary(const uint8_t* const streamStart, int typedLetterMultiplier,
         int fullWordMultiplier, int maxWordLength, int maxWords, int maxProximityChars,
         const bool isLatestDictVersion)
-#ifndef NEW_DICTIONARY_FORMAT
-    : DICT_ROOT(streamStart),
-#else // NEW_DICTIONARY_FORMAT
     : DICT_ROOT(streamStart + NEW_DICTIONARY_HEADER_SIZE),
-#endif // NEW_DICTIONARY_FORMAT
     MAX_WORD_LENGTH(maxWordLength), MAX_WORDS(maxWords),
     MAX_PROXIMITY_CHARS(maxProximityChars), IS_LATEST_DICT_VERSION(isLatestDictVersion),
     TYPED_LETTER_MULTIPLIER(typedLetterMultiplier), FULL_WORD_MULTIPLIER(fullWordMultiplier),
-#ifndef NEW_DICTIONARY_FORMAT
-    ROOT_POS(isLatestDictVersion ? DICTIONARY_HEADER_SIZE : 0),
-#else // NEW_DICTIONARY_FORMAT
       // TODO : remove this variable.
     ROOT_POS(0),
-#endif // NEW_DICTIONARY_FORMAT
     BYTES_IN_ONE_CHAR(MAX_PROXIMITY_CHARS * sizeof(int)),
     MAX_UMLAUT_SEARCH_DEPTH(DEFAULT_MAX_UMLAUT_SEARCH_DEPTH) {
     if (DEBUG_DICT) {
         LOGI("UnigramDictionary - constructor");
     }
+    mCorrection = new Correction(typedLetterMultiplier, fullWordMultiplier);
 }
 
-UnigramDictionary::~UnigramDictionary() {}
+UnigramDictionary::~UnigramDictionary() {
+    delete mCorrection;
+}
 
 static inline unsigned int getCodesBufferSize(const int* codes, const int codesSize,
         const int MAX_PROXIMITY_CHARS) {
@@ -174,12 +167,6 @@ int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, const int *x
             LOGI("%s %i", s, mFrequencies[j]);
 #endif
         }
-        LOGI("Next letters: ");
-        for (int k = 0; k < NEXT_LETTERS_SIZE; k++) {
-            if (mNextLettersFrequency[k] > 0) {
-                LOGI("%c = %d,", k, mNextLettersFrequency[k]);
-            }
-        }
     }
     PROF_END(20);
     PROF_CLOSE;
@@ -196,11 +183,12 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
             proximityInfo, xcoordinates, ycoordinates, codes, codesSize, outWords, frequencies);
     if (DEBUG_DICT) assert(codesSize == mInputLength);
 
-    const int MAX_DEPTH = min(mInputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH);
+    const int maxDepth = min(mInputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH);
+    mCorrection->initCorrection(mProximityInfo, mInputLength, maxDepth);
     PROF_END(0);
 
     PROF_START(1);
-    getSuggestionCandidates(-1, -1, -1, mNextLettersFrequency, NEXT_LETTERS_SIZE, MAX_DEPTH);
+    getSuggestionCandidates(-1, -1, -1);
     PROF_END(1);
 
     PROF_START(2);
@@ -210,7 +198,7 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
             if (DEBUG_DICT) {
                 LOGI("--- Suggest missing characters %d", i);
             }
-            getSuggestionCandidates(i, -1, -1, NULL, 0, MAX_DEPTH);
+            getSuggestionCandidates(i, -1, -1);
         }
     }
     PROF_END(2);
@@ -223,7 +211,7 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
             if (DEBUG_DICT) {
                 LOGI("--- Suggest excessive characters %d", i);
             }
-            getSuggestionCandidates(-1, i, -1, NULL, 0, MAX_DEPTH);
+            getSuggestionCandidates(-1, i, -1);
         }
     }
     PROF_END(3);
@@ -236,7 +224,7 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
             if (DEBUG_DICT) {
                 LOGI("--- Suggest transposed characters %d", i);
             }
-            getSuggestionCandidates(-1, -1, i, NULL, 0, mInputLength - 1);
+            getSuggestionCandidates(-1, -1, i);
         }
     }
     PROF_END(4);
@@ -249,7 +237,7 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
             if (DEBUG_DICT) {
                 LOGI("--- Suggest missing space characters %d", i);
             }
-            getMissingSpaceWords(mInputLength, i);
+            getMissingSpaceWords(mInputLength, i, mCorrection);
         }
     }
     PROF_END(5);
@@ -268,7 +256,7 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
                         i, x, y, proximityInfo->hasSpaceProximity(x, y));
             }
             if (proximityInfo->hasSpaceProximity(x, y)) {
-                getMistypedSpaceWords(mInputLength, i);
+                getMistypedSpaceWords(mInputLength, i, mCorrection);
             }
         }
     }
@@ -284,7 +272,6 @@ void UnigramDictionary::initSuggestions(ProximityInfo *proximityInfo, const int
     mFrequencies = frequencies;
     mOutputChars = outWords;
     mInputLength = codesSize;
-    mMaxEditDistance = mInputLength < 5 ? 2 : mInputLength / 2;
     proximityInfo->setInputParams(codes, codesSize);
     mProximityInfo = proximityInfo;
 }
@@ -354,70 +341,43 @@ static const char QUOTE = '\'';
 static const char SPACE = ' ';
 
 void UnigramDictionary::getSuggestionCandidates(const int skipPos,
-        const int excessivePos, const int transposedPos, int *nextLetters,
-        const int nextLettersSize, const int maxDepth) {
+        const int excessivePos, const int transposedPos) {
     if (DEBUG_DICT) {
-        LOGI("getSuggestionCandidates %d", maxDepth);
         assert(transposedPos + 1 < mInputLength);
         assert(excessivePos < mInputLength);
         assert(missingPos < mInputLength);
     }
+    mCorrection->setCorrectionParams(skipPos, excessivePos, transposedPos,
+            -1 /* spaceProximityPos */, -1 /* missingSpacePos */);
     int rootPosition = ROOT_POS;
     // Get the number of children of root, then increment the position
     int childCount = Dictionary::getCount(DICT_ROOT, &rootPosition);
-    int depth = 0;
+    int outputIndex = 0;
 
-    mStackChildCount[0] = childCount;
-    mStackTraverseAll[0] = (mInputLength <= 0);
-    mStackNodeFreq[0] = 1;
-    mStackInputIndex[0] = 0;
-    mStackDiffs[0] = 0;
-    mStackSiblingPos[0] = rootPosition;
-    mStackOutputIndex[0] = 0;
+    mCorrection->initCorrectionState(rootPosition, childCount, (mInputLength <= 0));
 
     // Depth first search
-    while (depth >= 0) {
-        if (mStackChildCount[depth] > 0) {
-            --mStackChildCount[depth];
-            bool traverseAllNodes = mStackTraverseAll[depth];
-            int matchWeight = mStackNodeFreq[depth];
-            int inputIndex = mStackInputIndex[depth];
-            int diffs = mStackDiffs[depth];
-            int siblingPos = mStackSiblingPos[depth];
-            int outputIndex = mStackOutputIndex[depth];
+    while (outputIndex >= 0) {
+        if (mCorrection->initProcessState(outputIndex)) {
+            int siblingPos = mCorrection->getTreeSiblingPos(outputIndex);
             int firstChildPos;
-            // depth will never be greater than maxDepth because in that case,
-            // needsToTraverseChildrenNodes should be false
-            const bool needsToTraverseChildrenNodes = processCurrentNode(siblingPos, outputIndex,
-                    maxDepth, traverseAllNodes, matchWeight, inputIndex, diffs, skipPos,
-                    excessivePos, transposedPos, nextLetters, nextLettersSize, &childCount,
-                    &firstChildPos, &traverseAllNodes, &matchWeight, &inputIndex, &diffs,
-                    &siblingPos, &outputIndex);
+
+            const bool needsToTraverseChildrenNodes = processCurrentNode(siblingPos,
+                    mCorrection, &childCount, &firstChildPos, &siblingPos);
             // Update next sibling pos
-            mStackSiblingPos[depth] = siblingPos;
+            mCorrection->setTreeSiblingPos(outputIndex, siblingPos);
+
             if (needsToTraverseChildrenNodes) {
                 // Goes to child node
-                ++depth;
-                mStackChildCount[depth] = childCount;
-                mStackTraverseAll[depth] = traverseAllNodes;
-                mStackNodeFreq[depth] = matchWeight;
-                mStackInputIndex[depth] = inputIndex;
-                mStackDiffs[depth] = diffs;
-                mStackSiblingPos[depth] = firstChildPos;
-                mStackOutputIndex[depth] = outputIndex;
+                outputIndex = mCorrection->goDownTree(outputIndex, childCount, firstChildPos);
             }
         } else {
             // Goes to parent sibling node
-            --depth;
+            outputIndex = mCorrection->getTreeParentIndex(outputIndex);
         }
     }
 }
 
-static const int TWO_31ST_DIV_255 = S_INT_MAX / 255;
-static inline int capped255MultForFullMatchAccentsOrCapitalizationDifference(const int num) {
-    return (num < TWO_31ST_DIV_255 ? 255 * num : S_INT_MAX);
-}
-
 static const int TWO_31ST_DIV_2 = S_INT_MAX / 2;
 inline static void multiplyIntCapped(const int multiplier, int *base) {
     const int temp = *base;
@@ -432,149 +392,18 @@ inline static void multiplyIntCapped(const int multiplier, int *base) {
     }
 }
 
-inline static int powerIntCapped(const int base, const int n) {
-    if (base == 2) {
-        return n < 31 ? 1 << n : S_INT_MAX;
-    } else {
-        int ret = base;
-        for (int i = 1; i < n; ++i) multiplyIntCapped(base, &ret);
-        return ret;
-    }
+void UnigramDictionary::getMissingSpaceWords(
+        const int inputLength, const int missingSpacePos, Correction *correction) {
+    correction->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */,
+            -1 /* transposedPos */, -1 /* spaceProximityPos */, missingSpacePos);
+    getSplitTwoWordsSuggestion(inputLength, correction);
 }
 
-inline static void multiplyRate(const int rate, int *freq) {
-    if (*freq != S_INT_MAX) {
-        if (*freq > 1000000) {
-            *freq /= 100;
-            multiplyIntCapped(rate, freq);
-        } else {
-            multiplyIntCapped(rate, freq);
-            *freq /= 100;
-        }
-    }
-}
-
-inline static int calcFreqForSplitTwoWords(
-        const int typedLetterMultiplier, const int firstWordLength, const int secondWordLength,
-        const int firstFreq, const int secondFreq, const bool isSpaceProximity) {
-    if (firstWordLength == 0 || secondWordLength == 0) {
-        return 0;
-    }
-    const int firstDemotionRate = 100 - 100 / (firstWordLength + 1);
-    int tempFirstFreq = firstFreq;
-    multiplyRate(firstDemotionRate, &tempFirstFreq);
-
-    const int secondDemotionRate = 100 - 100 / (secondWordLength + 1);
-    int tempSecondFreq = secondFreq;
-    multiplyRate(secondDemotionRate, &tempSecondFreq);
-
-    const int totalLength = firstWordLength + secondWordLength;
-
-    // Promote pairFreq with multiplying by 2, because the word length is the same as the typed
-    // length.
-    int totalFreq = tempFirstFreq + tempSecondFreq;
-
-    // This is a workaround to try offsetting the not-enough-demotion which will be done in
-    // calcNormalizedScore in Utils.java.
-    // In calcNormalizedScore the score will be demoted by (1 - 1 / length)
-    // but we demoted only (1 - 1 / (length + 1)) so we will additionally adjust freq by
-    // (1 - 1 / length) / (1 - 1 / (length + 1)) = (1 - 1 / (length * length))
-    const int normalizedScoreNotEnoughDemotionAdjustment = 100 - 100 / (totalLength * totalLength);
-    multiplyRate(normalizedScoreNotEnoughDemotionAdjustment, &totalFreq);
-
-    // At this moment, totalFreq is calculated by the following formula:
-    // (firstFreq * (1 - 1 / (firstWordLength + 1)) + secondFreq * (1 - 1 / (secondWordLength + 1)))
-    //        * (1 - 1 / totalLength) / (1 - 1 / (totalLength + 1))
-
-    multiplyIntCapped(powerIntCapped(typedLetterMultiplier, totalLength), &totalFreq);
-
-    // This is another workaround to offset the demotion which will be done in
-    // calcNormalizedScore in Utils.java.
-    // In calcNormalizedScore the score will be demoted by (1 - 1 / length) so we have to promote
-    // the same amount because we already have adjusted the synthetic freq of this "missing or
-    // mistyped space" suggestion candidate above in this method.
-    const int normalizedScoreDemotionRateOffset = (100 + 100 / totalLength);
-    multiplyRate(normalizedScoreDemotionRateOffset, &totalFreq);
-
-    if (isSpaceProximity) {
-        // A word pair with one space proximity correction
-        if (DEBUG_DICT) {
-            LOGI("Found a word pair with space proximity correction.");
-        }
-        multiplyIntCapped(typedLetterMultiplier, &totalFreq);
-        multiplyRate(WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE, &totalFreq);
-    }
-
-    multiplyRate(WORDS_WITH_MISSING_SPACE_CHARACTER_DEMOTION_RATE, &totalFreq);
-    return totalFreq;
-}
-
-bool UnigramDictionary::getMissingSpaceWords(const int inputLength, const int missingSpacePos) {
-    return getSplitTwoWordsSuggestion(
-            inputLength, 0, missingSpacePos, missingSpacePos, inputLength - missingSpacePos, false);
-}
-
-bool UnigramDictionary::getMistypedSpaceWords(const int inputLength, const int spaceProximityPos) {
-    return getSplitTwoWordsSuggestion(
-            inputLength, 0, spaceProximityPos, spaceProximityPos + 1,
-            inputLength - spaceProximityPos - 1, true);
-}
-
-inline int UnigramDictionary::calculateFinalFreq(const int inputIndex, const int depth,
-        const int matchWeight, const int skipPos, const int excessivePos, const int transposedPos,
-        const int freq, const bool sameLength) const {
-    // TODO: Demote by edit distance
-    int finalFreq = freq * matchWeight;
-    if (skipPos >= 0) {
-        if (mInputLength >= 2) {
-            const int demotionRate = WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE
-                    * (10 * mInputLength - WORDS_WITH_MISSING_CHARACTER_DEMOTION_START_POS_10X)
-                    / (10 * mInputLength
-                            - WORDS_WITH_MISSING_CHARACTER_DEMOTION_START_POS_10X + 10);
-            if (DEBUG_DICT_FULL) {
-                LOGI("Demotion rate for missing character is %d.", demotionRate);
-            }
-            multiplyRate(demotionRate, &finalFreq);
-        } else {
-            finalFreq = 0;
-        }
-    }
-    if (transposedPos >= 0) multiplyRate(
-            WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE, &finalFreq);
-    if (excessivePos >= 0) {
-        multiplyRate(WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE, &finalFreq);
-        if (!mProximityInfo->existsAdjacentProximityChars(inputIndex)) {
-            // If an excessive character is not adjacent to the left char or the right char,
-            // we will demote this word.
-            multiplyRate(WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE, &finalFreq);
-        }
-    }
-    int lengthFreq = TYPED_LETTER_MULTIPLIER;
-    multiplyIntCapped(powerIntCapped(TYPED_LETTER_MULTIPLIER, depth), &lengthFreq);
-    if (lengthFreq == matchWeight) {
-        // Full exact match
-        if (depth > 1) {
-            if (DEBUG_DICT) {
-                LOGI("Found full matched word.");
-            }
-            multiplyRate(FULL_MATCHED_WORDS_PROMOTION_RATE, &finalFreq);
-        }
-        if (sameLength && transposedPos < 0 && skipPos < 0 && excessivePos < 0) {
-            finalFreq = capped255MultForFullMatchAccentsOrCapitalizationDifference(finalFreq);
-        }
-    } else if (sameLength && transposedPos < 0 && skipPos < 0 && excessivePos < 0 && depth > 0) {
-        // A word with proximity corrections
-        if (DEBUG_DICT) {
-            LOGI("Found one proximity correction.");
-        }
-        multiplyIntCapped(TYPED_LETTER_MULTIPLIER, &finalFreq);
-        multiplyRate(WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE, &finalFreq);
-    }
-    if (DEBUG_DICT) {
-        LOGI("calc: %d, %d", depth, sameLength);
-    }
-    if (sameLength) multiplyIntCapped(FULL_WORD_MULTIPLIER, &finalFreq);
-    return finalFreq;
+void UnigramDictionary::getMistypedSpaceWords(
+        const int inputLength, const int spaceProximityPos, Correction *correction) {
+    correction->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */,
+            -1 /* transposedPos */, spaceProximityPos, -1 /* missingSpacePos */);
+    getSplitTwoWordsSuggestion(inputLength, correction);
 }
 
 inline bool UnigramDictionary::needsToSkipCurrentNode(const unsigned short c,
@@ -584,35 +413,38 @@ inline bool UnigramDictionary::needsToSkipCurrentNode(const unsigned short c,
     return (c == QUOTE && userTypedChar != QUOTE) || skipPos == depth;
 }
 
-
-inline void UnigramDictionary::onTerminal(unsigned short int* word, const int depth,
-        const uint8_t* const root, const uint8_t flags, const int pos,
-        const int inputIndex, const int matchWeight, const int skipPos,
-        const int excessivePos, const int transposedPos, const int freq, const bool sameLength,
-        int* nextLetters, const int nextLettersSize) {
-
-    const bool isSameAsTyped = sameLength ? mProximityInfo->sameAsTyped(word, depth + 1) : false;
-    if (isSameAsTyped) return;
-
-    if (depth >= MIN_SUGGEST_DEPTH) {
-        const int finalFreq = calculateFinalFreq(inputIndex, depth, matchWeight, skipPos,
-                excessivePos, transposedPos, freq, sameLength);
-        if (!isSameAsTyped)
-            addWord(word, depth + 1, finalFreq);
+inline void UnigramDictionary::onTerminal(const int freq, Correction *correction) {
+    int wordLength;
+    unsigned short* wordPointer;
+    const int finalFreq = correction->getFinalFreq(freq, &wordPointer, &wordLength);
+    if (finalFreq >= 0) {
+        addWord(wordPointer, wordLength, finalFreq);
     }
+}
 
-    if (sameLength && depth >= mInputLength && skipPos < 0) {
-        registerNextLetter(word[mInputLength], nextLetters, nextLettersSize);
+void UnigramDictionary::getSplitTwoWordsSuggestion(
+        const int inputLength, Correction* correction) {
+    const int spaceProximityPos = correction->getSpaceProximityPos();
+    const int missingSpacePos = correction->getMissingSpacePos();
+    if (DEBUG_DICT) {
+        int inputCount = 0;
+        if (spaceProximityPos >= 0) ++inputCount;
+        if (missingSpacePos >= 0) ++inputCount;
+        assert(inputCount <= 1);
     }
-}
+    const bool isSpaceProximity = spaceProximityPos >= 0;
+    const int firstWordStartPos = 0;
+    const int secondWordStartPos = isSpaceProximity ? (spaceProximityPos + 1) : missingSpacePos;
+    const int firstWordLength = isSpaceProximity ? spaceProximityPos : missingSpacePos;
+    const int secondWordLength = isSpaceProximity
+            ? (inputLength - spaceProximityPos - 1)
+            : (inputLength - missingSpacePos);
 
-bool UnigramDictionary::getSplitTwoWordsSuggestion(const int inputLength,
-        const int firstWordStartPos, const int firstWordLength, const int secondWordStartPos,
-        const int secondWordLength, const bool isSpaceProximity) {
-    if (inputLength >= MAX_WORD_LENGTH) return false;
+    if (inputLength >= MAX_WORD_LENGTH) return;
     if (0 >= firstWordLength || 0 >= secondWordLength || firstWordStartPos >= secondWordStartPos
             || firstWordStartPos < 0 || secondWordStartPos + secondWordLength > inputLength)
-        return false;
+        return;
+
     const int newWordLength = firstWordLength + secondWordLength + 1;
     // Allocating variable length array on stack
     unsigned short word[newWordLength];
@@ -620,7 +452,7 @@ bool UnigramDictionary::getSplitTwoWordsSuggestion(const int inputLength,
     if (DEBUG_DICT) {
         LOGI("First freq: %d", firstFreq);
     }
-    if (firstFreq <= 0) return false;
+    if (firstFreq <= 0) return;
 
     for (int i = 0; i < firstWordLength; ++i) {
         word[i] = mWord[i];
@@ -630,299 +462,21 @@ bool UnigramDictionary::getSplitTwoWordsSuggestion(const int inputLength,
     if (DEBUG_DICT) {
         LOGI("Second  freq:  %d", secondFreq);
     }
-    if (secondFreq <= 0) return false;
+    if (secondFreq <= 0) return;
 
     word[firstWordLength] = SPACE;
     for (int i = (firstWordLength + 1); i < newWordLength; ++i) {
         word[i] = mWord[i - firstWordLength - 1];
     }
 
-    int pairFreq = calcFreqForSplitTwoWords(TYPED_LETTER_MULTIPLIER, firstWordLength,
-            secondWordLength, firstFreq, secondFreq, isSpaceProximity);
+    const int pairFreq = mCorrection->getFreqForSplitTwoWords(firstFreq, secondFreq);
     if (DEBUG_DICT) {
-        LOGI("Split two words:  %d, %d, %d, %d, %d", firstFreq, secondFreq, pairFreq, inputLength,
-                TYPED_LETTER_MULTIPLIER);
+        LOGI("Split two words:  %d, %d, %d, %d", firstFreq, secondFreq, pairFreq, inputLength);
     }
     addWord(word, newWordLength, pairFreq);
-    return true;
-}
-
-#ifndef NEW_DICTIONARY_FORMAT
-// The following functions will be entirely replaced with new implementations.
-void UnigramDictionary::getWordsOld(const int initialPos, const int inputLength, const int skipPos,
-        const int excessivePos, const int transposedPos,int *nextLetters,
-        const int nextLettersSize) {
-    int initialPosition = initialPos;
-    const int count = Dictionary::getCount(DICT_ROOT, &initialPosition);
-    getWordsRec(count, initialPosition, 0,
-            min(inputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH),
-            mInputLength <= 0, 1, 0, 0, skipPos, excessivePos, transposedPos, nextLetters,
-            nextLettersSize);
-}
-
-void UnigramDictionary::getWordsRec(const int childrenCount, const int pos, const int depth,
-        const int maxDepth, const bool traverseAllNodes, const int matchWeight,
-        const int inputIndex, const int diffs, const int skipPos, const int excessivePos,
-        const int transposedPos, int *nextLetters, const int nextLettersSize) {
-    int siblingPos = pos;
-    for (int i = 0; i < childrenCount; ++i) {
-        int newCount;
-        int newChildPosition;
-        bool newTraverseAllNodes;
-        int newMatchRate;
-        int newInputIndex;
-        int newDiffs;
-        int newSiblingPos;
-        int newOutputIndex;
-        const bool needsToTraverseChildrenNodes = processCurrentNode(siblingPos, depth, maxDepth,
-                traverseAllNodes, matchWeight, inputIndex, diffs,
-                skipPos, excessivePos, transposedPos,
-                nextLetters, nextLettersSize,
-                &newCount, &newChildPosition, &newTraverseAllNodes, &newMatchRate,
-                &newInputIndex, &newDiffs, &newSiblingPos, &newOutputIndex);
-        siblingPos = newSiblingPos;
-
-        if (needsToTraverseChildrenNodes) {
-            getWordsRec(newCount, newChildPosition, newOutputIndex, maxDepth, newTraverseAllNodes,
-                    newMatchRate, newInputIndex, newDiffs, skipPos, excessivePos, transposedPos,
-                    nextLetters, nextLettersSize);
-        }
-    }
-}
-
-inline int UnigramDictionary::getMostFrequentWordLike(const int startInputIndex,
-        const int inputLength, unsigned short *word) {
-    int pos = ROOT_POS;
-    int count = Dictionary::getCount(DICT_ROOT, &pos);
-    int maxFreq = 0;
-    int depth = 0;
-    unsigned short newWord[MAX_WORD_LENGTH_INTERNAL];
-    bool terminal = false;
-
-    mStackChildCount[0] = count;
-    mStackSiblingPos[0] = pos;
-
-    while (depth >= 0) {
-        if (mStackChildCount[depth] > 0) {
-            --mStackChildCount[depth];
-            int firstChildPos;
-            int newFreq;
-            int siblingPos = mStackSiblingPos[depth];
-            const bool needsToTraverseChildrenNodes = processCurrentNodeForExactMatch(siblingPos,
-                    startInputIndex, depth, newWord, &firstChildPos, &count, &terminal, &newFreq,
-                    &siblingPos);
-            mStackSiblingPos[depth] = siblingPos;
-            if (depth == (inputLength - 1)) {
-                // Traverse sibling node
-                if (terminal) {
-                    if (newFreq > maxFreq) {
-                        for (int i = 0; i < inputLength; ++i) word[i] = newWord[i];
-                        if (DEBUG_DICT && DEBUG_NODE) {
-#ifdef FLAG_DBG
-                            char s[inputLength + 1];
-                            for (int i = 0; i < inputLength; ++i) s[i] = word[i];
-                            s[inputLength] = 0;
-                            LOGI("New missing space word found: %d > %d (%s), %d, %d",
-                                    newFreq, maxFreq, s, inputLength, depth);
-#endif
-                        }
-                        maxFreq = newFreq;
-                    }
-                }
-            } else if (needsToTraverseChildrenNodes) {
-                // Traverse children nodes
-                ++depth;
-                mStackChildCount[depth] = count;
-                mStackSiblingPos[depth] = firstChildPos;
-            }
-        } else {
-            // Traverse parent node
-            --depth;
-        }
-    }
-
-    word[inputLength] = 0;
-    return maxFreq;
+    return;
 }
 
-inline bool UnigramDictionary::processCurrentNodeForExactMatch(const int firstChildPos,
-        const int startInputIndex, const int depth, unsigned short *word, int *newChildPosition,
-        int *newCount, bool *newTerminal, int *newFreq, int *siblingPos) {
-    const int inputIndex = startInputIndex + depth;
-    unsigned short c;
-    *siblingPos = Dictionary::setDictionaryValues(DICT_ROOT, IS_LATEST_DICT_VERSION, firstChildPos,
-            &c, newChildPosition, newTerminal, newFreq);
-    const unsigned int inputC = mProximityInfo->getPrimaryCharAt(inputIndex);
-    if (DEBUG_DICT) {
-        assert(inputC <= U_SHORT_MAX);
-    }
-    const unsigned short baseLowerC = Dictionary::toBaseLowerCase(c);
-    const bool matched = (inputC == baseLowerC || inputC == c);
-    const bool hasChild = *newChildPosition != 0;
-    if (matched) {
-        word[depth] = c;
-        if (DEBUG_DICT && DEBUG_NODE) {
-            LOGI("Node(%c, %c)<%d>, %d, %d", inputC, c, matched, hasChild, *newFreq);
-            if (*newTerminal) {
-                LOGI("Terminal %d", *newFreq);
-            }
-        }
-        if (hasChild) {
-            *newCount = Dictionary::getCount(DICT_ROOT, newChildPosition);
-            return true;
-        } else {
-            return false;
-        }
-    } else {
-        // If this node is not user typed character, this method treats this word as unmatched.
-        // Thus newTerminal shouldn't be true.
-        *newTerminal = false;
-        return false;
-    }
-}
-
-// TODO: use uint32_t instead of unsigned short
-bool UnigramDictionary::isValidWord(unsigned short *word, int length) {
-    if (IS_LATEST_DICT_VERSION) {
-        return (getBigramPosition(DICTIONARY_HEADER_SIZE, word, 0, length) != NOT_VALID_WORD);
-    } else {
-        return (getBigramPosition(0, word, 0, length) != NOT_VALID_WORD);
-    }
-}
-
-
-// Require strict exact match.
-int UnigramDictionary::getBigramPosition(int pos, unsigned short *word, int offset,
-        int length) const {
-    // returns address of bigram data of that word
-    // return -99 if not found
-
-    int count = Dictionary::getCount(DICT_ROOT, &pos);
-    unsigned short currentChar = (unsigned short) word[offset];
-    for (int j = 0; j < count; j++) {
-        unsigned short c = Dictionary::getChar(DICT_ROOT, &pos);
-        int terminal = Dictionary::getTerminal(DICT_ROOT, &pos);
-        int childPos = Dictionary::getAddress(DICT_ROOT, &pos);
-        if (c == currentChar) {
-            if (offset == length - 1) {
-                if (terminal) {
-                    return (pos+1);
-                }
-            } else {
-                if (childPos != 0) {
-                    int t = getBigramPosition(childPos, word, offset + 1, length);
-                    if (t > 0) {
-                        return t;
-                    }
-                }
-            }
-        }
-        if (terminal) {
-            Dictionary::getFreq(DICT_ROOT, IS_LATEST_DICT_VERSION, &pos);
-        }
-        // There could be two instances of each alphabet - upper and lower case. So continue
-        // looking ...
-    }
-    return NOT_VALID_WORD;
-}
-
-// The following functions will be modified.
-inline bool UnigramDictionary::processCurrentNode(const int initialPos, const int initialDepth,
-        const int maxDepth, const bool initialTraverseAllNodes, int matchWeight, int inputIndex,
-        const int initialDiffs, const int skipPos, const int excessivePos, const int transposedPos,
-        int *nextLetters, const int nextLettersSize, int *newCount, int *newChildPosition,
-        bool *newTraverseAllNodes, int *newMatchRate, int *newInputIndex, int *newDiffs,
-        int *nextSiblingPosition, int *nextOutputIndex) {
-    if (DEBUG_DICT) {
-        int inputCount = 0;
-        if (skipPos >= 0) ++inputCount;
-        if (excessivePos >= 0) ++inputCount;
-        if (transposedPos >= 0) ++inputCount;
-        assert(inputCount <= 1);
-    }
-    unsigned short c;
-    int childPosition;
-    bool terminal;
-    int freq;
-    bool isSameAsUserTypedLength = false;
-
-    const int pos = initialPos;
-    const int depth = initialDepth;
-    const int traverseAllNodes = initialTraverseAllNodes;
-    const int diffs = initialDiffs;
-
-    const uint8_t flags = 0; // No flags for now
-
-    if (excessivePos == depth && inputIndex < mInputLength - 1) ++inputIndex;
-
-    *nextSiblingPosition = Dictionary::setDictionaryValues(DICT_ROOT, IS_LATEST_DICT_VERSION, pos,
-            &c, &childPosition, &terminal, &freq);
-    *nextOutputIndex = depth + 1;
-
-    const bool needsToTraverseChildrenNodes = childPosition != 0;
-
-    // If we are only doing traverseAllNodes, no need to look at the typed characters.
-    if (traverseAllNodes || needsToSkipCurrentNode(c, inputIndex, skipPos, depth)) {
-        mWord[depth] = c;
-        if (traverseAllNodes && terminal) {
-            onTerminal(mWord, depth, DICT_ROOT, flags, pos, inputIndex, matchWeight, skipPos,
-                       excessivePos, transposedPos, freq, false, nextLetters, nextLettersSize);
-        }
-        if (!needsToTraverseChildrenNodes) return false;
-        *newTraverseAllNodes = traverseAllNodes;
-        *newMatchRate = matchWeight;
-        *newDiffs = diffs;
-        *newInputIndex = inputIndex;
-    } else {
-        int inputIndexForProximity = inputIndex;
-
-        if (transposedPos >= 0) {
-            if (inputIndex == transposedPos) ++inputIndexForProximity;
-            if (inputIndex == (transposedPos + 1)) --inputIndexForProximity;
-        }
-
-        ProximityInfo::ProximityType matchedProximityCharId = mProximityInfo->getMatchedProximityId(
-                inputIndexForProximity, c, skipPos, excessivePos, transposedPos);
-        if (ProximityInfo::UNRELATED_CHAR == matchedProximityCharId) return false;
-        mWord[depth] = c;
-        // If inputIndex is greater than mInputLength, that means there is no
-        // proximity chars. So, we don't need to check proximity.
-        if (ProximityInfo::SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR == matchedProximityCharId) {
-            multiplyIntCapped(TYPED_LETTER_MULTIPLIER, &matchWeight);
-        }
-        bool isSameAsUserTypedLength = mInputLength == inputIndex + 1
-                || (excessivePos == mInputLength - 1 && inputIndex == mInputLength - 2);
-        if (isSameAsUserTypedLength && terminal) {
-            onTerminal(mWord, depth, DICT_ROOT, flags, pos, inputIndex, matchWeight, skipPos,
-                    excessivePos, transposedPos, freq, true, nextLetters, nextLettersSize);
-        }
-        if (!needsToTraverseChildrenNodes) return false;
-        // Start traversing all nodes after the index exceeds the user typed length
-        *newTraverseAllNodes = isSameAsUserTypedLength;
-        *newMatchRate = matchWeight;
-        *newDiffs = diffs
-                + ((ProximityInfo::NEAR_PROXIMITY_CHAR == matchedProximityCharId) ? 1 : 0);
-        *newInputIndex = inputIndex + 1;
-    }
-    // Optimization: Prune out words that are too long compared to how much was typed.
-    if (depth >= maxDepth || *newDiffs > mMaxEditDistance) {
-        return false;
-    }
-
-    // If inputIndex is greater than mInputLength, that means there are no proximity chars.
-    // TODO: Check if this can be isSameAsUserTypedLength only.
-    if (isSameAsUserTypedLength || mInputLength <= *newInputIndex) {
-        *newTraverseAllNodes = true;
-    }
-    // get the count of nodes and increment childAddress.
-    *newCount = Dictionary::getCount(DICT_ROOT, &childPosition);
-    *newChildPosition = childPosition;
-    if (DEBUG_DICT) assert(needsToTraverseChildrenNodes);
-    return needsToTraverseChildrenNodes;
-}
-
-#else // NEW_DICTIONARY_FORMAT
-
 // Wrapper for getMostFrequentWordLikeInner, which matches it to the previous
 // interface.
 inline int UnigramDictionary::getMostFrequentWordLike(const int startInputIndex,
@@ -1055,83 +609,8 @@ int UnigramDictionary::getMostFrequentWordLikeInner(const uint16_t * const inWor
     return maxFreq;
 }
 
-// This function gets the frequency of the exact matching word in the dictionary.
-// If no match is found, it returns -1.
-int UnigramDictionary::getFrequency(const uint16_t* const inWord, const int length) const {
-    int pos = 0;
-    int wordPos = 0;
-    const uint8_t* const root = DICT_ROOT;
-
-    while (true) {
-        // If we already traversed the tree further than the word is long, there means
-        // there was no match (or we would have found it).
-        if (wordPos > length) return -1;
-        int charGroupCount = BinaryFormat::getGroupCountAndForwardPointer(root, &pos);
-        const uint16_t wChar = inWord[wordPos];
-        while (true) {
-            // If there are no more character groups in this node, it means we could not
-            // find a matching character for this depth, therefore there is no match.
-            if (0 >= charGroupCount) return -1;
-            const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
-            int32_t character = BinaryFormat::getCharCodeAndForwardPointer(root, &pos);
-            if (character == wChar) {
-                // This is the correct node. Only one character group may start with the same
-                // char within a node, so either we found our match in this node, or there is
-                // no match and we can return -1. So we will check all the characters in this
-                // character group indeed does match.
-                if (FLAG_HAS_MULTIPLE_CHARS & flags) {
-                    character = BinaryFormat::getCharCodeAndForwardPointer(root, &pos);
-                    while (NOT_A_CHARACTER != character) {
-                        ++wordPos;
-                        // If we shoot the length of the word we search for, or if we find a single
-                        // character that does not match, as explained above, it means the word is
-                        // not in the dictionary (by virtue of this chargroup being the only one to
-                        // match the word on the first character, but not matching the whole word).
-                        if (wordPos > length) return -1;
-                        if (inWord[wordPos] != character) return -1;
-                        character = BinaryFormat::getCharCodeAndForwardPointer(root, &pos);
-                    }
-                }
-                // If we come here we know that so far, we do match. Either we are on a terminal
-                // and we match the length, in which case we found it, or we traverse children.
-                // If we don't match the length AND don't have children, then a word in the
-                // dictionary fully matches a prefix of the searched word but not the full word.
-                ++wordPos;
-                if (FLAG_IS_TERMINAL & flags) {
-                    if (wordPos == length) {
-                        return BinaryFormat::readFrequencyWithoutMovingPointer(root, pos);
-                    }
-                    pos = BinaryFormat::skipFrequency(FLAG_IS_TERMINAL, pos);
-                }
-                if (FLAG_GROUP_ADDRESS_TYPE_NOADDRESS == (MASK_GROUP_ADDRESS_TYPE & flags))
-                    return -1;
-                // We have children and we are still shorter than the word we are searching for, so
-                // we need to traverse children. Put the pointer on the children position, and
-                // break
-                pos = BinaryFormat::readChildrenPosition(root, flags, pos);
-                break;
-            } else {
-                // This chargroup does not match, so skip the remaining part and go to the next.
-                if (FLAG_HAS_MULTIPLE_CHARS & flags) {
-                    pos = BinaryFormat::skipOtherCharacters(root, pos);
-                }
-                pos = BinaryFormat::skipFrequency(flags, pos);
-                pos = BinaryFormat::skipChildrenPosAndAttributes(root, flags, pos);
-            }
-            --charGroupCount;
-        }
-    }
-}
-
 bool UnigramDictionary::isValidWord(const uint16_t* const inWord, const int length) const {
-    return -1 != getFrequency(inWord, length);
-}
-
-int UnigramDictionary::getBigrams(unsigned short *word, int length, int *codes, int codesSize,
-        unsigned short *outWords, int *frequencies, int maxWordLength, int maxBigrams,
-        int maxAlternatives) {
-    // TODO: add implementation.
-    return 0;
+    return NOT_VALID_WORD != BinaryFormat::getTerminalPosition(DICT_ROOT, inWord, length);
 }
 
 // TODO: remove this function.
@@ -1154,23 +633,13 @@ int UnigramDictionary::getBigramPosition(int pos, unsigned short *word, int offs
 // there aren't any more nodes at this level, it merely returns the address of the first byte after
 // the current node in nextSiblingPosition. Thus, the caller must keep count of the nodes at any
 // given level, as output into newCount when traversing this level's parent.
-inline bool UnigramDictionary::processCurrentNode(const int initialPos, const int initialDepth,
-        const int maxDepth, const bool initialTraverseAllNodes, int matchWeight, int inputIndex,
-        const int initialDiffs, const int skipPos, const int excessivePos, const int transposedPos,
-        int *nextLetters, const int nextLettersSize, int *newCount, int *newChildrenPosition,
-        bool *newTraverseAllNodes, int *newMatchRate, int *newInputIndex, int *newDiffs,
-        int *nextSiblingPosition, int *newOutputIndex) {
+inline bool UnigramDictionary::processCurrentNode(const int initialPos,
+        Correction *correction, int *newCount,
+        int *newChildrenPosition, int *nextSiblingPosition) {
     if (DEBUG_DICT) {
-        int inputCount = 0;
-        if (skipPos >= 0) ++inputCount;
-        if (excessivePos >= 0) ++inputCount;
-        if (transposedPos >= 0) ++inputCount;
-        assert(inputCount <= 1);
+        correction->checkState();
     }
     int pos = initialPos;
-    int depth = initialDepth;
-    int traverseAllNodes = initialTraverseAllNodes;
-    int diffs = initialDiffs;
 
     // Flags contain the following information:
     // - Address type (MASK_GROUP_ADDRESS_TYPE) on two bits:
@@ -1182,6 +651,9 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos, const in
     // - FLAG_HAS_BIGRAMS: whether this node has bigrams or not
     const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(DICT_ROOT, &pos);
     const bool hasMultipleChars = (0 != (FLAG_HAS_MULTIPLE_CHARS & flags));
+    const bool isTerminalNode = (0 != (FLAG_IS_TERMINAL & flags));
+
+    bool needsToInvokeOnTerminal = false;
 
     // This gets only ONE character from the stream. Next there will be:
     // if FLAG_HAS_MULTIPLE CHARS: the other characters of the same node
@@ -1207,101 +679,21 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos, const in
         const bool isLastChar = (NOT_A_CHARACTER == nextc);
         // If there are more chars in this nodes, then this virtual node is not a terminal.
         // If we are on the last char, this virtual node is a terminal if this node is.
-        const bool isTerminal = isLastChar && (0 != (FLAG_IS_TERMINAL & flags));
-        // If there are more chars in this node, then this virtual node has children.
-        // If we are on the last char, this virtual node has children if this node has.
-        const bool hasChildren = (!isLastChar) || BinaryFormat::hasChildrenInFlags(flags);
-
-        // This has to be done for each virtual char (this forwards the "inputIndex" which
-        // is the index in the user-inputted chars, as read by proximity chars.
-        if (excessivePos == depth && inputIndex < mInputLength - 1) ++inputIndex;
-        if (traverseAllNodes || needsToSkipCurrentNode(c, inputIndex, skipPos, depth)) {
-            mWord[depth] = c;
-            if (traverseAllNodes && isTerminal) {
-                // The frequency should be here, because we come here only if this is actually
-                // a terminal node, and we are on its last char.
-                const int freq = BinaryFormat::readFrequencyWithoutMovingPointer(DICT_ROOT, pos);
-                onTerminal(mWord, depth, DICT_ROOT, flags, pos, inputIndex, matchWeight, skipPos,
-                           excessivePos, transposedPos, freq, false, nextLetters, nextLettersSize);
-            }
-            if (!hasChildren) {
-                // If we don't have children here, that means we finished processing all
-                // characters of this node (we are on the last virtual node), AND we are in
-                // traverseAllNodes mode, which means we are searching for *completions*. We
-                // should skip the frequency if we have a terminal, and report the position
-                // of the next sibling. We don't have to return other values because we are
-                // returning false, as in "don't traverse children".
-                if (isTerminal) pos = BinaryFormat::skipFrequency(flags, pos);
-                *nextSiblingPosition =
-                        BinaryFormat::skipChildrenPosAndAttributes(DICT_ROOT, flags, pos);
-                return false;
-            }
-        } else {
-            int inputIndexForProximity = inputIndex;
-
-            if (transposedPos >= 0) {
-                if (inputIndex == transposedPos) ++inputIndexForProximity;
-                if (inputIndex == (transposedPos + 1)) --inputIndexForProximity;
-            }
-
-            int matchedProximityCharId = mProximityInfo->getMatchedProximityId(
-                    inputIndexForProximity, c, skipPos, excessivePos, transposedPos);
-            if (ProximityInfo::UNRELATED_CHAR == matchedProximityCharId) {
-                // We found that this is an unrelated character, so we should give up traversing
-                // this node and its children entirely.
-                // However we may not be on the last virtual node yet so we skip the remaining
-                // characters in this node, the frequency if it's there, read the next sibling
-                // position to output it, then return false.
-                // We don't have to output other values because we return false, as in
-                // "don't traverse children".
-                if (!isLastChar) {
-                    pos = BinaryFormat::skipOtherCharacters(DICT_ROOT, pos);
-                }
-                pos = BinaryFormat::skipFrequency(flags, pos);
-                *nextSiblingPosition =
-                        BinaryFormat::skipChildrenPosAndAttributes(DICT_ROOT, flags, pos);
-                return false;
-            }
-            mWord[depth] = c;
-            // If inputIndex is greater than mInputLength, that means there is no
-            // proximity chars. So, we don't need to check proximity.
-            if (ProximityInfo::SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR == matchedProximityCharId) {
-                multiplyIntCapped(TYPED_LETTER_MULTIPLIER, &matchWeight);
-            }
-            const bool isSameAsUserTypedLength = mInputLength == inputIndex + 1
-                    || (excessivePos == mInputLength - 1 && inputIndex == mInputLength - 2);
-            if (isSameAsUserTypedLength && isTerminal) {
-                const int freq = BinaryFormat::readFrequencyWithoutMovingPointer(DICT_ROOT, pos);
-                onTerminal(mWord, depth, DICT_ROOT, flags, pos, inputIndex, matchWeight, skipPos,
-                        excessivePos, transposedPos, freq, true, nextLetters, nextLettersSize);
-            }
-            // This character matched the typed character (enough to traverse the node at least)
-            // so we just evaluated it. Now we should evaluate this virtual node's children - that
-            // is, if it has any. If it has no children, we're done here - so we skip the end of
-            // the node, output the siblings position, and return false "don't traverse children".
-            // Note that !hasChildren implies isLastChar, so we know we don't have to skip any
-            // remaining char in this group for there can't be any.
-            if (!hasChildren) {
-                pos = BinaryFormat::skipFrequency(flags, pos);
-                *nextSiblingPosition =
-                        BinaryFormat::skipChildrenPosAndAttributes(DICT_ROOT, flags, pos);
-                return false;
-            }
-            // Start traversing all nodes after the index exceeds the user typed length
-            traverseAllNodes = isSameAsUserTypedLength;
-            diffs = diffs
-                    + ((ProximityInfo::NEAR_PROXIMITY_CHAR == matchedProximityCharId) ? 1 : 0);
-            // Finally, we are ready to go to the next character, the next "virtual node".
-            // We should advance the input index.
-            // We do this in this branch of the 'if traverseAllNodes' because we are still matching
-            // characters to input; the other branch is not matching them but searching for
-            // completions, this is why it does not have to do it.
-            ++inputIndex;
-        }
-        // Optimization: Prune out words that are too long compared to how much was typed.
-        if (depth >= maxDepth || diffs > mMaxEditDistance) {
-            // We are giving up parsing this node and its children. Skip the rest of the node,
-            // output the sibling position, and return that we don't want to traverse children.
+        const bool isTerminal = isLastChar && isTerminalNode;
+
+        Correction::CorrectionType stateType = correction->processCharAndCalcState(
+                c, isTerminal);
+        if (stateType == Correction::TRAVERSE_ALL_ON_TERMINAL
+                || stateType == Correction::ON_TERMINAL) {
+            needsToInvokeOnTerminal = true;
+        } else if (stateType == Correction::UNRELATED) {
+            // We found that this is an unrelated character, so we should give up traversing
+            // this node and its children entirely.
+            // However we may not be on the last virtual node yet so we skip the remaining
+            // characters in this node, the frequency if it's there, read the next sibling
+            // position to output it, then return false.
+            // We don't have to output other values because we return false, as in
+            // "don't traverse children".
             if (!isLastChar) {
                 pos = BinaryFormat::skipOtherCharacters(DICT_ROOT, pos);
             }
@@ -1315,23 +707,41 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos, const in
         // will take care of prefetching the next. If we finally found our last char, nextc will
         // contain NOT_A_CHARACTER.
         c = nextc;
-        // Also, the next char is one "virtual node" depth more than this char.
-        ++depth;
     } while (NOT_A_CHARACTER != c);
 
-    // If inputIndex is greater than mInputLength, that means there are no proximity chars.
-    // Here, that's all we are interested in so we don't need to check for isSameAsUserTypedLength.
-    if (mInputLength <= *newInputIndex) {
-        traverseAllNodes = true;
-    }
+    if (isTerminalNode) {
+        if (needsToInvokeOnTerminal) {
+            // The frequency should be here, because we come here only if this is actually
+            // a terminal node, and we are on its last char.
+            const int freq = BinaryFormat::readFrequencyWithoutMovingPointer(DICT_ROOT, pos);
+            onTerminal(freq, mCorrection);
+        }
+
+        // If there are more chars in this node, then this virtual node has children.
+        // If we are on the last char, this virtual node has children if this node has.
+        const bool hasChildren = BinaryFormat::hasChildrenInFlags(flags);
+
+        // This character matched the typed character (enough to traverse the node at least)
+        // so we just evaluated it. Now we should evaluate this virtual node's children - that
+        // is, if it has any. If it has no children, we're done here - so we skip the end of
+        // the node, output the siblings position, and return false "don't traverse children".
+        // Note that !hasChildren implies isLastChar, so we know we don't have to skip any
+        // remaining char in this group for there can't be any.
+        if (!hasChildren) {
+            pos = BinaryFormat::skipFrequency(flags, pos);
+            *nextSiblingPosition =
+                    BinaryFormat::skipChildrenPosAndAttributes(DICT_ROOT, flags, pos);
+            return false;
+        }
 
-    // All the output values that are purely computation by this function are held in local
-    // variables. Output them to the caller.
-    *newTraverseAllNodes = traverseAllNodes;
-    *newMatchRate = matchWeight;
-    *newDiffs = diffs;
-    *newInputIndex = inputIndex;
-    *newOutputIndex = depth;
+        // Optimization: Prune out words that are too long compared to how much was typed.
+        if (correction->needsToPrune()) {
+            pos = BinaryFormat::skipFrequency(flags, pos);
+            *nextSiblingPosition =
+                    BinaryFormat::skipChildrenPosAndAttributes(DICT_ROOT, flags, pos);
+            return false;
+        }
+    }
 
     // Now we finished processing this node, and we want to traverse children. If there are no
     // children, we can't come here.
@@ -1351,6 +761,4 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos, const in
     return true;
 }
 
-#endif // NEW_DICTIONARY_FORMAT
-
 } // namespace latinime
diff --git a/native/src/unigram_dictionary.h b/native/src/unigram_dictionary.h
index f6045c6ef..cfe63ff79 100644
--- a/native/src/unigram_dictionary.h
+++ b/native/src/unigram_dictionary.h
@@ -18,6 +18,8 @@
 #define LATINIME_UNIGRAM_DICTIONARY_H
 
 #include <stdint.h>
+#include "correction.h"
+#include "correction_state.h"
 #include "defines.h"
 #include "proximity_info.h"
 
@@ -30,7 +32,6 @@ namespace latinime {
 class UnigramDictionary {
 
 public:
-#ifdef NEW_DICTIONARY_FORMAT
 
     // Mask and flags for children address type selection.
     static const int MASK_GROUP_ADDRESS_TYPE = 0xC0;
@@ -62,26 +63,19 @@ public:
     static const int FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE = 0x10;
     static const int FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES = 0x20;
     static const int FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES = 0x30;
-#endif // NEW_DICTIONARY_FORMAT
 
     UnigramDictionary(const uint8_t* const streamStart, int typedLetterMultipler,
             int fullWordMultiplier, int maxWordLength, int maxWords, int maxProximityChars,
             const bool isLatestDictVersion);
-#ifndef NEW_DICTIONARY_FORMAT
-    bool isValidWord(unsigned short *word, int length);
-#else // NEW_DICTIONARY_FORMAT
     bool isValidWord(const uint16_t* const inWord, const int length) const;
-    int getBigrams(unsigned short *word, int length, int *codes, int codesSize,
-            unsigned short *outWords, int *frequencies, int maxWordLength, int maxBigrams,
-            int maxAlternatives);
-#endif // NEW_DICTIONARY_FORMAT
     int getBigramPosition(int pos, unsigned short *word, int offset, int length) const;
     int getSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
             const int *ycoordinates, const int *codes, const int codesSize, const int flags,
             unsigned short *outWords, int *frequencies);
-    ~UnigramDictionary();
+    virtual ~UnigramDictionary();
 
 private:
+
     void getWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
             const int *ycoordinates, const int *codes, const int codesSize,
             unsigned short *outWords, int *frequencies);
@@ -94,51 +88,24 @@ private:
             const int *ycoordinates, const int *codes, const int codesSize,
             unsigned short *outWords, int *frequencies);
     void getSuggestionCandidates(const int skipPos, const int excessivePos,
-            const int transposedPos, int *nextLetters, const int nextLettersSize,
-            const int maxDepth);
+            const int transposedPos);
     bool addWord(unsigned short *word, int length, int frequency);
-    bool getSplitTwoWordsSuggestion(const int inputLength,
-            const int firstWordStartPos, const int firstWordLength,
-            const int secondWordStartPos, const int secondWordLength, const bool isSpaceProximity);
-    bool getMissingSpaceWords(const int inputLength, const int missingSpacePos);
-    bool getMistypedSpaceWords(const int inputLength, const int spaceProximityPos);
-    int calculateFinalFreq(const int inputIndex, const int depth, const int snr, const int skipPos,
-            const int excessivePos, const int transposedPos, const int freq,
-            const bool sameLength) const;
-    void onTerminal(unsigned short int* word, const int depth,
-            const uint8_t* const root, const uint8_t flags, const int pos,
-            const int inputIndex, const int matchWeight, const int skipPos,
-            const int excessivePos, const int transposedPos, const int freq, const bool sameLength,
-            int *nextLetters, const int nextLettersSize);
+    void getSplitTwoWordsSuggestion(const int inputLength, Correction *correction);
+    void getMissingSpaceWords(
+            const int inputLength, const int missingSpacePos, Correction *correction);
+    void getMistypedSpaceWords(
+            const int inputLength, const int spaceProximityPos, Correction *correction);
+    void onTerminal(const int freq, Correction *correction);
     bool needsToSkipCurrentNode(const unsigned short c,
             const int inputIndex, const int skipPos, const int depth);
     // Process a node by considering proximity, missing and excessive character
-    bool processCurrentNode(const int initialPos, const int initialDepth,
-            const int maxDepth, const bool initialTraverseAllNodes, const int snr, int inputIndex,
-            const int initialDiffs, const int skipPos, const int excessivePos,
-            const int transposedPos, int *nextLetters, const int nextLettersSize, int *newCount,
-            int *newChildPosition, bool *newTraverseAllNodes, int *newSnr, int*newInputIndex,
-            int *newDiffs, int *nextSiblingPosition, int *nextOutputIndex);
+    bool processCurrentNode(const int initialPos,
+            Correction *correction, int *newCount,
+            int *newChildPosition, int *nextSiblingPosition);
     int getMostFrequentWordLike(const int startInputIndex, const int inputLength,
             unsigned short *word);
-#ifndef NEW_DICTIONARY_FORMAT
-    void getWordsRec(const int childrenCount, const int pos, const int depth, const int maxDepth,
-            const bool traverseAllNodes, const int snr, const int inputIndex, const int diffs,
-            const int skipPos, const int excessivePos, const int transposedPos, int *nextLetters,
-            const int nextLettersSize);
-    // Keep getWordsOld for comparing performance between getWords and getWordsOld
-    void getWordsOld(const int initialPos, const int inputLength, const int skipPos,
-            const int excessivePos, const int transposedPos, int *nextLetters,
-            const int nextLettersSize);
-    // Process a node by considering missing space
-    bool processCurrentNodeForExactMatch(const int firstChildPos,
-            const int startInputIndex, const int depth, unsigned short *word,
-            int *newChildPosition, int *newCount, bool *newTerminal, int *newFreq, int *siblingPos);
-#else // NEW_DICTIONARY_FORMAT
-    int getFrequency(const uint16_t* const inWord, const int length) const;
     int getMostFrequentWordLikeInner(const uint16_t* const inWord, const int length,
             short unsigned int* outWord);
-#endif // NEW_DICTIONARY_FORMAT
 
     const uint8_t* const DICT_ROOT;
     const int MAX_WORD_LENGTH;
@@ -162,20 +129,15 @@ private:
 
     int *mFrequencies;
     unsigned short *mOutputChars;
-    const ProximityInfo *mProximityInfo;
+    ProximityInfo *mProximityInfo;
+    Correction *mCorrection;
     int mInputLength;
     // MAX_WORD_LENGTH_INTERNAL must be bigger than MAX_WORD_LENGTH
     unsigned short mWord[MAX_WORD_LENGTH_INTERNAL];
-    int mMaxEditDistance;
-
-    int mStackChildCount[MAX_WORD_LENGTH_INTERNAL];
-    bool mStackTraverseAll[MAX_WORD_LENGTH_INTERNAL];
-    int mStackNodeFreq[MAX_WORD_LENGTH_INTERNAL];
-    int mStackInputIndex[MAX_WORD_LENGTH_INTERNAL];
-    int mStackDiffs[MAX_WORD_LENGTH_INTERNAL];
-    int mStackSiblingPos[MAX_WORD_LENGTH_INTERNAL];
-    int mStackOutputIndex[MAX_WORD_LENGTH_INTERNAL];
-    int mNextLettersFrequency[NEXT_LETTERS_SIZE];
+
+    int mStackChildCount[MAX_WORD_LENGTH_INTERNAL];// TODO: remove
+    int mStackInputIndex[MAX_WORD_LENGTH_INTERNAL];// TODO: remove
+    int mStackSiblingPos[MAX_WORD_LENGTH_INTERNAL];// TODO: remove
 };
 } // namespace latinime