1 files changed, 317 insertions, 114 deletions
diff --git a/native/src/correction.cpp b/native/src/correction.cpp
index 27dc40745..63dd283c8 100644
--- a/native/src/correction.cpp
+++ b/native/src/correction.cpp
@@ -16,11 +16,13 @@
 
 #include <assert.h>
 #include <ctype.h>
+#include <math.h>
 #include <stdio.h>
 #include <string.h>
 
 #define LOG_TAG "LatinIME: correction.cpp"
 
+#include "char_utils.h"
 #include "correction.h"
 #include "dictionary.h"
 #include "proximity_info.h"
@@ -31,81 +33,60 @@ namespace latinime {
 // edit distance funcitons //
 /////////////////////////////
 
-#if 0 /* no longer used */
-inline static int editDistance(
-        int* editDistanceTable, const unsigned short* input,
-        const int inputLength, const unsigned short* output, const int outputLength) {
-    // dp[li][lo] dp[a][b] = dp[ a * lo + b]
-    int* dp = editDistanceTable;
-    const int li = inputLength + 1;
-    const int lo = outputLength + 1;
-    for (int i = 0; i < li; ++i) {
-        dp[lo * i] = i;
-    }
-    for (int i = 0; i < lo; ++i) {
-        dp[i] = i;
-    }
-
-    for (int i = 0; i < li - 1; ++i) {
-        for (int j = 0; j < lo - 1; ++j) {
-            const uint32_t ci = Dictionary::toBaseLowerCase(input[i]);
-            const uint32_t co = Dictionary::toBaseLowerCase(output[j]);
-            const uint16_t cost = (ci == co) ? 0 : 1;
-            dp[(i + 1) * lo + (j + 1)] = min(dp[i * lo + (j + 1)] + 1,
-                    min(dp[(i + 1) * lo + j] + 1, dp[i * lo + j] + cost));
-            if (i > 0 && j > 0 && ci == Dictionary::toBaseLowerCase(output[j - 1])
-                    && co == Dictionary::toBaseLowerCase(input[i - 1])) {
-                dp[(i + 1) * lo + (j + 1)] = min(
-                        dp[(i + 1) * lo + (j + 1)], dp[(i - 1) * lo + (j - 1)] + cost);
-            }
-        }
+inline static void initEditDistance(int *editDistanceTable) {
+    for (int i = 0; i <= MAX_WORD_LENGTH_INTERNAL; ++i) {
+        editDistanceTable[i] = i;
     }
+}
 
-    if (DEBUG_EDIT_DISTANCE) {
-        LOGI("IN = %d, OUT = %d", inputLength, outputLength);
-        for (int i = 0; i < li; ++i) {
-            for (int j = 0; j < lo; ++j) {
-                LOGI("EDIT[%d][%d], %d", i, j, dp[i * lo + j]);
+inline static void dumpEditDistance10ForDebug(int *editDistanceTable,
+        const int editDistanceTableWidth, const int outputLength) {
+    if (DEBUG_DICT) {
+        AKLOGI("EditDistanceTable");
+        for (int i = 0; i <= 10; ++i) {
+            int c[11];
+            for (int j = 0; j <= 10; ++j) {
+                if (j < editDistanceTableWidth + 1 && i < outputLength + 1) {
+                    c[j] = (editDistanceTable + i * (editDistanceTableWidth + 1))[j];
+                } else {
+                    c[j] = -1;
+                }
             }
+            AKLOGI("[ %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d ]",
+                    c[0], c[1], c[2], c[3], c[4], c[5], c[6], c[7], c[8], c[9], c[10]);
         }
     }
-    return dp[li * lo - 1];
-}
-#endif
-
-inline static void initEditDistance(int *editDistanceTable) {
-    for (int i = 0; i <= MAX_WORD_LENGTH_INTERNAL; ++i) {
-        editDistanceTable[i] = i;
-    }
 }
 
 inline static void calcEditDistanceOneStep(int *editDistanceTable, const unsigned short *input,
         const int inputLength, const unsigned short *output, const int outputLength) {
+    // TODO: Make sure that editDistance[0 ~ MAX_WORD_LENGTH_INTERNAL] is not touched.
     // Let dp[i][j] be editDistanceTable[i * (inputLength + 1) + j].
     // Assuming that dp[0][0] ... dp[outputLength - 1][inputLength] are already calculated,
     // and calculate dp[ouputLength][0] ... dp[outputLength][inputLength].
     int *const current = editDistanceTable + outputLength * (inputLength + 1);
     const int *const prev = editDistanceTable + (outputLength - 1) * (inputLength + 1);
     const int *const prevprev =
-            outputLength >= 2 ? editDistanceTable + (outputLength - 2) * (inputLength + 1) : NULL;
+            outputLength >= 2 ? editDistanceTable + (outputLength - 2) * (inputLength + 1) : 0;
     current[0] = outputLength;
-    const uint32_t co = Dictionary::toBaseLowerCase(output[outputLength - 1]);
-    const uint32_t prevCO =
-            outputLength >= 2 ? Dictionary::toBaseLowerCase(output[outputLength - 2]) : 0;
+    const uint32_t co = toBaseLowerCase(output[outputLength - 1]);
+    const uint32_t prevCO = outputLength >= 2 ? toBaseLowerCase(output[outputLength - 2]) : 0;
     for (int i = 1; i <= inputLength; ++i) {
-        const uint32_t ci = Dictionary::toBaseLowerCase(input[i - 1]);
+        const uint32_t ci = toBaseLowerCase(input[i - 1]);
         const uint16_t cost = (ci == co) ? 0 : 1;
         current[i] = min(current[i - 1] + 1, min(prev[i] + 1, prev[i - 1] + cost));
-        if (i >= 2 && prevprev && ci == prevCO
-                && co == Dictionary::toBaseLowerCase(input[i - 2])) {
+        if (i >= 2 && prevprev && ci == prevCO && co == toBaseLowerCase(input[i - 2])) {
             current[i] = min(current[i], prevprev[i - 2] + 1);
         }
     }
 }
 
-inline static int getCurrentEditDistance(
-        int *editDistanceTable, const int inputLength, const int outputLength) {
-    return editDistanceTable[(inputLength + 1) * (outputLength + 1) - 1];
+inline static int getCurrentEditDistance(int *editDistanceTable, const int editDistanceTableWidth,
+        const int outputLength, const int inputLength) {
+    if (DEBUG_EDIT_DISTANCE) {
+        AKLOGI("getCurrentEditDistance %d, %d", inputLength, outputLength);
+    }
+    return editDistanceTable[(editDistanceTableWidth + 1) * (outputLength) + inputLength];
 }
 
 //////////////////////
@@ -133,6 +114,9 @@ void Correction::initCorrection(const ProximityInfo *pi, const int inputLength,
     mInputLength = inputLength;
     mMaxDepth = maxDepth;
     mMaxEditDistance = mInputLength < 5 ? 2 : mInputLength / 2;
+    // TODO: This is not supposed to be required.  Check what's going wrong with
+    // editDistance[0 ~ MAX_WORD_LENGTH_INTERNAL]
+    initEditDistance(mEditDistanceTable);
 }
 
 void Correction::initCorrectionState(
@@ -146,7 +130,7 @@ void Correction::initCorrectionState(
 
 void Correction::setCorrectionParams(const int skipPos, const int excessivePos,
         const int transposedPos, const int spaceProximityPos, const int missingSpacePos,
-        const bool useFullEditDistance) {
+        const bool useFullEditDistance, const bool doAutoCompletion, const int maxErrors) {
     // TODO: remove
     mTransposedPos = transposedPos;
     mExcessivePos = excessivePos;
@@ -159,6 +143,8 @@ void Correction::setCorrectionParams(const int skipPos, const int excessivePos,
     mSpaceProximityPos = spaceProximityPos;
     mMissingSpacePos = missingSpacePos;
     mUseFullEditDistance = useFullEditDistance;
+    mDoAutoCompletion = doAutoCompletion;
+    mMaxErrors = maxErrors;
 }
 
 void Correction::checkState() {
@@ -179,16 +165,27 @@ int Correction::getFreqForSplitTwoWords(const int firstFreq, const int secondFre
 }
 
 int Correction::getFinalFreq(const int freq, unsigned short **word, int *wordLength) {
+    return getFinalFreqInternal(freq, word, wordLength, mInputLength);
+}
+
+int Correction::getFinalFreqForSubQueue(const int freq, unsigned short **word, int *wordLength,
+        const int inputLength) {
+    return getFinalFreqInternal(freq, word, wordLength, inputLength);
+}
+
+int Correction::getFinalFreqInternal(const int freq, unsigned short **word, int *wordLength,
+        const int inputLength) {
     const int outputIndex = mTerminalOutputIndex;
     const int inputIndex = mTerminalInputIndex;
     *wordLength = outputIndex + 1;
-    if (mProximityInfo->sameAsTyped(mWord, outputIndex + 1) || outputIndex < MIN_SUGGEST_DEPTH) {
-        return -1;
+    if (outputIndex < MIN_SUGGEST_DEPTH) {
+        return NOT_A_FREQUENCY;
     }
 
     *word = mWord;
-    return Correction::RankingAlgorithm::calculateFinalFreq(
-            inputIndex, outputIndex, freq, mEditDistanceTable, this);
+    int finalFreq = Correction::RankingAlgorithm::calculateFinalFreq(
+            inputIndex, outputIndex, freq, mEditDistanceTable, this, inputLength);
+    return finalFreq;
 }
 
 bool Correction::initProcessState(const int outputIndex) {
@@ -229,20 +226,10 @@ int Correction::goDownTree(
 }
 
 // TODO: remove
-int Correction::getOutputIndex() {
-    return mOutputIndex;
-}
-
-// TODO: remove
 int Correction::getInputIndex() {
     return mInputIndex;
 }
 
-// TODO: remove
-bool Correction::needsToTraverseAllNodes() {
-    return mNeedsToTraverseAllNodes;
-}
-
 void Correction::incrementInputIndex() {
     ++mInputIndex;
 }
@@ -280,7 +267,9 @@ void Correction::startToTraverseAllNodes() {
 
 bool Correction::needsToPrune() const {
     // TODO: use edit distance here
-    return mOutputIndex - 1 >= mMaxDepth || mProximityCount > mMaxEditDistance;
+    return mOutputIndex - 1 >= mMaxDepth || mProximityCount > mMaxEditDistance
+            // Allow one char longer word for missing character
+            || (!mDoAutoCompletion && (mOutputIndex + 1 >= mInputLength));
 }
 
 void Correction::addCharToCurrentWord(const int32_t c) {
@@ -290,13 +279,12 @@ void Correction::addCharToCurrentWord(const int32_t c) {
             mWord, mOutputIndex + 1);
 }
 
-// TODO: inline?
 Correction::CorrectionType Correction::processSkipChar(
         const int32_t c, const bool isTerminal, const bool inputIndexIncremented) {
     addCharToCurrentWord(c);
-    if (needsToTraverseAllNodes() && isTerminal) {
-        mTerminalInputIndex = mInputIndex - (inputIndexIncremented ? 1 : 0);
-        mTerminalOutputIndex = mOutputIndex;
+    mTerminalInputIndex = mInputIndex - (inputIndexIncremented ? 1 : 0);
+    mTerminalOutputIndex = mOutputIndex;
+    if (mNeedsToTraverseAllNodes && isTerminal) {
         incrementOutputIndex();
         return TRAVERSE_ALL_ON_TERMINAL;
     } else {
@@ -305,6 +293,13 @@ Correction::CorrectionType Correction::processSkipChar(
     }
 }
 
+Correction::CorrectionType Correction::processUnrelatedCorrectionType() {
+    // Needs to set mTerminalInputIndex and mTerminalOutputIndex before returning any CorrectionType
+    mTerminalInputIndex = mInputIndex;
+    mTerminalOutputIndex = mOutputIndex;
+    return UNRELATED;
+}
+
 inline bool isEquivalentChar(ProximityInfo::ProximityType type) {
     return type == ProximityInfo::EQUIVALENT_CHAR;
 }
@@ -312,12 +307,17 @@ inline bool isEquivalentChar(ProximityInfo::ProximityType type) {
 Correction::CorrectionType Correction::processCharAndCalcState(
         const int32_t c, const bool isTerminal) {
     const int correctionCount = (mSkippedCount + mExcessiveCount + mTransposedCount);
+    if (correctionCount > mMaxErrors) {
+        return processUnrelatedCorrectionType();
+    }
+
     // TODO: Change the limit if we'll allow two or more corrections
     const bool noCorrectionsHappenedSoFar = correctionCount == 0;
     const bool canTryCorrection = noCorrectionsHappenedSoFar;
     int proximityIndex = 0;
     mDistances[mOutputIndex] = NOT_A_DISTANCE;
 
+    // Skip checking this node
     if (mNeedsToTraverseAllNodes || isQuote(c)) {
         bool incremented = false;
         if (mLastCharExceeded && mInputIndex == mInputLength - 1) {
@@ -342,6 +342,7 @@ Correction::CorrectionType Correction::processCharAndCalcState(
         return processSkipChar(c, isTerminal, incremented);
     }
 
+    // Check possible corrections.
     if (mExcessivePos >= 0) {
         if (mExcessiveCount == 0 && mExcessivePos < mOutputIndex) {
             mExcessivePos = mOutputIndex;
@@ -384,15 +385,20 @@ Correction::CorrectionType Correction::processCharAndCalcState(
             --mTransposedCount;
             if (DEBUG_CORRECTION) {
                 DUMP_WORD(mWord, mOutputIndex);
-                LOGI("UNRELATED(0): %d, %d, %d, %d, %c", mProximityCount, mSkippedCount,
+                AKLOGI("UNRELATED(0): %d, %d, %d, %d, %c", mProximityCount, mSkippedCount,
                         mTransposedCount, mExcessiveCount, c);
             }
-            return UNRELATED;
+            return processUnrelatedCorrectionType();
         }
     }
 
     // TODO: Change the limit if we'll allow two or more proximity chars with corrections
-    const bool checkProximityChars = noCorrectionsHappenedSoFar ||  mProximityCount == 0;
+    // Work around: When the mMaxErrors is 1, we only allow just one error
+    // including proximity correction.
+    const bool checkProximityChars = (mMaxErrors > 1)
+            ? (noCorrectionsHappenedSoFar || mProximityCount == 0)
+            : (noCorrectionsHappenedSoFar && mProximityCount == 0);
+
     ProximityInfo::ProximityType matchedProximityCharId = secondTransposing
             ? ProximityInfo::EQUIVALENT_CHAR
             : mProximityInfo->getMatchedProximityId(
@@ -405,7 +411,7 @@ Correction::CorrectionType Correction::processCharAndCalcState(
                 && isEquivalentChar(mProximityInfo->getMatchedProximityId(
                         mInputIndex, mWord[mOutputIndex - 1], false))) {
             if (DEBUG_CORRECTION) {
-                LOGI("CONVERSION p->e %c", mWord[mOutputIndex - 1]);
+                AKLOGI("CONVERSION p->e %c", mWord[mOutputIndex - 1]);
             }
             // Conversion p->e
             // Example:
@@ -482,10 +488,10 @@ Correction::CorrectionType Correction::processCharAndCalcState(
         } else {
             if (DEBUG_CORRECTION) {
                 DUMP_WORD(mWord, mOutputIndex);
-                LOGI("UNRELATED(1): %d, %d, %d, %d, %c", mProximityCount, mSkippedCount,
+                AKLOGI("UNRELATED(1): %d, %d, %d, %d, %c", mProximityCount, mSkippedCount,
                         mTransposedCount, mExcessiveCount, c);
             }
-            return UNRELATED;
+            return processUnrelatedCorrectionType();
         }
     } else if (secondTransposing) {
         // If inputIndex is greater than mInputLength, that means there is no
@@ -535,11 +541,13 @@ Correction::CorrectionType Correction::processCharAndCalcState(
         mTerminalOutputIndex = mOutputIndex - 1;
         if (DEBUG_CORRECTION) {
             DUMP_WORD(mWord, mOutputIndex);
-            LOGI("ONTERMINAL(1): %d, %d, %d, %d, %c", mProximityCount, mSkippedCount,
+            AKLOGI("ONTERMINAL(1): %d, %d, %d, %d, %c", mProximityCount, mSkippedCount,
                     mTransposedCount, mExcessiveCount, c);
         }
         return ON_TERMINAL;
     } else {
+        mTerminalInputIndex = mInputIndex - 1;
+        mTerminalOutputIndex = mOutputIndex - 1;
         return NOT_ON_TERMINAL;
     }
 }
@@ -607,13 +615,7 @@ inline static int getQuoteCount(const unsigned short* word, const int length) {
 }
 
 inline static bool isUpperCase(unsigned short c) {
-     if (c < sizeof(BASE_CHARS) / sizeof(BASE_CHARS[0])) {
-         c = BASE_CHARS[c];
-     }
-     if (isupper(c)) {
-         return true;
-     }
-     return false;
+     return isAsciiUpper(toBaseChar(c));
 }
 
 //////////////////////
@@ -622,9 +624,9 @@ inline static bool isUpperCase(unsigned short c) {
 
 /* static */
 int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const int outputIndex,
-        const int freq, int* editDistanceTable, const Correction* correction) {
+        const int freq, int* editDistanceTable, const Correction* correction,
+        const int inputLength) {
     const int excessivePos = correction->getExcessivePos();
-    const int inputLength = correction->mInputLength;
     const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER;
     const int fullWordMultiplier = correction->FULL_WORD_MULTIPLIER;
     const ProximityInfo *proximityInfo = correction->mProximityInfo;
@@ -649,45 +651,50 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const
     const unsigned short* word = correction->mWord;
     const bool skipped = skippedCount > 0;
 
-    const int quoteDiffCount = max(0, getQuoteCount(word, outputIndex + 1)
+    const int quoteDiffCount = max(0, getQuoteCount(word, outputLength)
             - getQuoteCount(proximityInfo->getPrimaryInputWord(), inputLength));
 
     // TODO: Calculate edit distance for transposed and excessive
     int ed = 0;
+    if (DEBUG_DICT_FULL) {
+        dumpEditDistance10ForDebug(editDistanceTable, correction->mInputLength, outputLength);
+    }
     int adjustedProximityMatchedCount = proximityMatchedCount;
 
     int finalFreq = freq;
 
     // TODO: Optimize this.
-    // TODO: Ignoring edit distance for transposed char, for now
-    if (transposedCount == 0 && (proximityMatchedCount > 0 || skipped || excessiveCount > 0)) {
-        ed = getCurrentEditDistance(editDistanceTable, inputLength, outputIndex + 1);
+    if (transposedCount > 0 || proximityMatchedCount > 0 || skipped || excessiveCount > 0) {
+        ed = getCurrentEditDistance(editDistanceTable, correction->mInputLength, outputLength,
+                inputLength) - transposedCount;
+
         const int matchWeight = powerIntCapped(typedLetterMultiplier,
-                max(inputLength, outputIndex + 1) - ed);
+                max(inputLength, outputLength) - ed);
         multiplyIntCapped(matchWeight, &finalFreq);
 
         // TODO: Demote further if there are two or more excessive chars with longer user input?
-        if (inputLength > outputIndex + 1) {
+        if (inputLength > outputLength) {
             multiplyRate(INPUT_EXCEEDS_OUTPUT_DEMOTION_RATE, &finalFreq);
         }
 
         ed = max(0, ed - quoteDiffCount);
 
-        if (ed == 1 && (inputLength == outputIndex || inputLength == outputIndex + 2)) {
-            // Promote a word with just one skipped or excessive char
-            if (sameLength) {
-                multiplyRate(WORDS_WITH_JUST_ONE_CORRECTION_PROMOTION_RATE, &finalFreq);
-            } else {
+        if (transposedCount < 1) {
+            if (ed == 1 && (inputLength == outputLength - 1 || inputLength == outputLength + 1)) {
+                // Promote a word with just one skipped or excessive char
+                if (sameLength) {
+                    multiplyRate(WORDS_WITH_JUST_ONE_CORRECTION_PROMOTION_RATE, &finalFreq);
+                } else {
+                    multiplyIntCapped(typedLetterMultiplier, &finalFreq);
+                }
+            } else if (ed == 0) {
                 multiplyIntCapped(typedLetterMultiplier, &finalFreq);
+                sameLength = true;
             }
-        } else if (ed == 0) {
-            multiplyIntCapped(typedLetterMultiplier, &finalFreq);
-            sameLength = true;
         }
-        adjustedProximityMatchedCount = min(max(0, ed - (outputIndex + 1 - inputLength)),
+        adjustedProximityMatchedCount = min(max(0, ed - (outputLength - inputLength)),
                 proximityMatchedCount);
     } else {
-        // TODO: Calculate the edit distance for transposed char
         const int matchWeight = powerIntCapped(typedLetterMultiplier, matchCount);
         multiplyIntCapped(matchWeight, &finalFreq);
     }
@@ -707,7 +714,7 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const
                 / (10 * inputLength
                         - WORDS_WITH_MISSING_CHARACTER_DEMOTION_START_POS_10X + 10);
         if (DEBUG_DICT_FULL) {
-            LOGI("Demotion rate for missing character is %d.", demotionRate);
+            AKLOGI("Demotion rate for missing character is %d.", demotionRate);
         }
         multiplyRate(demotionRate, &finalFreq);
     }
@@ -721,7 +728,7 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const
         multiplyRate(WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE, &finalFreq);
         if (!lastCharExceeded && !proximityInfo->existsAdjacentProximityChars(excessivePos)) {
             if (DEBUG_CORRECTION_FREQ) {
-                LOGI("Double excessive demotion");
+                AKLOGI("Double excessive demotion");
             }
             // If an excessive character is not adjacent to the left char or the right char,
             // we will demote this word.
@@ -771,7 +778,7 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const
         for (int i = 0; i < adjustedProximityMatchedCount; ++i) {
             // A word with proximity corrections
             if (DEBUG_DICT_FULL) {
-                LOGI("Found a proximity correction.");
+                AKLOGI("Found a proximity correction.");
             }
             multiplyIntCapped(typedLetterMultiplier, &finalFreq);
             multiplyRate(WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE, &finalFreq);
@@ -787,7 +794,8 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const
     // Promotion for an exactly matched word
     if (ed == 0) {
         // Full exact match
-        if (sameLength && transposedCount == 0 && !skipped && excessiveCount == 0) {
+        if (sameLength && transposedCount == 0 && !skipped && excessiveCount == 0
+                && quoteDiffCount == 0) {
             finalFreq = capped255MultForFullMatchAccentsOrCapitalizationDifference(finalFreq);
         }
     }
@@ -832,14 +840,14 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const
     }
 
     if (DEBUG_DICT_FULL) {
-        LOGI("calc: %d, %d", outputIndex, sameLength);
+        AKLOGI("calc: %d, %d", outputLength, sameLength);
     }
 
     if (DEBUG_CORRECTION_FREQ) {
-        DUMP_WORD(correction->mWord, outputIndex + 1);
-        LOGI("FinalFreq: [P%d, S%d, T%d, E%d] %d, %d, %d, %d, %d", proximityMatchedCount,
-                skippedCount, transposedCount, excessiveCount, lastCharExceeded, sameLength,
-                quoteDiffCount, ed, finalFreq);
+        DUMP_WORD(correction->mWord, outputLength);
+        AKLOGI("FinalFreq: [P%d, S%d, T%d, E%d] %d, %d, %d, %d, %d, %d", proximityMatchedCount,
+                skippedCount, transposedCount, excessiveCount, outputLength, lastCharExceeded,
+                sameLength, quoteDiffCount, ed, finalFreq);
     }
 
     return finalFreq;
@@ -878,7 +886,103 @@ int Correction::RankingAlgorithm::calcFreqForSplitTwoWords(
             firstCapitalizedWordDemotion ^ secondCapitalizedWordDemotion;
 
     if (DEBUG_DICT_FULL) {
-        LOGI("Two words: %c, %c, %d", word[0], word[firstWordLength + 1], capitalizedWordDemotion);
+        AKLOGI("Two words: %c, %c, %d",
+                word[0], word[firstWordLength + 1], capitalizedWordDemotion);
+    }
+
+    if (firstWordLength == 0 || secondWordLength == 0) {
+        return 0;
+    }
+    const int firstDemotionRate = 100 - TWO_WORDS_CORRECTION_DEMOTION_BASE / (firstWordLength + 1);
+    int tempFirstFreq = firstFreq;
+    multiplyRate(firstDemotionRate, &tempFirstFreq);
+
+    const int secondDemotionRate = 100
+            - TWO_WORDS_CORRECTION_DEMOTION_BASE / (secondWordLength + 1);
+    int tempSecondFreq = secondFreq;
+    multiplyRate(secondDemotionRate, &tempSecondFreq);
+
+    const int totalLength = firstWordLength + secondWordLength;
+
+    // Promote pairFreq with multiplying by 2, because the word length is the same as the typed
+    // length.
+    int totalFreq = tempFirstFreq + tempSecondFreq;
+
+    // This is a workaround to try offsetting the not-enough-demotion which will be done in
+    // calcNormalizedScore in Utils.java.
+    // In calcNormalizedScore the score will be demoted by (1 - 1 / length)
+    // but we demoted only (1 - 1 / (length + 1)) so we will additionally adjust freq by
+    // (1 - 1 / length) / (1 - 1 / (length + 1)) = (1 - 1 / (length * length))
+    const int normalizedScoreNotEnoughDemotionAdjustment = 100 - 100 / (totalLength * totalLength);
+    multiplyRate(normalizedScoreNotEnoughDemotionAdjustment, &totalFreq);
+
+    // At this moment, totalFreq is calculated by the following formula:
+    // (firstFreq * (1 - 1 / (firstWordLength + 1)) + secondFreq * (1 - 1 / (secondWordLength + 1)))
+    //        * (1 - 1 / totalLength) / (1 - 1 / (totalLength + 1))
+
+    multiplyIntCapped(powerIntCapped(typedLetterMultiplier, totalLength), &totalFreq);
+
+    // This is another workaround to offset the demotion which will be done in
+    // calcNormalizedScore in Utils.java.
+    // In calcNormalizedScore the score will be demoted by (1 - 1 / length) so we have to promote
+    // the same amount because we already have adjusted the synthetic freq of this "missing or
+    // mistyped space" suggestion candidate above in this method.
+    const int normalizedScoreDemotionRateOffset = (100 + 100 / totalLength);
+    multiplyRate(normalizedScoreDemotionRateOffset, &totalFreq);
+
+    if (isSpaceProximity) {
+        // A word pair with one space proximity correction
+        if (DEBUG_DICT) {
+            AKLOGI("Found a word pair with space proximity correction.");
+        }
+        multiplyIntCapped(typedLetterMultiplier, &totalFreq);
+        multiplyRate(WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE, &totalFreq);
+    }
+
+    multiplyRate(WORDS_WITH_MISSING_SPACE_CHARACTER_DEMOTION_RATE, &totalFreq);
+
+    if (capitalizedWordDemotion) {
+        multiplyRate(TWO_WORDS_CAPITALIZED_DEMOTION_RATE, &totalFreq);
+    }
+
+    return totalFreq;
+}
+
+/* static */
+int Correction::RankingAlgorithm::calcFreqForSplitTwoWordsOld(
+        const int firstFreq, const int secondFreq, const Correction* correction,
+        const unsigned short *word) {
+    const int spaceProximityPos = correction->mSpaceProximityPos;
+    const int missingSpacePos = correction->mMissingSpacePos;
+    if (DEBUG_DICT) {
+        int inputCount = 0;
+        if (spaceProximityPos >= 0) ++inputCount;
+        if (missingSpacePos >= 0) ++inputCount;
+        assert(inputCount <= 1);
+    }
+    const bool isSpaceProximity = spaceProximityPos >= 0;
+    const int inputLength = correction->mInputLength;
+    const int firstWordLength = isSpaceProximity ? spaceProximityPos : missingSpacePos;
+    const int secondWordLength = isSpaceProximity ? (inputLength - spaceProximityPos - 1)
+            : (inputLength - missingSpacePos);
+    const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER;
+
+    bool firstCapitalizedWordDemotion = false;
+    if (firstWordLength >= 2) {
+        firstCapitalizedWordDemotion = isUpperCase(word[0]);
+    }
+
+    bool secondCapitalizedWordDemotion = false;
+    if (secondWordLength >= 2) {
+        secondCapitalizedWordDemotion = isUpperCase(word[firstWordLength + 1]);
+    }
+
+    const bool capitalizedWordDemotion =
+            firstCapitalizedWordDemotion ^ secondCapitalizedWordDemotion;
+
+    if (DEBUG_DICT_FULL) {
+        AKLOGI("Two words: %c, %c, %d",
+                word[0], word[firstWordLength + 1], capitalizedWordDemotion);
     }
 
     if (firstWordLength == 0 || secondWordLength == 0) {
@@ -923,7 +1027,7 @@ int Correction::RankingAlgorithm::calcFreqForSplitTwoWords(
     if (isSpaceProximity) {
         // A word pair with one space proximity correction
         if (DEBUG_DICT) {
-            LOGI("Found a word pair with space proximity correction.");
+            AKLOGI("Found a word pair with space proximity correction.");
         }
         multiplyIntCapped(typedLetterMultiplier, &totalFreq);
         multiplyRate(WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE, &totalFreq);
@@ -938,4 +1042,103 @@ int Correction::RankingAlgorithm::calcFreqForSplitTwoWords(
     return totalFreq;
 }
 
+/* Damerau-Levenshtein distance */
+inline static int editDistanceInternal(
+        int* editDistanceTable, const unsigned short* before,
+        const int beforeLength, const unsigned short* after, const int afterLength) {
+    // dp[li][lo] dp[a][b] = dp[ a * lo + b]
+    int* dp = editDistanceTable;
+    const int li = beforeLength + 1;
+    const int lo = afterLength + 1;
+    for (int i = 0; i < li; ++i) {
+        dp[lo * i] = i;
+    }
+    for (int i = 0; i < lo; ++i) {
+        dp[i] = i;
+    }
+
+    for (int i = 0; i < li - 1; ++i) {
+        for (int j = 0; j < lo - 1; ++j) {
+            const uint32_t ci = toBaseLowerCase(before[i]);
+            const uint32_t co = toBaseLowerCase(after[j]);
+            const uint16_t cost = (ci == co) ? 0 : 1;
+            dp[(i + 1) * lo + (j + 1)] = min(dp[i * lo + (j + 1)] + 1,
+                    min(dp[(i + 1) * lo + j] + 1, dp[i * lo + j] + cost));
+            if (i > 0 && j > 0 && ci == toBaseLowerCase(after[j - 1])
+                    && co == toBaseLowerCase(before[i - 1])) {
+                dp[(i + 1) * lo + (j + 1)] = min(
+                        dp[(i + 1) * lo + (j + 1)], dp[(i - 1) * lo + (j - 1)] + cost);
+            }
+        }
+    }
+
+    if (DEBUG_EDIT_DISTANCE) {
+        AKLOGI("IN = %d, OUT = %d", beforeLength, afterLength);
+        for (int i = 0; i < li; ++i) {
+            for (int j = 0; j < lo; ++j) {
+                AKLOGI("EDIT[%d][%d], %d", i, j, dp[i * lo + j]);
+            }
+        }
+    }
+    return dp[li * lo - 1];
+}
+
+int Correction::RankingAlgorithm::editDistance(const unsigned short* before,
+        const int beforeLength, const unsigned short* after, const int afterLength) {
+    int table[(beforeLength + 1) * (afterLength + 1)];
+    return editDistanceInternal(table, before, beforeLength, after, afterLength);
+}
+
+
+// In dictionary.cpp, getSuggestion() method,
+// suggestion scores are computed using the below formula.
+// original score
+//  := pow(mTypedLetterMultiplier (this is defined 2),
+//         (the number of matched characters between typed word and suggested word))
+//     * (individual word's score which defined in the unigram dictionary,
+//         and this score is defined in range [0, 255].)
+// Then, the following processing is applied.
+//     - If the dictionary word is matched up to the point of the user entry
+//       (full match up to min(before.length(), after.length())
+//       => Then multiply by FULL_MATCHED_WORDS_PROMOTION_RATE (this is defined 1.2)
+//     - If the word is a true full match except for differences in accents or
+//       capitalization, then treat it as if the score was 255.
+//     - If before.length() == after.length()
+//       => multiply by mFullWordMultiplier (this is defined 2))
+// So, maximum original score is pow(2, min(before.length(), after.length())) * 255 * 2 * 1.2
+// For historical reasons we ignore the 1.2 modifier (because the measure for a good
+// autocorrection threshold was done at a time when it didn't exist). This doesn't change
+// the result.
+// So, we can normalize original score by dividing pow(2, min(b.l(),a.l())) * 255 * 2.
+
+/* static */
+double Correction::RankingAlgorithm::calcNormalizedScore(const unsigned short* before,
+        const int beforeLength, const unsigned short* after, const int afterLength,
+        const int score) {
+    if (0 == beforeLength || 0 == afterLength) {
+        return 0;
+    }
+    const int distance = editDistance(before, beforeLength, after, afterLength);
+    int spaceCount = 0;
+    for (int i = 0; i < afterLength; ++i) {
+        if (after[i] == CODE_SPACE) {
+            ++spaceCount;
+        }
+    }
+
+    if (spaceCount == afterLength) {
+        return 0;
+    }
+
+    const double maxScore = score >= S_INT_MAX ? S_INT_MAX : MAX_INITIAL_SCORE
+            * pow((double)TYPED_LETTER_MULTIPLIER,
+                    (double)min(beforeLength, afterLength - spaceCount)) * FULL_WORD_MULTIPLIER;
+
+    // add a weight based on edit distance.
+    // distance <= max(afterLength, beforeLength) == afterLength,
+    // so, 0 <= distance / afterLength <= 1
+    const double weight = 1.0 - (double) distance / afterLength;
+    return (score / maxScore) * weight;
+}
+
 } // namespace latinime