5 files changed, 154 insertions, 67 deletions
diff --git a/native/src/correction.cpp b/native/src/correction.cpp
index 6a129d4e3..63dd283c8 100644
--- a/native/src/correction.cpp
+++ b/native/src/correction.cpp
@@ -39,15 +39,15 @@ inline static void initEditDistance(int *editDistanceTable) {
     }
 }
 
-inline static void dumpEditDistance10ForDebug(int *editDistanceTable, const int inputLength,
-        const int outputLength) {
+inline static void dumpEditDistance10ForDebug(int *editDistanceTable,
+        const int editDistanceTableWidth, const int outputLength) {
     if (DEBUG_DICT) {
         AKLOGI("EditDistanceTable");
         for (int i = 0; i <= 10; ++i) {
             int c[11];
             for (int j = 0; j <= 10; ++j) {
-                if (j < inputLength + 1 && i < outputLength + 1) {
-                    c[j] = (editDistanceTable + i * (inputLength + 1))[j];
+                if (j < editDistanceTableWidth + 1 && i < outputLength + 1) {
+                    c[j] = (editDistanceTable + i * (editDistanceTableWidth + 1))[j];
                 } else {
                     c[j] = -1;
                 }
@@ -81,12 +81,12 @@ inline static void calcEditDistanceOneStep(int *editDistanceTable, const unsigne
     }
 }
 
-inline static int getCurrentEditDistance(
-        int *editDistanceTable, const int inputLength, const int outputLength) {
+inline static int getCurrentEditDistance(int *editDistanceTable, const int editDistanceTableWidth,
+        const int outputLength, const int inputLength) {
     if (DEBUG_EDIT_DISTANCE) {
         AKLOGI("getCurrentEditDistance %d, %d", inputLength, outputLength);
     }
-    return editDistanceTable[(inputLength + 1) * (outputLength + 1) - 1];
+    return editDistanceTable[(editDistanceTableWidth + 1) * (outputLength) + inputLength];
 }
 
 //////////////////////
@@ -165,6 +165,16 @@ int Correction::getFreqForSplitTwoWords(const int firstFreq, const int secondFre
 }
 
 int Correction::getFinalFreq(const int freq, unsigned short **word, int *wordLength) {
+    return getFinalFreqInternal(freq, word, wordLength, mInputLength);
+}
+
+int Correction::getFinalFreqForSubQueue(const int freq, unsigned short **word, int *wordLength,
+        const int inputLength) {
+    return getFinalFreqInternal(freq, word, wordLength, inputLength);
+}
+
+int Correction::getFinalFreqInternal(const int freq, unsigned short **word, int *wordLength,
+        const int inputLength) {
     const int outputIndex = mTerminalOutputIndex;
     const int inputIndex = mTerminalInputIndex;
     *wordLength = outputIndex + 1;
@@ -173,8 +183,9 @@ int Correction::getFinalFreq(const int freq, unsigned short **word, int *wordLen
     }
 
     *word = mWord;
-    return Correction::RankingAlgorithm::calculateFinalFreq(
-            inputIndex, outputIndex, freq, mEditDistanceTable, this);
+    int finalFreq = Correction::RankingAlgorithm::calculateFinalFreq(
+            inputIndex, outputIndex, freq, mEditDistanceTable, this, inputLength);
+    return finalFreq;
 }
 
 bool Correction::initProcessState(const int outputIndex) {
@@ -613,9 +624,9 @@ inline static bool isUpperCase(unsigned short c) {
 
 /* static */
 int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const int outputIndex,
-        const int freq, int* editDistanceTable, const Correction* correction) {
+        const int freq, int* editDistanceTable, const Correction* correction,
+        const int inputLength) {
     const int excessivePos = correction->getExcessivePos();
-    const int inputLength = correction->mInputLength;
     const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER;
     const int fullWordMultiplier = correction->FULL_WORD_MULTIPLIER;
     const ProximityInfo *proximityInfo = correction->mProximityInfo;
@@ -640,13 +651,13 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const
     const unsigned short* word = correction->mWord;
     const bool skipped = skippedCount > 0;
 
-    const int quoteDiffCount = max(0, getQuoteCount(word, outputIndex + 1)
+    const int quoteDiffCount = max(0, getQuoteCount(word, outputLength)
             - getQuoteCount(proximityInfo->getPrimaryInputWord(), inputLength));
 
     // TODO: Calculate edit distance for transposed and excessive
     int ed = 0;
     if (DEBUG_DICT_FULL) {
-        dumpEditDistance10ForDebug(editDistanceTable, inputLength, outputIndex + 1);
+        dumpEditDistance10ForDebug(editDistanceTable, correction->mInputLength, outputLength);
     }
     int adjustedProximityMatchedCount = proximityMatchedCount;
 
@@ -654,22 +665,22 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const
 
     // TODO: Optimize this.
     if (transposedCount > 0 || proximityMatchedCount > 0 || skipped || excessiveCount > 0) {
-        ed = getCurrentEditDistance(editDistanceTable, inputLength, outputIndex + 1)
-                - transposedCount;
+        ed = getCurrentEditDistance(editDistanceTable, correction->mInputLength, outputLength,
+                inputLength) - transposedCount;
 
         const int matchWeight = powerIntCapped(typedLetterMultiplier,
-                max(inputLength, outputIndex + 1) - ed);
+                max(inputLength, outputLength) - ed);
         multiplyIntCapped(matchWeight, &finalFreq);
 
         // TODO: Demote further if there are two or more excessive chars with longer user input?
-        if (inputLength > outputIndex + 1) {
+        if (inputLength > outputLength) {
             multiplyRate(INPUT_EXCEEDS_OUTPUT_DEMOTION_RATE, &finalFreq);
         }
 
         ed = max(0, ed - quoteDiffCount);
 
         if (transposedCount < 1) {
-            if (ed == 1 && (inputLength == outputIndex || inputLength == outputIndex + 2)) {
+            if (ed == 1 && (inputLength == outputLength - 1 || inputLength == outputLength + 1)) {
                 // Promote a word with just one skipped or excessive char
                 if (sameLength) {
                     multiplyRate(WORDS_WITH_JUST_ONE_CORRECTION_PROMOTION_RATE, &finalFreq);
@@ -681,7 +692,7 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const
                 sameLength = true;
             }
         }
-        adjustedProximityMatchedCount = min(max(0, ed - (outputIndex + 1 - inputLength)),
+        adjustedProximityMatchedCount = min(max(0, ed - (outputLength - inputLength)),
                 proximityMatchedCount);
     } else {
         const int matchWeight = powerIntCapped(typedLetterMultiplier, matchCount);
@@ -783,7 +794,8 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const
     // Promotion for an exactly matched word
     if (ed == 0) {
         // Full exact match
-        if (sameLength && transposedCount == 0 && !skipped && excessiveCount == 0) {
+        if (sameLength && transposedCount == 0 && !skipped && excessiveCount == 0
+                && quoteDiffCount == 0) {
             finalFreq = capped255MultForFullMatchAccentsOrCapitalizationDifference(finalFreq);
         }
     }
@@ -828,14 +840,14 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const
     }
 
     if (DEBUG_DICT_FULL) {
-        AKLOGI("calc: %d, %d", outputIndex, sameLength);
+        AKLOGI("calc: %d, %d", outputLength, sameLength);
     }
 
     if (DEBUG_CORRECTION_FREQ) {
-        DUMP_WORD(correction->mWord, outputIndex + 1);
-        AKLOGI("FinalFreq: [P%d, S%d, T%d, E%d] %d, %d, %d, %d, %d", proximityMatchedCount,
-                skippedCount, transposedCount, excessiveCount, lastCharExceeded, sameLength,
-                quoteDiffCount, ed, finalFreq);
+        DUMP_WORD(correction->mWord, outputLength);
+        AKLOGI("FinalFreq: [P%d, S%d, T%d, E%d] %d, %d, %d, %d, %d, %d", proximityMatchedCount,
+                skippedCount, transposedCount, excessiveCount, outputLength, lastCharExceeded,
+                sameLength, quoteDiffCount, ed, finalFreq);
     }
 
     return finalFreq;
@@ -881,11 +893,12 @@ int Correction::RankingAlgorithm::calcFreqForSplitTwoWords(
     if (firstWordLength == 0 || secondWordLength == 0) {
         return 0;
     }
-    const int firstDemotionRate = 100 - 100 / (firstWordLength + 1);
+    const int firstDemotionRate = 100 - TWO_WORDS_CORRECTION_DEMOTION_BASE / (firstWordLength + 1);
     int tempFirstFreq = firstFreq;
     multiplyRate(firstDemotionRate, &tempFirstFreq);
 
-    const int secondDemotionRate = 100 - 100 / (secondWordLength + 1);
+    const int secondDemotionRate = 100
+            - TWO_WORDS_CORRECTION_DEMOTION_BASE / (secondWordLength + 1);
     int tempSecondFreq = secondFreq;
     multiplyRate(secondDemotionRate, &tempSecondFreq);
 
diff --git a/native/src/correction.h b/native/src/correction.h
index 22a424f5c..0715551d0 100644
--- a/native/src/correction.h
+++ b/native/src/correction.h
@@ -75,6 +75,8 @@ class Correction {
     int getFreqForSplitTwoWords(
             const int firstFreq, const int secondFreq, const unsigned short *word);
     int getFinalFreq(const int freq, unsigned short **word, int* wordLength);
+    int getFinalFreqForSubQueue(const int freq, unsigned short **word, int* wordLength,
+            const int inputLength);
 
     CorrectionType processCharAndCalcState(const int32_t c, const bool isTerminal);
 
@@ -97,7 +99,8 @@ class Correction {
     class RankingAlgorithm {
      public:
         static int calculateFinalFreq(const int inputIndex, const int depth,
-                const int freq, int *editDistanceTable, const Correction* correction);
+                const int freq, int *editDistanceTable, const Correction* correction,
+                const int inputLength);
         static int calcFreqForSplitTwoWords(const int firstFreq, const int secondFreq,
                 const Correction* correction, const unsigned short *word);
         static int calcFreqForSplitTwoWordsOld(const int firstFreq, const int secondFreq,
@@ -122,6 +125,8 @@ class Correction {
             const int32_t c, const bool isTerminal, const bool inputIndexIncremented);
     inline CorrectionType processUnrelatedCorrectionType();
     inline void addCharToCurrentWord(const int32_t c);
+    inline int getFinalFreqInternal(const int freq, unsigned short **word, int* wordLength,
+            const int inputLength);
 
     const int TYPED_LETTER_MULTIPLIER;
     const int FULL_WORD_MULTIPLIER;
diff --git a/native/src/defines.h b/native/src/defines.h
index d739043a4..119a7d779 100644
--- a/native/src/defines.h
+++ b/native/src/defines.h
@@ -187,7 +187,7 @@ static void prof_out(void) {
 // The following "rate"s are used as a multiplier before dividing by 100, so they are in percent.
 #define WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE 80
 #define WORDS_WITH_MISSING_CHARACTER_DEMOTION_START_POS_10X 12
-#define WORDS_WITH_MISSING_SPACE_CHARACTER_DEMOTION_RATE 67
+#define WORDS_WITH_MISSING_SPACE_CHARACTER_DEMOTION_RATE 58
 #define WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE 75
 #define WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE 75
 #define WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE 70
@@ -199,6 +199,8 @@ static void prof_out(void) {
 #define INPUT_EXCEEDS_OUTPUT_DEMOTION_RATE 70
 #define FIRST_CHAR_DIFFERENT_DEMOTION_RATE 96
 #define TWO_WORDS_CAPITALIZED_DEMOTION_RATE 50
+#define TWO_WORDS_CORRECTION_DEMOTION_BASE 80
+#define TWO_WORDS_PLUS_OTHER_ERROR_CORRECTION_DEMOTION_DIVIDER 1
 #define ZERO_DISTANCE_PROMOTION_RATE 110
 #define NEUTRAL_SCORE_SQUARED_RADIUS 8.0f
 #define HALF_SCORE_SQUARED_RADIUS 32.0f
@@ -212,8 +214,10 @@ static void prof_out(void) {
 // Holds up to 1 candidate for each word
 #define SUB_QUEUE_MAX_WORDS 1
 #define SUB_QUEUE_MAX_COUNT 10
+#define SUB_QUEUE_MIN_WORD_LENGTH 4
 
-#define TWO_WORDS_CORRECTION_THRESHOLD 0.22f
+#define TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD 0.39
+#define START_TWO_WORDS_CORRECTION_THRESHOLD 0.22
 
 #define MAX_DEPTH_MULTIPLIER 3
 
diff --git a/native/src/unigram_dictionary.cpp b/native/src/unigram_dictionary.cpp
index 8be95bc40..2c5b9402a 100644
--- a/native/src/unigram_dictionary.cpp
+++ b/native/src/unigram_dictionary.cpp
@@ -254,7 +254,7 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
                         proximityInfo->getPrimaryInputWord(), i, word, wordLength, score);
                 ns += 0;
                 AKLOGI("--- TOP SUB WORDS for %d --- %d %f [%d]", i, score, ns,
-                        (ns > TWO_WORDS_CORRECTION_THRESHOLD));
+                        (ns > TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD));
                 DUMP_WORD(proximityInfo->getPrimaryInputWord(), i);
                 DUMP_WORD(word, wordLength);
             }
@@ -343,43 +343,45 @@ inline void UnigramDictionary::onTerminal(const int freq,
         WordsPriorityQueuePool *queuePool, const bool addToMasterQueue) {
     const int inputIndex = correction->getInputIndex();
     const bool addToSubQueue = inputIndex < SUB_QUEUE_MAX_COUNT;
-    if (!addToMasterQueue && !addToSubQueue) {
-        return;
-    }
-    WordsPriorityQueue *masterQueue = queuePool->getMasterQueue();
-    WordsPriorityQueue *subQueue = queuePool->getSubQueue1(inputIndex);
+
     int wordLength;
     unsigned short* wordPointer;
-    const int finalFreq = correction->getFinalFreq(freq, &wordPointer, &wordLength);
-    if (finalFreq != NOT_A_FREQUENCY) {
-        if (!terminalAttributes.isShortcutOnly()) {
-            if (addToMasterQueue) {
+
+    if (addToMasterQueue) {
+        WordsPriorityQueue *masterQueue = queuePool->getMasterQueue();
+        const int finalFreq = correction->getFinalFreq(freq, &wordPointer, &wordLength);
+        if (finalFreq != NOT_A_FREQUENCY) {
+            if (!terminalAttributes.isShortcutOnly()) {
                 addWord(wordPointer, wordLength, finalFreq, masterQueue);
             }
-            // TODO: Check the validity of "inputIndex == wordLength"
-            //if (addToSubQueue && inputIndex == wordLength) {
-            if (addToSubQueue) {
-                addWord(wordPointer, wordLength, finalFreq, subQueue);
+
+            // Please note that the shortcut candidates will be added to the master queue only.
+            TerminalAttributes::ShortcutIterator iterator =
+                    terminalAttributes.getShortcutIterator();
+            while (iterator.hasNextShortcutTarget()) {
+                // TODO: addWord only supports weak ordering, meaning we have no means
+                // to control the order of the shortcuts relative to one another or to the word.
+                // We need to either modulate the frequency of each shortcut according
+                // to its own shortcut frequency or to make the queue
+                // so that the insert order is protected inside the queue for words
+                // with the same score.
+                uint16_t shortcutTarget[MAX_WORD_LENGTH_INTERNAL];
+                const int shortcutTargetStringLength = iterator.getNextShortcutTarget(
+                        MAX_WORD_LENGTH_INTERNAL, shortcutTarget);
+                addWord(shortcutTarget, shortcutTargetStringLength, finalFreq, masterQueue);
             }
         }
-        // Please note that the shortcut candidates will be added to the master queue only.
-        if (!addToMasterQueue) {
-            return;
-        }
+    }
 
-        // From here, below is the code to add shortcut candidates.
-        TerminalAttributes::ShortcutIterator iterator = terminalAttributes.getShortcutIterator();
-        while (iterator.hasNextShortcutTarget()) {
-            // TODO: addWord only supports weak ordering, meaning we have no means to control the
-            // order of the shortcuts relative to one another or to the word. We need to either
-            // modulate the frequency of each shortcut according to its own shortcut frequency or
-            // to make the queue so that the insert order is protected inside the queue for words
-            // with the same score.
-            uint16_t shortcutTarget[MAX_WORD_LENGTH_INTERNAL];
-            const int shortcutTargetStringLength = iterator.getNextShortcutTarget(
-                    MAX_WORD_LENGTH_INTERNAL, shortcutTarget);
-            addWord(shortcutTarget, shortcutTargetStringLength, finalFreq, masterQueue);
-        }
+    // We only allow two words + other error correction for words with SUB_QUEUE_MIN_WORD_LENGTH
+    // or more length.
+    if (inputIndex >= SUB_QUEUE_MIN_WORD_LENGTH && addToSubQueue) {
+        // TODO: Check the validity of "inputIndex == wordLength"
+        //if (addToSubQueue && inputIndex == wordLength) {
+        WordsPriorityQueue *subQueue = queuePool->getSubQueue1(inputIndex);
+        const int finalFreq = correction->getFinalFreqForSubQueue(freq, &wordPointer, &wordLength,
+                inputIndex);
+        addWord(wordPointer, wordLength, finalFreq, subQueue);
     }
 }
 
@@ -397,20 +399,57 @@ void UnigramDictionary::getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo
     }
     const bool isSpaceProximity = spaceProximityPos >= 0;
     const int firstWordStartPos = 0;
+
+    const int firstTypedWordLength = isSpaceProximity ? spaceProximityPos : missingSpacePos;
+    int firstFreq = getMostFrequentWordLike(0, firstTypedWordLength, proximityInfo, mWord);
+    unsigned short* firstWord = 0;
+    int firstWordLength = 0;
+    if (firstFreq > 0) {
+        firstWordLength = firstTypedWordLength;
+        firstWord = mWord;
+    } else {
+        if (masterQueue->size() > 0) {
+            double nsForMaster = masterQueue->getHighestNormalizedScore(
+                    proximityInfo->getPrimaryInputWord(), inputLength, 0, 0, 0);
+            if (nsForMaster > START_TWO_WORDS_CORRECTION_THRESHOLD) {
+                // Do nothing if the highest suggestion exceeds the threshold.
+                return;
+            }
+        }
+        WordsPriorityQueue* firstWordQueue = queuePool->getSubQueue1(firstTypedWordLength);
+        if (firstWordQueue->size() < 1) {
+            return;
+        }
+        int score = 0;
+        const double ns = firstWordQueue->getHighestNormalizedScore(
+                proximityInfo->getPrimaryInputWord(), firstTypedWordLength, &firstWord, &score,
+                &firstWordLength);
+        // Two words correction won't be done if the score of the first word doesn't exceed the
+        // threshold.
+        if (ns < TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD) {
+            return;
+        }
+        firstFreq = score >> (firstWordLength
+                + TWO_WORDS_PLUS_OTHER_ERROR_CORRECTION_DEMOTION_DIVIDER);
+    }
+
+    if (firstFreq <= 0) {
+        return;
+    }
+
     const int secondWordStartPos = isSpaceProximity ? (spaceProximityPos + 1) : missingSpacePos;
-    const int firstWordLength = isSpaceProximity ? spaceProximityPos : missingSpacePos;
     const int secondWordLength = isSpaceProximity
             ? (inputLength - spaceProximityPos - 1)
             : (inputLength - missingSpacePos);
 
     if (inputLength >= MAX_WORD_LENGTH) return;
+
     if (0 >= firstWordLength || 0 >= secondWordLength || firstWordStartPos >= secondWordStartPos
             || firstWordStartPos < 0 || secondWordStartPos + secondWordLength > inputLength)
         return;
 
     const int newWordLength = firstWordLength + secondWordLength + 1;
 
-
     // Space proximity preparation
     //WordsPriorityQueue *subQueue = queuePool->getSubQueue1();
     //initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, firstWordLength, subQueue,
@@ -420,15 +459,12 @@ void UnigramDictionary::getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo
 
     // Allocating variable length array on stack
     unsigned short word[newWordLength];
-    const int firstFreq = getMostFrequentWordLike(
-            firstWordStartPos, firstWordLength, proximityInfo, mWord);
     if (DEBUG_DICT) {
         AKLOGI("First freq: %d", firstFreq);
     }
-    if (firstFreq <= 0) return;
 
     for (int i = 0; i < firstWordLength; ++i) {
-        word[i] = mWord[i];
+        word[i] = firstWord[i];
     }
 
     const int secondFreq = getMostFrequentWordLike(
diff --git a/native/src/words_priority_queue.h b/native/src/words_priority_queue.h
index 6262439b5..c85f2b9b3 100644
--- a/native/src/words_priority_queue.h
+++ b/native/src/words_priority_queue.h
@@ -47,6 +47,7 @@ class WordsPriorityQueue {
         for (int i = 0; i < maxWordLength; ++i) {
             mSuggestedWords[i].mUsed = false;
         }
+        mHighestSuggestedWord = 0;
     }
 
     ~WordsPriorityQueue() {
@@ -79,6 +80,9 @@ class WordsPriorityQueue {
             DUMP_WORD(word, wordLength);
         }
         mSuggestions.push(sw);
+        if (!mHighestSuggestedWord || mHighestSuggestedWord->mScore < sw->mScore) {
+            mHighestSuggestedWord = sw;
+        }
     }
 
     SuggestedWord* top() {
@@ -88,6 +92,7 @@ class WordsPriorityQueue {
     }
 
     int outputSuggestions(int *frequencies, unsigned short *outputChars) {
+        mHighestSuggestedWord = 0;
         const unsigned int size = min(MAX_WORDS, mSuggestions.size());
         int index = size - 1;
         while (!mSuggestions.empty() && index >= 0) {
@@ -116,6 +121,7 @@ class WordsPriorityQueue {
     }
 
     void clear() {
+        mHighestSuggestedWord = 0;
         while (!mSuggestions.empty()) {
             SuggestedWord* sw = mSuggestions.top();
             if (DEBUG_WORDS_PRIORITY_QUEUE) {
@@ -134,6 +140,28 @@ class WordsPriorityQueue {
         DUMP_WORD(mSuggestions.top()->mWord, mSuggestions.top()->mWordLength);
     }
 
+    double getHighestNormalizedScore(const unsigned short* before, const int beforeLength,
+            unsigned short** outWord, int *outScore, int *outLength) {
+        if (!mHighestSuggestedWord) {
+            return 0.0;
+        }
+        SuggestedWord* sw = mHighestSuggestedWord;
+        const int score = sw->mScore;
+        unsigned short* word = sw->mWord;
+        const int wordLength = sw->mWordLength;
+        if (outScore) {
+            *outScore = score;
+        }
+        if (outWord) {
+            *outWord = word;
+        }
+        if (outLength) {
+            *outLength = wordLength;
+        }
+        return Correction::RankingAlgorithm::calcNormalizedScore(
+                before, beforeLength, word, wordLength, score);
+    }
+
  private:
     struct wordComparator {
         bool operator ()(SuggestedWord * left, SuggestedWord * right) {
@@ -158,6 +186,7 @@ class WordsPriorityQueue {
     const unsigned int MAX_WORDS;
     const unsigned int MAX_WORD_LENGTH;
     SuggestedWord* mSuggestedWords;
+    SuggestedWord* mHighestSuggestedWord;
 };
 }