1 files changed, 71 insertions, 109 deletions
diff --git a/native/src/unigram_dictionary.cpp b/native/src/unigram_dictionary.cpp
index e3296f12a..290e9f997 100644
--- a/native/src/unigram_dictionary.cpp
+++ b/native/src/unigram_dictionary.cpp
@@ -153,6 +153,13 @@ int UnigramDictionary::getSuggestions(const ProximityInfo *proximityInfo, const
 
     if (DEBUG_DICT) {
         LOGI("Returning %d words", suggestedWordsCount);
+        /// Print the returned words
+        for (int j = 0; j < suggestedWordsCount; ++j) {
+            short unsigned int* w = mOutputChars + j * MAX_WORD_LENGTH;
+            char s[MAX_WORD_LENGTH];
+            for (int i = 0; i <= MAX_WORD_LENGTH; i++) s[i] = w[i];
+            LOGI("%s %i", s, mFrequencies[j]);
+        }
         LOGI("Next letters: ");
         for (int k = 0; k < NEXT_LETTERS_SIZE; k++) {
             if (mNextLettersFrequency[k] > 0) {
@@ -322,16 +329,6 @@ bool UnigramDictionary::addWord(unsigned short *word, int length, int frequency)
     return false;
 }
 
-inline void UnigramDictionary::addWordAlternatesSpellings(const uint8_t* const root, int pos,
-        int depth, int finalFreq) {
-    // TODO: actually add alternates when the format supports it.
-}
-
-static inline bool hasAlternateSpellings(uint8_t flags) {
-    // TODO: when the format supports it, return the actual value.
-    return false;
-}
-
 static inline unsigned short toBaseLowerCase(unsigned short c) {
     if (c < sizeof(BASE_CHARS) / sizeof(BASE_CHARS[0])) {
         c = BASE_CHARS[c];
@@ -372,7 +369,7 @@ void UnigramDictionary::getSuggestionCandidates(const int skipPos,
         assert(missingPos < mInputLength);
     }
     int rootPosition = ROOT_POS;
-    // Get the number of child of root, then increment the position
+    // Get the number of children of root, then increment the position
     int childCount = Dictionary::getCount(DICT_ROOT, &rootPosition);
     int depth = 0;
 
@@ -657,22 +654,19 @@ inline UnigramDictionary::ProximityType UnigramDictionary::getMatchedProximityId
 }
 
 inline void UnigramDictionary::onTerminal(unsigned short int* word, const int depth,
-        const uint8_t* const root, const uint8_t flags, int pos,
+        const uint8_t* const root, const uint8_t flags, const int pos,
         const int inputIndex, const int matchWeight, const int skipPos,
         const int excessivePos, const int transposedPos, const int freq, const bool sameLength,
         int* nextLetters, const int nextLettersSize) {
 
     const bool isSameAsTyped = sameLength ? sameAsTyped(word, depth + 1) : false;
-    const bool hasAlternates = hasAlternateSpellings(flags);
-    if (isSameAsTyped && !hasAlternates) return;
+    if (isSameAsTyped) return;
 
     if (depth >= MIN_SUGGEST_DEPTH) {
         const int finalFreq = calculateFinalFreq(inputIndex, depth, matchWeight, skipPos,
                 excessivePos, transposedPos, freq, sameLength);
         if (!isSameAsTyped)
             addWord(word, depth + 1, finalFreq);
-        if (hasAlternates)
-            addWordAlternatesSpellings(DICT_ROOT, pos, flags, finalFreq);
     }
 
     if (sameLength && depth >= mInputLength && skipPos < 0) {
@@ -680,6 +674,47 @@ inline void UnigramDictionary::onTerminal(unsigned short int* word, const int de
     }
 }
 
+bool UnigramDictionary::getSplitTwoWordsSuggestion(const int inputLength,
+        const int firstWordStartPos, const int firstWordLength, const int secondWordStartPos,
+        const int secondWordLength, const bool isSpaceProximity) {
+    if (inputLength >= MAX_WORD_LENGTH) return false;
+    if (0 >= firstWordLength || 0 >= secondWordLength || firstWordStartPos >= secondWordStartPos
+            || firstWordStartPos < 0 || secondWordStartPos + secondWordLength > inputLength)
+        return false;
+    const int newWordLength = firstWordLength + secondWordLength + 1;
+    // Allocating variable length array on stack
+    unsigned short word[newWordLength];
+    const int firstFreq = getMostFrequentWordLike(firstWordStartPos, firstWordLength, mWord);
+    if (DEBUG_DICT) {
+        LOGI("First freq: %d", firstFreq);
+    }
+    if (firstFreq <= 0) return false;
+
+    for (int i = 0; i < firstWordLength; ++i) {
+        word[i] = mWord[i];
+    }
+
+    const int secondFreq = getMostFrequentWordLike(secondWordStartPos, secondWordLength, mWord);
+    if (DEBUG_DICT) {
+        LOGI("Second  freq:  %d", secondFreq);
+    }
+    if (secondFreq <= 0) return false;
+
+    word[firstWordLength] = SPACE;
+    for (int i = (firstWordLength + 1); i < newWordLength; ++i) {
+        word[i] = mWord[i - firstWordLength - 1];
+    }
+
+    int pairFreq = calcFreqForSplitTwoWords(TYPED_LETTER_MULTIPLIER, firstWordLength,
+            secondWordLength, firstFreq, secondFreq, isSpaceProximity);
+    if (DEBUG_DICT) {
+        LOGI("Split two words:  %d, %d, %d, %d, %d", firstFreq, secondFreq, pairFreq, inputLength,
+                TYPED_LETTER_MULTIPLIER);
+    }
+    addWord(word, newWordLength, pairFreq);
+    return true;
+}
+
 #ifndef NEW_DICTIONARY_FORMAT
 // TODO: Don't forget to bring inline functions back to over where they are used.
 
@@ -725,8 +760,8 @@ void UnigramDictionary::getWordsRec(const int childrenCount, const int pos, cons
     }
 }
 
-inline int UnigramDictionary::getBestWordFreq(const int startInputIndex, const int inputLength,
-        unsigned short *word) {
+inline int UnigramDictionary::getMostFrequentWordLike(const int startInputIndex,
+        const int inputLength, unsigned short *word) {
     int pos = ROOT_POS;
     int count = Dictionary::getCount(DICT_ROOT, &pos);
     int maxFreq = 0;
@@ -860,52 +895,10 @@ int UnigramDictionary::getBigramPosition(int pos, unsigned short *word, int offs
     return NOT_VALID_WORD;
 }
 
-
 // The following functions will be modified.
-bool UnigramDictionary::getSplitTwoWordsSuggestion(const int inputLength,
-        const int firstWordStartPos, const int firstWordLength, const int secondWordStartPos,
-        const int secondWordLength, const bool isSpaceProximity) {
-    if (inputLength >= MAX_WORD_LENGTH) return false;
-    if (0 >= firstWordLength || 0 >= secondWordLength || firstWordStartPos >= secondWordStartPos
-            || firstWordStartPos < 0 || secondWordStartPos + secondWordLength > inputLength)
-        return false;
-    const int newWordLength = firstWordLength + secondWordLength + 1;
-    // Allocating variable length array on stack
-    unsigned short word[newWordLength];
-    const int firstFreq = getBestWordFreq(firstWordStartPos, firstWordLength, mWord);
-    if (DEBUG_DICT) {
-        LOGI("First freq: %d", firstFreq);
-    }
-    if (firstFreq <= 0) return false;
-
-    for (int i = 0; i < firstWordLength; ++i) {
-        word[i] = mWord[i];
-    }
-
-    const int secondFreq = getBestWordFreq(secondWordStartPos, secondWordLength, mWord);
-    if (DEBUG_DICT) {
-        LOGI("Second  freq:  %d", secondFreq);
-    }
-    if (secondFreq <= 0) return false;
-
-    word[firstWordLength] = SPACE;
-    for (int i = (firstWordLength + 1); i < newWordLength; ++i) {
-        word[i] = mWord[i - firstWordLength - 1];
-    }
-
-    int pairFreq = calcFreqForSplitTwoWords(TYPED_LETTER_MULTIPLIER, firstWordLength,
-            secondWordLength, firstFreq, secondFreq, isSpaceProximity);
-    if (DEBUG_DICT) {
-        LOGI("Split two words:  %d, %d, %d, %d, %d", firstFreq, secondFreq, pairFreq, inputLength,
-                TYPED_LETTER_MULTIPLIER);
-    }
-    addWord(word, newWordLength, pairFreq);
-    return true;
-}
-
-inline bool UnigramDictionary::processCurrentNode(const int pos, const int depth,
-        const int maxDepth, const bool traverseAllNodes, int matchWeight, int inputIndex,
-        const int diffs, const int skipPos, const int excessivePos, const int transposedPos,
+inline bool UnigramDictionary::processCurrentNode(const int initialPos, const int initialDepth,
+        const int maxDepth, const bool initialTraverseAllNodes, int matchWeight, int inputIndex,
+        const int initialDiffs, const int skipPos, const int excessivePos, const int transposedPos,
         int *nextLetters, const int nextLettersSize, int *newCount, int *newChildPosition,
         bool *newTraverseAllNodes, int *newMatchRate, int *newInputIndex, int *newDiffs,
         int *nextSiblingPosition, int *nextOutputIndex) {
@@ -922,6 +915,11 @@ inline bool UnigramDictionary::processCurrentNode(const int pos, const int depth
     int freq;
     bool isSameAsUserTypedLength = false;
 
+    const int pos = initialPos;
+    const int depth = initialDepth;
+    const int traverseAllNodes = initialTraverseAllNodes;
+    const int diffs = initialDiffs;
+
     const uint8_t flags = 0; // No flags for now
 
     if (excessivePos == depth && inputIndex < mInputLength - 1) ++inputIndex;
@@ -993,53 +991,12 @@ inline bool UnigramDictionary::processCurrentNode(const int pos, const int depth
 
 #else // NEW_DICTIONARY_FORMAT
 
-bool UnigramDictionary::getSplitTwoWordsSuggestion(const int inputLength,
-        const int firstWordStartPos, const int firstWordLength, const int secondWordStartPos,
-        const int secondWordLength, const bool isSpaceProximity) {
-    if (inputLength >= MAX_WORD_LENGTH) return false;
-    if (0 >= firstWordLength || 0 >= secondWordLength || firstWordStartPos >= secondWordStartPos
-            || firstWordStartPos < 0 || secondWordStartPos + secondWordLength > inputLength)
-        return false;
-    const int newWordLength = firstWordLength + secondWordLength + 1;
-    // Allocating variable length array on stack
-    unsigned short word[newWordLength];
-    const int firstFreq = getBestWordFreq(firstWordStartPos, firstWordLength, mWord);
-    if (DEBUG_DICT) {
-        LOGI("First freq: %d", firstFreq);
-    }
-    if (firstFreq <= 0) return false;
-
-    for (int i = 0; i < firstWordLength; ++i) {
-        word[i] = mWord[i];
-    }
-
-    const int secondFreq = getBestWordFreq(secondWordStartPos, secondWordLength, mWord);
-    if (DEBUG_DICT) {
-        LOGI("Second  freq:  %d", secondFreq);
-    }
-    if (secondFreq <= 0) return false;
-
-    word[firstWordLength] = SPACE;
-    for (int i = (firstWordLength + 1); i < newWordLength; ++i) {
-        word[i] = mWord[i - firstWordLength - 1];
-    }
-
-    int pairFreq = calcFreqForSplitTwoWords(TYPED_LETTER_MULTIPLIER, firstWordLength,
-            secondWordLength, firstFreq, secondFreq, isSpaceProximity);
-    if (DEBUG_DICT) {
-        LOGI("Split two words:  %d, %d, %d, %d, %d", firstFreq, secondFreq, pairFreq, inputLength,
-                TYPED_LETTER_MULTIPLIER);
-    }
-    addWord(word, newWordLength, pairFreq);
-    return true;
-}
-
-inline bool UnigramDictionary::processCurrentNode(const int pos, const int depth,
-        const int maxDepth, const bool traverseAllNodes, int matchWeight, int inputIndex,
-        const int diffs, const int skipPos, const int excessivePos, const int transposedPos,
+inline bool UnigramDictionary::processCurrentNode(const int initialPos, const int initialDepth,
+        const int maxDepth, const bool initialTraverseAllNodes, int matchWeight, int inputIndex,
+        const int initialDiffs, const int skipPos, const int excessivePos, const int transposedPos,
         int *nextLetters, const int nextLettersSize, int *newCount, int *newChildPosition,
         bool *newTraverseAllNodes, int *newMatchRate, int *newInputIndex, int *newDiffs,
-        int *nextSiblingPosition, int *nextOutputIndex) {
+        int *nextSiblingPosition, int *newOutputIndex) {
     if (DEBUG_DICT) {
         int inputCount = 0;
         if (skipPos >= 0) ++inputCount;
@@ -1053,13 +1010,18 @@ inline bool UnigramDictionary::processCurrentNode(const int pos, const int depth
     int freq;
     bool isSameAsUserTypedLength = false;
 
+    int pos = initialPos;
+    int depth = initialDepth;
+    int traverseAllNodes = initialTraverseAllNodes;
+    int diffs = initialDiffs;
+
     const uint8_t flags = 0; // No flags for now
 
     if (excessivePos == depth && inputIndex < mInputLength - 1) ++inputIndex;
 
     *nextSiblingPosition = Dictionary::setDictionaryValues(DICT_ROOT, IS_LATEST_DICT_VERSION, pos,
             &c, &childPosition, &terminal, &freq);
-    *nextOutputIndex = depth + 1;
+    *newOutputIndex = depth + 1;
 
     const bool needsToTraverseChildrenNodes = childPosition != 0;