7 files changed, 115 insertions, 171 deletions
diff --git a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp
deleted file mode 100644
index 295e760d6..000000000
--- a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Copyright (C) 2010, The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#define LOG_TAG "LatinIME: bigram_dictionary.cpp"
-
-#include "bigram_dictionary.h"
-
-#include <algorithm>
-#include <cstring>
-
-#include "defines.h"
-#include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h"
-#include "suggest/core/dictionary/dictionary.h"
-#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
-#include "suggest/core/result/suggestion_results.h"
-#include "suggest/core/session/prev_words_info.h"
-#include "utils/char_utils.h"
-
-namespace latinime {
-
-BigramDictionary::BigramDictionary(
-        const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy)
-        : mDictionaryStructurePolicy(dictionaryStructurePolicy) {
-    if (DEBUG_DICT) {
-        AKLOGI("BigramDictionary - constructor");
-    }
-}
-
-BigramDictionary::~BigramDictionary() {
-}
-
-/* Parameters :
- * prevWordsInfo: Information of previous words to get the predictions.
- * outSuggestionResults: SuggestionResults to put the predictions.
- */
-void BigramDictionary::getPredictions(const PrevWordsInfo *const prevWordsInfo,
-        SuggestionResults *const outSuggestionResults) const {
-    int unigramProbability = 0;
-    int bigramCodePoints[MAX_WORD_LENGTH];
-    BinaryDictionaryBigramsIterator bigramsIt =
-            prevWordsInfo->getBigramsIteratorForPrediction(mDictionaryStructurePolicy);
-    while (bigramsIt.hasNext()) {
-        bigramsIt.next();
-        if (bigramsIt.getBigramPos() == NOT_A_DICT_POS) {
-            continue;
-        }
-        const int codePointCount = mDictionaryStructurePolicy->
-                getCodePointsAndProbabilityAndReturnCodePointCount(bigramsIt.getBigramPos(),
-                        MAX_WORD_LENGTH, bigramCodePoints, &unigramProbability);
-        if (codePointCount <= 0) {
-            continue;
-        }
-        // Due to space constraints, the probability for bigrams is approximate - the lower the
-        // unigram probability, the worse the precision. The theoritical maximum error in
-        // resulting probability is 8 - although in the practice it's never bigger than 3 or 4
-        // in very bad cases. This means that sometimes, we'll see some bigrams interverted
-        // here, but it can't get too bad.
-        const int probability = mDictionaryStructurePolicy->getProbability(
-                unigramProbability, bigramsIt.getProbability());
-        outSuggestionResults->addPrediction(bigramCodePoints, codePointCount, probability);
-    }
-}
-
-// Returns a pointer to the start of the bigram list.
-// If the word is not found or has no bigrams, this function returns NOT_A_DICT_POS.
-int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const int prevWordLength,
-        const bool forceLowerCaseSearch) const {
-    if (0 >= prevWordLength) return NOT_A_DICT_POS;
-    int pos = mDictionaryStructurePolicy->getTerminalPtNodePositionOfWord(prevWord, prevWordLength,
-            forceLowerCaseSearch);
-    if (NOT_A_DICT_POS == pos) return NOT_A_DICT_POS;
-    return mDictionaryStructurePolicy->getBigramsPositionOfPtNode(pos);
-}
-
-int BigramDictionary::getBigramProbability(const PrevWordsInfo *const prevWordsInfo,
-        const int *word1, int length1) const {
-    int nextWordPos = mDictionaryStructurePolicy->getTerminalPtNodePositionOfWord(word1, length1,
-            false /* forceLowerCaseSearch */);
-    if (NOT_A_DICT_POS == nextWordPos) return NOT_A_PROBABILITY;
-    BinaryDictionaryBigramsIterator bigramsIt =
-            prevWordsInfo->getBigramsIteratorForPrediction(mDictionaryStructurePolicy);
-    while (bigramsIt.hasNext()) {
-        bigramsIt.next();
-        if (bigramsIt.getBigramPos() == nextWordPos
-                && bigramsIt.getProbability() != NOT_A_PROBABILITY) {
-            return mDictionaryStructurePolicy->getProbability(
-                    mDictionaryStructurePolicy->getUnigramProbabilityOfPtNode(nextWordPos),
-                    bigramsIt.getProbability());
-        }
-    }
-    return NOT_A_PROBABILITY;
-}
-
-// TODO: Move functions related to bigram to here
-} // namespace latinime
diff --git a/native/jni/src/suggest/core/dictionary/bigram_dictionary.h b/native/jni/src/suggest/core/dictionary/bigram_dictionary.h
deleted file mode 100644
index bd3aed1bd..000000000
--- a/native/jni/src/suggest/core/dictionary/bigram_dictionary.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Copyright (C) 2010 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LATINIME_BIGRAM_DICTIONARY_H
-#define LATINIME_BIGRAM_DICTIONARY_H
-
-#include "defines.h"
-
-namespace latinime {
-
-class DictionaryStructureWithBufferPolicy;
-class PrevWordsInfo;
-class SuggestionResults;
-
-class BigramDictionary {
- public:
-    BigramDictionary(const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy);
-
-    void getPredictions(const PrevWordsInfo *const prevWordsInfo,
-            SuggestionResults *const outSuggestionResults) const;
-    int getBigramProbability(const PrevWordsInfo *const prevWordsInfo,
-            const int *word1, int length1) const;
-    ~BigramDictionary();
-
- private:
-    DISALLOW_IMPLICIT_CONSTRUCTORS(BigramDictionary);
-
-    int getBigramListPositionForWord(const int *prevWord, const int prevWordLength,
-            const bool forceLowerCaseSearch) const;
-
-    const DictionaryStructureWithBufferPolicy *const mDictionaryStructurePolicy;
-};
-} // namespace latinime
-#endif // LATINIME_BIGRAM_DICTIONARY_H
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp
index 228260216..fb25f757c 100644
--- a/native/jni/src/suggest/core/dictionary/dictionary.cpp
+++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp
@@ -23,6 +23,7 @@
 #include "suggest/core/policy/dictionary_header_structure_policy.h"
 #include "suggest/core/result/suggestion_results.h"
 #include "suggest/core/session/dic_traverse_session.h"
+#include "suggest/core/session/prev_words_info.h"
 #include "suggest/core/suggest.h"
 #include "suggest/core/suggest_options.h"
 #include "suggest/policyimpl/gesture/gesture_suggest_policy_factory.h"
@@ -37,7 +38,6 @@ const int Dictionary::HEADER_ATTRIBUTE_BUFFER_SIZE = 32;
 Dictionary::Dictionary(JNIEnv *env, DictionaryStructureWithBufferPolicy::StructurePolicyPtr
         dictionaryStructureWithBufferPolicy)
         : mDictionaryStructureWithBufferPolicy(std::move(dictionaryStructureWithBufferPolicy)),
-          mBigramDictionary(mDictionaryStructureWithBufferPolicy.get()),
           mGestureSuggest(new Suggest(GestureSuggestPolicyFactory::getGestureSuggestPolicy())),
           mTypingSuggest(new Suggest(TypingSuggestPolicyFactory::getTypingSuggestPolicy())) {
     logDictionaryInfo(env);
@@ -62,7 +62,29 @@ void Dictionary::getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession
 void Dictionary::getPredictions(const PrevWordsInfo *const prevWordsInfo,
         SuggestionResults *const outSuggestionResults) const {
     TimeKeeper::setCurrentTime();
-    mBigramDictionary.getPredictions(prevWordsInfo, outSuggestionResults);
+    int unigramProbability = 0;
+    int bigramCodePoints[MAX_WORD_LENGTH];
+    BinaryDictionaryBigramsIterator bigramsIt = prevWordsInfo->getBigramsIteratorForPrediction(
+            mDictionaryStructureWithBufferPolicy.get());
+    while (bigramsIt.hasNext()) {
+        bigramsIt.next();
+        if (bigramsIt.getBigramPos() == NOT_A_DICT_POS) {
+            continue;
+        }
+        if (prevWordsInfo->isNthPrevWordBeginningOfSentence(1 /* n */)
+                && bigramsIt.getProbability() == NOT_A_PROBABILITY) {
+            continue;
+        }
+        const int codePointCount = mDictionaryStructureWithBufferPolicy->
+                getCodePointsAndProbabilityAndReturnCodePointCount(bigramsIt.getBigramPos(),
+                        MAX_WORD_LENGTH, bigramCodePoints, &unigramProbability);
+        if (codePointCount <= 0) {
+            continue;
+        }
+        const int probability = mDictionaryStructureWithBufferPolicy->getProbability(
+                unigramProbability, bigramsIt.getProbability());
+        outSuggestionResults->addPrediction(bigramCodePoints, codePointCount, probability);
+    }
 }
 
 int Dictionary::getProbability(const int *word, int length) const {
@@ -81,10 +103,24 @@ int Dictionary::getMaxProbabilityOfExactMatches(const int *word, int length) con
             mDictionaryStructureWithBufferPolicy.get(), word, length);
 }
 
-int Dictionary::getBigramProbability(const PrevWordsInfo *const prevWordsInfo, const int *word,
+int Dictionary::getNgramProbability(const PrevWordsInfo *const prevWordsInfo, const int *word,
         int length) const {
     TimeKeeper::setCurrentTime();
-    return mBigramDictionary.getBigramProbability(prevWordsInfo, word, length);
+    int nextWordPos = mDictionaryStructureWithBufferPolicy->getTerminalPtNodePositionOfWord(word,
+            length, false /* forceLowerCaseSearch */);
+    if (NOT_A_DICT_POS == nextWordPos) return NOT_A_PROBABILITY;
+    BinaryDictionaryBigramsIterator bigramsIt = prevWordsInfo->getBigramsIteratorForPrediction(
+            mDictionaryStructureWithBufferPolicy.get());
+    while (bigramsIt.hasNext()) {
+        bigramsIt.next();
+        if (bigramsIt.getBigramPos() == nextWordPos
+                && bigramsIt.getProbability() != NOT_A_PROBABILITY) {
+            return mDictionaryStructureWithBufferPolicy->getProbability(
+                    mDictionaryStructureWithBufferPolicy->getUnigramProbabilityOfPtNode(
+                            nextWordPos), bigramsIt.getProbability());
+        }
+    }
+    return NOT_A_PROBABILITY;
 }
 
 bool Dictionary::addUnigramEntry(const int *const word, const int length,
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.h b/native/jni/src/suggest/core/dictionary/dictionary.h
index 247ee2421..3b41088fe 100644
--- a/native/jni/src/suggest/core/dictionary/dictionary.h
+++ b/native/jni/src/suggest/core/dictionary/dictionary.h
@@ -21,7 +21,6 @@
 
 #include "defines.h"
 #include "jni.h"
-#include "suggest/core/dictionary/bigram_dictionary.h"
 #include "suggest/core/dictionary/property/word_property.h"
 #include "suggest/core/policy/dictionary_header_structure_policy.h"
 #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
@@ -75,7 +74,7 @@ class Dictionary {
 
     int getMaxProbabilityOfExactMatches(const int *word, int length) const;
 
-    int getBigramProbability(const PrevWordsInfo *const prevWordsInfo,
+    int getNgramProbability(const PrevWordsInfo *const prevWordsInfo,
             const int *word, int length) const;
 
     bool addUnigramEntry(const int *const codePoints, const int codePointCount,
@@ -119,7 +118,6 @@ class Dictionary {
 
     const DictionaryStructureWithBufferPolicy::StructurePolicyPtr
             mDictionaryStructureWithBufferPolicy;
-    const BigramDictionary mBigramDictionary;
     const SuggestInterfacePtr mGestureSuggest;
     const SuggestInterfacePtr mTypingSuggest;
 
diff --git a/native/jni/src/suggest/core/session/prev_words_info.h b/native/jni/src/suggest/core/session/prev_words_info.h
index 640f6a2fc..e350c6996 100644
--- a/native/jni/src/suggest/core/session/prev_words_info.h
+++ b/native/jni/src/suggest/core/session/prev_words_info.h
@@ -25,7 +25,6 @@
 namespace latinime {
 
 // TODO: Support n-gram.
-// This class does not take ownership of any code point buffers.
 class PrevWordsInfo {
  public:
     // No prev word information.
@@ -33,21 +32,52 @@ class PrevWordsInfo {
         clear();
     }
 
+    PrevWordsInfo(PrevWordsInfo &&prevWordsInfo) {
+        for (size_t i = 0; i < NELEMS(mPrevWordCodePoints); ++i) {
+            mPrevWordCodePointCount[i] = prevWordsInfo.mPrevWordCodePointCount[i];
+            memmove(mPrevWordCodePoints[i], prevWordsInfo.mPrevWordCodePoints[i],
+                    sizeof(mPrevWordCodePoints[i][0]) * mPrevWordCodePointCount[i]);
+            mIsBeginningOfSentence[i] = prevWordsInfo.mIsBeginningOfSentence[i];
+        }
+    }
+
+    // Construct from previous words.
+    PrevWordsInfo(const int prevWordCodePoints[][MAX_WORD_LENGTH],
+            const int *const prevWordCodePointCount, const bool *const isBeginningOfSentence,
+            const size_t prevWordCount) {
+        clear();
+        for (size_t i = 0; i < std::min(NELEMS(mPrevWordCodePoints), prevWordCount); ++i) {
+            if (prevWordCodePointCount[i] < 0 || prevWordCodePointCount[i] > MAX_WORD_LENGTH) {
+                continue;
+            }
+            memmove(mPrevWordCodePoints[i], prevWordCodePoints[i],
+                    sizeof(mPrevWordCodePoints[i][0]) * prevWordCodePointCount[i]);
+            mPrevWordCodePointCount[i] = prevWordCodePointCount[i];
+            mIsBeginningOfSentence[i] = isBeginningOfSentence[i];
+        }
+    }
+
+    // Construct from a previous word.
     PrevWordsInfo(const int *const prevWordCodePoints, const int prevWordCodePointCount,
             const bool isBeginningOfSentence) {
         clear();
-        mPrevWordCodePoints[0] = prevWordCodePoints;
+        if (prevWordCodePointCount > MAX_WORD_LENGTH || !prevWordCodePoints) {
+            return;
+        }
+        memmove(mPrevWordCodePoints[0], prevWordCodePoints,
+                sizeof(mPrevWordCodePoints[0][0]) * prevWordCodePointCount);
         mPrevWordCodePointCount[0] = prevWordCodePointCount;
         mIsBeginningOfSentence[0] = isBeginningOfSentence;
     }
 
     bool isValid() const {
-        for (size_t i = 0; i < NELEMS(mPrevWordCodePoints); ++i) {
-            if (mPrevWordCodePointCount[i] > MAX_WORD_LENGTH) {
-                return false;
-            }
+        if (mPrevWordCodePointCount[0] > 0) {
+            return true;
+        }
+        if (mIsBeginningOfSentence[0]) {
+            return true;
         }
-        return true;
+        return false;
     }
 
     void getPrevWordsTerminalPtNodePos(
@@ -168,13 +198,12 @@ class PrevWordsInfo {
 
     void clear() {
         for (size_t i = 0; i < NELEMS(mPrevWordCodePoints); ++i) {
-            mPrevWordCodePoints[i] = nullptr;
             mPrevWordCodePointCount[i] = 0;
             mIsBeginningOfSentence[i] = false;
         }
     }
 
-    const int *mPrevWordCodePoints[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
+    int mPrevWordCodePoints[MAX_PREV_WORD_COUNT_FOR_N_GRAM][MAX_WORD_LENGTH];
     int mPrevWordCodePointCount[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
     bool mIsBeginningOfSentence[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
 };
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
index 5c62b9caf..002593c49 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
@@ -268,6 +268,10 @@ int PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord,
 
 int PatriciaTriePolicy::getProbability(const int unigramProbability,
         const int bigramProbability) const {
+    // Due to space constraints, the probability for bigrams is approximate - the lower the unigram
+    // probability, the worse the precision. The theoritical maximum error in resulting probability
+    // is 8 - although in the practice it's never bigger than 3 or 4 in very bad cases. This means
+    // that sometimes, we'll see some bigrams interverted here, but it can't get too bad.
     if (unigramProbability == NOT_A_PROBABILITY) {
         return NOT_A_PROBABILITY;
     } else if (bigramProbability == NOT_A_PROBABILITY) {
diff --git a/native/jni/src/utils/jni_data_utils.h b/native/jni/src/utils/jni_data_utils.h
index 3514aeeb0..cb82d3c3b 100644
--- a/native/jni/src/utils/jni_data_utils.h
+++ b/native/jni/src/utils/jni_data_utils.h
@@ -21,6 +21,7 @@
 
 #include "defines.h"
 #include "jni.h"
+#include "suggest/core/session/prev_words_info.h"
 #include "suggest/core/policy/dictionary_header_structure_policy.h"
 #include "suggest/policyimpl/dictionary/header/header_read_write_utils.h"
 #include "utils/char_utils.h"
@@ -95,6 +96,37 @@ class JniDataUtils {
         }
     }
 
+    static PrevWordsInfo constructPrevWordsInfo(JNIEnv *env, jobjectArray prevWordCodePointArrays,
+            jbooleanArray isBeginningOfSentenceArray) {
+        int prevWordCodePoints[MAX_PREV_WORD_COUNT_FOR_N_GRAM][MAX_WORD_LENGTH];
+        int prevWordCodePointCount[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
+        bool isBeginningOfSentence[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
+        jsize prevWordsCount = env->GetArrayLength(prevWordCodePointArrays);
+        for (size_t i = 0; i < NELEMS(prevWordCodePoints); ++i) {
+            prevWordCodePointCount[i] = 0;
+            isBeginningOfSentence[i] = false;
+            if (prevWordsCount <= static_cast<int>(i)) {
+                continue;
+            }
+            jintArray prevWord = (jintArray)env->GetObjectArrayElement(prevWordCodePointArrays, i);
+            if (!prevWord) {
+                continue;
+            }
+            jsize prevWordLength = env->GetArrayLength(prevWord);
+            if (prevWordLength > MAX_WORD_LENGTH) {
+                continue;
+            }
+            env->GetIntArrayRegion(prevWord, 0, prevWordLength, prevWordCodePoints[i]);
+            prevWordCodePointCount[i] = prevWordLength;
+            jboolean isBeginningOfSentenceBoolean = JNI_FALSE;
+            env->GetBooleanArrayRegion(isBeginningOfSentenceArray, i, 1 /* len */,
+                    &isBeginningOfSentenceBoolean);
+            isBeginningOfSentence[i] = isBeginningOfSentenceBoolean == JNI_TRUE;
+        }
+        return PrevWordsInfo(prevWordCodePoints, prevWordCodePointCount, isBeginningOfSentence,
+                MAX_PREV_WORD_COUNT_FOR_N_GRAM);
+    }
+
     static void putBooleanToArray(JNIEnv *env, jbooleanArray array, const int index,
             const jboolean value) {
         env->SetBooleanArrayRegion(array, index, 1 /* len */, &value);