1 files changed, 122 insertions, 76 deletions
diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
index 22ad2d0ab..f8dadb488 100644
--- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
+++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
@@ -28,10 +28,11 @@
 #include "suggest/core/dictionary/property/unigram_property.h"
 #include "suggest/core/dictionary/property/word_property.h"
 #include "suggest/core/result/suggestion_results.h"
-#include "suggest/core/session/prev_words_info.h"
+#include "suggest/core/session/ngram_context.h"
 #include "suggest/core/suggest_options.h"
 #include "suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h"
 #include "utils/char_utils.h"
+#include "utils/int_array_view.h"
 #include "utils/jni_data_utils.h"
 #include "utils/log_utils.h"
 #include "utils/time_keeper.h"
@@ -179,9 +180,10 @@ static void latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jclass clazz,
         jintArray yCoordinatesArray, jintArray timesArray, jintArray pointerIdsArray,
         jintArray inputCodePointsArray, jint inputSize, jintArray suggestOptions,
         jobjectArray prevWordCodePointArrays, jbooleanArray isBeginningOfSentenceArray,
-        jintArray outSuggestionCount, jintArray outCodePointsArray, jintArray outScoresArray,
-        jintArray outSpaceIndicesArray, jintArray outTypesArray,
-        jintArray outAutoCommitFirstWordConfidenceArray, jfloatArray inOutLanguageWeight) {
+        jint prevWordCount, jintArray outSuggestionCount, jintArray outCodePointsArray,
+        jintArray outScoresArray, jintArray outSpaceIndicesArray, jintArray outTypesArray,
+        jintArray outAutoCommitFirstWordConfidenceArray,
+        jfloatArray inOutWeightOfLangModelVsSpatialModel) {
     Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
     // Assign 0 to outSuggestionCount here in case of returning earlier in this method.
     JniDataUtils::putIntToArray(env, outSuggestionCount, 0 /* index */, 0);
@@ -236,42 +238,47 @@ static void latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jclass clazz,
         ASSERT(false);
         return;
     }
-    float languageWeight;
-    env->GetFloatArrayRegion(inOutLanguageWeight, 0, 1 /* len */, &languageWeight);
+    float weightOfLangModelVsSpatialModel;
+    env->GetFloatArrayRegion(inOutWeightOfLangModelVsSpatialModel, 0, 1 /* len */,
+            &weightOfLangModelVsSpatialModel);
     SuggestionResults suggestionResults(MAX_RESULTS);
-    const PrevWordsInfo prevWordsInfo = JniDataUtils::constructPrevWordsInfo(env,
-            prevWordCodePointArrays, isBeginningOfSentenceArray);
+    const NgramContext ngramContext = JniDataUtils::constructNgramContext(env,
+            prevWordCodePointArrays, isBeginningOfSentenceArray, prevWordCount);
     if (givenSuggestOptions.isGesture() || inputSize > 0) {
         // TODO: Use SuggestionResults to return suggestions.
         dictionary->getSuggestions(pInfo, traverseSession, xCoordinates, yCoordinates,
-                times, pointerIds, inputCodePoints, inputSize, &prevWordsInfo,
-                &givenSuggestOptions, languageWeight, &suggestionResults);
+                times, pointerIds, inputCodePoints, inputSize, &ngramContext,
+                &givenSuggestOptions, weightOfLangModelVsSpatialModel, &suggestionResults);
     } else {
-        dictionary->getPredictions(&prevWordsInfo, &suggestionResults);
+        dictionary->getPredictions(&ngramContext, &suggestionResults);
+    }
+    if (DEBUG_DICT) {
+        suggestionResults.dumpSuggestions();
     }
     suggestionResults.outputSuggestions(env, outSuggestionCount, outCodePointsArray,
             outScoresArray, outSpaceIndicesArray, outTypesArray,
-            outAutoCommitFirstWordConfidenceArray, inOutLanguageWeight);
+            outAutoCommitFirstWordConfidenceArray, inOutWeightOfLangModelVsSpatialModel);
 }
 
 static jint latinime_BinaryDictionary_getProbability(JNIEnv *env, jclass clazz, jlong dict,
         jintArray word) {
     Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
     if (!dictionary) return NOT_A_PROBABILITY;
-    const jsize wordLength = env->GetArrayLength(word);
-    int codePoints[wordLength];
-    env->GetIntArrayRegion(word, 0, wordLength, codePoints);
-    return dictionary->getProbability(codePoints, wordLength);
+    const jsize codePointCount = env->GetArrayLength(word);
+    int codePoints[codePointCount];
+    env->GetIntArrayRegion(word, 0, codePointCount, codePoints);
+    return dictionary->getProbability(CodePointArrayView(codePoints, codePointCount));
 }
 
 static jint latinime_BinaryDictionary_getMaxProbabilityOfExactMatches(
         JNIEnv *env, jclass clazz, jlong dict, jintArray word) {
     Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
     if (!dictionary) return NOT_A_PROBABILITY;
-    const jsize wordLength = env->GetArrayLength(word);
-    int codePoints[wordLength];
-    env->GetIntArrayRegion(word, 0, wordLength, codePoints);
-    return dictionary->getMaxProbabilityOfExactMatches(codePoints, wordLength);
+    const jsize codePointCount = env->GetArrayLength(word);
+    int codePoints[codePointCount];
+    env->GetIntArrayRegion(word, 0, codePointCount, codePoints);
+    return dictionary->getMaxProbabilityOfExactMatches(
+            CodePointArrayView(codePoints, codePointCount));
 }
 
 static jint latinime_BinaryDictionary_getNgramProbability(JNIEnv *env, jclass clazz,
@@ -282,9 +289,11 @@ static jint latinime_BinaryDictionary_getNgramProbability(JNIEnv *env, jclass cl
     const jsize wordLength = env->GetArrayLength(word);
     int wordCodePoints[wordLength];
     env->GetIntArrayRegion(word, 0, wordLength, wordCodePoints);
-    const PrevWordsInfo prevWordsInfo = JniDataUtils::constructPrevWordsInfo(env,
-            prevWordCodePointArrays, isBeginningOfSentenceArray);
-    return dictionary->getNgramProbability(&prevWordsInfo, wordCodePoints, wordLength);
+    const NgramContext ngramContext = JniDataUtils::constructNgramContext(env,
+            prevWordCodePointArrays, isBeginningOfSentenceArray,
+            env->GetArrayLength(prevWordCodePointArrays));
+    return dictionary->getNgramProbability(&ngramContext,
+            CodePointArrayView(wordCodePoints, wordLength));
 }
 
 // Method to iterate all words in the dictionary for makedict.
@@ -318,8 +327,9 @@ static jint latinime_BinaryDictionary_getNextWord(JNIEnv *env, jclass clazz,
 
 static void latinime_BinaryDictionary_getWordProperty(JNIEnv *env, jclass clazz,
         jlong dict, jintArray word, jboolean isBeginningOfSentence, jintArray outCodePoints,
-        jbooleanArray outFlags, jintArray outProbabilityInfo, jobject outBigramTargets,
-        jobject outBigramProbabilityInfo, jobject outShortcutTargets,
+        jbooleanArray outFlags, jintArray outProbabilityInfo, jobject /* outNgramPrevWordsArray */,
+        jobject /* outNgramPrevWordIsBeginningOfSentenceArray */, jobject outNgramTargets,
+        jobject outNgramProbabilityInfo, jobject outShortcutTargets,
         jobject outShortcutProbabilities) {
     Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
     if (!dictionary) return;
@@ -339,9 +349,10 @@ static void latinime_BinaryDictionary_getWordProperty(JNIEnv *env, jclass clazz,
             return;
         }
     }
-    const WordProperty wordProperty = dictionary->getWordProperty(wordCodePoints, codePointCount);
+    const WordProperty wordProperty = dictionary->getWordProperty(
+            CodePointArrayView(wordCodePoints, codePointCount));
     wordProperty.outputProperties(env, outCodePoints, outFlags, outProbabilityInfo,
-            outBigramTargets, outBigramProbabilityInfo, outShortcutTargets,
+            outNgramTargets, outNgramProbabilityInfo, outShortcutTargets,
             outShortcutProbabilities);
 }
 
@@ -357,15 +368,19 @@ static bool latinime_BinaryDictionary_addUnigramEntry(JNIEnv *env, jclass clazz,
     int codePoints[codePointCount];
     env->GetIntArrayRegion(word, 0, codePointCount, codePoints);
     std::vector<UnigramProperty::ShortcutProperty> shortcuts;
-    std::vector<int> shortcutTargetCodePoints;
-    JniDataUtils::jintarrayToVector(env, shortcutTarget, &shortcutTargetCodePoints);
-    if (!shortcutTargetCodePoints.empty()) {
-        shortcuts.emplace_back(&shortcutTargetCodePoints, shortcutProbability);
+    {
+        std::vector<int> shortcutTargetCodePoints;
+        JniDataUtils::jintarrayToVector(env, shortcutTarget, &shortcutTargetCodePoints);
+        if (!shortcutTargetCodePoints.empty()) {
+            shortcuts.emplace_back(std::move(shortcutTargetCodePoints), shortcutProbability);
+        }
     }
     // Use 1 for count to indicate the word has inputted.
     const UnigramProperty unigramProperty(isBeginningOfSentence, isNotAWord,
-            isBlacklisted, probability, timestamp, 0 /* level */, 1 /* count */, &shortcuts);
-    return dictionary->addUnigramEntry(codePoints, codePointCount, &unigramProperty);
+            isBlacklisted, probability, HistoricalInfo(timestamp, 0 /* level */, 1 /* count */),
+            std::move(shortcuts));
+    return dictionary->addUnigramEntry(CodePointArrayView(codePoints, codePointCount),
+            &unigramProperty);
 }
 
 static bool latinime_BinaryDictionary_removeUnigramEntry(JNIEnv *env, jclass clazz, jlong dict,
@@ -377,7 +392,7 @@ static bool latinime_BinaryDictionary_removeUnigramEntry(JNIEnv *env, jclass cla
     jsize codePointCount = env->GetArrayLength(word);
     int codePoints[codePointCount];
     env->GetIntArrayRegion(word, 0, codePointCount, codePoints);
-    return dictionary->removeUnigramEntry(codePoints, codePointCount);
+    return dictionary->removeUnigramEntry(CodePointArrayView(codePoints, codePointCount));
 }
 
 static bool latinime_BinaryDictionary_addNgramEntry(JNIEnv *env, jclass clazz, jlong dict,
@@ -387,17 +402,16 @@ static bool latinime_BinaryDictionary_addNgramEntry(JNIEnv *env, jclass clazz, j
     if (!dictionary) {
         return false;
     }
-    const PrevWordsInfo prevWordsInfo = JniDataUtils::constructPrevWordsInfo(env,
-            prevWordCodePointArrays, isBeginningOfSentenceArray);
+    const NgramContext ngramContext = JniDataUtils::constructNgramContext(env,
+            prevWordCodePointArrays, isBeginningOfSentenceArray,
+            env->GetArrayLength(prevWordCodePointArrays));
     jsize wordLength = env->GetArrayLength(word);
     int wordCodePoints[wordLength];
     env->GetIntArrayRegion(word, 0, wordLength, wordCodePoints);
-    const std::vector<int> bigramTargetCodePoints(
-            wordCodePoints, wordCodePoints + wordLength);
-    // Use 1 for count to indicate the bigram has inputted.
-    const BigramProperty bigramProperty(&bigramTargetCodePoints, probability,
-            timestamp, 0 /* level */, 1 /* count */);
-    return dictionary->addNgramEntry(&prevWordsInfo, &bigramProperty);
+    // Use 1 for count to indicate the ngram has inputted.
+    const NgramProperty ngramProperty(CodePointArrayView(wordCodePoints, wordLength).toVector(),
+            probability, HistoricalInfo(timestamp, 0 /* level */, 1 /* count */));
+    return dictionary->addNgramEntry(&ngramContext, &ngramProperty);
 }
 
 static bool latinime_BinaryDictionary_removeNgramEntry(JNIEnv *env, jclass clazz, jlong dict,
@@ -407,12 +421,34 @@ static bool latinime_BinaryDictionary_removeNgramEntry(JNIEnv *env, jclass clazz
     if (!dictionary) {
         return false;
     }
-    const PrevWordsInfo prevWordsInfo = JniDataUtils::constructPrevWordsInfo(env,
-            prevWordCodePointArrays, isBeginningOfSentenceArray);
-    jsize wordLength = env->GetArrayLength(word);
-    int wordCodePoints[wordLength];
-    env->GetIntArrayRegion(word, 0, wordLength, wordCodePoints);
-    return dictionary->removeNgramEntry(&prevWordsInfo, wordCodePoints, wordLength);
+    const NgramContext ngramContext = JniDataUtils::constructNgramContext(env,
+            prevWordCodePointArrays, isBeginningOfSentenceArray,
+            env->GetArrayLength(prevWordCodePointArrays));
+    jsize codePointCount = env->GetArrayLength(word);
+    int wordCodePoints[codePointCount];
+    env->GetIntArrayRegion(word, 0, codePointCount, wordCodePoints);
+    return dictionary->removeNgramEntry(&ngramContext,
+            CodePointArrayView(wordCodePoints, codePointCount));
+}
+
+static bool latinime_BinaryDictionary_updateEntriesForWordWithNgramContext(JNIEnv *env,
+        jclass clazz, jlong dict, jobjectArray prevWordCodePointArrays,
+        jbooleanArray isBeginningOfSentenceArray, jintArray word, jboolean isValidWord, jint count,
+        jint timestamp) {
+    Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
+    if (!dictionary) {
+        return false;
+    }
+    const NgramContext ngramContext = JniDataUtils::constructNgramContext(env,
+            prevWordCodePointArrays, isBeginningOfSentenceArray,
+            env->GetArrayLength(prevWordCodePointArrays));
+    jsize codePointCount = env->GetArrayLength(word);
+    int wordCodePoints[codePointCount];
+    env->GetIntArrayRegion(word, 0, codePointCount, wordCodePoints);
+    const HistoricalInfo historicalInfo(timestamp, 0 /* level */, count);
+    return dictionary->updateEntriesForWordWithNgramContext(&ngramContext,
+            CodePointArrayView(wordCodePoints, codePointCount), isValidWord == JNI_TRUE,
+            historicalInfo);
 }
 
 // Returns how many language model params are processed.
@@ -472,28 +508,30 @@ static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, j
         jintArray shortcutTarget = static_cast<jintArray>(
                 env->GetObjectField(languageModelParam, shortcutTargetFieldId));
         std::vector<UnigramProperty::ShortcutProperty> shortcuts;
-        std::vector<int> shortcutTargetCodePoints;
-        JniDataUtils::jintarrayToVector(env, shortcutTarget, &shortcutTargetCodePoints);
-        if (!shortcutTargetCodePoints.empty()) {
-            jint shortcutProbability =
-                    env->GetIntField(languageModelParam, shortcutProbabilityFieldId);
-            shortcuts.emplace_back(&shortcutTargetCodePoints, shortcutProbability);
+        {
+            std::vector<int> shortcutTargetCodePoints;
+            JniDataUtils::jintarrayToVector(env, shortcutTarget, &shortcutTargetCodePoints);
+            if (!shortcutTargetCodePoints.empty()) {
+                jint shortcutProbability =
+                        env->GetIntField(languageModelParam, shortcutProbabilityFieldId);
+                shortcuts.emplace_back(std::move(shortcutTargetCodePoints), shortcutProbability);
+            }
         }
         // Use 1 for count to indicate the word has inputted.
         const UnigramProperty unigramProperty(false /* isBeginningOfSentence */, isNotAWord,
-                isBlacklisted, unigramProbability, timestamp, 0 /* level */, 1 /* count */,
-                &shortcuts);
-        dictionary->addUnigramEntry(word1CodePoints, word1Length, &unigramProperty);
+                isBlacklisted, unigramProbability,
+                HistoricalInfo(timestamp, 0 /* level */, 1 /* count */), std::move(shortcuts));
+        dictionary->addUnigramEntry(CodePointArrayView(word1CodePoints, word1Length),
+                &unigramProperty);
         if (word0) {
             jint bigramProbability = env->GetIntField(languageModelParam, bigramProbabilityFieldId);
-            const std::vector<int> bigramTargetCodePoints(
-                    word1CodePoints, word1CodePoints + word1Length);
             // Use 1 for count to indicate the bigram has inputted.
-            const BigramProperty bigramProperty(&bigramTargetCodePoints, bigramProbability,
-                    timestamp, 0 /* level */, 1 /* count */);
-            const PrevWordsInfo prevWordsInfo(word0CodePoints, word0Length,
+            const NgramProperty ngramProperty(
+                    CodePointArrayView(word1CodePoints, word1Length).toVector(),
+                    bigramProbability, HistoricalInfo(timestamp, 0 /* level */, 1 /* count */));
+            const NgramContext ngramContext(word0CodePoints, word0Length,
                     false /* isBeginningOfSentence */);
-            dictionary->addNgramEntry(&prevWordsInfo, &bigramProperty);
+            dictionary->addNgramEntry(&ngramContext, &ngramProperty);
         }
         if (dictionary->needsToRunGC(true /* mindsBlockByGC */)) {
             return i + 1;
@@ -567,8 +605,8 @@ static bool latinime_BinaryDictionary_migrateNative(JNIEnv *env, jclass clazz, j
     // Add unigrams.
     do {
         token = dictionary->getNextWordAndNextToken(token, wordCodePoints, &wordCodePointCount);
-        const WordProperty wordProperty = dictionary->getWordProperty(wordCodePoints,
-                wordCodePointCount);
+        const WordProperty wordProperty = dictionary->getWordProperty(
+                CodePointArrayView(wordCodePoints, wordCodePointCount));
         if (wordCodePoints[0] == CODE_POINT_BEGINNING_OF_SENTENCE) {
             // Skip beginning-of-sentence unigram.
             continue;
@@ -581,18 +619,20 @@ static bool latinime_BinaryDictionary_migrateNative(JNIEnv *env, jclass clazz, j
                 return false;
             }
         }
-        if (!dictionaryStructureWithBufferPolicy->addUnigramEntry(wordCodePoints,
-                wordCodePointCount, wordProperty.getUnigramProperty())) {
+        if (!dictionaryStructureWithBufferPolicy->addUnigramEntry(
+                CodePointArrayView(wordCodePoints, wordCodePointCount),
+                wordProperty.getUnigramProperty())) {
             LogUtils::logToJava(env, "Cannot add unigram to the new dict.");
             return false;
         }
     } while (token != 0);
 
     // Add bigrams.
+    // TODO: Support ngrams.
     do {
         token = dictionary->getNextWordAndNextToken(token, wordCodePoints, &wordCodePointCount);
-        const WordProperty wordProperty = dictionary->getWordProperty(wordCodePoints,
-                wordCodePointCount);
+        const WordProperty wordProperty = dictionary->getWordProperty(
+                CodePointArrayView(wordCodePoints, wordCodePointCount));
         if (dictionaryStructureWithBufferPolicy->needsToRunGC(true /* mindsBlockByGC */)) {
             dictionaryStructureWithBufferPolicy = runGCAndGetNewStructurePolicy(
                     std::move(dictionaryStructureWithBufferPolicy), dictFilePathChars);
@@ -601,12 +641,12 @@ static bool latinime_BinaryDictionary_migrateNative(JNIEnv *env, jclass clazz, j
                 return false;
             }
         }
-        const PrevWordsInfo prevWordsInfo(wordCodePoints, wordCodePointCount,
+        const NgramContext ngramContext(wordCodePoints, wordCodePointCount,
                 wordProperty.getUnigramProperty()->representsBeginningOfSentence());
-        for (const BigramProperty &bigramProperty : *wordProperty.getBigramProperties()) {
-            if (!dictionaryStructureWithBufferPolicy->addNgramEntry(&prevWordsInfo,
-                    &bigramProperty)) {
-                LogUtils::logToJava(env, "Cannot add bigram to the new dict.");
+        for (const NgramProperty &ngramProperty : *wordProperty.getNgramProperties()) {
+            if (!dictionaryStructureWithBufferPolicy->addNgramEntry(&ngramContext,
+                    &ngramProperty)) {
+                LogUtils::logToJava(env, "Cannot add ngram to the new dict.");
                 return false;
             }
         }
@@ -659,7 +699,7 @@ static const JNINativeMethod sMethods[] = {
     },
     {
         const_cast<char *>("getSuggestionsNative"),
-        const_cast<char *>("(JJJ[I[I[I[I[II[I[[I[Z[I[I[I[I[I[I[F)V"),
+        const_cast<char *>("(JJJ[I[I[I[I[II[I[[I[ZI[I[I[I[I[I[I[F)V"),
         reinterpret_cast<void *>(latinime_BinaryDictionary_getSuggestions)
     },
     {
@@ -680,7 +720,8 @@ static const JNINativeMethod sMethods[] = {
     {
         const_cast<char *>("getWordPropertyNative"),
         const_cast<char *>("(J[IZ[I[Z[ILjava/util/ArrayList;Ljava/util/ArrayList;"
-                "Ljava/util/ArrayList;Ljava/util/ArrayList;)V"),
+                "Ljava/util/ArrayList;Ljava/util/ArrayList;Ljava/util/ArrayList;"
+                "Ljava/util/ArrayList;)V"),
         reinterpret_cast<void *>(latinime_BinaryDictionary_getWordProperty)
     },
     {
@@ -709,6 +750,11 @@ static const JNINativeMethod sMethods[] = {
         reinterpret_cast<void *>(latinime_BinaryDictionary_removeNgramEntry)
     },
     {
+        const_cast<char *>("updateEntriesForWordWithNgramContextNative"),
+        const_cast<char *>("(J[[I[Z[IZII)Z"),
+        reinterpret_cast<void *>(latinime_BinaryDictionary_updateEntriesForWordWithNgramContext)
+    },
+    {
         const_cast<char *>("addMultipleDictionaryEntriesNative"),
         const_cast<char *>(
                 "(J[Lcom/android/inputmethod/latin/utils/LanguageModelParam;I)I"),