1 files changed, 101 insertions, 6 deletions
diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
index 9016cae69..a55b2da96 100644
--- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
+++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
@@ -32,6 +32,7 @@
 #include "suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h"
 #include "utils/char_utils.h"
 #include "utils/jni_data_utils.h"
+#include "utils/log_utils.h"
 #include "utils/time_keeper.h"
 
 namespace latinime {
@@ -334,8 +335,9 @@ static void latinime_BinaryDictionary_addUnigramWord(JNIEnv *env, jclass clazz,
     if (!shortcutTargetCodePoints.empty()) {
         shortcuts.emplace_back(&shortcutTargetCodePoints, shortcutProbability);
     }
+    // Use 1 for count to indicate the word has inputted.
     const UnigramProperty unigramProperty(isNotAWord, isBlacklisted,
-            probability, timestamp, 0 /* level */, 0 /* count */, &shortcuts);
+            probability, timestamp, 0 /* level */, 1 /* count */, &shortcuts);
     dictionary->addUnigramWord(codePoints, codePointCount, &unigramProperty);
 }
 
@@ -351,8 +353,12 @@ static void latinime_BinaryDictionary_addBigramWords(JNIEnv *env, jclass clazz,
     jsize word1Length = env->GetArrayLength(word1);
     int word1CodePoints[word1Length];
     env->GetIntArrayRegion(word1, 0, word1Length, word1CodePoints);
-    dictionary->addBigramWords(word0CodePoints, word0Length, word1CodePoints,
-            word1Length, probability, timestamp);
+    const std::vector<int> bigramTargetCodePoints(
+            word1CodePoints, word1CodePoints + word1Length);
+    // Use 1 for count to indicate the bigram has inputted.
+    const BigramProperty bigramProperty(&bigramTargetCodePoints, probability,
+            timestamp, 0 /* level */, 1 /* count */);
+    dictionary->addBigramWords(word0CodePoints, word0Length, &bigramProperty);
 }
 
 static void latinime_BinaryDictionary_removeBigramWords(JNIEnv *env, jclass clazz, jlong dict,
@@ -435,13 +441,18 @@ static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, j
                     env->GetIntField(languageModelParam, shortcutProbabilityFieldId);
             shortcuts.emplace_back(&shortcutTargetCodePoints, shortcutProbability);
         }
+        // Use 1 for count to indicate the word has inputted.
         const UnigramProperty unigramProperty(isNotAWord, isBlacklisted,
-                unigramProbability, timestamp, 0 /* level */, 0 /* count */, &shortcuts);
+                unigramProbability, timestamp, 0 /* level */, 1 /* count */, &shortcuts);
         dictionary->addUnigramWord(word1CodePoints, word1Length, &unigramProperty);
         if (word0) {
             jint bigramProbability = env->GetIntField(languageModelParam, bigramProbabilityFieldId);
-            dictionary->addBigramWords(word0CodePoints, word0Length, word1CodePoints, word1Length,
-                    bigramProbability, timestamp);
+            const std::vector<int> bigramTargetCodePoints(
+                    word1CodePoints, word1CodePoints + word1Length);
+            // Use 1 for count to indicate the bigram has inputted.
+            const BigramProperty bigramProperty(&bigramTargetCodePoints, bigramProbability,
+                    timestamp, 0 /* level */, 1 /* count */);
+            dictionary->addBigramWords(word0CodePoints, word0Length, &bigramProperty);
         }
         if (dictionary->needsToRunGC(true /* mindsBlockByGC */)) {
             return i + 1;
@@ -489,6 +500,85 @@ static bool latinime_BinaryDictionary_isCorruptedNative(JNIEnv *env, jclass claz
     return dictionary->getDictionaryStructurePolicy()->isCorrupted();
 }
 
+static DictionaryStructureWithBufferPolicy::StructurePolicyPtr runGCAndGetNewStructurePolicy(
+        DictionaryStructureWithBufferPolicy::StructurePolicyPtr structurePolicy,
+        const char *const dictFilePath) {
+    structurePolicy->flushWithGC(dictFilePath);
+    structurePolicy.release();
+    return DictionaryStructureWithBufferPolicyFactory::newPolicyForExistingDictFile(
+            dictFilePath, 0 /* offset */, 0 /* size */, true /* isUpdatable */);
+}
+
+static bool latinime_BinaryDictionary_migrateNative(JNIEnv *env, jclass clazz, jlong dict,
+        jstring dictFilePath, jlong newFormatVersion) {
+    Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
+    if (!dictionary) {
+        return false;
+    }
+    const jsize filePathUtf8Length = env->GetStringUTFLength(dictFilePath);
+    char dictFilePathChars[filePathUtf8Length + 1];
+    env->GetStringUTFRegion(dictFilePath, 0, env->GetStringLength(dictFilePath), dictFilePathChars);
+    dictFilePathChars[filePathUtf8Length] = '\0';
+
+    const DictionaryHeaderStructurePolicy *const headerPolicy =
+            dictionary->getDictionaryStructurePolicy()->getHeaderStructurePolicy();
+    DictionaryStructureWithBufferPolicy::StructurePolicyPtr dictionaryStructureWithBufferPolicy =
+            DictionaryStructureWithBufferPolicyFactory::newPolicyForOnMemoryDict(
+                    newFormatVersion, *headerPolicy->getLocale(), headerPolicy->getAttributeMap());
+    if (!dictionaryStructureWithBufferPolicy) {
+        LogUtils::logToJava(env, "Cannot migrate header.");
+        return false;
+    }
+
+    // TODO: Migrate historical information.
+    int wordCodePoints[MAX_WORD_LENGTH];
+    int token = 0;
+    // Add unigrams.
+    do {
+        token = dictionary->getNextWordAndNextToken(token, wordCodePoints);
+        const int wordLength = CharUtils::getCodePointCount(MAX_WORD_LENGTH, wordCodePoints);
+        const WordProperty wordProperty = dictionary->getWordProperty(wordCodePoints, wordLength);
+        if (dictionaryStructureWithBufferPolicy->needsToRunGC(true /* mindsBlockByGC */)) {
+            dictionaryStructureWithBufferPolicy = runGCAndGetNewStructurePolicy(
+                    std::move(dictionaryStructureWithBufferPolicy), dictFilePathChars);
+            if (!dictionaryStructureWithBufferPolicy) {
+                LogUtils::logToJava(env, "Cannot open dict after GC.");
+                return false;
+            }
+        }
+        if (!dictionaryStructureWithBufferPolicy->addUnigramWord(wordCodePoints, wordLength,
+                wordProperty.getUnigramProperty())) {
+            LogUtils::logToJava(env, "Cannot add unigram to the new dict.");
+            return false;
+        }
+    } while (token != 0);
+
+    // Add bigrams.
+    do {
+        token = dictionary->getNextWordAndNextToken(token, wordCodePoints);
+        const int wordLength = CharUtils::getCodePointCount(MAX_WORD_LENGTH, wordCodePoints);
+        const WordProperty wordProperty = dictionary->getWordProperty(wordCodePoints, wordLength);
+        if (dictionaryStructureWithBufferPolicy->needsToRunGC(true /* mindsBlockByGC */)) {
+            dictionaryStructureWithBufferPolicy = runGCAndGetNewStructurePolicy(
+                    std::move(dictionaryStructureWithBufferPolicy), dictFilePathChars);
+            if (!dictionaryStructureWithBufferPolicy) {
+                LogUtils::logToJava(env, "Cannot open dict after GC.");
+                return false;
+            }
+        }
+        for (const BigramProperty &bigramProperty : *wordProperty.getBigramProperties()) {
+            if (!dictionaryStructureWithBufferPolicy->addBigramWords(wordCodePoints, wordLength,
+                    &bigramProperty)) {
+                LogUtils::logToJava(env, "Cannot add bigram to the new dict.");
+                return false;
+            }
+        }
+    } while (token != 0);
+    // Save to File.
+    dictionaryStructureWithBufferPolicy->flushWithGC(dictFilePathChars);
+    return true;
+}
+
 static const JNINativeMethod sMethods[] = {
     {
         const_cast<char *>("openNative"),
@@ -591,6 +681,11 @@ static const JNINativeMethod sMethods[] = {
         const_cast<char *>("isCorruptedNative"),
         const_cast<char *>("(J)Z"),
         reinterpret_cast<void *>(latinime_BinaryDictionary_isCorruptedNative)
+    },
+    {
+        const_cast<char *>("migrateNative"),
+        const_cast<char *>("(JLjava/lang/String;J)Z"),
+        reinterpret_cast<void *>(latinime_BinaryDictionary_migrateNative)
     }
 };