diff options
Diffstat (limited to 'native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp')
-rw-r--r-- | native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp | 107 |
1 files changed, 101 insertions, 6 deletions
diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp index 9016cae69..a55b2da96 100644 --- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp +++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp @@ -32,6 +32,7 @@ #include "suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h" #include "utils/char_utils.h" #include "utils/jni_data_utils.h" +#include "utils/log_utils.h" #include "utils/time_keeper.h" namespace latinime { @@ -334,8 +335,9 @@ static void latinime_BinaryDictionary_addUnigramWord(JNIEnv *env, jclass clazz, if (!shortcutTargetCodePoints.empty()) { shortcuts.emplace_back(&shortcutTargetCodePoints, shortcutProbability); } + // Use 1 for count to indicate the word has inputted. const UnigramProperty unigramProperty(isNotAWord, isBlacklisted, - probability, timestamp, 0 /* level */, 0 /* count */, &shortcuts); + probability, timestamp, 0 /* level */, 1 /* count */, &shortcuts); dictionary->addUnigramWord(codePoints, codePointCount, &unigramProperty); } @@ -351,8 +353,12 @@ static void latinime_BinaryDictionary_addBigramWords(JNIEnv *env, jclass clazz, jsize word1Length = env->GetArrayLength(word1); int word1CodePoints[word1Length]; env->GetIntArrayRegion(word1, 0, word1Length, word1CodePoints); - dictionary->addBigramWords(word0CodePoints, word0Length, word1CodePoints, - word1Length, probability, timestamp); + const std::vector<int> bigramTargetCodePoints( + word1CodePoints, word1CodePoints + word1Length); + // Use 1 for count to indicate the bigram has inputted. + const BigramProperty bigramProperty(&bigramTargetCodePoints, probability, + timestamp, 0 /* level */, 1 /* count */); + dictionary->addBigramWords(word0CodePoints, word0Length, &bigramProperty); } static void latinime_BinaryDictionary_removeBigramWords(JNIEnv *env, jclass clazz, jlong dict, @@ -435,13 +441,18 @@ static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, j env->GetIntField(languageModelParam, shortcutProbabilityFieldId); shortcuts.emplace_back(&shortcutTargetCodePoints, shortcutProbability); } + // Use 1 for count to indicate the word has inputted. const UnigramProperty unigramProperty(isNotAWord, isBlacklisted, - unigramProbability, timestamp, 0 /* level */, 0 /* count */, &shortcuts); + unigramProbability, timestamp, 0 /* level */, 1 /* count */, &shortcuts); dictionary->addUnigramWord(word1CodePoints, word1Length, &unigramProperty); if (word0) { jint bigramProbability = env->GetIntField(languageModelParam, bigramProbabilityFieldId); - dictionary->addBigramWords(word0CodePoints, word0Length, word1CodePoints, word1Length, - bigramProbability, timestamp); + const std::vector<int> bigramTargetCodePoints( + word1CodePoints, word1CodePoints + word1Length); + // Use 1 for count to indicate the bigram has inputted. + const BigramProperty bigramProperty(&bigramTargetCodePoints, bigramProbability, + timestamp, 0 /* level */, 1 /* count */); + dictionary->addBigramWords(word0CodePoints, word0Length, &bigramProperty); } if (dictionary->needsToRunGC(true /* mindsBlockByGC */)) { return i + 1; @@ -489,6 +500,85 @@ static bool latinime_BinaryDictionary_isCorruptedNative(JNIEnv *env, jclass claz return dictionary->getDictionaryStructurePolicy()->isCorrupted(); } +static DictionaryStructureWithBufferPolicy::StructurePolicyPtr runGCAndGetNewStructurePolicy( + DictionaryStructureWithBufferPolicy::StructurePolicyPtr structurePolicy, + const char *const dictFilePath) { + structurePolicy->flushWithGC(dictFilePath); + structurePolicy.release(); + return DictionaryStructureWithBufferPolicyFactory::newPolicyForExistingDictFile( + dictFilePath, 0 /* offset */, 0 /* size */, true /* isUpdatable */); +} + +static bool latinime_BinaryDictionary_migrateNative(JNIEnv *env, jclass clazz, jlong dict, + jstring dictFilePath, jlong newFormatVersion) { + Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); + if (!dictionary) { + return false; + } + const jsize filePathUtf8Length = env->GetStringUTFLength(dictFilePath); + char dictFilePathChars[filePathUtf8Length + 1]; + env->GetStringUTFRegion(dictFilePath, 0, env->GetStringLength(dictFilePath), dictFilePathChars); + dictFilePathChars[filePathUtf8Length] = '\0'; + + const DictionaryHeaderStructurePolicy *const headerPolicy = + dictionary->getDictionaryStructurePolicy()->getHeaderStructurePolicy(); + DictionaryStructureWithBufferPolicy::StructurePolicyPtr dictionaryStructureWithBufferPolicy = + DictionaryStructureWithBufferPolicyFactory::newPolicyForOnMemoryDict( + newFormatVersion, *headerPolicy->getLocale(), headerPolicy->getAttributeMap()); + if (!dictionaryStructureWithBufferPolicy) { + LogUtils::logToJava(env, "Cannot migrate header."); + return false; + } + + // TODO: Migrate historical information. + int wordCodePoints[MAX_WORD_LENGTH]; + int token = 0; + // Add unigrams. + do { + token = dictionary->getNextWordAndNextToken(token, wordCodePoints); + const int wordLength = CharUtils::getCodePointCount(MAX_WORD_LENGTH, wordCodePoints); + const WordProperty wordProperty = dictionary->getWordProperty(wordCodePoints, wordLength); + if (dictionaryStructureWithBufferPolicy->needsToRunGC(true /* mindsBlockByGC */)) { + dictionaryStructureWithBufferPolicy = runGCAndGetNewStructurePolicy( + std::move(dictionaryStructureWithBufferPolicy), dictFilePathChars); + if (!dictionaryStructureWithBufferPolicy) { + LogUtils::logToJava(env, "Cannot open dict after GC."); + return false; + } + } + if (!dictionaryStructureWithBufferPolicy->addUnigramWord(wordCodePoints, wordLength, + wordProperty.getUnigramProperty())) { + LogUtils::logToJava(env, "Cannot add unigram to the new dict."); + return false; + } + } while (token != 0); + + // Add bigrams. + do { + token = dictionary->getNextWordAndNextToken(token, wordCodePoints); + const int wordLength = CharUtils::getCodePointCount(MAX_WORD_LENGTH, wordCodePoints); + const WordProperty wordProperty = dictionary->getWordProperty(wordCodePoints, wordLength); + if (dictionaryStructureWithBufferPolicy->needsToRunGC(true /* mindsBlockByGC */)) { + dictionaryStructureWithBufferPolicy = runGCAndGetNewStructurePolicy( + std::move(dictionaryStructureWithBufferPolicy), dictFilePathChars); + if (!dictionaryStructureWithBufferPolicy) { + LogUtils::logToJava(env, "Cannot open dict after GC."); + return false; + } + } + for (const BigramProperty &bigramProperty : *wordProperty.getBigramProperties()) { + if (!dictionaryStructureWithBufferPolicy->addBigramWords(wordCodePoints, wordLength, + &bigramProperty)) { + LogUtils::logToJava(env, "Cannot add bigram to the new dict."); + return false; + } + } + } while (token != 0); + // Save to File. + dictionaryStructureWithBufferPolicy->flushWithGC(dictFilePathChars); + return true; +} + static const JNINativeMethod sMethods[] = { { const_cast<char *>("openNative"), @@ -591,6 +681,11 @@ static const JNINativeMethod sMethods[] = { const_cast<char *>("isCorruptedNative"), const_cast<char *>("(J)Z"), reinterpret_cast<void *>(latinime_BinaryDictionary_isCorruptedNative) + }, + { + const_cast<char *>("migrateNative"), + const_cast<char *>("(JLjava/lang/String;J)Z"), + reinterpret_cast<void *>(latinime_BinaryDictionary_migrateNative) } }; |