diff options
Diffstat (limited to 'native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp')
-rw-r--r-- | native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp | 428 |
1 files changed, 296 insertions, 132 deletions
diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp index 8f21c50ec..9016cae69 100644 --- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp +++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp @@ -19,61 +19,25 @@ #include "com_android_inputmethod_latin_BinaryDictionary.h" #include <cstring> // for memset() +#include <vector> #include "defines.h" #include "jni.h" #include "jni_common.h" #include "suggest/core/dictionary/dictionary.h" +#include "suggest/core/dictionary/property/unigram_property.h" +#include "suggest/core/dictionary/property/word_property.h" +#include "suggest/core/result/suggestion_results.h" #include "suggest/core/suggest_options.h" -#include "suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h" -#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h" -#include "utils/autocorrection_threshold_utils.h" +#include "suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h" +#include "utils/char_utils.h" +#include "utils/jni_data_utils.h" +#include "utils/time_keeper.h" namespace latinime { class ProximityInfo; -// TODO: Move to makedict. -static jboolean latinime_BinaryDictionary_createEmptyDictFile(JNIEnv *env, jclass clazz, - jstring filePath, jlong dictVersion, jobjectArray attributeKeyStringArray, - jobjectArray attributeValueStringArray) { - const jsize filePathUtf8Length = env->GetStringUTFLength(filePath); - char filePathChars[filePathUtf8Length + 1]; - env->GetStringUTFRegion(filePath, 0, env->GetStringLength(filePath), filePathChars); - filePathChars[filePathUtf8Length] = '\0'; - - const int keyCount = env->GetArrayLength(attributeKeyStringArray); - const int valueCount = env->GetArrayLength(attributeValueStringArray); - if (keyCount != valueCount) { - return false; - } - - HeaderReadWriteUtils::AttributeMap attributeMap; - for (int i = 0; i < keyCount; i++) { - jstring keyString = static_cast<jstring>( - env->GetObjectArrayElement(attributeKeyStringArray, i)); - const jsize keyUtf8Length = env->GetStringUTFLength(keyString); - char keyChars[keyUtf8Length + 1]; - env->GetStringUTFRegion(keyString, 0, env->GetStringLength(keyString), keyChars); - keyChars[keyUtf8Length] = '\0'; - HeaderReadWriteUtils::AttributeMap::key_type key; - HeaderReadWriteUtils::insertCharactersIntoVector(keyChars, &key); - - jstring valueString = static_cast<jstring>( - env->GetObjectArrayElement(attributeValueStringArray, i)); - const jsize valueUtf8Length = env->GetStringUTFLength(valueString); - char valueChars[valueUtf8Length + 1]; - env->GetStringUTFRegion(valueString, 0, env->GetStringLength(valueString), valueChars); - valueChars[valueUtf8Length] = '\0'; - HeaderReadWriteUtils::AttributeMap::mapped_type value; - HeaderReadWriteUtils::insertCharactersIntoVector(valueChars, &value); - attributeMap[key] = value; - } - - return DictFileWritingUtils::createEmptyDictFile(filePathChars, static_cast<int>(dictVersion), - &attributeMap); -} - static jlong latinime_BinaryDictionary_open(JNIEnv *env, jclass clazz, jstring sourceDir, jlong dictOffset, jlong dictSize, jboolean isUpdatable) { PROF_OPEN; @@ -86,20 +50,49 @@ static jlong latinime_BinaryDictionary_open(JNIEnv *env, jclass clazz, jstring s char sourceDirChars[sourceDirUtf8Length + 1]; env->GetStringUTFRegion(sourceDir, 0, env->GetStringLength(sourceDir), sourceDirChars); sourceDirChars[sourceDirUtf8Length] = '\0'; - DictionaryStructureWithBufferPolicy *const dictionaryStructureWithBufferPolicy = - DictionaryStructureWithBufferPolicyFactory::newDictionaryStructureWithBufferPolicy( + DictionaryStructureWithBufferPolicy::StructurePolicyPtr dictionaryStructureWithBufferPolicy( + DictionaryStructureWithBufferPolicyFactory::newPolicyForExistingDictFile( sourceDirChars, static_cast<int>(dictOffset), static_cast<int>(dictSize), - isUpdatable == JNI_TRUE); + isUpdatable == JNI_TRUE)); if (!dictionaryStructureWithBufferPolicy) { return 0; } - Dictionary *const dictionary = new Dictionary(env, dictionaryStructureWithBufferPolicy); + Dictionary *const dictionary = + new Dictionary(env, std::move(dictionaryStructureWithBufferPolicy)); PROF_END(66); PROF_CLOSE; return reinterpret_cast<jlong>(dictionary); } +static jlong latinime_BinaryDictionary_createOnMemory(JNIEnv *env, jclass clazz, + jlong formatVersion, jstring locale, jobjectArray attributeKeyStringArray, + jobjectArray attributeValueStringArray) { + const jsize localeUtf8Length = env->GetStringUTFLength(locale); + char localeChars[localeUtf8Length + 1]; + env->GetStringUTFRegion(locale, 0, env->GetStringLength(locale), localeChars); + localeChars[localeUtf8Length] = '\0'; + std::vector<int> localeCodePoints; + HeaderReadWriteUtils::insertCharactersIntoVector(localeChars, &localeCodePoints); + const int keyCount = env->GetArrayLength(attributeKeyStringArray); + const int valueCount = env->GetArrayLength(attributeValueStringArray); + if (keyCount != valueCount) { + return false; + } + DictionaryHeaderStructurePolicy::AttributeMap attributeMap = + JniDataUtils::constructAttributeMap(env, attributeKeyStringArray, + attributeValueStringArray); + DictionaryStructureWithBufferPolicy::StructurePolicyPtr dictionaryStructureWithBufferPolicy = + DictionaryStructureWithBufferPolicyFactory::newPolicyForOnMemoryDict( + formatVersion, localeCodePoints, &attributeMap); + if (!dictionaryStructureWithBufferPolicy) { + return 0; + } + Dictionary *const dictionary = + new Dictionary(env, std::move(dictionaryStructureWithBufferPolicy)); + return reinterpret_cast<jlong>(dictionary); +} + static void latinime_BinaryDictionary_flush(JNIEnv *env, jclass clazz, jlong dict, jstring filePath) { Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); @@ -135,15 +128,64 @@ static void latinime_BinaryDictionary_close(JNIEnv *env, jclass clazz, jlong dic delete dictionary; } -static int latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jclass clazz, jlong dict, +static void latinime_BinaryDictionary_getHeaderInfo(JNIEnv *env, jclass clazz, jlong dict, + jintArray outHeaderSize, jintArray outFormatVersion, jobject outAttributeKeys, + jobject outAttributeValues) { + Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); + if (!dictionary) return; + const DictionaryHeaderStructurePolicy *const headerPolicy = + dictionary->getDictionaryStructurePolicy()->getHeaderStructurePolicy(); + const int headerSize = headerPolicy->getSize(); + env->SetIntArrayRegion(outHeaderSize, 0 /* start */, 1 /* len */, &headerSize); + const int formatVersion = headerPolicy->getFormatVersionNumber(); + env->SetIntArrayRegion(outFormatVersion, 0 /* start */, 1 /* len */, &formatVersion); + // Output attribute map + jclass arrayListClass = env->FindClass("java/util/ArrayList"); + jmethodID addMethodId = env->GetMethodID(arrayListClass, "add", "(Ljava/lang/Object;)Z"); + const DictionaryHeaderStructurePolicy::AttributeMap *const attributeMap = + headerPolicy->getAttributeMap(); + for (DictionaryHeaderStructurePolicy::AttributeMap::const_iterator it = attributeMap->begin(); + it != attributeMap->end(); ++it) { + // Output key + jintArray keyCodePointArray = env->NewIntArray(it->first.size()); + env->SetIntArrayRegion( + keyCodePointArray, 0 /* start */, it->first.size(), &it->first.at(0)); + env->CallBooleanMethod(outAttributeKeys, addMethodId, keyCodePointArray); + env->DeleteLocalRef(keyCodePointArray); + // Output value + jintArray valueCodePointArray = env->NewIntArray(it->second.size()); + env->SetIntArrayRegion( + valueCodePointArray, 0 /* start */, it->second.size(), &it->second.at(0)); + env->CallBooleanMethod(outAttributeValues, addMethodId, valueCodePointArray); + env->DeleteLocalRef(valueCodePointArray); + } + env->DeleteLocalRef(arrayListClass); + return; +} + +static int latinime_BinaryDictionary_getFormatVersion(JNIEnv *env, jclass clazz, jlong dict) { + Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); + if (!dictionary) return 0; + const DictionaryHeaderStructurePolicy *const headerPolicy = + dictionary->getDictionaryStructurePolicy()->getHeaderStructurePolicy(); + return headerPolicy->getFormatVersionNumber(); +} + +static void latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jclass clazz, jlong dict, jlong proximityInfo, jlong dicTraverseSession, jintArray xCoordinatesArray, jintArray yCoordinatesArray, jintArray timesArray, jintArray pointerIdsArray, - jintArray inputCodePointsArray, jint inputSize, jint commitPoint, jintArray suggestOptions, - jintArray prevWordCodePointsForBigrams, jintArray outputCodePointsArray, - jintArray scoresArray, jintArray spaceIndicesArray, jintArray outputTypesArray, - jintArray outputAutoCommitFirstWordConfidenceArray) { + jintArray inputCodePointsArray, jint inputSize, jintArray suggestOptions, + jintArray prevWordCodePointsForBigrams, jintArray outSuggestionCount, + jintArray outCodePointsArray, jintArray outScoresArray, jintArray outSpaceIndicesArray, + jintArray outTypesArray, jintArray outAutoCommitFirstWordConfidenceArray, + jfloatArray inOutLanguageWeight) { Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); - if (!dictionary) return 0; + // Assign 0 to outSuggestionCount here in case of returning earlier in this method. + int count = 0; + env->SetIntArrayRegion(outSuggestionCount, 0, 1 /* len */, &count); + if (!dictionary) { + return; + } ProximityInfo *pInfo = reinterpret_cast<ProximityInfo *>(proximityInfo); DicTraverseSession *traverseSession = reinterpret_cast<DicTraverseSession *>(dicTraverseSession); @@ -158,7 +200,7 @@ static int latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jclass clazz, j const jsize prevWordCodePointsLength = prevWordCodePointsForBigrams ? env->GetArrayLength(prevWordCodePointsForBigrams) : 0; int prevWordCodePointsInternal[prevWordCodePointsLength]; - int *prevWordCodePoints = 0; + int *prevWordCodePoints = nullptr; env->GetIntArrayRegion(xCoordinatesArray, 0, inputSize, xCoordinates); env->GetIntArrayRegion(yCoordinatesArray, 0, inputSize, yCoordinates); env->GetIntArrayRegion(timesArray, 0, inputSize, times); @@ -177,55 +219,44 @@ static int latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jclass clazz, j // Output values /* By the way, let's check the output array length here to make sure */ - const jsize outputCodePointsLength = env->GetArrayLength(outputCodePointsArray); + const jsize outputCodePointsLength = env->GetArrayLength(outCodePointsArray); if (outputCodePointsLength != (MAX_WORD_LENGTH * MAX_RESULTS)) { AKLOGE("Invalid outputCodePointsLength: %d", outputCodePointsLength); ASSERT(false); - return 0; + return; } - const jsize scoresLength = env->GetArrayLength(scoresArray); + const jsize scoresLength = env->GetArrayLength(outScoresArray); if (scoresLength != MAX_RESULTS) { AKLOGE("Invalid scoresLength: %d", scoresLength); ASSERT(false); - return 0; + return; } - int outputCodePoints[outputCodePointsLength]; - int scores[scoresLength]; - const jsize spaceIndicesLength = env->GetArrayLength(spaceIndicesArray); - int spaceIndices[spaceIndicesLength]; - const jsize outputTypesLength = env->GetArrayLength(outputTypesArray); - int outputTypes[outputTypesLength]; const jsize outputAutoCommitFirstWordConfidenceLength = - env->GetArrayLength(outputAutoCommitFirstWordConfidenceArray); - // We only use the first result, as obviously we will only ever autocommit the first one + env->GetArrayLength(outAutoCommitFirstWordConfidenceArray); ASSERT(outputAutoCommitFirstWordConfidenceLength == 1); - int outputAutoCommitFirstWordConfidence[outputAutoCommitFirstWordConfidenceLength]; - memset(outputCodePoints, 0, sizeof(outputCodePoints)); - memset(scores, 0, sizeof(scores)); - memset(spaceIndices, 0, sizeof(spaceIndices)); - memset(outputTypes, 0, sizeof(outputTypes)); - memset(outputAutoCommitFirstWordConfidence, 0, sizeof(outputAutoCommitFirstWordConfidence)); - - int count; + if (outputAutoCommitFirstWordConfidenceLength != 1) { + // We only use the first result, as obviously we will only ever autocommit the first one + AKLOGE("Invalid outputAutoCommitFirstWordConfidenceLength: %d", + outputAutoCommitFirstWordConfidenceLength); + ASSERT(false); + return; + } + float languageWeight; + env->GetFloatArrayRegion(inOutLanguageWeight, 0, 1 /* len */, &languageWeight); + SuggestionResults suggestionResults(MAX_RESULTS); if (givenSuggestOptions.isGesture() || inputSize > 0) { - count = dictionary->getSuggestions(pInfo, traverseSession, xCoordinates, yCoordinates, + // TODO: Use SuggestionResults to return suggestions. + dictionary->getSuggestions(pInfo, traverseSession, xCoordinates, yCoordinates, times, pointerIds, inputCodePoints, inputSize, prevWordCodePoints, - prevWordCodePointsLength, commitPoint, &givenSuggestOptions, outputCodePoints, - scores, spaceIndices, outputTypes, outputAutoCommitFirstWordConfidence); + prevWordCodePointsLength, &givenSuggestOptions, languageWeight, + &suggestionResults); } else { - count = dictionary->getBigrams(prevWordCodePoints, prevWordCodePointsLength, - outputCodePoints, scores, outputTypes); + dictionary->getPredictions(prevWordCodePoints, prevWordCodePointsLength, + &suggestionResults); } - - // Copy back the output values - env->SetIntArrayRegion(outputCodePointsArray, 0, outputCodePointsLength, outputCodePoints); - env->SetIntArrayRegion(scoresArray, 0, scoresLength, scores); - env->SetIntArrayRegion(spaceIndicesArray, 0, spaceIndicesLength, spaceIndices); - env->SetIntArrayRegion(outputTypesArray, 0, outputTypesLength, outputTypes); - env->SetIntArrayRegion(outputAutoCommitFirstWordConfidenceArray, 0, - outputAutoCommitFirstWordConfidenceLength, outputAutoCommitFirstWordConfidence); - - return count; + suggestionResults.outputSuggestions(env, outSuggestionCount, outCodePointsArray, + outScoresArray, outSpaceIndicesArray, outTypesArray, + outAutoCommitFirstWordConfidenceArray, inOutLanguageWeight); } static jint latinime_BinaryDictionary_getProbability(JNIEnv *env, jclass clazz, jlong dict, @@ -252,44 +283,64 @@ static jint latinime_BinaryDictionary_getBigramProbability(JNIEnv *env, jclass c word1Length); } -static jfloat latinime_BinaryDictionary_calcNormalizedScore(JNIEnv *env, jclass clazz, - jintArray before, jintArray after, jint score) { - jsize beforeLength = env->GetArrayLength(before); - jsize afterLength = env->GetArrayLength(after); - int beforeCodePoints[beforeLength]; - int afterCodePoints[afterLength]; - env->GetIntArrayRegion(before, 0, beforeLength, beforeCodePoints); - env->GetIntArrayRegion(after, 0, afterLength, afterCodePoints); - return AutocorrectionThresholdUtils::calcNormalizedScore(beforeCodePoints, beforeLength, - afterCodePoints, afterLength, score); +// Method to iterate all words in the dictionary for makedict. +// If token is 0, this method newly starts iterating the dictionary. This method returns 0 when +// the dictionary does not have a next word. +static jint latinime_BinaryDictionary_getNextWord(JNIEnv *env, jclass clazz, + jlong dict, jint token, jintArray outCodePoints) { + Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); + if (!dictionary) return 0; + const jsize outCodePointsLength = env->GetArrayLength(outCodePoints); + if (outCodePointsLength != MAX_WORD_LENGTH) { + AKLOGE("Invalid outCodePointsLength: %d", outCodePointsLength); + ASSERT(false); + return 0; + } + int wordCodePoints[outCodePointsLength]; + memset(wordCodePoints, 0, sizeof(wordCodePoints)); + const int nextToken = dictionary->getNextWordAndNextToken(token, wordCodePoints); + env->SetIntArrayRegion(outCodePoints, 0, outCodePointsLength, wordCodePoints); + return nextToken; } -static jint latinime_BinaryDictionary_editDistance(JNIEnv *env, jclass clazz, jintArray before, - jintArray after) { - jsize beforeLength = env->GetArrayLength(before); - jsize afterLength = env->GetArrayLength(after); - int beforeCodePoints[beforeLength]; - int afterCodePoints[afterLength]; - env->GetIntArrayRegion(before, 0, beforeLength, beforeCodePoints); - env->GetIntArrayRegion(after, 0, afterLength, afterCodePoints); - return AutocorrectionThresholdUtils::editDistance(beforeCodePoints, beforeLength, - afterCodePoints, afterLength); +static void latinime_BinaryDictionary_getWordProperty(JNIEnv *env, jclass clazz, + jlong dict, jintArray word, jintArray outCodePoints, jbooleanArray outFlags, + jintArray outProbabilityInfo, jobject outBigramTargets, jobject outBigramProbabilityInfo, + jobject outShortcutTargets, jobject outShortcutProbabilities) { + Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); + if (!dictionary) return; + const jsize wordLength = env->GetArrayLength(word); + int wordCodePoints[wordLength]; + env->GetIntArrayRegion(word, 0, wordLength, wordCodePoints); + const WordProperty wordProperty = dictionary->getWordProperty(wordCodePoints, wordLength); + wordProperty.outputProperties(env, outCodePoints, outFlags, outProbabilityInfo, + outBigramTargets, outBigramProbabilityInfo, outShortcutTargets, + outShortcutProbabilities); } static void latinime_BinaryDictionary_addUnigramWord(JNIEnv *env, jclass clazz, jlong dict, - jintArray word, jint probability) { + jintArray word, jint probability, jintArray shortcutTarget, jint shortcutProbability, + jboolean isNotAWord, jboolean isBlacklisted, jint timestamp) { Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); if (!dictionary) { return; } - jsize wordLength = env->GetArrayLength(word); - int codePoints[wordLength]; - env->GetIntArrayRegion(word, 0, wordLength, codePoints); - dictionary->addUnigramWord(codePoints, wordLength, probability); + jsize codePointCount = env->GetArrayLength(word); + int codePoints[codePointCount]; + env->GetIntArrayRegion(word, 0, codePointCount, codePoints); + std::vector<UnigramProperty::ShortcutProperty> shortcuts; + std::vector<int> shortcutTargetCodePoints; + JniDataUtils::jintarrayToVector(env, shortcutTarget, &shortcutTargetCodePoints); + if (!shortcutTargetCodePoints.empty()) { + shortcuts.emplace_back(&shortcutTargetCodePoints, shortcutProbability); + } + const UnigramProperty unigramProperty(isNotAWord, isBlacklisted, + probability, timestamp, 0 /* level */, 0 /* count */, &shortcuts); + dictionary->addUnigramWord(codePoints, codePointCount, &unigramProperty); } static void latinime_BinaryDictionary_addBigramWords(JNIEnv *env, jclass clazz, jlong dict, - jintArray word0, jintArray word1, jint probability) { + jintArray word0, jintArray word1, jint probability, jint timestamp) { Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); if (!dictionary) { return; @@ -301,7 +352,7 @@ static void latinime_BinaryDictionary_addBigramWords(JNIEnv *env, jclass clazz, int word1CodePoints[word1Length]; env->GetIntArrayRegion(word1, 0, word1Length, word1CodePoints); dictionary->addBigramWords(word0CodePoints, word0Length, word1CodePoints, - word1Length, probability); + word1Length, probability, timestamp); } static void latinime_BinaryDictionary_removeBigramWords(JNIEnv *env, jclass clazz, jlong dict, @@ -320,6 +371,89 @@ static void latinime_BinaryDictionary_removeBigramWords(JNIEnv *env, jclass claz word1Length); } +// Returns how many language model params are processed. +static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, jclass clazz, + jlong dict, jobjectArray languageModelParams, jint startIndex) { + Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); + if (!dictionary) { + return 0; + } + jsize languageModelParamCount = env->GetArrayLength(languageModelParams); + if (languageModelParamCount == 0 || startIndex >= languageModelParamCount) { + return 0; + } + jobject languageModelParam = env->GetObjectArrayElement(languageModelParams, 0); + jclass languageModelParamClass = env->GetObjectClass(languageModelParam); + env->DeleteLocalRef(languageModelParam); + + jfieldID word0FieldId = env->GetFieldID(languageModelParamClass, "mWord0", "[I"); + jfieldID word1FieldId = env->GetFieldID(languageModelParamClass, "mWord1", "[I"); + jfieldID unigramProbabilityFieldId = + env->GetFieldID(languageModelParamClass, "mUnigramProbability", "I"); + jfieldID bigramProbabilityFieldId = + env->GetFieldID(languageModelParamClass, "mBigramProbability", "I"); + jfieldID timestampFieldId = + env->GetFieldID(languageModelParamClass, "mTimestamp", "I"); + jfieldID shortcutTargetFieldId = + env->GetFieldID(languageModelParamClass, "mShortcutTarget", "[I"); + jfieldID shortcutProbabilityFieldId = + env->GetFieldID(languageModelParamClass, "mShortcutProbability", "I"); + jfieldID isNotAWordFieldId = + env->GetFieldID(languageModelParamClass, "mIsNotAWord", "Z"); + jfieldID isBlacklistedFieldId = + env->GetFieldID(languageModelParamClass, "mIsBlacklisted", "Z"); + env->DeleteLocalRef(languageModelParamClass); + + for (int i = startIndex; i < languageModelParamCount; ++i) { + jobject languageModelParam = env->GetObjectArrayElement(languageModelParams, i); + // languageModelParam is a set of params for word1; thus, word1 cannot be null. On the + // other hand, word0 can be null and then it means the set of params doesn't contain bigram + // information. + jintArray word0 = static_cast<jintArray>( + env->GetObjectField(languageModelParam, word0FieldId)); + jsize word0Length = word0 ? env->GetArrayLength(word0) : 0; + int word0CodePoints[word0Length]; + if (word0) { + env->GetIntArrayRegion(word0, 0, word0Length, word0CodePoints); + } + jintArray word1 = static_cast<jintArray>( + env->GetObjectField(languageModelParam, word1FieldId)); + jsize word1Length = env->GetArrayLength(word1); + int word1CodePoints[word1Length]; + env->GetIntArrayRegion(word1, 0, word1Length, word1CodePoints); + jint unigramProbability = env->GetIntField(languageModelParam, unigramProbabilityFieldId); + jint timestamp = env->GetIntField(languageModelParam, timestampFieldId); + jboolean isNotAWord = env->GetBooleanField(languageModelParam, isNotAWordFieldId); + jboolean isBlacklisted = env->GetBooleanField(languageModelParam, isBlacklistedFieldId); + jintArray shortcutTarget = static_cast<jintArray>( + env->GetObjectField(languageModelParam, shortcutTargetFieldId)); + std::vector<UnigramProperty::ShortcutProperty> shortcuts; + std::vector<int> shortcutTargetCodePoints; + JniDataUtils::jintarrayToVector(env, shortcutTarget, &shortcutTargetCodePoints); + if (!shortcutTargetCodePoints.empty()) { + jint shortcutProbability = + env->GetIntField(languageModelParam, shortcutProbabilityFieldId); + shortcuts.emplace_back(&shortcutTargetCodePoints, shortcutProbability); + } + const UnigramProperty unigramProperty(isNotAWord, isBlacklisted, + unigramProbability, timestamp, 0 /* level */, 0 /* count */, &shortcuts); + dictionary->addUnigramWord(word1CodePoints, word1Length, &unigramProperty); + if (word0) { + jint bigramProbability = env->GetIntField(languageModelParam, bigramProbabilityFieldId); + dictionary->addBigramWords(word0CodePoints, word0Length, word1CodePoints, word1Length, + bigramProbability, timestamp); + } + if (dictionary->needsToRunGC(true /* mindsBlockByGC */)) { + return i + 1; + } + env->DeleteLocalRef(word0); + env->DeleteLocalRef(word1); + env->DeleteLocalRef(shortcutTarget); + env->DeleteLocalRef(languageModelParam); + } + return languageModelParamCount; +} + static int latinime_BinaryDictionary_calculateProbabilityNative(JNIEnv *env, jclass clazz, jlong dict, jint unigramProbability, jint bigramProbability) { Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); @@ -343,27 +477,45 @@ static jstring latinime_BinaryDictionary_getProperty(JNIEnv *env, jclass clazz, static const int GET_PROPERTY_RESULT_LENGTH = 100; char resultChars[GET_PROPERTY_RESULT_LENGTH]; resultChars[0] = '\0'; - dictionary->getProperty(queryChars, resultChars, GET_PROPERTY_RESULT_LENGTH); + dictionary->getProperty(queryChars, queryUtf8Length, resultChars, GET_PROPERTY_RESULT_LENGTH); return env->NewStringUTF(resultChars); } +static bool latinime_BinaryDictionary_isCorruptedNative(JNIEnv *env, jclass clazz, jlong dict) { + Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); + if (!dictionary) { + return false; + } + return dictionary->getDictionaryStructurePolicy()->isCorrupted(); +} + static const JNINativeMethod sMethods[] = { { - const_cast<char *>("createEmptyDictFileNative"), - const_cast<char *>("(Ljava/lang/String;J[Ljava/lang/String;[Ljava/lang/String;)Z"), - reinterpret_cast<void *>(latinime_BinaryDictionary_createEmptyDictFile) - }, - { const_cast<char *>("openNative"), const_cast<char *>("(Ljava/lang/String;JJZ)J"), reinterpret_cast<void *>(latinime_BinaryDictionary_open) }, { + const_cast<char *>("createOnMemoryNative"), + const_cast<char *>("(JLjava/lang/String;[Ljava/lang/String;[Ljava/lang/String;)J"), + reinterpret_cast<void *>(latinime_BinaryDictionary_createOnMemory) + }, + { const_cast<char *>("closeNative"), const_cast<char *>("(J)V"), reinterpret_cast<void *>(latinime_BinaryDictionary_close) }, { + const_cast<char *>("getFormatVersionNative"), + const_cast<char *>("(J)I"), + reinterpret_cast<void *>(latinime_BinaryDictionary_getFormatVersion) + }, + { + const_cast<char *>("getHeaderInfoNative"), + const_cast<char *>("(J[I[ILjava/util/ArrayList;Ljava/util/ArrayList;)V"), + reinterpret_cast<void *>(latinime_BinaryDictionary_getHeaderInfo) + }, + { const_cast<char *>("flushNative"), const_cast<char *>("(JLjava/lang/String;)V"), reinterpret_cast<void *>(latinime_BinaryDictionary_flush) @@ -380,7 +532,7 @@ static const JNINativeMethod sMethods[] = { }, { const_cast<char *>("getSuggestionsNative"), - const_cast<char *>("(JJJ[I[I[I[I[III[I[I[I[I[I[I[I)I"), + const_cast<char *>("(JJJ[I[I[I[I[II[I[I[I[I[I[I[I[I[F)V"), reinterpret_cast<void *>(latinime_BinaryDictionary_getSuggestions) }, { @@ -394,23 +546,24 @@ static const JNINativeMethod sMethods[] = { reinterpret_cast<void *>(latinime_BinaryDictionary_getBigramProbability) }, { - const_cast<char *>("calcNormalizedScoreNative"), - const_cast<char *>("([I[II)F"), - reinterpret_cast<void *>(latinime_BinaryDictionary_calcNormalizedScore) + const_cast<char *>("getWordPropertyNative"), + const_cast<char *>("(J[I[I[Z[ILjava/util/ArrayList;Ljava/util/ArrayList;" + "Ljava/util/ArrayList;Ljava/util/ArrayList;)V"), + reinterpret_cast<void *>(latinime_BinaryDictionary_getWordProperty) }, { - const_cast<char *>("editDistanceNative"), - const_cast<char *>("([I[I)I"), - reinterpret_cast<void *>(latinime_BinaryDictionary_editDistance) + const_cast<char *>("getNextWordNative"), + const_cast<char *>("(JI[I)I"), + reinterpret_cast<void *>(latinime_BinaryDictionary_getNextWord) }, { const_cast<char *>("addUnigramWordNative"), - const_cast<char *>("(J[II)V"), + const_cast<char *>("(J[II[IIZZI)V"), reinterpret_cast<void *>(latinime_BinaryDictionary_addUnigramWord) }, { const_cast<char *>("addBigramWordsNative"), - const_cast<char *>("(J[I[II)V"), + const_cast<char *>("(J[I[III)V"), reinterpret_cast<void *>(latinime_BinaryDictionary_addBigramWords) }, { @@ -419,6 +572,12 @@ static const JNINativeMethod sMethods[] = { reinterpret_cast<void *>(latinime_BinaryDictionary_removeBigramWords) }, { + const_cast<char *>("addMultipleDictionaryEntriesNative"), + const_cast<char *>( + "(J[Lcom/android/inputmethod/latin/utils/LanguageModelParam;I)I"), + reinterpret_cast<void *>(latinime_BinaryDictionary_addMultipleDictionaryEntries) + }, + { const_cast<char *>("calculateProbabilityNative"), const_cast<char *>("(JII)I"), reinterpret_cast<void *>(latinime_BinaryDictionary_calculateProbabilityNative) @@ -427,6 +586,11 @@ static const JNINativeMethod sMethods[] = { const_cast<char *>("getPropertyNative"), const_cast<char *>("(JLjava/lang/String;)Ljava/lang/String;"), reinterpret_cast<void *>(latinime_BinaryDictionary_getProperty) + }, + { + const_cast<char *>("isCorruptedNative"), + const_cast<char *>("(J)Z"), + reinterpret_cast<void *>(latinime_BinaryDictionary_isCorruptedNative) } }; |