diff options
Diffstat (limited to 'native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp')
-rw-r--r-- | native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp | 262 |
1 files changed, 245 insertions, 17 deletions
diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp index 8f21c50ec..bb54cbdb9 100644 --- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp +++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp @@ -24,10 +24,13 @@ #include "jni.h" #include "jni_common.h" #include "suggest/core/dictionary/dictionary.h" +#include "suggest/core/dictionary/word_property.h" #include "suggest/core/suggest_options.h" -#include "suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h" +#include "suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h" #include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h" #include "utils/autocorrection_threshold_utils.h" +#include "utils/char_utils.h" +#include "utils/time_keeper.h" namespace latinime { @@ -35,20 +38,22 @@ class ProximityInfo; // TODO: Move to makedict. static jboolean latinime_BinaryDictionary_createEmptyDictFile(JNIEnv *env, jclass clazz, - jstring filePath, jlong dictVersion, jobjectArray attributeKeyStringArray, + jstring filePath, jlong dictVersion, jstring locale, jobjectArray attributeKeyStringArray, jobjectArray attributeValueStringArray) { const jsize filePathUtf8Length = env->GetStringUTFLength(filePath); char filePathChars[filePathUtf8Length + 1]; env->GetStringUTFRegion(filePath, 0, env->GetStringLength(filePath), filePathChars); filePathChars[filePathUtf8Length] = '\0'; - + jsize localeLength = env->GetStringLength(locale); + jchar localeCodePoints[localeLength]; + env->GetStringRegion(locale, 0, localeLength, localeCodePoints); const int keyCount = env->GetArrayLength(attributeKeyStringArray); const int valueCount = env->GetArrayLength(attributeValueStringArray); if (keyCount != valueCount) { return false; } - HeaderReadWriteUtils::AttributeMap attributeMap; + DictionaryHeaderStructurePolicy::AttributeMap attributeMap; for (int i = 0; i < keyCount; i++) { jstring keyString = static_cast<jstring>( env->GetObjectArrayElement(attributeKeyStringArray, i)); @@ -56,7 +61,7 @@ static jboolean latinime_BinaryDictionary_createEmptyDictFile(JNIEnv *env, jclas char keyChars[keyUtf8Length + 1]; env->GetStringUTFRegion(keyString, 0, env->GetStringLength(keyString), keyChars); keyChars[keyUtf8Length] = '\0'; - HeaderReadWriteUtils::AttributeMap::key_type key; + DictionaryHeaderStructurePolicy::AttributeMap::key_type key; HeaderReadWriteUtils::insertCharactersIntoVector(keyChars, &key); jstring valueString = static_cast<jstring>( @@ -65,13 +70,13 @@ static jboolean latinime_BinaryDictionary_createEmptyDictFile(JNIEnv *env, jclas char valueChars[valueUtf8Length + 1]; env->GetStringUTFRegion(valueString, 0, env->GetStringLength(valueString), valueChars); valueChars[valueUtf8Length] = '\0'; - HeaderReadWriteUtils::AttributeMap::mapped_type value; + DictionaryHeaderStructurePolicy::AttributeMap::mapped_type value; HeaderReadWriteUtils::insertCharactersIntoVector(valueChars, &value); attributeMap[key] = value; } return DictFileWritingUtils::createEmptyDictFile(filePathChars, static_cast<int>(dictVersion), - &attributeMap); + CharUtils::convertShortArrayToIntVector(localeCodePoints, localeLength), &attributeMap); } static jlong latinime_BinaryDictionary_open(JNIEnv *env, jclass clazz, jstring sourceDir, @@ -86,11 +91,11 @@ static jlong latinime_BinaryDictionary_open(JNIEnv *env, jclass clazz, jstring s char sourceDirChars[sourceDirUtf8Length + 1]; env->GetStringUTFRegion(sourceDir, 0, env->GetStringLength(sourceDir), sourceDirChars); sourceDirChars[sourceDirUtf8Length] = '\0'; - DictionaryStructureWithBufferPolicy *const dictionaryStructureWithBufferPolicy = + DictionaryStructureWithBufferPolicy::StructurePolicyPtr dictionaryStructureWithBufferPolicy = DictionaryStructureWithBufferPolicyFactory::newDictionaryStructureWithBufferPolicy( sourceDirChars, static_cast<int>(dictOffset), static_cast<int>(dictSize), isUpdatable == JNI_TRUE); - if (!dictionaryStructureWithBufferPolicy) { + if (!dictionaryStructureWithBufferPolicy.get()) { return 0; } @@ -135,6 +140,49 @@ static void latinime_BinaryDictionary_close(JNIEnv *env, jclass clazz, jlong dic delete dictionary; } +static void latinime_BinaryDictionary_getHeaderInfo(JNIEnv *env, jclass clazz, jlong dict, + jintArray outHeaderSize, jintArray outFormatVersion, jobject outAttributeKeys, + jobject outAttributeValues) { + Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); + if (!dictionary) return; + const DictionaryHeaderStructurePolicy *const headerPolicy = + dictionary->getDictionaryStructurePolicy()->getHeaderStructurePolicy(); + const int headerSize = headerPolicy->getSize(); + env->SetIntArrayRegion(outHeaderSize, 0 /* start */, 1 /* len */, &headerSize); + const int formatVersion = headerPolicy->getFormatVersionNumber(); + env->SetIntArrayRegion(outFormatVersion, 0 /* start */, 1 /* len */, &formatVersion); + // Output attribute map + jclass arrayListClass = env->FindClass("java/util/ArrayList"); + jmethodID addMethodId = env->GetMethodID(arrayListClass, "add", "(Ljava/lang/Object;)Z"); + const DictionaryHeaderStructurePolicy::AttributeMap *const attributeMap = + headerPolicy->getAttributeMap(); + for (DictionaryHeaderStructurePolicy::AttributeMap::const_iterator it = attributeMap->begin(); + it != attributeMap->end(); ++it) { + // Output key + jintArray keyCodePointArray = env->NewIntArray(it->first.size()); + env->SetIntArrayRegion( + keyCodePointArray, 0 /* start */, it->first.size(), &it->first.at(0)); + env->CallBooleanMethod(outAttributeKeys, addMethodId, keyCodePointArray); + env->DeleteLocalRef(keyCodePointArray); + // Output value + jintArray valueCodePointArray = env->NewIntArray(it->second.size()); + env->SetIntArrayRegion( + valueCodePointArray, 0 /* start */, it->second.size(), &it->second.at(0)); + env->CallBooleanMethod(outAttributeValues, addMethodId, valueCodePointArray); + env->DeleteLocalRef(valueCodePointArray); + } + env->DeleteLocalRef(arrayListClass); + return; +} + +static int latinime_BinaryDictionary_getFormatVersion(JNIEnv *env, jclass clazz, jlong dict) { + Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); + if (!dictionary) return 0; + const DictionaryHeaderStructurePolicy *const headerPolicy = + dictionary->getDictionaryStructurePolicy()->getHeaderStructurePolicy(); + return headerPolicy->getFormatVersionNumber(); +} + static int latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jclass clazz, jlong dict, jlong proximityInfo, jlong dicTraverseSession, jintArray xCoordinatesArray, jintArray yCoordinatesArray, jintArray timesArray, jintArray pointerIdsArray, @@ -252,6 +300,41 @@ static jint latinime_BinaryDictionary_getBigramProbability(JNIEnv *env, jclass c word1Length); } +// Method to iterate all words in the dictionary for makedict. +// If token is 0, this method newly starts iterating the dictionary. This method returns 0 when +// the dictionary does not have a next word. +static jint latinime_BinaryDictionary_getNextWord(JNIEnv *env, jclass clazz, + jlong dict, jint token, jintArray outCodePoints) { + Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); + if (!dictionary) return 0; + const jsize outCodePointsLength = env->GetArrayLength(outCodePoints); + if (outCodePointsLength != MAX_WORD_LENGTH) { + AKLOGE("Invalid outCodePointsLength: %d", outCodePointsLength); + ASSERT(false); + return 0; + } + int wordCodePoints[outCodePointsLength]; + memset(wordCodePoints, 0, sizeof(wordCodePoints)); + const int nextToken = dictionary->getNextWordAndNextToken(token, wordCodePoints); + env->SetIntArrayRegion(outCodePoints, 0, outCodePointsLength, wordCodePoints); + return nextToken; +} + +static void latinime_BinaryDictionary_getWordProperty(JNIEnv *env, jclass clazz, + jlong dict, jintArray word, jintArray outCodePoints, jbooleanArray outFlags, + jintArray outProbabilityInfo, jobject outBigramTargets, jobject outBigramProbabilityInfo, + jobject outShortcutTargets, jobject outShortcutProbabilities) { + Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); + if (!dictionary) return; + const jsize wordLength = env->GetArrayLength(word); + int wordCodePoints[wordLength]; + env->GetIntArrayRegion(word, 0, wordLength, wordCodePoints); + const WordProperty wordProperty = dictionary->getWordProperty(wordCodePoints, wordLength); + wordProperty.outputProperties(env, outCodePoints, outFlags, outProbabilityInfo, + outBigramTargets, outBigramProbabilityInfo, outShortcutTargets, + outShortcutProbabilities); +} + static jfloat latinime_BinaryDictionary_calcNormalizedScore(JNIEnv *env, jclass clazz, jintArray before, jintArray after, jint score) { jsize beforeLength = env->GetArrayLength(before); @@ -277,7 +360,8 @@ static jint latinime_BinaryDictionary_editDistance(JNIEnv *env, jclass clazz, ji } static void latinime_BinaryDictionary_addUnigramWord(JNIEnv *env, jclass clazz, jlong dict, - jintArray word, jint probability) { + jintArray word, jint probability, jintArray shortcutTarget, jint shortuctProbability, + jboolean isNotAWord, jboolean isBlacklisted, jint timestamp) { Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); if (!dictionary) { return; @@ -285,11 +369,17 @@ static void latinime_BinaryDictionary_addUnigramWord(JNIEnv *env, jclass clazz, jsize wordLength = env->GetArrayLength(word); int codePoints[wordLength]; env->GetIntArrayRegion(word, 0, wordLength, codePoints); - dictionary->addUnigramWord(codePoints, wordLength, probability); + jsize shortcutLength = shortcutTarget ? env->GetArrayLength(shortcutTarget) : 0; + int shortcutTargetCodePoints[shortcutLength]; + if (shortcutTarget) { + env->GetIntArrayRegion(shortcutTarget, 0, shortcutLength, shortcutTargetCodePoints); + } + dictionary->addUnigramWord(codePoints, wordLength, probability, shortcutTargetCodePoints, + shortcutLength, shortuctProbability, isNotAWord, isBlacklisted, timestamp); } static void latinime_BinaryDictionary_addBigramWords(JNIEnv *env, jclass clazz, jlong dict, - jintArray word0, jintArray word1, jint probability) { + jintArray word0, jintArray word1, jint probability, jint timestamp) { Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); if (!dictionary) { return; @@ -301,7 +391,7 @@ static void latinime_BinaryDictionary_addBigramWords(JNIEnv *env, jclass clazz, int word1CodePoints[word1Length]; env->GetIntArrayRegion(word1, 0, word1Length, word1CodePoints); dictionary->addBigramWords(word0CodePoints, word0Length, word1CodePoints, - word1Length, probability); + word1Length, probability, timestamp); } static void latinime_BinaryDictionary_removeBigramWords(JNIEnv *env, jclass clazz, jlong dict, @@ -320,6 +410,87 @@ static void latinime_BinaryDictionary_removeBigramWords(JNIEnv *env, jclass claz word1Length); } +// Returns how many language model params are processed. +static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, jclass clazz, + jlong dict, jobjectArray languageModelParams, jint startIndex) { + Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); + if (!dictionary) { + return 0; + } + jsize languageModelParamCount = env->GetArrayLength(languageModelParams); + if (languageModelParamCount == 0 || startIndex >= languageModelParamCount) { + return 0; + } + jobject languageModelParam = env->GetObjectArrayElement(languageModelParams, 0); + jclass languageModelParamClass = env->GetObjectClass(languageModelParam); + env->DeleteLocalRef(languageModelParam); + + jfieldID word0FieldId = env->GetFieldID(languageModelParamClass, "mWord0", "[I"); + jfieldID word1FieldId = env->GetFieldID(languageModelParamClass, "mWord1", "[I"); + jfieldID unigramProbabilityFieldId = + env->GetFieldID(languageModelParamClass, "mUnigramProbability", "I"); + jfieldID bigramProbabilityFieldId = + env->GetFieldID(languageModelParamClass, "mBigramProbability", "I"); + jfieldID timestampFieldId = + env->GetFieldID(languageModelParamClass, "mTimestamp", "I"); + jfieldID shortcutTargetFieldId = + env->GetFieldID(languageModelParamClass, "mShortcutTarget", "[I"); + jfieldID shortcutProbabilityFieldId = + env->GetFieldID(languageModelParamClass, "mShortcutProbability", "I"); + jfieldID isNotAWordFieldId = + env->GetFieldID(languageModelParamClass, "mIsNotAWord", "Z"); + jfieldID isBlacklistedFieldId = + env->GetFieldID(languageModelParamClass, "mIsBlacklisted", "Z"); + env->DeleteLocalRef(languageModelParamClass); + + for (int i = startIndex; i < languageModelParamCount; ++i) { + jobject languageModelParam = env->GetObjectArrayElement(languageModelParams, i); + // languageModelParam is a set of params for word1; thus, word1 cannot be null. On the + // other hand, word0 can be null and then it means the set of params doesn't contain bigram + // information. + jintArray word0 = static_cast<jintArray>( + env->GetObjectField(languageModelParam, word0FieldId)); + jsize word0Length = word0 ? env->GetArrayLength(word0) : 0; + int word0CodePoints[word0Length]; + if (word0) { + env->GetIntArrayRegion(word0, 0, word0Length, word0CodePoints); + } + jintArray word1 = static_cast<jintArray>( + env->GetObjectField(languageModelParam, word1FieldId)); + jsize word1Length = env->GetArrayLength(word1); + int word1CodePoints[word1Length]; + env->GetIntArrayRegion(word1, 0, word1Length, word1CodePoints); + jint unigramProbability = env->GetIntField(languageModelParam, unigramProbabilityFieldId); + jint timestamp = env->GetIntField(languageModelParam, timestampFieldId); + jboolean isNotAWord = env->GetBooleanField(languageModelParam, isNotAWordFieldId); + jboolean isBlacklisted = env->GetBooleanField(languageModelParam, isBlacklistedFieldId); + jintArray shortcutTarget = static_cast<jintArray>( + env->GetObjectField(languageModelParam, shortcutTargetFieldId)); + jsize shortcutLength = shortcutTarget ? env->GetArrayLength(shortcutTarget) : 0; + int shortcutTargetCodePoints[shortcutLength]; + if (shortcutTarget) { + env->GetIntArrayRegion(shortcutTarget, 0, shortcutLength, shortcutTargetCodePoints); + } + jint shortcutProbability = env->GetIntField(languageModelParam, shortcutProbabilityFieldId); + dictionary->addUnigramWord(word1CodePoints, word1Length, unigramProbability, + shortcutTargetCodePoints, shortcutLength, shortcutProbability, + isNotAWord, isBlacklisted, timestamp); + if (word0) { + jint bigramProbability = env->GetIntField(languageModelParam, bigramProbabilityFieldId); + dictionary->addBigramWords(word0CodePoints, word0Length, word1CodePoints, word1Length, + bigramProbability, timestamp); + } + if (dictionary->needsToRunGC(true /* mindsBlockByGC */)) { + return i + 1; + } + env->DeleteLocalRef(word0); + env->DeleteLocalRef(word1); + env->DeleteLocalRef(shortcutTarget); + env->DeleteLocalRef(languageModelParam); + } + return languageModelParamCount; +} + static int latinime_BinaryDictionary_calculateProbabilityNative(JNIEnv *env, jclass clazz, jlong dict, jint unigramProbability, jint bigramProbability) { Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); @@ -343,14 +514,34 @@ static jstring latinime_BinaryDictionary_getProperty(JNIEnv *env, jclass clazz, static const int GET_PROPERTY_RESULT_LENGTH = 100; char resultChars[GET_PROPERTY_RESULT_LENGTH]; resultChars[0] = '\0'; - dictionary->getProperty(queryChars, resultChars, GET_PROPERTY_RESULT_LENGTH); + dictionary->getProperty(queryChars, queryUtf8Length, resultChars, GET_PROPERTY_RESULT_LENGTH); return env->NewStringUTF(resultChars); } +static int latinime_BinaryDictionary_setCurrentTimeForTest(JNIEnv *env, jclass clazz, + jint currentTime) { + if (currentTime >= 0) { + TimeKeeper::startTestModeWithForceCurrentTime(currentTime); + } else { + TimeKeeper::stopTestMode(); + } + TimeKeeper::setCurrentTime(); + return TimeKeeper::peekCurrentTime(); +} + +static bool latinime_BinaryDictionary_isCorruptedNative(JNIEnv *env, jclass clazz, jlong dict) { + Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); + if (!dictionary) { + return false; + } + return dictionary->getDictionaryStructurePolicy()->isCorrupted(); +} + static const JNINativeMethod sMethods[] = { { const_cast<char *>("createEmptyDictFileNative"), - const_cast<char *>("(Ljava/lang/String;J[Ljava/lang/String;[Ljava/lang/String;)Z"), + const_cast<char *>( + "(Ljava/lang/String;JLjava/lang/String;[Ljava/lang/String;[Ljava/lang/String;)Z"), reinterpret_cast<void *>(latinime_BinaryDictionary_createEmptyDictFile) }, { @@ -364,6 +555,16 @@ static const JNINativeMethod sMethods[] = { reinterpret_cast<void *>(latinime_BinaryDictionary_close) }, { + const_cast<char *>("getFormatVersionNative"), + const_cast<char *>("(J)I"), + reinterpret_cast<void *>(latinime_BinaryDictionary_getFormatVersion) + }, + { + const_cast<char *>("getHeaderInfoNative"), + const_cast<char *>("(J[I[ILjava/util/ArrayList;Ljava/util/ArrayList;)V"), + reinterpret_cast<void *>(latinime_BinaryDictionary_getHeaderInfo) + }, + { const_cast<char *>("flushNative"), const_cast<char *>("(JLjava/lang/String;)V"), reinterpret_cast<void *>(latinime_BinaryDictionary_flush) @@ -394,6 +595,17 @@ static const JNINativeMethod sMethods[] = { reinterpret_cast<void *>(latinime_BinaryDictionary_getBigramProbability) }, { + const_cast<char *>("getWordPropertyNative"), + const_cast<char *>("(J[I[I[Z[ILjava/util/ArrayList;Ljava/util/ArrayList;" + "Ljava/util/ArrayList;Ljava/util/ArrayList;)V"), + reinterpret_cast<void *>(latinime_BinaryDictionary_getWordProperty) + }, + { + const_cast<char *>("getNextWordNative"), + const_cast<char *>("(JI[I)I"), + reinterpret_cast<void *>(latinime_BinaryDictionary_getNextWord) + }, + { const_cast<char *>("calcNormalizedScoreNative"), const_cast<char *>("([I[II)F"), reinterpret_cast<void *>(latinime_BinaryDictionary_calcNormalizedScore) @@ -405,12 +617,12 @@ static const JNINativeMethod sMethods[] = { }, { const_cast<char *>("addUnigramWordNative"), - const_cast<char *>("(J[II)V"), + const_cast<char *>("(J[II[IIZZI)V"), reinterpret_cast<void *>(latinime_BinaryDictionary_addUnigramWord) }, { const_cast<char *>("addBigramWordsNative"), - const_cast<char *>("(J[I[II)V"), + const_cast<char *>("(J[I[III)V"), reinterpret_cast<void *>(latinime_BinaryDictionary_addBigramWords) }, { @@ -419,14 +631,30 @@ static const JNINativeMethod sMethods[] = { reinterpret_cast<void *>(latinime_BinaryDictionary_removeBigramWords) }, { + const_cast<char *>("addMultipleDictionaryEntriesNative"), + const_cast<char *>( + "(J[Lcom/android/inputmethod/latin/utils/LanguageModelParam;I)I"), + reinterpret_cast<void *>(latinime_BinaryDictionary_addMultipleDictionaryEntries) + }, + { const_cast<char *>("calculateProbabilityNative"), const_cast<char *>("(JII)I"), reinterpret_cast<void *>(latinime_BinaryDictionary_calculateProbabilityNative) }, { + const_cast<char *>("setCurrentTimeForTestNative"), + const_cast<char *>("(I)I"), + reinterpret_cast<void *>(latinime_BinaryDictionary_setCurrentTimeForTest) + }, + { const_cast<char *>("getPropertyNative"), const_cast<char *>("(JLjava/lang/String;)Ljava/lang/String;"), reinterpret_cast<void *>(latinime_BinaryDictionary_getProperty) + }, + { + const_cast<char *>("isCorruptedNative"), + const_cast<char *>("(J)Z"), + reinterpret_cast<void *>(latinime_BinaryDictionary_isCorruptedNative) } }; |