diff options
Diffstat (limited to 'native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp')
-rw-r--r-- | native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp | 275 |
1 files changed, 200 insertions, 75 deletions
diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp index f60793733..7761ec4d5 100644 --- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp +++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp @@ -18,28 +18,65 @@ #include "com_android_inputmethod_latin_BinaryDictionary.h" -#include <cerrno> #include <cstring> // for memset() -#include <fcntl.h> -#include <sys/mman.h> #include "defines.h" #include "jni.h" #include "jni_common.h" -#include "obsolete/correction.h" -#include "suggest/core/dictionary/binary_dictionary_format.h" -#include "suggest/core/dictionary/binary_dictionary_info.h" #include "suggest/core/dictionary/dictionary.h" #include "suggest/core/suggest_options.h" +#include "suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h" +#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h" +#include "utils/autocorrection_threshold_utils.h" namespace latinime { class ProximityInfo; -static void releaseDictBuf(const void *dictBuf, const size_t length, const int fd); +// TODO: Move to makedict. +static jboolean latinime_BinaryDictionary_createEmptyDictFile(JNIEnv *env, jclass clazz, + jstring filePath, jlong dictVersion, jobjectArray attributeKeyStringArray, + jobjectArray attributeValueStringArray) { + const jsize filePathUtf8Length = env->GetStringUTFLength(filePath); + char filePathChars[filePathUtf8Length + 1]; + env->GetStringUTFRegion(filePath, 0, env->GetStringLength(filePath), filePathChars); + filePathChars[filePathUtf8Length] = '\0'; + + const int keyCount = env->GetArrayLength(attributeKeyStringArray); + const int valueCount = env->GetArrayLength(attributeValueStringArray); + if (keyCount != valueCount) { + return false; + } + + HeaderReadWriteUtils::AttributeMap attributeMap; + for (int i = 0; i < keyCount; i++) { + jstring keyString = static_cast<jstring>( + env->GetObjectArrayElement(attributeKeyStringArray, i)); + const jsize keyUtf8Length = env->GetStringUTFLength(keyString); + char keyChars[keyUtf8Length + 1]; + env->GetStringUTFRegion(keyString, 0, env->GetStringLength(keyString), keyChars); + keyChars[keyUtf8Length] = '\0'; + HeaderReadWriteUtils::AttributeMap::key_type key; + HeaderReadWriteUtils::insertCharactersIntoVector(keyChars, &key); + + jstring valueString = static_cast<jstring>( + env->GetObjectArrayElement(attributeValueStringArray, i)); + const jsize valueUtf8Length = env->GetStringUTFLength(valueString); + char valueChars[valueUtf8Length + 1]; + env->GetStringUTFRegion(valueString, 0, env->GetStringLength(valueString), valueChars); + valueChars[valueUtf8Length] = '\0'; + HeaderReadWriteUtils::AttributeMap::mapped_type value; + HeaderReadWriteUtils::insertCharactersIntoVector(valueChars, &value); + + attributeMap[key] = value; + } + + return DictFileWritingUtils::createEmptyDictFile(filePathChars, static_cast<int>(dictVersion), + &attributeMap); +} static jlong latinime_BinaryDictionary_open(JNIEnv *env, jclass clazz, jstring sourceDir, - jlong dictOffset, jlong dictSize) { + jlong dictOffset, jlong dictSize, jboolean isUpdatable) { PROF_OPEN; PROF_START(66); const jsize sourceDirUtf8Length = env->GetStringUTFLength(sourceDir); @@ -50,48 +87,62 @@ static jlong latinime_BinaryDictionary_open(JNIEnv *env, jclass clazz, jstring s char sourceDirChars[sourceDirUtf8Length + 1]; env->GetStringUTFRegion(sourceDir, 0, env->GetStringLength(sourceDir), sourceDirChars); sourceDirChars[sourceDirUtf8Length] = '\0'; - int fd = 0; - void *dictBuf = 0; - int adjust = 0; - fd = open(sourceDirChars, O_RDONLY); - if (fd < 0) { - AKLOGE("DICT: Can't open sourceDir. sourceDirChars=%s errno=%d", sourceDirChars, errno); + DictionaryStructureWithBufferPolicy *const dictionaryStructureWithBufferPolicy = + DictionaryStructureWithBufferPolicyFactory::newDictionaryStructureWithBufferPolicy( + sourceDirChars, static_cast<int>(dictOffset), static_cast<int>(dictSize), + isUpdatable == JNI_TRUE); + if (!dictionaryStructureWithBufferPolicy) { return 0; } - int pagesize = getpagesize(); - adjust = static_cast<int>(dictOffset) % pagesize; - int adjDictOffset = static_cast<int>(dictOffset) - adjust; - int adjDictSize = static_cast<int>(dictSize) + adjust; - dictBuf = mmap(0, adjDictSize, PROT_READ, MAP_PRIVATE, fd, adjDictOffset); - if (dictBuf == MAP_FAILED) { - AKLOGE("DICT: Can't mmap dictionary. errno=%d", errno); - return 0; - } - dictBuf = static_cast<char *>(dictBuf) + adjust; - if (!dictBuf) { - AKLOGE("DICT: dictBuf is null"); - return 0; - } - Dictionary *dictionary = 0; - if (BinaryDictionaryFormat::UNKNOWN_VERSION - == BinaryDictionaryFormat::detectFormatVersion(static_cast<uint8_t *>(dictBuf), - static_cast<int>(dictSize))) { - AKLOGE("DICT: dictionary format is unknown, bad magic number"); - releaseDictBuf(static_cast<const char *>(dictBuf) - adjust, adjDictSize, fd); - } else { - dictionary = new Dictionary(dictBuf, static_cast<int>(dictSize), fd, adjust); - } + + Dictionary *const dictionary = new Dictionary(env, dictionaryStructureWithBufferPolicy); PROF_END(66); PROF_CLOSE; return reinterpret_cast<jlong>(dictionary); } +static void latinime_BinaryDictionary_flush(JNIEnv *env, jclass clazz, jlong dict, + jstring filePath) { + Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); + if (!dictionary) return; + const jsize filePathUtf8Length = env->GetStringUTFLength(filePath); + char filePathChars[filePathUtf8Length + 1]; + env->GetStringUTFRegion(filePath, 0, env->GetStringLength(filePath), filePathChars); + filePathChars[filePathUtf8Length] = '\0'; + dictionary->flush(filePathChars); +} + +static bool latinime_BinaryDictionary_needsToRunGC(JNIEnv *env, jclass clazz, + jlong dict) { + Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); + if (!dictionary) return false; + return dictionary->needsToRunGC(); +} + +static void latinime_BinaryDictionary_flushWithGC(JNIEnv *env, jclass clazz, jlong dict, + jstring filePath) { + Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); + if (!dictionary) return; + const jsize filePathUtf8Length = env->GetStringUTFLength(filePath); + char filePathChars[filePathUtf8Length + 1]; + env->GetStringUTFRegion(filePath, 0, env->GetStringLength(filePath), filePathChars); + filePathChars[filePathUtf8Length] = '\0'; + dictionary->flushWithGC(filePathChars); +} + +static void latinime_BinaryDictionary_close(JNIEnv *env, jclass clazz, jlong dict) { + Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); + if (!dictionary) return; + delete dictionary; +} + static int latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jclass clazz, jlong dict, jlong proximityInfo, jlong dicTraverseSession, jintArray xCoordinatesArray, jintArray yCoordinatesArray, jintArray timesArray, jintArray pointerIdsArray, jintArray inputCodePointsArray, jint inputSize, jint commitPoint, jintArray suggestOptions, jintArray prevWordCodePointsForBigrams, jintArray outputCodePointsArray, - jintArray scoresArray, jintArray spaceIndicesArray, jintArray outputTypesArray) { + jintArray scoresArray, jintArray spaceIndicesArray, jintArray outputTypesArray, + jintArray outputAutoCommitFirstWordConfidence) { Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); if (!dictionary) return 0; ProximityInfo *pInfo = reinterpret_cast<ProximityInfo *>(proximityInfo); @@ -158,7 +209,7 @@ static int latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jclass clazz, j scores, spaceIndices, outputTypes); } else { count = dictionary->getBigrams(prevWordCodePoints, prevWordCodePointsLength, - inputCodePoints, inputSize, outputCodePoints, scores, outputTypes); + outputCodePoints, scores, outputTypes); } // Copy back the output values @@ -171,26 +222,27 @@ static int latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jclass clazz, j } static jint latinime_BinaryDictionary_getProbability(JNIEnv *env, jclass clazz, jlong dict, - jintArray wordArray) { + jintArray word) { Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); - if (!dictionary) return 0; - const jsize codePointLength = env->GetArrayLength(wordArray); - int codePoints[codePointLength]; - env->GetIntArrayRegion(wordArray, 0, codePointLength, codePoints); - return dictionary->getProbability(codePoints, codePointLength); + if (!dictionary) return NOT_A_PROBABILITY; + const jsize wordLength = env->GetArrayLength(word); + int codePoints[wordLength]; + env->GetIntArrayRegion(word, 0, wordLength, codePoints); + return dictionary->getProbability(codePoints, wordLength); } -static jboolean latinime_BinaryDictionary_isValidBigram(JNIEnv *env, jclass clazz, jlong dict, - jintArray wordArray1, jintArray wordArray2) { +static jint latinime_BinaryDictionary_getBigramProbability(JNIEnv *env, jclass clazz, + jlong dict, jintArray word0, jintArray word1) { Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); if (!dictionary) return JNI_FALSE; - const jsize codePointLength1 = env->GetArrayLength(wordArray1); - const jsize codePointLength2 = env->GetArrayLength(wordArray2); - int codePoints1[codePointLength1]; - int codePoints2[codePointLength2]; - env->GetIntArrayRegion(wordArray1, 0, codePointLength1, codePoints1); - env->GetIntArrayRegion(wordArray2, 0, codePointLength2, codePoints2); - return dictionary->isValidBigram(codePoints1, codePointLength1, codePoints2, codePointLength2); + const jsize word0Length = env->GetArrayLength(word0); + const jsize word1Length = env->GetArrayLength(word1); + int word0CodePoints[word0Length]; + int word1CodePoints[word1Length]; + env->GetIntArrayRegion(word0, 0, word0Length, word0CodePoints); + env->GetIntArrayRegion(word1, 0, word1Length, word1CodePoints); + return dictionary->getBigramProbability(word0CodePoints, word0Length, word1CodePoints, + word1Length); } static jfloat latinime_BinaryDictionary_calcNormalizedScore(JNIEnv *env, jclass clazz, @@ -201,7 +253,7 @@ static jfloat latinime_BinaryDictionary_calcNormalizedScore(JNIEnv *env, jclass int afterCodePoints[afterLength]; env->GetIntArrayRegion(before, 0, beforeLength, beforeCodePoints); env->GetIntArrayRegion(after, 0, afterLength, afterCodePoints); - return Correction::RankingAlgorithm::calcNormalizedScore(beforeCodePoints, beforeLength, + return AutocorrectionThresholdUtils::calcNormalizedScore(beforeCodePoints, beforeLength, afterCodePoints, afterLength, score); } @@ -213,35 +265,73 @@ static jint latinime_BinaryDictionary_editDistance(JNIEnv *env, jclass clazz, ji int afterCodePoints[afterLength]; env->GetIntArrayRegion(before, 0, beforeLength, beforeCodePoints); env->GetIntArrayRegion(after, 0, afterLength, afterCodePoints); - return Correction::RankingAlgorithm::editDistance(beforeCodePoints, beforeLength, + return AutocorrectionThresholdUtils::editDistance(beforeCodePoints, beforeLength, afterCodePoints, afterLength); } -static void latinime_BinaryDictionary_close(JNIEnv *env, jclass clazz, jlong dict) { +static void latinime_BinaryDictionary_addUnigramWord(JNIEnv *env, jclass clazz, jlong dict, + jintArray word, jint probability) { Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); - if (!dictionary) return; - const void *dictBuf = dictionary->getBinaryDictionaryInfo()->getDictBuf(); - if (!dictBuf) return; - releaseDictBuf(static_cast<const char *>(dictBuf) - dictionary->getDictBufAdjust(), - dictionary->getDictSize() + dictionary->getDictBufAdjust(), dictionary->getMmapFd()); - delete dictionary; + if (!dictionary) { + return; + } + jsize wordLength = env->GetArrayLength(word); + int codePoints[wordLength]; + env->GetIntArrayRegion(word, 0, wordLength, codePoints); + dictionary->addUnigramWord(codePoints, wordLength, probability); } -static void releaseDictBuf(const void *dictBuf, const size_t length, const int fd) { - int ret = munmap(const_cast<void *>(dictBuf), length); - if (ret != 0) { - AKLOGE("DICT: Failure in munmap. ret=%d errno=%d", ret, errno); +static void latinime_BinaryDictionary_addBigramWords(JNIEnv *env, jclass clazz, jlong dict, + jintArray word0, jintArray word1, jint probability) { + Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); + if (!dictionary) { + return; } - ret = close(fd); - if (ret != 0) { - AKLOGE("DICT: Failure in close. ret=%d errno=%d", ret, errno); + jsize word0Length = env->GetArrayLength(word0); + int word0CodePoints[word0Length]; + env->GetIntArrayRegion(word0, 0, word0Length, word0CodePoints); + jsize word1Length = env->GetArrayLength(word1); + int word1CodePoints[word1Length]; + env->GetIntArrayRegion(word1, 0, word1Length, word1CodePoints); + dictionary->addBigramWords(word0CodePoints, word0Length, word1CodePoints, + word1Length, probability); +} + +static void latinime_BinaryDictionary_removeBigramWords(JNIEnv *env, jclass clazz, jlong dict, + jintArray word0, jintArray word1) { + Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); + if (!dictionary) { + return; } + jsize word0Length = env->GetArrayLength(word0); + int word0CodePoints[word0Length]; + env->GetIntArrayRegion(word0, 0, word0Length, word0CodePoints); + jsize word1Length = env->GetArrayLength(word1); + int word1CodePoints[word1Length]; + env->GetIntArrayRegion(word1, 0, word1Length, word1CodePoints); + dictionary->removeBigramWords(word0CodePoints, word0Length, word1CodePoints, + word1Length); +} + +static int latinime_BinaryDictionary_calculateProbabilityNative(JNIEnv *env, jclass clazz, + jlong dict, jint unigramProbability, jint bigramProbability) { + Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); + if (!dictionary) { + return NOT_A_PROBABILITY; + } + return dictionary->getDictionaryStructurePolicy()->getProbability(unigramProbability, + bigramProbability); } static const JNINativeMethod sMethods[] = { { + const_cast<char *>("createEmptyDictFileNative"), + const_cast<char *>("(Ljava/lang/String;J[Ljava/lang/String;[Ljava/lang/String;)Z"), + reinterpret_cast<void *>(latinime_BinaryDictionary_createEmptyDictFile) + }, + { const_cast<char *>("openNative"), - const_cast<char *>("(Ljava/lang/String;JJ)J"), + const_cast<char *>("(Ljava/lang/String;JJZ)J"), reinterpret_cast<void *>(latinime_BinaryDictionary_open) }, { @@ -250,8 +340,23 @@ static const JNINativeMethod sMethods[] = { reinterpret_cast<void *>(latinime_BinaryDictionary_close) }, { + const_cast<char *>("flushNative"), + const_cast<char *>("(JLjava/lang/String;)V"), + reinterpret_cast<void *>(latinime_BinaryDictionary_flush) + }, + { + const_cast<char *>("needsToRunGCNative"), + const_cast<char *>("(J)Z"), + reinterpret_cast<void *>(latinime_BinaryDictionary_needsToRunGC) + }, + { + const_cast<char *>("flushWithGCNative"), + const_cast<char *>("(JLjava/lang/String;)V"), + reinterpret_cast<void *>(latinime_BinaryDictionary_flushWithGC) + }, + { const_cast<char *>("getSuggestionsNative"), - const_cast<char *>("(JJJ[I[I[I[I[III[I[I[I[I[I[I)I"), + const_cast<char *>("(JJJ[I[I[I[I[III[I[I[I[I[I[I[I)I"), reinterpret_cast<void *>(latinime_BinaryDictionary_getSuggestions) }, { @@ -260,9 +365,9 @@ static const JNINativeMethod sMethods[] = { reinterpret_cast<void *>(latinime_BinaryDictionary_getProbability) }, { - const_cast<char *>("isValidBigramNative"), - const_cast<char *>("(J[I[I)Z"), - reinterpret_cast<void *>(latinime_BinaryDictionary_isValidBigram) + const_cast<char *>("getBigramProbabilityNative"), + const_cast<char *>("(J[I[I)I"), + reinterpret_cast<void *>(latinime_BinaryDictionary_getBigramProbability) }, { const_cast<char *>("calcNormalizedScoreNative"), @@ -273,6 +378,26 @@ static const JNINativeMethod sMethods[] = { const_cast<char *>("editDistanceNative"), const_cast<char *>("([I[I)I"), reinterpret_cast<void *>(latinime_BinaryDictionary_editDistance) + }, + { + const_cast<char *>("addUnigramWordNative"), + const_cast<char *>("(J[II)V"), + reinterpret_cast<void *>(latinime_BinaryDictionary_addUnigramWord) + }, + { + const_cast<char *>("addBigramWordsNative"), + const_cast<char *>("(J[I[II)V"), + reinterpret_cast<void *>(latinime_BinaryDictionary_addBigramWords) + }, + { + const_cast<char *>("removeBigramWordsNative"), + const_cast<char *>("(J[I[I)V"), + reinterpret_cast<void *>(latinime_BinaryDictionary_removeBigramWords) + }, + { + const_cast<char *>("calculateProbabilityNative"), + const_cast<char *>("(JII)I"), + reinterpret_cast<void *>(latinime_BinaryDictionary_calculateProbabilityNative) } }; |