aboutsummaryrefslogtreecommitdiffstats
path: root/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp')
-rw-r--r--native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp379
1 files changed, 234 insertions, 145 deletions
diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
index 34764c337..7761ec4d5 100644
--- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
+++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
@@ -14,39 +14,69 @@
* limitations under the License.
*/
-#include <cstring> // for memset()
-
#define LOG_TAG "LatinIME: jni: BinaryDictionary"
-#include "defines.h" // for macros below
-
-#ifdef USE_MMAP_FOR_DICTIONARY
-#include <cerrno>
-#include <fcntl.h>
-#include <sys/mman.h>
-#else // USE_MMAP_FOR_DICTIONARY
-#include <cstdlib>
-#include <cstdio> // for fopen() etc.
-#endif // USE_MMAP_FOR_DICTIONARY
-
#include "com_android_inputmethod_latin_BinaryDictionary.h"
+#include <cstring> // for memset()
+
+#include "defines.h"
#include "jni.h"
#include "jni_common.h"
-#include "obsolete/correction.h"
-#include "suggest/core/dictionary/binary_dictionary_format.h"
-#include "suggest/core/dictionary/binary_dictionary_info.h"
#include "suggest/core/dictionary/dictionary.h"
#include "suggest/core/suggest_options.h"
+#include "suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h"
+#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
+#include "utils/autocorrection_threshold_utils.h"
namespace latinime {
class ProximityInfo;
-static void releaseDictBuf(const void *dictBuf, const size_t length, const int fd);
+// TODO: Move to makedict.
+static jboolean latinime_BinaryDictionary_createEmptyDictFile(JNIEnv *env, jclass clazz,
+ jstring filePath, jlong dictVersion, jobjectArray attributeKeyStringArray,
+ jobjectArray attributeValueStringArray) {
+ const jsize filePathUtf8Length = env->GetStringUTFLength(filePath);
+ char filePathChars[filePathUtf8Length + 1];
+ env->GetStringUTFRegion(filePath, 0, env->GetStringLength(filePath), filePathChars);
+ filePathChars[filePathUtf8Length] = '\0';
+
+ const int keyCount = env->GetArrayLength(attributeKeyStringArray);
+ const int valueCount = env->GetArrayLength(attributeValueStringArray);
+ if (keyCount != valueCount) {
+ return false;
+ }
+
+ HeaderReadWriteUtils::AttributeMap attributeMap;
+ for (int i = 0; i < keyCount; i++) {
+ jstring keyString = static_cast<jstring>(
+ env->GetObjectArrayElement(attributeKeyStringArray, i));
+ const jsize keyUtf8Length = env->GetStringUTFLength(keyString);
+ char keyChars[keyUtf8Length + 1];
+ env->GetStringUTFRegion(keyString, 0, env->GetStringLength(keyString), keyChars);
+ keyChars[keyUtf8Length] = '\0';
+ HeaderReadWriteUtils::AttributeMap::key_type key;
+ HeaderReadWriteUtils::insertCharactersIntoVector(keyChars, &key);
+
+ jstring valueString = static_cast<jstring>(
+ env->GetObjectArrayElement(attributeValueStringArray, i));
+ const jsize valueUtf8Length = env->GetStringUTFLength(valueString);
+ char valueChars[valueUtf8Length + 1];
+ env->GetStringUTFRegion(valueString, 0, env->GetStringLength(valueString), valueChars);
+ valueChars[valueUtf8Length] = '\0';
+ HeaderReadWriteUtils::AttributeMap::mapped_type value;
+ HeaderReadWriteUtils::insertCharactersIntoVector(valueChars, &value);
+
+ attributeMap[key] = value;
+ }
+
+ return DictFileWritingUtils::createEmptyDictFile(filePathChars, static_cast<int>(dictVersion),
+ &attributeMap);
+}
static jlong latinime_BinaryDictionary_open(JNIEnv *env, jclass clazz, jstring sourceDir,
- jlong dictOffset, jlong dictSize) {
+ jlong dictOffset, jlong dictSize, jboolean isUpdatable) {
PROF_OPEN;
PROF_START(66);
const jsize sourceDirUtf8Length = env->GetStringUTFLength(sourceDir);
@@ -57,83 +87,62 @@ static jlong latinime_BinaryDictionary_open(JNIEnv *env, jclass clazz, jstring s
char sourceDirChars[sourceDirUtf8Length + 1];
env->GetStringUTFRegion(sourceDir, 0, env->GetStringLength(sourceDir), sourceDirChars);
sourceDirChars[sourceDirUtf8Length] = '\0';
- int fd = 0;
- void *dictBuf = 0;
- int adjust = 0;
-#ifdef USE_MMAP_FOR_DICTIONARY
- /* mmap version */
- fd = open(sourceDirChars, O_RDONLY);
- if (fd < 0) {
- AKLOGE("DICT: Can't open sourceDir. sourceDirChars=%s errno=%d", sourceDirChars, errno);
- return 0;
- }
- int pagesize = getpagesize();
- adjust = static_cast<int>(dictOffset) % pagesize;
- int adjDictOffset = static_cast<int>(dictOffset) - adjust;
- int adjDictSize = static_cast<int>(dictSize) + adjust;
- dictBuf = mmap(0, adjDictSize, PROT_READ, MAP_PRIVATE, fd, adjDictOffset);
- if (dictBuf == MAP_FAILED) {
- AKLOGE("DICT: Can't mmap dictionary. errno=%d", errno);
+ DictionaryStructureWithBufferPolicy *const dictionaryStructureWithBufferPolicy =
+ DictionaryStructureWithBufferPolicyFactory::newDictionaryStructureWithBufferPolicy(
+ sourceDirChars, static_cast<int>(dictOffset), static_cast<int>(dictSize),
+ isUpdatable == JNI_TRUE);
+ if (!dictionaryStructureWithBufferPolicy) {
return 0;
}
- dictBuf = static_cast<char *>(dictBuf) + adjust;
-#else // USE_MMAP_FOR_DICTIONARY
- /* malloc version */
- FILE *file = 0;
- file = fopen(sourceDirChars, "rb");
- if (file == 0) {
- AKLOGE("DICT: Can't fopen sourceDir. sourceDirChars=%s errno=%d", sourceDirChars, errno);
- return 0;
- }
- dictBuf = malloc(dictSize);
- if (!dictBuf) {
- AKLOGE("DICT: Can't allocate memory region for dictionary. errno=%d", errno);
- return 0;
- }
- int ret = fseek(file, static_cast<long>(dictOffset), SEEK_SET);
- if (ret != 0) {
- AKLOGE("DICT: Failure in fseek. ret=%d errno=%d", ret, errno);
- return 0;
- }
- ret = fread(dictBuf, dictSize, 1, file);
- if (ret != 1) {
- AKLOGE("DICT: Failure in fread. ret=%d errno=%d", ret, errno);
- return 0;
- }
- ret = fclose(file);
- if (ret != 0) {
- AKLOGE("DICT: Failure in fclose. ret=%d errno=%d", ret, errno);
- return 0;
- }
-#endif // USE_MMAP_FOR_DICTIONARY
- if (!dictBuf) {
- AKLOGE("DICT: dictBuf is null");
- return 0;
- }
- Dictionary *dictionary = 0;
- if (BinaryDictionaryFormat::UNKNOWN_VERSION
- == BinaryDictionaryFormat::detectFormatVersion(static_cast<uint8_t *>(dictBuf),
- static_cast<int>(dictSize))) {
- AKLOGE("DICT: dictionary format is unknown, bad magic number");
-#ifdef USE_MMAP_FOR_DICTIONARY
- releaseDictBuf(static_cast<const char *>(dictBuf) - adjust, adjDictSize, fd);
-#else // USE_MMAP_FOR_DICTIONARY
- releaseDictBuf(dictBuf, 0, 0);
-#endif // USE_MMAP_FOR_DICTIONARY
- } else {
- dictionary = new Dictionary(dictBuf, static_cast<int>(dictSize), fd, adjust);
- }
+
+ Dictionary *const dictionary = new Dictionary(env, dictionaryStructureWithBufferPolicy);
PROF_END(66);
PROF_CLOSE;
return reinterpret_cast<jlong>(dictionary);
}
+static void latinime_BinaryDictionary_flush(JNIEnv *env, jclass clazz, jlong dict,
+ jstring filePath) {
+ Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
+ if (!dictionary) return;
+ const jsize filePathUtf8Length = env->GetStringUTFLength(filePath);
+ char filePathChars[filePathUtf8Length + 1];
+ env->GetStringUTFRegion(filePath, 0, env->GetStringLength(filePath), filePathChars);
+ filePathChars[filePathUtf8Length] = '\0';
+ dictionary->flush(filePathChars);
+}
+
+static bool latinime_BinaryDictionary_needsToRunGC(JNIEnv *env, jclass clazz,
+ jlong dict) {
+ Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
+ if (!dictionary) return false;
+ return dictionary->needsToRunGC();
+}
+
+static void latinime_BinaryDictionary_flushWithGC(JNIEnv *env, jclass clazz, jlong dict,
+ jstring filePath) {
+ Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
+ if (!dictionary) return;
+ const jsize filePathUtf8Length = env->GetStringUTFLength(filePath);
+ char filePathChars[filePathUtf8Length + 1];
+ env->GetStringUTFRegion(filePath, 0, env->GetStringLength(filePath), filePathChars);
+ filePathChars[filePathUtf8Length] = '\0';
+ dictionary->flushWithGC(filePathChars);
+}
+
+static void latinime_BinaryDictionary_close(JNIEnv *env, jclass clazz, jlong dict) {
+ Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
+ if (!dictionary) return;
+ delete dictionary;
+}
+
static int latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jclass clazz, jlong dict,
jlong proximityInfo, jlong dicTraverseSession, jintArray xCoordinatesArray,
jintArray yCoordinatesArray, jintArray timesArray, jintArray pointerIdsArray,
jintArray inputCodePointsArray, jint inputSize, jint commitPoint, jintArray suggestOptions,
jintArray prevWordCodePointsForBigrams, jintArray outputCodePointsArray,
- jintArray scoresArray, jintArray spaceIndicesArray, jintArray outputTypesArray) {
+ jintArray scoresArray, jintArray spaceIndicesArray, jintArray outputTypesArray,
+ jintArray outputAutoCommitFirstWordConfidence) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) return 0;
ProximityInfo *pInfo = reinterpret_cast<ProximityInfo *>(proximityInfo);
@@ -200,7 +209,7 @@ static int latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jclass clazz, j
scores, spaceIndices, outputTypes);
} else {
count = dictionary->getBigrams(prevWordCodePoints, prevWordCodePointsLength,
- inputCodePoints, inputSize, outputCodePoints, scores, outputTypes);
+ outputCodePoints, scores, outputTypes);
}
// Copy back the output values
@@ -213,26 +222,27 @@ static int latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jclass clazz, j
}
static jint latinime_BinaryDictionary_getProbability(JNIEnv *env, jclass clazz, jlong dict,
- jintArray wordArray) {
+ jintArray word) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
- if (!dictionary) return 0;
- const jsize codePointLength = env->GetArrayLength(wordArray);
- int codePoints[codePointLength];
- env->GetIntArrayRegion(wordArray, 0, codePointLength, codePoints);
- return dictionary->getProbability(codePoints, codePointLength);
+ if (!dictionary) return NOT_A_PROBABILITY;
+ const jsize wordLength = env->GetArrayLength(word);
+ int codePoints[wordLength];
+ env->GetIntArrayRegion(word, 0, wordLength, codePoints);
+ return dictionary->getProbability(codePoints, wordLength);
}
-static jboolean latinime_BinaryDictionary_isValidBigram(JNIEnv *env, jclass clazz, jlong dict,
- jintArray wordArray1, jintArray wordArray2) {
+static jint latinime_BinaryDictionary_getBigramProbability(JNIEnv *env, jclass clazz,
+ jlong dict, jintArray word0, jintArray word1) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) return JNI_FALSE;
- const jsize codePointLength1 = env->GetArrayLength(wordArray1);
- const jsize codePointLength2 = env->GetArrayLength(wordArray2);
- int codePoints1[codePointLength1];
- int codePoints2[codePointLength2];
- env->GetIntArrayRegion(wordArray1, 0, codePointLength1, codePoints1);
- env->GetIntArrayRegion(wordArray2, 0, codePointLength2, codePoints2);
- return dictionary->isValidBigram(codePoints1, codePointLength1, codePoints2, codePointLength2);
+ const jsize word0Length = env->GetArrayLength(word0);
+ const jsize word1Length = env->GetArrayLength(word1);
+ int word0CodePoints[word0Length];
+ int word1CodePoints[word1Length];
+ env->GetIntArrayRegion(word0, 0, word0Length, word0CodePoints);
+ env->GetIntArrayRegion(word1, 0, word1Length, word1CodePoints);
+ return dictionary->getBigramProbability(word0CodePoints, word0Length, word1CodePoints,
+ word1Length);
}
static jfloat latinime_BinaryDictionary_calcNormalizedScore(JNIEnv *env, jclass clazz,
@@ -243,7 +253,7 @@ static jfloat latinime_BinaryDictionary_calcNormalizedScore(JNIEnv *env, jclass
int afterCodePoints[afterLength];
env->GetIntArrayRegion(before, 0, beforeLength, beforeCodePoints);
env->GetIntArrayRegion(after, 0, afterLength, afterCodePoints);
- return Correction::RankingAlgorithm::calcNormalizedScore(beforeCodePoints, beforeLength,
+ return AutocorrectionThresholdUtils::calcNormalizedScore(beforeCodePoints, beforeLength,
afterCodePoints, afterLength, score);
}
@@ -255,61 +265,140 @@ static jint latinime_BinaryDictionary_editDistance(JNIEnv *env, jclass clazz, ji
int afterCodePoints[afterLength];
env->GetIntArrayRegion(before, 0, beforeLength, beforeCodePoints);
env->GetIntArrayRegion(after, 0, afterLength, afterCodePoints);
- return Correction::RankingAlgorithm::editDistance(beforeCodePoints, beforeLength,
+ return AutocorrectionThresholdUtils::editDistance(beforeCodePoints, beforeLength,
afterCodePoints, afterLength);
}
-static void latinime_BinaryDictionary_close(JNIEnv *env, jclass clazz, jlong dict) {
+static void latinime_BinaryDictionary_addUnigramWord(JNIEnv *env, jclass clazz, jlong dict,
+ jintArray word, jint probability) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
- if (!dictionary) return;
- const void *dictBuf = dictionary->getBinaryDictionaryInfo()->getDictBuf();
- if (!dictBuf) return;
-#ifdef USE_MMAP_FOR_DICTIONARY
- releaseDictBuf(static_cast<const char *>(dictBuf) - dictionary->getDictBufAdjust(),
- dictionary->getDictSize() + dictionary->getDictBufAdjust(), dictionary->getMmapFd());
-#else // USE_MMAP_FOR_DICTIONARY
- releaseDictBuf(dictBuf, 0, 0);
-#endif // USE_MMAP_FOR_DICTIONARY
- delete dictionary;
+ if (!dictionary) {
+ return;
+ }
+ jsize wordLength = env->GetArrayLength(word);
+ int codePoints[wordLength];
+ env->GetIntArrayRegion(word, 0, wordLength, codePoints);
+ dictionary->addUnigramWord(codePoints, wordLength, probability);
}
-static void releaseDictBuf(const void *dictBuf, const size_t length, const int fd) {
-#ifdef USE_MMAP_FOR_DICTIONARY
- int ret = munmap(const_cast<void *>(dictBuf), length);
- if (ret != 0) {
- AKLOGE("DICT: Failure in munmap. ret=%d errno=%d", ret, errno);
+static void latinime_BinaryDictionary_addBigramWords(JNIEnv *env, jclass clazz, jlong dict,
+ jintArray word0, jintArray word1, jint probability) {
+ Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
+ if (!dictionary) {
+ return;
}
- ret = close(fd);
- if (ret != 0) {
- AKLOGE("DICT: Failure in close. ret=%d errno=%d", ret, errno);
+ jsize word0Length = env->GetArrayLength(word0);
+ int word0CodePoints[word0Length];
+ env->GetIntArrayRegion(word0, 0, word0Length, word0CodePoints);
+ jsize word1Length = env->GetArrayLength(word1);
+ int word1CodePoints[word1Length];
+ env->GetIntArrayRegion(word1, 0, word1Length, word1CodePoints);
+ dictionary->addBigramWords(word0CodePoints, word0Length, word1CodePoints,
+ word1Length, probability);
+}
+
+static void latinime_BinaryDictionary_removeBigramWords(JNIEnv *env, jclass clazz, jlong dict,
+ jintArray word0, jintArray word1) {
+ Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
+ if (!dictionary) {
+ return;
}
-#else // USE_MMAP_FOR_DICTIONARY
- free(const_cast<void *>(dictBuf));
-#endif // USE_MMAP_FOR_DICTIONARY
+ jsize word0Length = env->GetArrayLength(word0);
+ int word0CodePoints[word0Length];
+ env->GetIntArrayRegion(word0, 0, word0Length, word0CodePoints);
+ jsize word1Length = env->GetArrayLength(word1);
+ int word1CodePoints[word1Length];
+ env->GetIntArrayRegion(word1, 0, word1Length, word1CodePoints);
+ dictionary->removeBigramWords(word0CodePoints, word0Length, word1CodePoints,
+ word1Length);
}
-static JNINativeMethod sMethods[] = {
- {const_cast<char *>("openNative"),
- const_cast<char *>("(Ljava/lang/String;JJ)J"),
- reinterpret_cast<void *>(latinime_BinaryDictionary_open)},
- {const_cast<char *>("closeNative"),
- const_cast<char *>("(J)V"),
- reinterpret_cast<void *>(latinime_BinaryDictionary_close)},
- {const_cast<char *>("getSuggestionsNative"),
- const_cast<char *>("(JJJ[I[I[I[I[III[I[I[I[I[I[I)I"),
- reinterpret_cast<void *>(latinime_BinaryDictionary_getSuggestions)},
- {const_cast<char *>("getProbabilityNative"),
- const_cast<char *>("(J[I)I"),
- reinterpret_cast<void *>(latinime_BinaryDictionary_getProbability)},
- {const_cast<char *>("isValidBigramNative"),
- const_cast<char *>("(J[I[I)Z"),
- reinterpret_cast<void *>(latinime_BinaryDictionary_isValidBigram)},
- {const_cast<char *>("calcNormalizedScoreNative"),
- const_cast<char *>("([I[II)F"),
- reinterpret_cast<void *>(latinime_BinaryDictionary_calcNormalizedScore)},
- {const_cast<char *>("editDistanceNative"),
- const_cast<char *>("([I[I)I"),
- reinterpret_cast<void *>(latinime_BinaryDictionary_editDistance)}
+static int latinime_BinaryDictionary_calculateProbabilityNative(JNIEnv *env, jclass clazz,
+ jlong dict, jint unigramProbability, jint bigramProbability) {
+ Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
+ if (!dictionary) {
+ return NOT_A_PROBABILITY;
+ }
+ return dictionary->getDictionaryStructurePolicy()->getProbability(unigramProbability,
+ bigramProbability);
+}
+
+static const JNINativeMethod sMethods[] = {
+ {
+ const_cast<char *>("createEmptyDictFileNative"),
+ const_cast<char *>("(Ljava/lang/String;J[Ljava/lang/String;[Ljava/lang/String;)Z"),
+ reinterpret_cast<void *>(latinime_BinaryDictionary_createEmptyDictFile)
+ },
+ {
+ const_cast<char *>("openNative"),
+ const_cast<char *>("(Ljava/lang/String;JJZ)J"),
+ reinterpret_cast<void *>(latinime_BinaryDictionary_open)
+ },
+ {
+ const_cast<char *>("closeNative"),
+ const_cast<char *>("(J)V"),
+ reinterpret_cast<void *>(latinime_BinaryDictionary_close)
+ },
+ {
+ const_cast<char *>("flushNative"),
+ const_cast<char *>("(JLjava/lang/String;)V"),
+ reinterpret_cast<void *>(latinime_BinaryDictionary_flush)
+ },
+ {
+ const_cast<char *>("needsToRunGCNative"),
+ const_cast<char *>("(J)Z"),
+ reinterpret_cast<void *>(latinime_BinaryDictionary_needsToRunGC)
+ },
+ {
+ const_cast<char *>("flushWithGCNative"),
+ const_cast<char *>("(JLjava/lang/String;)V"),
+ reinterpret_cast<void *>(latinime_BinaryDictionary_flushWithGC)
+ },
+ {
+ const_cast<char *>("getSuggestionsNative"),
+ const_cast<char *>("(JJJ[I[I[I[I[III[I[I[I[I[I[I[I)I"),
+ reinterpret_cast<void *>(latinime_BinaryDictionary_getSuggestions)
+ },
+ {
+ const_cast<char *>("getProbabilityNative"),
+ const_cast<char *>("(J[I)I"),
+ reinterpret_cast<void *>(latinime_BinaryDictionary_getProbability)
+ },
+ {
+ const_cast<char *>("getBigramProbabilityNative"),
+ const_cast<char *>("(J[I[I)I"),
+ reinterpret_cast<void *>(latinime_BinaryDictionary_getBigramProbability)
+ },
+ {
+ const_cast<char *>("calcNormalizedScoreNative"),
+ const_cast<char *>("([I[II)F"),
+ reinterpret_cast<void *>(latinime_BinaryDictionary_calcNormalizedScore)
+ },
+ {
+ const_cast<char *>("editDistanceNative"),
+ const_cast<char *>("([I[I)I"),
+ reinterpret_cast<void *>(latinime_BinaryDictionary_editDistance)
+ },
+ {
+ const_cast<char *>("addUnigramWordNative"),
+ const_cast<char *>("(J[II)V"),
+ reinterpret_cast<void *>(latinime_BinaryDictionary_addUnigramWord)
+ },
+ {
+ const_cast<char *>("addBigramWordsNative"),
+ const_cast<char *>("(J[I[II)V"),
+ reinterpret_cast<void *>(latinime_BinaryDictionary_addBigramWords)
+ },
+ {
+ const_cast<char *>("removeBigramWordsNative"),
+ const_cast<char *>("(J[I[I)V"),
+ reinterpret_cast<void *>(latinime_BinaryDictionary_removeBigramWords)
+ },
+ {
+ const_cast<char *>("calculateProbabilityNative"),
+ const_cast<char *>("(JII)I"),
+ reinterpret_cast<void *>(latinime_BinaryDictionary_calculateProbabilityNative)
+ }
};
int register_BinaryDictionary(JNIEnv *env) {