aboutsummaryrefslogtreecommitdiffstats
path: root/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp')
-rw-r--r--native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp43
1 files changed, 33 insertions, 10 deletions
diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
index c2cd2addd..2654a4a0a 100644
--- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
+++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
@@ -301,7 +301,7 @@ static jint latinime_BinaryDictionary_getBigramProbability(JNIEnv *env, jclass c
// If token is 0, this method newly starts iterating the dictionary. This method returns 0 when
// the dictionary does not have a next word.
static jint latinime_BinaryDictionary_getNextWord(JNIEnv *env, jclass clazz,
- jlong dict, jint token, jintArray outCodePoints) {
+ jlong dict, jint token, jintArray outCodePoints, jbooleanArray outIsBeginningOfSentence) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) return 0;
const jsize codePointBufSize = env->GetArrayLength(outCodePoints);
@@ -317,19 +317,39 @@ static jint latinime_BinaryDictionary_getNextWord(JNIEnv *env, jclass clazz,
JniDataUtils::outputCodePoints(env, outCodePoints, 0 /* start */,
MAX_WORD_LENGTH /* maxLength */, wordCodePoints, wordCodePointCount,
false /* needsNullTermination */);
+ bool isBeginningOfSentence = false;
+ if (wordCodePointCount > 0 && wordCodePoints[0] == CODE_POINT_BEGINNING_OF_SENTENCE) {
+ isBeginningOfSentence = true;
+ }
+ JniDataUtils::putBooleanToArray(env, outIsBeginningOfSentence, 0 /* index */,
+ isBeginningOfSentence);
return nextToken;
}
static void latinime_BinaryDictionary_getWordProperty(JNIEnv *env, jclass clazz,
- jlong dict, jintArray word, jintArray outCodePoints, jbooleanArray outFlags,
- jintArray outProbabilityInfo, jobject outBigramTargets, jobject outBigramProbabilityInfo,
- jobject outShortcutTargets, jobject outShortcutProbabilities) {
+ jlong dict, jintArray word, jboolean isBeginningOfSentence, jintArray outCodePoints,
+ jbooleanArray outFlags, jintArray outProbabilityInfo, jobject outBigramTargets,
+ jobject outBigramProbabilityInfo, jobject outShortcutTargets,
+ jobject outShortcutProbabilities) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) return;
const jsize wordLength = env->GetArrayLength(word);
- int wordCodePoints[wordLength];
+ if (wordLength > MAX_WORD_LENGTH) {
+ AKLOGE("Invalid wordLength: %d", wordLength);
+ return;
+ }
+ int wordCodePoints[MAX_WORD_LENGTH];
env->GetIntArrayRegion(word, 0, wordLength, wordCodePoints);
- const WordProperty wordProperty = dictionary->getWordProperty(wordCodePoints, wordLength);
+ int codePointCount = wordLength;
+ if (isBeginningOfSentence) {
+ codePointCount = CharUtils::attachBeginningOfSentenceMarker(
+ wordCodePoints, wordLength, MAX_WORD_LENGTH);
+ if (codePointCount < 0) {
+ AKLOGE("Cannot attach Beginning-of-Sentence marker.");
+ return;
+ }
+ }
+ const WordProperty wordProperty = dictionary->getWordProperty(wordCodePoints, codePointCount);
wordProperty.outputProperties(env, outCodePoints, outFlags, outProbabilityInfo,
outBigramTargets, outBigramProbabilityInfo, outShortcutTargets,
outShortcutProbabilities);
@@ -554,7 +574,6 @@ static bool latinime_BinaryDictionary_migrateNative(JNIEnv *env, jclass clazz, j
return false;
}
- // TODO: Migrate historical information.
int wordCodePoints[MAX_WORD_LENGTH];
int wordCodePointCount = 0;
int token = 0;
@@ -563,6 +582,10 @@ static bool latinime_BinaryDictionary_migrateNative(JNIEnv *env, jclass clazz, j
token = dictionary->getNextWordAndNextToken(token, wordCodePoints, &wordCodePointCount);
const WordProperty wordProperty = dictionary->getWordProperty(wordCodePoints,
wordCodePointCount);
+ if (wordCodePoints[0] == CODE_POINT_BEGINNING_OF_SENTENCE) {
+ // Skip beginning-of-sentence unigram.
+ continue;
+ }
if (dictionaryStructureWithBufferPolicy->needsToRunGC(true /* mindsBlockByGC */)) {
dictionaryStructureWithBufferPolicy = runGCAndGetNewStructurePolicy(
std::move(dictionaryStructureWithBufferPolicy), dictFilePathChars);
@@ -592,7 +615,7 @@ static bool latinime_BinaryDictionary_migrateNative(JNIEnv *env, jclass clazz, j
}
}
const PrevWordsInfo prevWordsInfo(wordCodePoints, wordCodePointCount,
- false /* isBeginningOfSentence */);
+ wordProperty.getUnigramProperty()->representsBeginningOfSentence());
for (const BigramProperty &bigramProperty : *wordProperty.getBigramProperties()) {
if (!dictionaryStructureWithBufferPolicy->addNgramEntry(&prevWordsInfo,
&bigramProperty)) {
@@ -669,13 +692,13 @@ static const JNINativeMethod sMethods[] = {
},
{
const_cast<char *>("getWordPropertyNative"),
- const_cast<char *>("(J[I[I[Z[ILjava/util/ArrayList;Ljava/util/ArrayList;"
+ const_cast<char *>("(J[IZ[I[Z[ILjava/util/ArrayList;Ljava/util/ArrayList;"
"Ljava/util/ArrayList;Ljava/util/ArrayList;)V"),
reinterpret_cast<void *>(latinime_BinaryDictionary_getWordProperty)
},
{
const_cast<char *>("getNextWordNative"),
- const_cast<char *>("(JI[I)I"),
+ const_cast<char *>("(JI[I[Z)I"),
reinterpret_cast<void *>(latinime_BinaryDictionary_getNextWord)
},
{