aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp36
-rw-r--r--native/jni/src/suggest/core/dictionary/dictionary.cpp5
-rw-r--r--native/jni/src/suggest/core/dictionary/dictionary.h3
-rw-r--r--native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h3
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp14
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h3
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp8
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h3
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp14
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h3
10 files changed, 45 insertions, 47 deletions
diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
index 6e2219d87..c2cd2addd 100644
--- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
+++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
@@ -304,17 +304,18 @@ static jint latinime_BinaryDictionary_getNextWord(JNIEnv *env, jclass clazz,
jlong dict, jint token, jintArray outCodePoints) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) return 0;
- const jsize outCodePointsLength = env->GetArrayLength(outCodePoints);
- if (outCodePointsLength != MAX_WORD_LENGTH) {
- AKLOGE("Invalid outCodePointsLength: %d", outCodePointsLength);
+ const jsize codePointBufSize = env->GetArrayLength(outCodePoints);
+ if (codePointBufSize != MAX_WORD_LENGTH) {
+ AKLOGE("Invalid outCodePointsLength: %d", codePointBufSize);
ASSERT(false);
return 0;
}
- int wordCodePoints[outCodePointsLength];
- memset(wordCodePoints, 0, sizeof(wordCodePoints));
- const int nextToken = dictionary->getNextWordAndNextToken(token, wordCodePoints);
+ int wordCodePoints[codePointBufSize];
+ int wordCodePointCount = 0;
+ const int nextToken = dictionary->getNextWordAndNextToken(token, wordCodePoints,
+ &wordCodePointCount);
JniDataUtils::outputCodePoints(env, outCodePoints, 0 /* start */,
- MAX_WORD_LENGTH /* maxLength */, wordCodePoints, outCodePointsLength,
+ MAX_WORD_LENGTH /* maxLength */, wordCodePoints, wordCodePointCount,
false /* needsNullTermination */);
return nextToken;
}
@@ -555,12 +556,13 @@ static bool latinime_BinaryDictionary_migrateNative(JNIEnv *env, jclass clazz, j
// TODO: Migrate historical information.
int wordCodePoints[MAX_WORD_LENGTH];
+ int wordCodePointCount = 0;
int token = 0;
// Add unigrams.
do {
- token = dictionary->getNextWordAndNextToken(token, wordCodePoints);
- const int wordLength = CharUtils::getCodePointCount(MAX_WORD_LENGTH, wordCodePoints);
- const WordProperty wordProperty = dictionary->getWordProperty(wordCodePoints, wordLength);
+ token = dictionary->getNextWordAndNextToken(token, wordCodePoints, &wordCodePointCount);
+ const WordProperty wordProperty = dictionary->getWordProperty(wordCodePoints,
+ wordCodePointCount);
if (dictionaryStructureWithBufferPolicy->needsToRunGC(true /* mindsBlockByGC */)) {
dictionaryStructureWithBufferPolicy = runGCAndGetNewStructurePolicy(
std::move(dictionaryStructureWithBufferPolicy), dictFilePathChars);
@@ -569,8 +571,8 @@ static bool latinime_BinaryDictionary_migrateNative(JNIEnv *env, jclass clazz, j
return false;
}
}
- if (!dictionaryStructureWithBufferPolicy->addUnigramEntry(wordCodePoints, wordLength,
- wordProperty.getUnigramProperty())) {
+ if (!dictionaryStructureWithBufferPolicy->addUnigramEntry(wordCodePoints,
+ wordCodePointCount, wordProperty.getUnigramProperty())) {
LogUtils::logToJava(env, "Cannot add unigram to the new dict.");
return false;
}
@@ -578,9 +580,9 @@ static bool latinime_BinaryDictionary_migrateNative(JNIEnv *env, jclass clazz, j
// Add bigrams.
do {
- token = dictionary->getNextWordAndNextToken(token, wordCodePoints);
- const int wordLength = CharUtils::getCodePointCount(MAX_WORD_LENGTH, wordCodePoints);
- const WordProperty wordProperty = dictionary->getWordProperty(wordCodePoints, wordLength);
+ token = dictionary->getNextWordAndNextToken(token, wordCodePoints, &wordCodePointCount);
+ const WordProperty wordProperty = dictionary->getWordProperty(wordCodePoints,
+ wordCodePointCount);
if (dictionaryStructureWithBufferPolicy->needsToRunGC(true /* mindsBlockByGC */)) {
dictionaryStructureWithBufferPolicy = runGCAndGetNewStructurePolicy(
std::move(dictionaryStructureWithBufferPolicy), dictFilePathChars);
@@ -589,8 +591,8 @@ static bool latinime_BinaryDictionary_migrateNative(JNIEnv *env, jclass clazz, j
return false;
}
}
- const PrevWordsInfo prevWordsInfo(wordCodePoints, wordLength,
- false /* isStartOfSentence */);
+ const PrevWordsInfo prevWordsInfo(wordCodePoints, wordCodePointCount,
+ false /* isBeginningOfSentence */);
for (const BigramProperty &bigramProperty : *wordProperty.getBigramProperties()) {
if (!dictionaryStructureWithBufferPolicy->addNgramEntry(&prevWordsInfo,
&bigramProperty)) {
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp
index 0bcde2294..228260216 100644
--- a/native/jni/src/suggest/core/dictionary/dictionary.cpp
+++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp
@@ -145,10 +145,11 @@ const WordProperty Dictionary::getWordProperty(const int *const codePoints,
codePoints, codePointCount);
}
-int Dictionary::getNextWordAndNextToken(const int token, int *const outCodePoints) {
+int Dictionary::getNextWordAndNextToken(const int token, int *const outCodePoints,
+ int *const outCodePointCount) {
TimeKeeper::setCurrentTime();
return mDictionaryStructureWithBufferPolicy->getNextWordAndNextToken(
- token, outCodePoints);
+ token, outCodePoints, outCodePointCount);
}
void Dictionary::logDictionaryInfo(JNIEnv *const env) const {
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.h b/native/jni/src/suggest/core/dictionary/dictionary.h
index 542ba7291..247ee2421 100644
--- a/native/jni/src/suggest/core/dictionary/dictionary.h
+++ b/native/jni/src/suggest/core/dictionary/dictionary.h
@@ -103,7 +103,8 @@ class Dictionary {
// Method to iterate all words in the dictionary.
// The returned token has to be used to get the next word. If token is 0, this method newly
// starts iterating the dictionary.
- int getNextWordAndNextToken(const int token, int *const outCodePoints);
+ int getNextWordAndNextToken(const int token, int *const outCodePoints,
+ int *const outCodePointCount);
const DictionaryStructureWithBufferPolicy *getDictionaryStructurePolicy() const {
return mDictionaryStructureWithBufferPolicy.get();
diff --git a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
index e2771f97c..b72601109 100644
--- a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
+++ b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
@@ -104,7 +104,8 @@ class DictionaryStructureWithBufferPolicy {
// Method to iterate all words in the dictionary.
// The returned token has to be used to get the next word. If token is 0, this method newly
// starts iterating the dictionary.
- virtual int getNextWordAndNextToken(const int token, int *const outCodePoints) = 0;
+ virtual int getNextWordAndNextToken(const int token, int *const outCodePoints,
+ int *const outCodePointCount) = 0;
virtual bool isCorrupted() const = 0;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp
index 4ac0f406e..9780ae048 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp
@@ -478,10 +478,9 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(const int *const code
return WordProperty(&codePointVector, &unigramProperty, &bigrams);
}
-int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints) {
- // TODO: Return code point count like other methods.
- // Null termination.
- outCodePoints[0] = 0;
+int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints,
+ int *const outCodePointCount) {
+ *outCodePointCount = 0;
if (token == 0) {
mTerminalPtNodePositionsForIteratingWords.clear();
DynamicPtReadingHelper::TraversePolicyToGetAllTerminalPtNodePositions traversePolicy(
@@ -498,13 +497,8 @@ int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const
}
const int terminalPtNodePos = mTerminalPtNodePositionsForIteratingWords[token];
int unigramProbability = NOT_A_PROBABILITY;
- const int codePointCount = getCodePointsAndProbabilityAndReturnCodePointCount(
+ *outCodePointCount = getCodePointsAndProbabilityAndReturnCodePointCount(
terminalPtNodePos, MAX_WORD_LENGTH, outCodePoints, &unigramProbability);
- if (codePointCount < MAX_WORD_LENGTH) {
- // Null termination. outCodePoints have to be null terminated or contain MAX_WORD_LENGTH
- // code points.
- outCodePoints[codePointCount] = 0;
- }
const int nextToken = token + 1;
if (nextToken >= terminalPtNodePositionsVectorSize) {
// All words have been iterated.
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h
index 2e948ac4a..16b1bd2c1 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h
@@ -134,7 +134,8 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
const WordProperty getWordProperty(const int *const codePoints,
const int codePointCount) const;
- int getNextWordAndNextToken(const int token, int *const outCodePoints);
+ int getNextWordAndNextToken(const int token, int *const outCodePoints,
+ int *const outCodePointCount);
bool isCorrupted() const {
return mIsCorrupted;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
index 7e1f3b233..5c62b9caf 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
@@ -391,7 +391,9 @@ const WordProperty PatriciaTriePolicy::getWordProperty(const int *const codePoin
return WordProperty(&codePointVector, &unigramProperty, &bigrams);
}
-int PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints) {
+int PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints,
+ int *const outCodePointCount) {
+ *outCodePointCount = 0;
if (token == 0) {
// Start iterating the dictionary.
mTerminalPtNodePositionsForIteratingWords.clear();
@@ -409,8 +411,8 @@ int PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outC
}
const int terminalPtNodePos = mTerminalPtNodePositionsForIteratingWords[token];
int unigramProbability = NOT_A_PROBABILITY;
- getCodePointsAndProbabilityAndReturnCodePointCount(terminalPtNodePos, MAX_WORD_LENGTH,
- outCodePoints, &unigramProbability);
+ *outCodePointCount = getCodePointsAndProbabilityAndReturnCodePointCount(terminalPtNodePos,
+ MAX_WORD_LENGTH, outCodePoints, &unigramProbability);
const int nextToken = token + 1;
if (nextToken >= terminalPtNodePositionsVectorSize) {
// All words have been iterated.
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
index dce94363a..ec8407408 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
@@ -137,7 +137,8 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
const WordProperty getWordProperty(const int *const codePoints,
const int codePointCount) const;
- int getNextWordAndNextToken(const int token, int *const outCodePoints);
+ int getNextWordAndNextToken(const int token, int *const outCodePoints,
+ int *const outCodePointCount);
bool isCorrupted() const {
return mIsCorrupted;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
index f7f2a32b4..46107d92a 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
@@ -489,10 +489,9 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(const int *const code
return WordProperty(&codePointVector, &unigramProperty, &bigrams);
}
-int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints) {
- // TODO: Return code point count like other methods.
- // Null termination.
- outCodePoints[0] = 0;
+int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints,
+ int *const outCodePointCount) {
+ *outCodePointCount = 0;
if (token == 0) {
mTerminalPtNodePositionsForIteratingWords.clear();
DynamicPtReadingHelper::TraversePolicyToGetAllTerminalPtNodePositions traversePolicy(
@@ -509,13 +508,8 @@ int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const
}
const int terminalPtNodePos = mTerminalPtNodePositionsForIteratingWords[token];
int unigramProbability = NOT_A_PROBABILITY;
- const int codePointCount = getCodePointsAndProbabilityAndReturnCodePointCount(
+ *outCodePointCount = getCodePointsAndProbabilityAndReturnCodePointCount(
terminalPtNodePos, MAX_WORD_LENGTH, outCodePoints, &unigramProbability);
- if (codePointCount < MAX_WORD_LENGTH) {
- // Null termination. outCodePoints have to be null terminated or contain MAX_WORD_LENGTH
- // code points.
- outCodePoints[codePointCount] = 0;
- }
const int nextToken = token + 1;
if (nextToken >= terminalPtNodePositionsVectorSize) {
// All words have been iterated.
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
index 0a20965f3..5d66a2cce 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
@@ -113,7 +113,8 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
const WordProperty getWordProperty(const int *const codePoints,
const int codePointCount) const;
- int getNextWordAndNextToken(const int token, int *const outCodePoints);
+ int getNextWordAndNextToken(const int token, int *const outCodePoints,
+ int *const outCodePointCount);
bool isCorrupted() const {
return mIsCorrupted;