aboutsummaryrefslogtreecommitdiffstats
path: root/native
diff options
context:
space:
mode:
Diffstat (limited to 'native')
-rw-r--r--native/jni/HostUnitTests.mk2
-rw-r--r--native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp77
-rw-r--r--native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp4
-rw-r--r--native/jni/src/suggest/core/dictionary/dictionary.cpp5
-rw-r--r--native/jni/src/suggest/core/dictionary/dictionary.h3
-rw-r--r--native/jni/src/suggest/core/dictionary/property/word_property.cpp3
-rw-r--r--native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h3
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp14
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h3
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp8
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h3
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp14
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h3
-rw-r--r--native/jni/src/utils/char_utils.h4
-rw-r--r--native/jni/src/utils/jni_data_utils.h24
15 files changed, 105 insertions, 65 deletions
diff --git a/native/jni/HostUnitTests.mk b/native/jni/HostUnitTests.mk
index 572d36564..6967d9b87 100644
--- a/native/jni/HostUnitTests.mk
+++ b/native/jni/HostUnitTests.mk
@@ -44,7 +44,7 @@ LOCAL_C_INCLUDES += $(LOCAL_PATH)/$(LATIN_IME_SRC_DIR)
LOCAL_MODULE := liblatinime_host_unittests
LOCAL_MODULE_TAGS := tests
LOCAL_SRC_FILES := $(addprefix $(LATIN_IME_TEST_SRC_DIR)/, $(LATIN_IME_CORE_TEST_FILES))
-LOCAL_STATIC_LIBRARIES += liblatinime_host_static_for_unittests libgtest_host libgtest_main_host
+LOCAL_STATIC_LIBRARIES += liblatinime_host_static_for_unittests
include $(BUILD_HOST_NATIVE_TEST)
endif # Darwin - TODO: Remove this
diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
index e51f7171d..6b4fb7986 100644
--- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
+++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
@@ -301,34 +301,55 @@ static jint latinime_BinaryDictionary_getNgramProbability(JNIEnv *env, jclass cl
// If token is 0, this method newly starts iterating the dictionary. This method returns 0 when
// the dictionary does not have a next word.
static jint latinime_BinaryDictionary_getNextWord(JNIEnv *env, jclass clazz,
- jlong dict, jint token, jintArray outCodePoints) {
+ jlong dict, jint token, jintArray outCodePoints, jbooleanArray outIsBeginningOfSentence) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) return 0;
- const jsize outCodePointsLength = env->GetArrayLength(outCodePoints);
- if (outCodePointsLength != MAX_WORD_LENGTH) {
- AKLOGE("Invalid outCodePointsLength: %d", outCodePointsLength);
+ const jsize codePointBufSize = env->GetArrayLength(outCodePoints);
+ if (codePointBufSize != MAX_WORD_LENGTH) {
+ AKLOGE("Invalid outCodePointsLength: %d", codePointBufSize);
ASSERT(false);
return 0;
}
- int wordCodePoints[outCodePointsLength];
- memset(wordCodePoints, 0, sizeof(wordCodePoints));
- const int nextToken = dictionary->getNextWordAndNextToken(token, wordCodePoints);
+ int wordCodePoints[codePointBufSize];
+ int wordCodePointCount = 0;
+ const int nextToken = dictionary->getNextWordAndNextToken(token, wordCodePoints,
+ &wordCodePointCount);
JniDataUtils::outputCodePoints(env, outCodePoints, 0 /* start */,
- MAX_WORD_LENGTH /* maxLength */, wordCodePoints, outCodePointsLength,
+ MAX_WORD_LENGTH /* maxLength */, wordCodePoints, wordCodePointCount,
false /* needsNullTermination */);
+ bool isBeginningOfSentence = false;
+ if (wordCodePointCount > 0 && wordCodePoints[0] == CODE_POINT_BEGINNING_OF_SENTENCE) {
+ isBeginningOfSentence = true;
+ }
+ JniDataUtils::putBooleanToArray(env, outIsBeginningOfSentence, 0 /* index */,
+ isBeginningOfSentence);
return nextToken;
}
static void latinime_BinaryDictionary_getWordProperty(JNIEnv *env, jclass clazz,
- jlong dict, jintArray word, jintArray outCodePoints, jbooleanArray outFlags,
- jintArray outProbabilityInfo, jobject outBigramTargets, jobject outBigramProbabilityInfo,
- jobject outShortcutTargets, jobject outShortcutProbabilities) {
+ jlong dict, jintArray word, jboolean isBeginningOfSentence, jintArray outCodePoints,
+ jbooleanArray outFlags, jintArray outProbabilityInfo, jobject outBigramTargets,
+ jobject outBigramProbabilityInfo, jobject outShortcutTargets,
+ jobject outShortcutProbabilities) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) return;
const jsize wordLength = env->GetArrayLength(word);
- int wordCodePoints[wordLength];
+ if (wordLength > MAX_WORD_LENGTH) {
+ AKLOGE("Invalid wordLength: %d", wordLength);
+ return;
+ }
+ int wordCodePoints[MAX_WORD_LENGTH];
env->GetIntArrayRegion(word, 0, wordLength, wordCodePoints);
- const WordProperty wordProperty = dictionary->getWordProperty(wordCodePoints, wordLength);
+ int codePointCount = wordLength;
+ if (isBeginningOfSentence) {
+ codePointCount = CharUtils::attachBeginningOfSentenceMarker(
+ wordCodePoints, wordLength, MAX_WORD_LENGTH);
+ if (codePointCount < 0) {
+ AKLOGE("Cannot attach Beginning-of-Sentence marker.");
+ return;
+ }
+ }
+ const WordProperty wordProperty = dictionary->getWordProperty(wordCodePoints, codePointCount);
wordProperty.outputProperties(env, outCodePoints, outFlags, outProbabilityInfo,
outBigramTargets, outBigramProbabilityInfo, outShortcutTargets,
outShortcutProbabilities);
@@ -553,14 +574,18 @@ static bool latinime_BinaryDictionary_migrateNative(JNIEnv *env, jclass clazz, j
return false;
}
- // TODO: Migrate historical information.
int wordCodePoints[MAX_WORD_LENGTH];
+ int wordCodePointCount = 0;
int token = 0;
// Add unigrams.
do {
- token = dictionary->getNextWordAndNextToken(token, wordCodePoints);
- const int wordLength = CharUtils::getCodePointCount(MAX_WORD_LENGTH, wordCodePoints);
- const WordProperty wordProperty = dictionary->getWordProperty(wordCodePoints, wordLength);
+ token = dictionary->getNextWordAndNextToken(token, wordCodePoints, &wordCodePointCount);
+ const WordProperty wordProperty = dictionary->getWordProperty(wordCodePoints,
+ wordCodePointCount);
+ if (wordCodePoints[0] == CODE_POINT_BEGINNING_OF_SENTENCE) {
+ // Skip beginning-of-sentence unigram.
+ continue;
+ }
if (dictionaryStructureWithBufferPolicy->needsToRunGC(true /* mindsBlockByGC */)) {
dictionaryStructureWithBufferPolicy = runGCAndGetNewStructurePolicy(
std::move(dictionaryStructureWithBufferPolicy), dictFilePathChars);
@@ -569,8 +594,8 @@ static bool latinime_BinaryDictionary_migrateNative(JNIEnv *env, jclass clazz, j
return false;
}
}
- if (!dictionaryStructureWithBufferPolicy->addUnigramEntry(wordCodePoints, wordLength,
- wordProperty.getUnigramProperty())) {
+ if (!dictionaryStructureWithBufferPolicy->addUnigramEntry(wordCodePoints,
+ wordCodePointCount, wordProperty.getUnigramProperty())) {
LogUtils::logToJava(env, "Cannot add unigram to the new dict.");
return false;
}
@@ -578,9 +603,9 @@ static bool latinime_BinaryDictionary_migrateNative(JNIEnv *env, jclass clazz, j
// Add bigrams.
do {
- token = dictionary->getNextWordAndNextToken(token, wordCodePoints);
- const int wordLength = CharUtils::getCodePointCount(MAX_WORD_LENGTH, wordCodePoints);
- const WordProperty wordProperty = dictionary->getWordProperty(wordCodePoints, wordLength);
+ token = dictionary->getNextWordAndNextToken(token, wordCodePoints, &wordCodePointCount);
+ const WordProperty wordProperty = dictionary->getWordProperty(wordCodePoints,
+ wordCodePointCount);
if (dictionaryStructureWithBufferPolicy->needsToRunGC(true /* mindsBlockByGC */)) {
dictionaryStructureWithBufferPolicy = runGCAndGetNewStructurePolicy(
std::move(dictionaryStructureWithBufferPolicy), dictFilePathChars);
@@ -589,8 +614,8 @@ static bool latinime_BinaryDictionary_migrateNative(JNIEnv *env, jclass clazz, j
return false;
}
}
- const PrevWordsInfo prevWordsInfo(wordCodePoints, wordLength,
- false /* isStartOfSentence */);
+ const PrevWordsInfo prevWordsInfo(wordCodePoints, wordCodePointCount,
+ wordProperty.getUnigramProperty()->representsBeginningOfSentence());
for (const BigramProperty &bigramProperty : *wordProperty.getBigramProperties()) {
if (!dictionaryStructureWithBufferPolicy->addNgramEntry(&prevWordsInfo,
&bigramProperty)) {
@@ -667,13 +692,13 @@ static const JNINativeMethod sMethods[] = {
},
{
const_cast<char *>("getWordPropertyNative"),
- const_cast<char *>("(J[I[I[Z[ILjava/util/ArrayList;Ljava/util/ArrayList;"
+ const_cast<char *>("(J[IZ[I[Z[ILjava/util/ArrayList;Ljava/util/ArrayList;"
"Ljava/util/ArrayList;Ljava/util/ArrayList;)V"),
reinterpret_cast<void *>(latinime_BinaryDictionary_getWordProperty)
},
{
const_cast<char *>("getNextWordNative"),
- const_cast<char *>("(JI[I)I"),
+ const_cast<char *>("(JI[I[Z)I"),
reinterpret_cast<void *>(latinime_BinaryDictionary_getNextWord)
},
{
diff --git a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp
index 295e760d6..56339fe48 100644
--- a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp
+++ b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp
@@ -57,6 +57,10 @@ void BigramDictionary::getPredictions(const PrevWordsInfo *const prevWordsInfo,
if (bigramsIt.getBigramPos() == NOT_A_DICT_POS) {
continue;
}
+ if (prevWordsInfo->isNthPrevWordBeginningOfSentence(1 /* n */)
+ && bigramsIt.getProbability() == NOT_A_PROBABILITY) {
+ continue;
+ }
const int codePointCount = mDictionaryStructurePolicy->
getCodePointsAndProbabilityAndReturnCodePointCount(bigramsIt.getBigramPos(),
MAX_WORD_LENGTH, bigramCodePoints, &unigramProbability);
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp
index bf0d0b126..e553bc0fc 100644
--- a/native/jni/src/suggest/core/dictionary/dictionary.cpp
+++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp
@@ -145,10 +145,11 @@ const WordProperty Dictionary::getWordProperty(const int *const codePoints,
codePoints, codePointCount);
}
-int Dictionary::getNextWordAndNextToken(const int token, int *const outCodePoints) {
+int Dictionary::getNextWordAndNextToken(const int token, int *const outCodePoints,
+ int *const outCodePointCount) {
TimeKeeper::setCurrentTime();
return mDictionaryStructureWithBufferPolicy->getNextWordAndNextToken(
- token, outCodePoints);
+ token, outCodePoints, outCodePointCount);
}
void Dictionary::logDictionaryInfo(JNIEnv *const env) const {
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.h b/native/jni/src/suggest/core/dictionary/dictionary.h
index a96c87635..83447de44 100644
--- a/native/jni/src/suggest/core/dictionary/dictionary.h
+++ b/native/jni/src/suggest/core/dictionary/dictionary.h
@@ -103,7 +103,8 @@ class Dictionary {
// Method to iterate all words in the dictionary.
// The returned token has to be used to get the next word. If token is 0, this method newly
// starts iterating the dictionary.
- int getNextWordAndNextToken(const int token, int *const outCodePoints);
+ int getNextWordAndNextToken(const int token, int *const outCodePoints,
+ int *const outCodePointCount);
const DictionaryStructureWithBufferPolicy *getDictionaryStructurePolicy() const {
return mDictionaryStructureWithBufferPolicy.get();
diff --git a/native/jni/src/suggest/core/dictionary/property/word_property.cpp b/native/jni/src/suggest/core/dictionary/property/word_property.cpp
index 6f5f808f8..5bdd5606b 100644
--- a/native/jni/src/suggest/core/dictionary/property/word_property.cpp
+++ b/native/jni/src/suggest/core/dictionary/property/word_property.cpp
@@ -28,7 +28,8 @@ void WordProperty::outputProperties(JNIEnv *const env, jintArray outCodePoints,
MAX_WORD_LENGTH /* maxLength */, mCodePoints.data(), mCodePoints.size(),
false /* needsNullTermination */);
jboolean flags[] = {mUnigramProperty.isNotAWord(), mUnigramProperty.isBlacklisted(),
- !mBigrams.empty(), mUnigramProperty.hasShortcuts()};
+ !mBigrams.empty(), mUnigramProperty.hasShortcuts(),
+ mUnigramProperty.representsBeginningOfSentence()};
env->SetBooleanArrayRegion(outFlags, 0 /* start */, NELEMS(flags), flags);
int probabilityInfo[] = {mUnigramProperty.getProbability(), mUnigramProperty.getTimestamp(),
mUnigramProperty.getLevel(), mUnigramProperty.getCount()};
diff --git a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
index e2771f97c..b72601109 100644
--- a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
+++ b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
@@ -104,7 +104,8 @@ class DictionaryStructureWithBufferPolicy {
// Method to iterate all words in the dictionary.
// The returned token has to be used to get the next word. If token is 0, this method newly
// starts iterating the dictionary.
- virtual int getNextWordAndNextToken(const int token, int *const outCodePoints) = 0;
+ virtual int getNextWordAndNextToken(const int token, int *const outCodePoints,
+ int *const outCodePointCount) = 0;
virtual bool isCorrupted() const = 0;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp
index 4ac0f406e..9780ae048 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp
@@ -478,10 +478,9 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(const int *const code
return WordProperty(&codePointVector, &unigramProperty, &bigrams);
}
-int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints) {
- // TODO: Return code point count like other methods.
- // Null termination.
- outCodePoints[0] = 0;
+int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints,
+ int *const outCodePointCount) {
+ *outCodePointCount = 0;
if (token == 0) {
mTerminalPtNodePositionsForIteratingWords.clear();
DynamicPtReadingHelper::TraversePolicyToGetAllTerminalPtNodePositions traversePolicy(
@@ -498,13 +497,8 @@ int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const
}
const int terminalPtNodePos = mTerminalPtNodePositionsForIteratingWords[token];
int unigramProbability = NOT_A_PROBABILITY;
- const int codePointCount = getCodePointsAndProbabilityAndReturnCodePointCount(
+ *outCodePointCount = getCodePointsAndProbabilityAndReturnCodePointCount(
terminalPtNodePos, MAX_WORD_LENGTH, outCodePoints, &unigramProbability);
- if (codePointCount < MAX_WORD_LENGTH) {
- // Null termination. outCodePoints have to be null terminated or contain MAX_WORD_LENGTH
- // code points.
- outCodePoints[codePointCount] = 0;
- }
const int nextToken = token + 1;
if (nextToken >= terminalPtNodePositionsVectorSize) {
// All words have been iterated.
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h
index 2e948ac4a..16b1bd2c1 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h
@@ -134,7 +134,8 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
const WordProperty getWordProperty(const int *const codePoints,
const int codePointCount) const;
- int getNextWordAndNextToken(const int token, int *const outCodePoints);
+ int getNextWordAndNextToken(const int token, int *const outCodePoints,
+ int *const outCodePointCount);
bool isCorrupted() const {
return mIsCorrupted;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
index 7e1f3b233..5c62b9caf 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
@@ -391,7 +391,9 @@ const WordProperty PatriciaTriePolicy::getWordProperty(const int *const codePoin
return WordProperty(&codePointVector, &unigramProperty, &bigrams);
}
-int PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints) {
+int PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints,
+ int *const outCodePointCount) {
+ *outCodePointCount = 0;
if (token == 0) {
// Start iterating the dictionary.
mTerminalPtNodePositionsForIteratingWords.clear();
@@ -409,8 +411,8 @@ int PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outC
}
const int terminalPtNodePos = mTerminalPtNodePositionsForIteratingWords[token];
int unigramProbability = NOT_A_PROBABILITY;
- getCodePointsAndProbabilityAndReturnCodePointCount(terminalPtNodePos, MAX_WORD_LENGTH,
- outCodePoints, &unigramProbability);
+ *outCodePointCount = getCodePointsAndProbabilityAndReturnCodePointCount(terminalPtNodePos,
+ MAX_WORD_LENGTH, outCodePoints, &unigramProbability);
const int nextToken = token + 1;
if (nextToken >= terminalPtNodePositionsVectorSize) {
// All words have been iterated.
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
index dce94363a..ec8407408 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
@@ -137,7 +137,8 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
const WordProperty getWordProperty(const int *const codePoints,
const int codePointCount) const;
- int getNextWordAndNextToken(const int token, int *const outCodePoints);
+ int getNextWordAndNextToken(const int token, int *const outCodePoints,
+ int *const outCodePointCount);
bool isCorrupted() const {
return mIsCorrupted;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
index f7f2a32b4..46107d92a 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
@@ -489,10 +489,9 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(const int *const code
return WordProperty(&codePointVector, &unigramProperty, &bigrams);
}
-int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints) {
- // TODO: Return code point count like other methods.
- // Null termination.
- outCodePoints[0] = 0;
+int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints,
+ int *const outCodePointCount) {
+ *outCodePointCount = 0;
if (token == 0) {
mTerminalPtNodePositionsForIteratingWords.clear();
DynamicPtReadingHelper::TraversePolicyToGetAllTerminalPtNodePositions traversePolicy(
@@ -509,13 +508,8 @@ int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const
}
const int terminalPtNodePos = mTerminalPtNodePositionsForIteratingWords[token];
int unigramProbability = NOT_A_PROBABILITY;
- const int codePointCount = getCodePointsAndProbabilityAndReturnCodePointCount(
+ *outCodePointCount = getCodePointsAndProbabilityAndReturnCodePointCount(
terminalPtNodePos, MAX_WORD_LENGTH, outCodePoints, &unigramProbability);
- if (codePointCount < MAX_WORD_LENGTH) {
- // Null termination. outCodePoints have to be null terminated or contain MAX_WORD_LENGTH
- // code points.
- outCodePoints[codePointCount] = 0;
- }
const int nextToken = token + 1;
if (nextToken >= terminalPtNodePositionsVectorSize) {
// All words have been iterated.
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
index 0a20965f3..5d66a2cce 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
@@ -113,7 +113,8 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
const WordProperty getWordProperty(const int *const codePoints,
const int codePointCount) const;
- int getNextWordAndNextToken(const int token, int *const outCodePoints);
+ int getNextWordAndNextToken(const int token, int *const outCodePoints,
+ int *const outCodePointCount);
bool isCorrupted() const {
return mIsCorrupted;
diff --git a/native/jni/src/utils/char_utils.h b/native/jni/src/utils/char_utils.h
index f28ed5682..63786502b 100644
--- a/native/jni/src/utils/char_utils.h
+++ b/native/jni/src/utils/char_utils.h
@@ -98,6 +98,10 @@ class CharUtils {
// Beginning-of-Sentence.
static AK_FORCE_INLINE int attachBeginningOfSentenceMarker(int *const codePoints,
const int codePointCount, const int maxCodePoint) {
+ if (codePointCount > 0 && codePoints[0] == CODE_POINT_BEGINNING_OF_SENTENCE) {
+ // Marker has already been attached.
+ return codePointCount;
+ }
if (codePointCount >= maxCodePoint) {
// the code points cannot be marked as a Beginning-of-Sentence.
return 0;
diff --git a/native/jni/src/utils/jni_data_utils.h b/native/jni/src/utils/jni_data_utils.h
index 67a66fdfe..3514aeeb0 100644
--- a/native/jni/src/utils/jni_data_utils.h
+++ b/native/jni/src/utils/jni_data_utils.h
@@ -69,18 +69,23 @@ class JniDataUtils {
static void outputCodePoints(JNIEnv *env, jintArray intArrayToOutputCodePoints, const int start,
const int maxLength, const int *const codePoints, const int codePointCount,
const bool needsNullTermination) {
- const int outputCodePointCount = std::min(maxLength, codePointCount);
- int outputCodePonts[outputCodePointCount];
- for (int i = 0; i < outputCodePointCount; ++i) {
+ const int codePointBufSize = std::min(maxLength, codePointCount);
+ int outputCodePonts[codePointBufSize];
+ int outputCodePointCount = 0;
+ for (int i = 0; i < codePointBufSize; ++i) {
const int codePoint = codePoints[i];
+ int codePointToOutput = codePoint;
if (!CharUtils::isInUnicodeSpace(codePoint)) {
- outputCodePonts[i] = CODE_POINT_REPLACEMENT_CHARACTER;
+ if (codePoint == CODE_POINT_BEGINNING_OF_SENTENCE) {
+ // Just skip Beginning-of-Sentence marker.
+ continue;
+ }
+ codePointToOutput = CODE_POINT_REPLACEMENT_CHARACTER;
} else if (codePoint >= 0x01 && codePoint <= 0x1F) {
// Control code.
- outputCodePonts[i] = CODE_POINT_REPLACEMENT_CHARACTER;
- } else {
- outputCodePonts[i] = codePoint;
+ codePointToOutput = CODE_POINT_REPLACEMENT_CHARACTER;
}
+ outputCodePonts[outputCodePointCount++] = codePointToOutput;
}
env->SetIntArrayRegion(intArrayToOutputCodePoints, start, outputCodePointCount,
outputCodePonts);
@@ -90,6 +95,11 @@ class JniDataUtils {
}
}
+ static void putBooleanToArray(JNIEnv *env, jbooleanArray array, const int index,
+ const jboolean value) {
+ env->SetBooleanArrayRegion(array, index, 1 /* len */, &value);
+ }
+
static void putIntToArray(JNIEnv *env, jintArray array, const int index, const int value) {
env->SetIntArrayRegion(array, index, 1 /* len */, &value);
}