diff options
Diffstat (limited to 'native/jni/src')
-rw-r--r-- | native/jni/src/suggest/core/dictionary/property/word_property.cpp | 3 | ||||
-rw-r--r-- | native/jni/src/utils/char_utils.h | 4 | ||||
-rw-r--r-- | native/jni/src/utils/jni_data_utils.h | 24 |
3 files changed, 23 insertions, 8 deletions
diff --git a/native/jni/src/suggest/core/dictionary/property/word_property.cpp b/native/jni/src/suggest/core/dictionary/property/word_property.cpp index 6f5f808f8..5bdd5606b 100644 --- a/native/jni/src/suggest/core/dictionary/property/word_property.cpp +++ b/native/jni/src/suggest/core/dictionary/property/word_property.cpp @@ -28,7 +28,8 @@ void WordProperty::outputProperties(JNIEnv *const env, jintArray outCodePoints, MAX_WORD_LENGTH /* maxLength */, mCodePoints.data(), mCodePoints.size(), false /* needsNullTermination */); jboolean flags[] = {mUnigramProperty.isNotAWord(), mUnigramProperty.isBlacklisted(), - !mBigrams.empty(), mUnigramProperty.hasShortcuts()}; + !mBigrams.empty(), mUnigramProperty.hasShortcuts(), + mUnigramProperty.representsBeginningOfSentence()}; env->SetBooleanArrayRegion(outFlags, 0 /* start */, NELEMS(flags), flags); int probabilityInfo[] = {mUnigramProperty.getProbability(), mUnigramProperty.getTimestamp(), mUnigramProperty.getLevel(), mUnigramProperty.getCount()}; diff --git a/native/jni/src/utils/char_utils.h b/native/jni/src/utils/char_utils.h index f28ed5682..63786502b 100644 --- a/native/jni/src/utils/char_utils.h +++ b/native/jni/src/utils/char_utils.h @@ -98,6 +98,10 @@ class CharUtils { // Beginning-of-Sentence. static AK_FORCE_INLINE int attachBeginningOfSentenceMarker(int *const codePoints, const int codePointCount, const int maxCodePoint) { + if (codePointCount > 0 && codePoints[0] == CODE_POINT_BEGINNING_OF_SENTENCE) { + // Marker has already been attached. + return codePointCount; + } if (codePointCount >= maxCodePoint) { // the code points cannot be marked as a Beginning-of-Sentence. return 0; diff --git a/native/jni/src/utils/jni_data_utils.h b/native/jni/src/utils/jni_data_utils.h index 67a66fdfe..3514aeeb0 100644 --- a/native/jni/src/utils/jni_data_utils.h +++ b/native/jni/src/utils/jni_data_utils.h @@ -69,18 +69,23 @@ class JniDataUtils { static void outputCodePoints(JNIEnv *env, jintArray intArrayToOutputCodePoints, const int start, const int maxLength, const int *const codePoints, const int codePointCount, const bool needsNullTermination) { - const int outputCodePointCount = std::min(maxLength, codePointCount); - int outputCodePonts[outputCodePointCount]; - for (int i = 0; i < outputCodePointCount; ++i) { + const int codePointBufSize = std::min(maxLength, codePointCount); + int outputCodePonts[codePointBufSize]; + int outputCodePointCount = 0; + for (int i = 0; i < codePointBufSize; ++i) { const int codePoint = codePoints[i]; + int codePointToOutput = codePoint; if (!CharUtils::isInUnicodeSpace(codePoint)) { - outputCodePonts[i] = CODE_POINT_REPLACEMENT_CHARACTER; + if (codePoint == CODE_POINT_BEGINNING_OF_SENTENCE) { + // Just skip Beginning-of-Sentence marker. + continue; + } + codePointToOutput = CODE_POINT_REPLACEMENT_CHARACTER; } else if (codePoint >= 0x01 && codePoint <= 0x1F) { // Control code. - outputCodePonts[i] = CODE_POINT_REPLACEMENT_CHARACTER; - } else { - outputCodePonts[i] = codePoint; + codePointToOutput = CODE_POINT_REPLACEMENT_CHARACTER; } + outputCodePonts[outputCodePointCount++] = codePointToOutput; } env->SetIntArrayRegion(intArrayToOutputCodePoints, start, outputCodePointCount, outputCodePonts); @@ -90,6 +95,11 @@ class JniDataUtils { } } + static void putBooleanToArray(JNIEnv *env, jbooleanArray array, const int index, + const jboolean value) { + env->SetBooleanArrayRegion(array, index, 1 /* len */, &value); + } + static void putIntToArray(JNIEnv *env, jintArray array, const int index, const int value) { env->SetIntArrayRegion(array, index, 1 /* len */, &value); } |