diff options
Diffstat (limited to 'native/jni/src')
10 files changed, 71 insertions, 20 deletions
diff --git a/native/jni/src/suggest/core/dictionary/property/word_property.cpp b/native/jni/src/suggest/core/dictionary/property/word_property.cpp index 95608dcf8..6f5f808f8 100644 --- a/native/jni/src/suggest/core/dictionary/property/word_property.cpp +++ b/native/jni/src/suggest/core/dictionary/property/word_property.cpp @@ -16,14 +16,17 @@ #include "suggest/core/dictionary/property/word_property.h" +#include "utils/jni_data_utils.h" + namespace latinime { void WordProperty::outputProperties(JNIEnv *const env, jintArray outCodePoints, jbooleanArray outFlags, jintArray outProbabilityInfo, jobject outBigramTargets, jobject outBigramProbabilities, jobject outShortcutTargets, jobject outShortcutProbabilities) const { - env->SetIntArrayRegion(outCodePoints, 0 /* start */, mCodePoints.size(), &mCodePoints[0]); - + JniDataUtils::outputCodePoints(env, outCodePoints, 0 /* start */, + MAX_WORD_LENGTH /* maxLength */, mCodePoints.data(), mCodePoints.size(), + false /* needsNullTermination */); jboolean flags[] = {mUnigramProperty.isNotAWord(), mUnigramProperty.isBlacklisted(), !mBigrams.empty(), mUnigramProperty.hasShortcuts()}; env->SetBooleanArrayRegion(outFlags, 0 /* start */, NELEMS(flags), flags); @@ -41,8 +44,9 @@ void WordProperty::outputProperties(JNIEnv *const env, jintArray outCodePoints, for (const auto &bigramProperty : mBigrams) { const std::vector<int> *const word1CodePoints = bigramProperty.getTargetCodePoints(); jintArray bigramWord1CodePointArray = env->NewIntArray(word1CodePoints->size()); - env->SetIntArrayRegion(bigramWord1CodePointArray, 0 /* start */, - word1CodePoints->size(), word1CodePoints->data()); + JniDataUtils::outputCodePoints(env, bigramWord1CodePointArray, 0 /* start */, + word1CodePoints->size(), word1CodePoints->data(), word1CodePoints->size(), + false /* needsNullTermination */); env->CallBooleanMethod(outBigramTargets, addMethodId, bigramWord1CodePointArray); env->DeleteLocalRef(bigramWord1CodePointArray); @@ -62,6 +66,9 @@ void WordProperty::outputProperties(JNIEnv *const env, jintArray outCodePoints, jintArray shortcutTargetCodePointArray = env->NewIntArray(targetCodePoints->size()); env->SetIntArrayRegion(shortcutTargetCodePointArray, 0 /* start */, targetCodePoints->size(), targetCodePoints->data()); + JniDataUtils::outputCodePoints(env, shortcutTargetCodePointArray, 0 /* start */, + targetCodePoints->size(), targetCodePoints->data(), targetCodePoints->size(), + false /* needsNullTermination */); env->CallBooleanMethod(outShortcutTargets, addMethodId, shortcutTargetCodePointArray); env->DeleteLocalRef(shortcutTargetCodePointArray); jobject integerProbability = env->NewObject(integerClass, intToIntegerConstructorId, diff --git a/native/jni/src/suggest/core/result/suggestion_results.cpp b/native/jni/src/suggest/core/result/suggestion_results.cpp index 088a55f6f..6594a1292 100644 --- a/native/jni/src/suggest/core/result/suggestion_results.cpp +++ b/native/jni/src/suggest/core/result/suggestion_results.cpp @@ -16,6 +16,8 @@ #include "suggest/core/result/suggestion_results.h" +#include "utils/jni_data_utils.h" + namespace latinime { void SuggestionResults::outputSuggestions(JNIEnv *env, jintArray outSuggestionCount, @@ -27,13 +29,9 @@ void SuggestionResults::outputSuggestions(JNIEnv *env, jintArray outSuggestionCo const SuggestedWord &suggestedWord = mSuggestedWords.top(); suggestedWord.getCodePointCount(); const int start = outputIndex * MAX_WORD_LENGTH; - env->SetIntArrayRegion(outputCodePointsArray, start, suggestedWord.getCodePointCount(), - suggestedWord.getCodePoint()); - if (suggestedWord.getCodePointCount() < MAX_WORD_LENGTH) { - const int terminal = 0; - env->SetIntArrayRegion(outputCodePointsArray, start + suggestedWord.getCodePointCount(), - 1 /* len */, &terminal); - } + JniDataUtils::outputCodePoints(env, outputCodePointsArray, start, + MAX_WORD_LENGTH /* maxLength */, suggestedWord.getCodePoint(), + suggestedWord.getCodePointCount(), true /* needsNullTermination */); const int score = suggestedWord.getScore(); env->SetIntArrayRegion(outScoresArray, outputIndex, 1 /* len */, &score); const int indexToPartialCommit = suggestedWord.getIndexToPartialCommit(); diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h index 91192fc57..bef401f87 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h @@ -23,6 +23,7 @@ #include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h" #include "suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h" +#include "utils/char_utils.h" namespace latinime { @@ -158,6 +159,10 @@ class PtNodeParams { return PatriciaTrieReadingUtils::hasShortcutTargets(mFlags); } + AK_FORCE_INLINE bool representsNonWordInfo() const { + return getCodePointCount() > 0 && CharUtils::isInUnicodeSpace(getCodePoints()[0]); + } + // Parent node position AK_FORCE_INLINE int getParentPos() const { return mParentPos; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp index b3af1f47a..30dcfba37 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp @@ -24,6 +24,7 @@ #include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h" #include "suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/utils/probability_utils.h" +#include "utils/char_utils.h" namespace latinime { @@ -318,12 +319,15 @@ int PatriciaTriePolicy::createAndGetLeavingChildNode(const DicNode *const dicNod PatriciaTrieReadingUtils::readPtNodeInfo(mDictRoot, ptNodePos, getShortcutsStructurePolicy(), getBigramsStructurePolicy(), &flags, &mergedNodeCodePointCount, mergedNodeCodePoints, &probability, &childrenPos, &shortcutPos, &bigramPos, &siblingPos); - childDicNodes->pushLeavingChild(dicNode, ptNodePos, childrenPos, probability, - PatriciaTrieReadingUtils::isTerminal(flags), - PatriciaTrieReadingUtils::hasChildrenInFlags(flags), - PatriciaTrieReadingUtils::isBlacklisted(flags) - || PatriciaTrieReadingUtils::isNotAWord(flags), - mergedNodeCodePointCount, mergedNodeCodePoints); + // Skip PtNodes don't start with Unicode code point because they represent non-word information. + if (CharUtils::isInUnicodeSpace(mergedNodeCodePoints[0])) { + childDicNodes->pushLeavingChild(dicNode, ptNodePos, childrenPos, probability, + PatriciaTrieReadingUtils::isTerminal(flags), + PatriciaTrieReadingUtils::hasChildrenInFlags(flags), + PatriciaTrieReadingUtils::isBlacklisted(flags) + || PatriciaTrieReadingUtils::isNotAWord(flags), + mergedNodeCodePointCount, mergedNodeCodePoints); + } return siblingPos; } diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp index 77ed38b89..5aa6b9a92 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp @@ -56,6 +56,7 @@ bool Ver4DictBuffers::flushHeaderAndDictBuffers(const char *const dictDirPath, return false; } } + umask(S_IWGRP | S_IWOTH); if (mkdir(tmpDirPath, S_IRWXU) == -1) { AKLOGE("Cannot create directory: %s. errno: %d.", tmpDirPath, errno); return false; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp index 2fb3decee..8373dc549 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp @@ -59,13 +59,17 @@ void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const d // valid terminal DicNode. isTerminal = ptNodeParams.getProbability() != NOT_A_PROBABILITY; } + readingHelper.readNextSiblingNode(ptNodeParams); + if (!ptNodeParams.representsNonWordInfo()) { + // Skip PtNodes that represent non-word information. + continue; + } childDicNodes->pushLeavingChild(dicNode, ptNodeParams.getHeadPos(), ptNodeParams.getChildrenPos(), ptNodeParams.getProbability(), isTerminal, ptNodeParams.hasChildren(), ptNodeParams.isBlacklisted() || ptNodeParams.isNotAWord() /* isBlacklistedOrNotAWord */, ptNodeParams.getCodePointCount(), ptNodeParams.getCodePoints()); - readingHelper.readNextSiblingNode(ptNodeParams); } if (readingHelper.isError()) { mIsCorrupted = true; diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp index 7bc7b0a48..80970c7f8 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp @@ -17,6 +17,10 @@ #include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h" #include <cstdio> +#include <errno.h> +#include <fcntl.h> +#include <sys/stat.h> +#include <sys/types.h> #include "suggest/policyimpl/dictionary/header/header_policy.h" #include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.h" @@ -100,9 +104,15 @@ const char *const DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE = /* static */ bool DictFileWritingUtils::flushBufferToFile(const char *const filePath, const BufferWithExtendableBuffer *const buffer) { - FILE *const file = fopen(filePath, "wb"); + const int fd = open(filePath, O_WRONLY | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR); + if (fd == -1) { + AKLOGE("File %s cannot be opened. errno: %d", filePath, errno); + ASSERT(false); + return false; + } + FILE *const file = fdopen(fd, "wb"); if (!file) { - AKLOGE("File %s cannot be opened.", filePath); + AKLOGE("fdopen failed for the file %s. errno: %d", filePath, errno); ASSERT(false); return false; } diff --git a/native/jni/src/utils/char_utils.cpp b/native/jni/src/utils/char_utils.cpp index adc474b4c..b17e0847d 100644 --- a/native/jni/src/utils/char_utils.cpp +++ b/native/jni/src/utils/char_utils.cpp @@ -22,6 +22,9 @@ namespace latinime { +const int CharUtils::MIN_UNICODE_CODE_POINT = 0; +const int CharUtils::MAX_UNICODE_CODE_POINT = 0x10FFFF; + struct LatinCapitalSmallPair { unsigned short capital; unsigned short small; diff --git a/native/jni/src/utils/char_utils.h b/native/jni/src/utils/char_utils.h index 239419d5b..634c45b04 100644 --- a/native/jni/src/utils/char_utils.h +++ b/native/jni/src/utils/char_utils.h @@ -86,12 +86,19 @@ class CharUtils { return spaceCount; } + static AK_FORCE_INLINE int isInUnicodeSpace(const int codePoint) { + return codePoint >= MIN_UNICODE_CODE_POINT && codePoint <= MAX_UNICODE_CODE_POINT; + } + static unsigned short latin_tolower(const unsigned short c); static const std::vector<int> EMPTY_STRING; private: DISALLOW_IMPLICIT_CONSTRUCTORS(CharUtils); + static const int MIN_UNICODE_CODE_POINT; + static const int MAX_UNICODE_CODE_POINT; + /** * Table mapping most combined Latin, Greek, and Cyrillic characters * to their base characters. If c is in range, BASE_CHARS[c] == c diff --git a/native/jni/src/utils/jni_data_utils.h b/native/jni/src/utils/jni_data_utils.h index 2ce02dc05..0e393e315 100644 --- a/native/jni/src/utils/jni_data_utils.h +++ b/native/jni/src/utils/jni_data_utils.h @@ -65,6 +65,18 @@ class JniDataUtils { return attributeMap; } + static void outputCodePoints(JNIEnv *env, jintArray intArrayToOutputCodePoints, const int start, + const int maxLength, const int *const codePoints, const int codePointCount, + const bool needsNullTermination) { + const int outputCodePointCount = std::min(maxLength, codePointCount); + env->SetIntArrayRegion(intArrayToOutputCodePoints, start, outputCodePointCount, codePoints); + if (needsNullTermination && outputCodePointCount < maxLength) { + const int terminal = 0; + env->SetIntArrayRegion(intArrayToOutputCodePoints, start + outputCodePointCount, + 1 /* len */, &terminal); + } + } + private: DISALLOW_IMPLICIT_CONSTRUCTORS(JniDataUtils); }; |