diff options
Diffstat (limited to 'native/jni/src')
33 files changed, 138 insertions, 86 deletions
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp index e288413a3..fdc893653 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.cpp +++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp @@ -88,11 +88,10 @@ void Dictionary::addUnigramWord(const int *const word, const int length, mDictionaryStructureWithBufferPolicy->addUnigramWord(word, length, unigramProperty); } -void Dictionary::addBigramWords(const int *const word0, const int length0, const int *const word1, - const int length1, const int probability, const int timestamp) { +void Dictionary::addBigramWords(const int *const word0, const int length0, + const BigramProperty *const bigramProperty) { TimeKeeper::setCurrentTime(); - mDictionaryStructureWithBufferPolicy->addBigramWords(word0, length0, word1, length1, - probability, timestamp); + mDictionaryStructureWithBufferPolicy->addBigramWords(word0, length0, bigramProperty); } void Dictionary::removeBigramWords(const int *const word0, const int length0, diff --git a/native/jni/src/suggest/core/dictionary/dictionary.h b/native/jni/src/suggest/core/dictionary/dictionary.h index b6149b338..f0a7e5b6a 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.h +++ b/native/jni/src/suggest/core/dictionary/dictionary.h @@ -76,8 +76,8 @@ class Dictionary { void addUnigramWord(const int *const codePoints, const int codePointCount, const UnigramProperty *const unigramProperty); - void addBigramWords(const int *const word0, const int length0, const int *const word1, - const int length1, const int probability, const int timestamp); + void addBigramWords(const int *const word0, const int length0, + const BigramProperty *const bigramProperty); void removeBigramWords(const int *const word0, const int length0, const int *const word1, const int length1); diff --git a/native/jni/src/suggest/core/dictionary/property/word_property.cpp b/native/jni/src/suggest/core/dictionary/property/word_property.cpp index 95608dcf8..6f5f808f8 100644 --- a/native/jni/src/suggest/core/dictionary/property/word_property.cpp +++ b/native/jni/src/suggest/core/dictionary/property/word_property.cpp @@ -16,14 +16,17 @@ #include "suggest/core/dictionary/property/word_property.h" +#include "utils/jni_data_utils.h" + namespace latinime { void WordProperty::outputProperties(JNIEnv *const env, jintArray outCodePoints, jbooleanArray outFlags, jintArray outProbabilityInfo, jobject outBigramTargets, jobject outBigramProbabilities, jobject outShortcutTargets, jobject outShortcutProbabilities) const { - env->SetIntArrayRegion(outCodePoints, 0 /* start */, mCodePoints.size(), &mCodePoints[0]); - + JniDataUtils::outputCodePoints(env, outCodePoints, 0 /* start */, + MAX_WORD_LENGTH /* maxLength */, mCodePoints.data(), mCodePoints.size(), + false /* needsNullTermination */); jboolean flags[] = {mUnigramProperty.isNotAWord(), mUnigramProperty.isBlacklisted(), !mBigrams.empty(), mUnigramProperty.hasShortcuts()}; env->SetBooleanArrayRegion(outFlags, 0 /* start */, NELEMS(flags), flags); @@ -41,8 +44,9 @@ void WordProperty::outputProperties(JNIEnv *const env, jintArray outCodePoints, for (const auto &bigramProperty : mBigrams) { const std::vector<int> *const word1CodePoints = bigramProperty.getTargetCodePoints(); jintArray bigramWord1CodePointArray = env->NewIntArray(word1CodePoints->size()); - env->SetIntArrayRegion(bigramWord1CodePointArray, 0 /* start */, - word1CodePoints->size(), word1CodePoints->data()); + JniDataUtils::outputCodePoints(env, bigramWord1CodePointArray, 0 /* start */, + word1CodePoints->size(), word1CodePoints->data(), word1CodePoints->size(), + false /* needsNullTermination */); env->CallBooleanMethod(outBigramTargets, addMethodId, bigramWord1CodePointArray); env->DeleteLocalRef(bigramWord1CodePointArray); @@ -62,6 +66,9 @@ void WordProperty::outputProperties(JNIEnv *const env, jintArray outCodePoints, jintArray shortcutTargetCodePointArray = env->NewIntArray(targetCodePoints->size()); env->SetIntArrayRegion(shortcutTargetCodePointArray, 0 /* start */, targetCodePoints->size(), targetCodePoints->data()); + JniDataUtils::outputCodePoints(env, shortcutTargetCodePointArray, 0 /* start */, + targetCodePoints->size(), targetCodePoints->data(), targetCodePoints->size(), + false /* needsNullTermination */); env->CallBooleanMethod(outShortcutTargets, addMethodId, shortcutTargetCodePointArray); env->DeleteLocalRef(shortcutTargetCodePointArray); jobject integerProbability = env->NewObject(integerClass, intToIntegerConstructorId, diff --git a/native/jni/src/suggest/core/layout/proximity_info.cpp b/native/jni/src/suggest/core/layout/proximity_info.cpp index c40a2bdca..4c75a188e 100644 --- a/native/jni/src/suggest/core/layout/proximity_info.cpp +++ b/native/jni/src/suggest/core/layout/proximity_info.cpp @@ -226,7 +226,7 @@ int ProximityInfo::getKeyCenterXOfKeyIdG( // When the referencePointY is NOT_A_COORDINATE, this method calculates the return value without // using the line segment. int ProximityInfo::getKeyCenterYOfKeyIdG( - const int keyId, const int referencePointY, const bool isGeometric) const { + const int keyId, const int referencePointY, const bool isGeometric) const { // TODO: Remove "isGeometric" and have separate "proximity_info"s for gesture and typing. if (keyId < 0) { return 0; diff --git a/native/jni/src/suggest/core/layout/proximity_info_state_utils.h b/native/jni/src/suggest/core/layout/proximity_info_state_utils.h index 71e83a80c..211a79737 100644 --- a/native/jni/src/suggest/core/layout/proximity_info_state_utils.h +++ b/native/jni/src/suggest/core/layout/proximity_info_state_utils.h @@ -56,7 +56,7 @@ class ProximityInfoStateUtils { const std::vector<int> *const sampledLengthCache, const std::vector<int> *const sampledInputIndice, std::vector<float> *sampledSpeedRates, std::vector<float> *sampledDirections); - static void refreshBeelineSpeedRates(const int mostCommonKeyWidth, const float averageSpeed, + static void refreshBeelineSpeedRates(const int mostCommonKeyWidth, const float averageSpeed, const int inputSize, const int *const xCoordinates, const int *const yCoordinates, const int *times, const int sampledInputSize, const std::vector<int> *const sampledInputXs, diff --git a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h index 807f9b8dd..ce5a49f83 100644 --- a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h +++ b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h @@ -73,8 +73,8 @@ class DictionaryStructureWithBufferPolicy { const UnigramProperty *const unigramProperty) = 0; // Returns whether the update was success or not. - virtual bool addBigramWords(const int *const word0, const int length0, const int *const word1, - const int length1, const int probability, const int timestamp) = 0; + virtual bool addBigramWords(const int *const word0, const int length0, + const BigramProperty *const bigramProperty) = 0; // Returns whether the update was success or not. virtual bool removeBigramWords(const int *const word0, const int length0, diff --git a/native/jni/src/suggest/core/result/suggestion_results.cpp b/native/jni/src/suggest/core/result/suggestion_results.cpp index 088a55f6f..6594a1292 100644 --- a/native/jni/src/suggest/core/result/suggestion_results.cpp +++ b/native/jni/src/suggest/core/result/suggestion_results.cpp @@ -16,6 +16,8 @@ #include "suggest/core/result/suggestion_results.h" +#include "utils/jni_data_utils.h" + namespace latinime { void SuggestionResults::outputSuggestions(JNIEnv *env, jintArray outSuggestionCount, @@ -27,13 +29,9 @@ void SuggestionResults::outputSuggestions(JNIEnv *env, jintArray outSuggestionCo const SuggestedWord &suggestedWord = mSuggestedWords.top(); suggestedWord.getCodePointCount(); const int start = outputIndex * MAX_WORD_LENGTH; - env->SetIntArrayRegion(outputCodePointsArray, start, suggestedWord.getCodePointCount(), - suggestedWord.getCodePoint()); - if (suggestedWord.getCodePointCount() < MAX_WORD_LENGTH) { - const int terminal = 0; - env->SetIntArrayRegion(outputCodePointsArray, start + suggestedWord.getCodePointCount(), - 1 /* len */, &terminal); - } + JniDataUtils::outputCodePoints(env, outputCodePointsArray, start, + MAX_WORD_LENGTH /* maxLength */, suggestedWord.getCodePoint(), + suggestedWord.getCodePointCount(), true /* needsNullTermination */); const int score = suggestedWord.getScore(); env->SetIntArrayRegion(outScoresArray, outputIndex, 1 /* len */, &score); const int indexToPartialCommit = suggestedWord.getIndexToPartialCommit(); diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.cpp index 7d0d09631..08b4e0b5e 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h" +#include "suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.h" #include "suggest/policyimpl/dictionary/utils/byte_array_utils.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.h index 15f924a6a..15f924a6a 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.h diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp index a527f03bd..9e575858a 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp @@ -85,13 +85,13 @@ bool DynamicPtUpdatingHelper::addUnigramWord( } bool DynamicPtUpdatingHelper::addBigramWords(const int word0Pos, const int word1Pos, - const int probability, const int timestamp, bool *const outAddedNewBigram) { + const BigramProperty *const bigramProperty, bool *const outAddedNewBigram) { const PtNodeParams sourcePtNodeParams( mPtNodeReader->fetchNodeInfoInBufferFromPtNodePos(word0Pos)); const PtNodeParams targetPtNodeParams( mPtNodeReader->fetchNodeInfoInBufferFromPtNodePos(word1Pos)); - return mPtNodeWriter->addNewBigramEntry(&sourcePtNodeParams, &targetPtNodeParams, probability, - timestamp, outAddedNewBigram); + return mPtNodeWriter->addNewBigramEntry(&sourcePtNodeParams, &targetPtNodeParams, + bigramProperty, outAddedNewBigram); } // Remove a bigram relation from word0Pos to word1Pos. diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h index 44914fe4c..f10d15a9b 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h @@ -22,6 +22,7 @@ namespace latinime { +class BigramProperty; class BufferWithExtendableBuffer; class DynamicPtReadingHelper; class PtNodeReader; @@ -42,8 +43,8 @@ class DynamicPtUpdatingHelper { const UnigramProperty *const unigramProperty, bool *const outAddedNewUnigram); // Add a bigram relation from word0Pos to word1Pos. - bool addBigramWords(const int word0Pos, const int word1Pos, const int probability, - const int timestamp, bool *const outAddedNewBigram); + bool addBigramWords(const int word0Pos, const int word1Pos, + const BigramProperty *const bigramProperty, bool *const outAddedNewBigram); // Remove a bigram relation from word0Pos to word1Pos. bool removeBigramWords(const int word0Pos, const int word1Pos); diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h index 91192fc57..bef401f87 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h @@ -23,6 +23,7 @@ #include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h" #include "suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h" +#include "utils/char_utils.h" namespace latinime { @@ -158,6 +159,10 @@ class PtNodeParams { return PatriciaTrieReadingUtils::hasShortcutTargets(mFlags); } + AK_FORCE_INLINE bool representsNonWordInfo() const { + return getCodePointCount() > 0 && CharUtils::isInUnicodeSpace(getCodePoints()[0]); + } + // Parent node position AK_FORCE_INLINE int getParentPos() const { return mParentPos; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h index cbca3fe35..a8029f73f 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h @@ -24,6 +24,7 @@ namespace latinime { +class BigramProperty; class UnigramProperty; // Interface class used to write PtNode information. @@ -70,7 +71,7 @@ class PtNodeWriter { const UnigramProperty *const unigramProperty, int *const ptNodeWritingPos) = 0; virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams, - const PtNodeParams *const targetPtNodeParam, const int probability, const int timestamp, + const PtNodeParams *const targetPtNodeParam, const BigramProperty *const bigramProperty, bool *const outAddedNewBigram) = 0; virtual bool removeBigramEntry(const PtNodeParams *const sourcePtNodeParams, diff --git a/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/shortcut/shortcut_list_reading_utils.cpp index 847dcdee5..91c76941c 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/shortcut/shortcut_list_reading_utils.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h" +#include "suggest/policyimpl/dictionary/structure/pt_common/shortcut/shortcut_list_reading_utils.h" #include "suggest/policyimpl/dictionary/utils/byte_array_utils.h" @@ -44,7 +44,7 @@ const int ShortcutListReadingUtils::WHITELIST_SHORTCUT_PROBABILITY = 15; } /* static */ int ShortcutListReadingUtils::readShortcutTarget( - const uint8_t *const dictRoot, const int maxLength, int *const outWord, int *const pos) { + const uint8_t *const dictRoot, const int maxLength, int *const outWord, int *const pos) { return ByteArrayUtils::readStringAndAdvancePosition(dictRoot, maxLength, outWord, pos); } diff --git a/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/shortcut/shortcut_list_reading_utils.h index d065bf7fd..d065bf7fd 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/shortcut/shortcut_list_reading_utils.h diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/bigram/bigram_list_policy.h index a898e2afc..00bb502dc 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/bigram/bigram_list_policy.h @@ -21,7 +21,7 @@ #include "defines.h" #include "suggest/core/policy/dictionary_bigrams_structure_policy.h" -#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h" +#include "suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.h" namespace latinime { diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp index b3af1f47a..30dcfba37 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp @@ -24,6 +24,7 @@ #include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h" #include "suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/utils/probability_utils.h" +#include "utils/char_utils.h" namespace latinime { @@ -318,12 +319,15 @@ int PatriciaTriePolicy::createAndGetLeavingChildNode(const DicNode *const dicNod PatriciaTrieReadingUtils::readPtNodeInfo(mDictRoot, ptNodePos, getShortcutsStructurePolicy(), getBigramsStructurePolicy(), &flags, &mergedNodeCodePointCount, mergedNodeCodePoints, &probability, &childrenPos, &shortcutPos, &bigramPos, &siblingPos); - childDicNodes->pushLeavingChild(dicNode, ptNodePos, childrenPos, probability, - PatriciaTrieReadingUtils::isTerminal(flags), - PatriciaTrieReadingUtils::hasChildrenInFlags(flags), - PatriciaTrieReadingUtils::isBlacklisted(flags) - || PatriciaTrieReadingUtils::isNotAWord(flags), - mergedNodeCodePointCount, mergedNodeCodePoints); + // Skip PtNodes don't start with Unicode code point because they represent non-word information. + if (CharUtils::isInUnicodeSpace(mergedNodeCodePoints[0])) { + childDicNodes->pushLeavingChild(dicNode, ptNodePos, childrenPos, probability, + PatriciaTrieReadingUtils::isTerminal(flags), + PatriciaTrieReadingUtils::hasChildrenInFlags(flags), + PatriciaTrieReadingUtils::isBlacklisted(flags) + || PatriciaTrieReadingUtils::isNotAWord(flags), + mergedNodeCodePointCount, mergedNodeCodePoints); + } return siblingPos; } diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h index 85f46603e..54d1e0f6d 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h @@ -22,9 +22,9 @@ #include "defines.h" #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" -#include "suggest/policyimpl/dictionary/bigram/bigram_list_policy.h" #include "suggest/policyimpl/dictionary/header/header_policy.h" -#include "suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h" +#include "suggest/policyimpl/dictionary/structure/v2/bigram/bigram_list_policy.h" +#include "suggest/policyimpl/dictionary/structure/v2/shortcut/shortcut_list_policy.h" #include "suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.h" #include "suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.h" #include "suggest/policyimpl/dictionary/utils/format_utils.h" @@ -88,8 +88,8 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { return false; } - bool addBigramWords(const int *const word0, const int length0, const int *const word1, - const int length1, const int probability, const int timestamp) { + bool addBigramWords(const int *const word0, const int length0, + const BigramProperty *const bigramProperty) { // This method should not be called for non-updatable dictionary. AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary."); return false; diff --git a/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/shortcut/shortcut_list_policy.h index 6d2b4778c..8e16ccc05 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/shortcut/shortcut_list_policy.h @@ -21,7 +21,7 @@ #include "defines.h" #include "suggest/core/policy/dictionary_shortcuts_structure_policy.h" -#include "suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h" +#include "suggest/policyimpl/dictionary/structure/pt_common/shortcut/shortcut_list_reading_utils.h" namespace latinime { diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.cpp index 1645039d3..7a52fd180 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.cpp @@ -14,10 +14,11 @@ * limitations under the License. */ -#include "suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h" +#include "suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.h" -#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h" +#include "suggest/core/dictionary/property/bigram_property.h" #include "suggest/policyimpl/dictionary/header/header_policy.h" +#include "suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.h" #include "suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h" #include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h" @@ -49,13 +50,12 @@ void Ver4BigramListPolicy::getNextBigram(int *const outBigramPos, int *const out } bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTargetTerminalId, - const int newProbability, const int timestamp, bool *const outAddedNewEntry) { + const BigramProperty *const bigramProperty, bool *const outAddedNewEntry) { // 1. The word has no bigrams yet. // 2. The word has bigrams, and there is the target in the list. // 3. The word has bigrams, and there is an invalid entry that can be reclaimed. // 4. The word has bigrams. We have to append new bigram entry to the list. // 5. Same as 4, but the list is the last entry of the content file. - if (outAddedNewEntry) { *outAddedNewEntry = false; } @@ -69,7 +69,7 @@ bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTarget const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY, newTargetTerminalId); const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(&newBigramEntry, - newProbability, timestamp); + bigramProperty); // Write an entry. const int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId); if (!mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, writingPos)) { @@ -102,7 +102,7 @@ bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTarget const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY, newTargetTerminalId); const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom( - &newBigramEntry, newProbability, timestamp); + &newBigramEntry, bigramProperty); if (!mBigramDictContent->writeBigramEntryAtTail(&bigramEntryToWrite)) { return false; } @@ -128,7 +128,7 @@ bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTarget const BigramEntry updatedBigramEntry = originalBigramEntry.updateTargetTerminalIdAndGetEntry(newTargetTerminalId); const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom( - &updatedBigramEntry, newProbability, timestamp); + &updatedBigramEntry, bigramProperty); return mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, entryPosToUpdate); } @@ -253,19 +253,19 @@ int Ver4BigramListPolicy::getEntryPosToUpdate(const int targetTerminalIdToFind, } const BigramEntry Ver4BigramListPolicy::createUpdatedBigramEntryFrom( - const BigramEntry *const originalBigramEntry, const int newProbability, - const int timestamp) const { + const BigramEntry *const originalBigramEntry, + const BigramProperty *const bigramProperty) const { // TODO: Consolidate historical info and probability. if (mHeaderPolicy->hasHistoricalInfoOfWords()) { - // Use 1 for count to indicate the bigram has inputed. - const HistoricalInfo historicalInfoForUpdate(timestamp, 0 /* level */, 1 /* count */); + const HistoricalInfo historicalInfoForUpdate(bigramProperty->getTimestamp(), + bigramProperty->getLevel(), bigramProperty->getCount()); const HistoricalInfo updatedHistoricalInfo = ForgettingCurveUtils::createUpdatedHistoricalInfo( - originalBigramEntry->getHistoricalInfo(), newProbability, + originalBigramEntry->getHistoricalInfo(), bigramProperty->getProbability(), &historicalInfoForUpdate, mHeaderPolicy); return originalBigramEntry->updateHistoricalInfoAndGetEntry(&updatedHistoricalInfo); } else { - return originalBigramEntry->updateProbabilityAndGetEntry(newProbability); + return originalBigramEntry->updateProbabilityAndGetEntry(bigramProperty->getProbability()); } } diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.h index c1f33359b..1613941c4 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.h @@ -24,6 +24,7 @@ namespace latinime { class BigramDictContent; +class BigramProperty; class HeaderPolicy; class TerminalPositionLookupTable; @@ -43,8 +44,8 @@ class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy { // Do nothing because we don't need to skip bigram lists in ver4 dictionaries. } - bool addNewEntry(const int terminalId, const int newTargetTerminalId, const int newProbability, - const int timestamp, bool *const outAddedNewEntry); + bool addNewEntry(const int terminalId, const int newTargetTerminalId, + const BigramProperty *const bigramProperty, bool *const outAddedNewEntry); bool removeEntry(const int terminalId, const int targetTerminalId); @@ -60,7 +61,7 @@ class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy { int *const outTailEntryPos) const; const BigramEntry createUpdatedBigramEntryFrom(const BigramEntry *const originalBigramEntry, - const int newProbability, const int timestamp) const; + const BigramProperty *const bigramProperty) const; bool updateHasNextFlag(const bool hasNext, const int bigramEntryPos); diff --git a/native/jni/src/suggest/policyimpl/dictionary/shortcut/ver4_shortcut_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/shortcut/ver4_shortcut_list_policy.h index fe984615c..790273541 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/shortcut/ver4_shortcut_list_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/shortcut/ver4_shortcut_list_policy.h @@ -19,7 +19,7 @@ #include "defines.h" #include "suggest/core/policy/dictionary_shortcuts_structure_policy.h" -#include "suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h" +#include "suggest/policyimpl/dictionary/structure/pt_common/shortcut/shortcut_list_reading_utils.h" #include "suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h" #include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h" diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp index 77ed38b89..5aa6b9a92 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp @@ -56,6 +56,7 @@ bool Ver4DictBuffers::flushHeaderAndDictBuffers(const char *const dictDirPath, return false; } } + umask(S_IWGRP | S_IWOTH); if (mkdir(tmpDirPath, S_IRWXU) == -1) { AKLOGE("Cannot create directory: %s. errno: %d.", tmpDirPath, errno); return false; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp index 67420a252..0a435e91c 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp @@ -95,4 +95,4 @@ const PtNodeParams Ver4PatriciaTrieNodeReader::fetchPtNodeInfoFromBufferAndProce } } -} +} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp index cc3a24a22..f89d3d7a0 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp @@ -17,13 +17,13 @@ #include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h" #include "suggest/core/dictionary/property/unigram_property.h" -#include "suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h" #include "suggest/policyimpl/dictionary/header/header_policy.h" -#include "suggest/policyimpl/dictionary/shortcut/ver4_shortcut_list_policy.h" #include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h" #include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.h" #include "suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.h" #include "suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h" +#include "suggest/policyimpl/dictionary/structure/v4/shortcut/ver4_shortcut_list_policy.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" @@ -76,7 +76,7 @@ bool Ver4PatriciaTrieNodeWriter::markPtNodeAsMoved( PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos); const PatriciaTrieReadingUtils::NodeFlags updatedFlags = DynamicPtReadingUtils::updateAndGetFlags(originalFlags, true /* isMoved */, - false /* isDeleted */, false /* willBecomeNonTerminal */); + false /* isDeleted */, false /* willBecomeNonTerminal */); int writingPos = toBeUpdatedPtNodeParams->getHeadPos(); // Update flags. if (!DynamicPtWritingUtils::writeFlagsAndAdvancePosition(mTrieBuffer, updatedFlags, @@ -223,11 +223,10 @@ bool Ver4PatriciaTrieNodeWriter::writeNewTerminalPtNodeAndAdvancePosition( } bool Ver4PatriciaTrieNodeWriter::addNewBigramEntry( - const PtNodeParams *const sourcePtNodeParams, - const PtNodeParams *const targetPtNodeParam, const int probability, const int timestamp, - bool *const outAddedNewBigram) { + const PtNodeParams *const sourcePtNodeParams, const PtNodeParams *const targetPtNodeParam, + const BigramProperty *const bigramProperty, bool *const outAddedNewBigram) { if (!mBigramPolicy->addNewEntry(sourcePtNodeParams->getTerminalId(), - targetPtNodeParam->getTerminalId(), probability, timestamp, outAddedNewBigram)) { + targetPtNodeParam->getTerminalId(), bigramProperty, outAddedNewBigram)) { AKLOGE("Cannot add new bigram entry. terminalId: %d, targetTerminalId: %d", sourcePtNodeParams->getTerminalId(), targetPtNodeParam->getTerminalId()); return false; @@ -416,4 +415,4 @@ bool Ver4PatriciaTrieNodeWriter::updatePtNodeFlags(const int ptNodePos, return true; } -} +} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h index f20d3a241..e90bc44c0 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h @@ -76,7 +76,7 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter { const UnigramProperty *const unigramProperty, int *const ptNodeWritingPos); virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams, - const PtNodeParams *const targetPtNodeParam, const int probability, const int timestamp, + const PtNodeParams *const targetPtNodeParam, const BigramProperty *const bigramProperty, bool *const outAddedNewBigram); virtual bool removeBigramEntry(const PtNodeParams *const sourcePtNodeParams, diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp index 9999e0692..8373dc549 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp @@ -59,13 +59,17 @@ void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const d // valid terminal DicNode. isTerminal = ptNodeParams.getProbability() != NOT_A_PROBABILITY; } + readingHelper.readNextSiblingNode(ptNodeParams); + if (!ptNodeParams.representsNonWordInfo()) { + // Skip PtNodes that represent non-word information. + continue; + } childDicNodes->pushLeavingChild(dicNode, ptNodeParams.getHeadPos(), ptNodeParams.getChildrenPos(), ptNodeParams.getProbability(), isTerminal, ptNodeParams.hasChildren(), ptNodeParams.isBlacklisted() || ptNodeParams.isNotAWord() /* isBlacklistedOrNotAWord */, ptNodeParams.getCodePointCount(), ptNodeParams.getCodePoints()); - readingHelper.readNextSiblingNode(ptNodeParams); } if (readingHelper.isError()) { mIsCorrupted = true; @@ -209,8 +213,7 @@ bool Ver4PatriciaTriePolicy::addUnigramWord(const int *const word, const int len } bool Ver4PatriciaTriePolicy::addBigramWords(const int *const word0, const int length0, - const int *const word1, const int length1, const int probability, - const int timestamp) { + const BigramProperty *const bigramProperty) { if (!mBuffers->isUpdatable()) { AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary."); return false; @@ -220,9 +223,10 @@ bool Ver4PatriciaTriePolicy::addBigramWords(const int *const word0, const int le mDictBuffer->getTailPosition()); return false; } - if (length0 > MAX_WORD_LENGTH || length1 > MAX_WORD_LENGTH) { + if (length0 > MAX_WORD_LENGTH + || bigramProperty->getTargetCodePoints()->size() > MAX_WORD_LENGTH) { AKLOGE("Either src word or target word is too long to insert the bigram to the dictionary. " - "length0: %d, length1: %d", length0, length1); + "length0: %d, length1: %d", length0, bigramProperty->getTargetCodePoints()->size()); return false; } const int word0Pos = getTerminalPtNodePositionOfWord(word0, length0, @@ -230,14 +234,14 @@ bool Ver4PatriciaTriePolicy::addBigramWords(const int *const word0, const int le if (word0Pos == NOT_A_DICT_POS) { return false; } - const int word1Pos = getTerminalPtNodePositionOfWord(word1, length1, - false /* forceLowerCaseSearch */); + const int word1Pos = getTerminalPtNodePositionOfWord( + bigramProperty->getTargetCodePoints()->data(), + bigramProperty->getTargetCodePoints()->size(), false /* forceLowerCaseSearch */); if (word1Pos == NOT_A_DICT_POS) { return false; } bool addedNewBigram = false; - if (mUpdatingHelper.addBigramWords(word0Pos, word1Pos, probability, timestamp, - &addedNewBigram)) { + if (mUpdatingHelper.addBigramWords(word0Pos, word1Pos, bigramProperty, &addedNewBigram)) { if (addedNewBigram) { mBigramCount++; } diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h index 8f981def5..b78576484 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h @@ -21,10 +21,10 @@ #include "defines.h" #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" -#include "suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h" #include "suggest/policyimpl/dictionary/header/header_policy.h" -#include "suggest/policyimpl/dictionary/shortcut/ver4_shortcut_list_policy.h" #include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h" +#include "suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.h" +#include "suggest/policyimpl/dictionary/structure/v4/shortcut/ver4_shortcut_list_policy.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h" @@ -93,8 +93,8 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { bool addUnigramWord(const int *const word, const int length, const UnigramProperty *const unigramProperty); - bool addBigramWords(const int *const word0, const int length0, const int *const word1, - const int length1, const int probability, const int timestamp); + bool addBigramWords(const int *const word0, const int length0, + const BigramProperty *const bigramProperty); bool removeBigramWords(const int *const word0, const int length0, const int *const word1, const int length1); diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp index 12298d967..f31c50253 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp @@ -19,9 +19,9 @@ #include <cstring> #include <queue> -#include "suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h" #include "suggest/policyimpl/dictionary/header/header_policy.h" -#include "suggest/policyimpl/dictionary/shortcut/ver4_shortcut_list_policy.h" +#include "suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.h" +#include "suggest/policyimpl/dictionary/structure/v4/shortcut/ver4_shortcut_list_policy.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h" diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp index 7bc7b0a48..80970c7f8 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp @@ -17,6 +17,10 @@ #include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h" #include <cstdio> +#include <errno.h> +#include <fcntl.h> +#include <sys/stat.h> +#include <sys/types.h> #include "suggest/policyimpl/dictionary/header/header_policy.h" #include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.h" @@ -100,9 +104,15 @@ const char *const DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE = /* static */ bool DictFileWritingUtils::flushBufferToFile(const char *const filePath, const BufferWithExtendableBuffer *const buffer) { - FILE *const file = fopen(filePath, "wb"); + const int fd = open(filePath, O_WRONLY | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR); + if (fd == -1) { + AKLOGE("File %s cannot be opened. errno: %d", filePath, errno); + ASSERT(false); + return false; + } + FILE *const file = fdopen(fd, "wb"); if (!file) { - AKLOGE("File %s cannot be opened.", filePath); + AKLOGE("fdopen failed for the file %s. errno: %d", filePath, errno); ASSERT(false); return false; } diff --git a/native/jni/src/utils/char_utils.cpp b/native/jni/src/utils/char_utils.cpp index adc474b4c..b17e0847d 100644 --- a/native/jni/src/utils/char_utils.cpp +++ b/native/jni/src/utils/char_utils.cpp @@ -22,6 +22,9 @@ namespace latinime { +const int CharUtils::MIN_UNICODE_CODE_POINT = 0; +const int CharUtils::MAX_UNICODE_CODE_POINT = 0x10FFFF; + struct LatinCapitalSmallPair { unsigned short capital; unsigned short small; diff --git a/native/jni/src/utils/char_utils.h b/native/jni/src/utils/char_utils.h index 239419d5b..634c45b04 100644 --- a/native/jni/src/utils/char_utils.h +++ b/native/jni/src/utils/char_utils.h @@ -86,12 +86,19 @@ class CharUtils { return spaceCount; } + static AK_FORCE_INLINE int isInUnicodeSpace(const int codePoint) { + return codePoint >= MIN_UNICODE_CODE_POINT && codePoint <= MAX_UNICODE_CODE_POINT; + } + static unsigned short latin_tolower(const unsigned short c); static const std::vector<int> EMPTY_STRING; private: DISALLOW_IMPLICIT_CONSTRUCTORS(CharUtils); + static const int MIN_UNICODE_CODE_POINT; + static const int MAX_UNICODE_CODE_POINT; + /** * Table mapping most combined Latin, Greek, and Cyrillic characters * to their base characters. If c is in range, BASE_CHARS[c] == c diff --git a/native/jni/src/utils/jni_data_utils.h b/native/jni/src/utils/jni_data_utils.h index 2ce02dc05..0e393e315 100644 --- a/native/jni/src/utils/jni_data_utils.h +++ b/native/jni/src/utils/jni_data_utils.h @@ -65,6 +65,18 @@ class JniDataUtils { return attributeMap; } + static void outputCodePoints(JNIEnv *env, jintArray intArrayToOutputCodePoints, const int start, + const int maxLength, const int *const codePoints, const int codePointCount, + const bool needsNullTermination) { + const int outputCodePointCount = std::min(maxLength, codePointCount); + env->SetIntArrayRegion(intArrayToOutputCodePoints, start, outputCodePointCount, codePoints); + if (needsNullTermination && outputCodePointCount < maxLength) { + const int terminal = 0; + env->SetIntArrayRegion(intArrayToOutputCodePoints, start + outputCodePointCount, + 1 /* len */, &terminal); + } + } + private: DISALLOW_IMPLICIT_CONSTRUCTORS(JniDataUtils); }; |