diff options
Diffstat (limited to 'native/jni/src')
14 files changed, 35 insertions, 69 deletions
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp index 0bcde2294..228260216 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.cpp +++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp @@ -145,10 +145,11 @@ const WordProperty Dictionary::getWordProperty(const int *const codePoints, codePoints, codePointCount); } -int Dictionary::getNextWordAndNextToken(const int token, int *const outCodePoints) { +int Dictionary::getNextWordAndNextToken(const int token, int *const outCodePoints, + int *const outCodePointCount) { TimeKeeper::setCurrentTime(); return mDictionaryStructureWithBufferPolicy->getNextWordAndNextToken( - token, outCodePoints); + token, outCodePoints, outCodePointCount); } void Dictionary::logDictionaryInfo(JNIEnv *const env) const { diff --git a/native/jni/src/suggest/core/dictionary/dictionary.h b/native/jni/src/suggest/core/dictionary/dictionary.h index 542ba7291..247ee2421 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.h +++ b/native/jni/src/suggest/core/dictionary/dictionary.h @@ -103,7 +103,8 @@ class Dictionary { // Method to iterate all words in the dictionary. // The returned token has to be used to get the next word. If token is 0, this method newly // starts iterating the dictionary. - int getNextWordAndNextToken(const int token, int *const outCodePoints); + int getNextWordAndNextToken(const int token, int *const outCodePoints, + int *const outCodePointCount); const DictionaryStructureWithBufferPolicy *getDictionaryStructurePolicy() const { return mDictionaryStructureWithBufferPolicy.get(); diff --git a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h index e2771f97c..b72601109 100644 --- a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h +++ b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h @@ -104,7 +104,8 @@ class DictionaryStructureWithBufferPolicy { // Method to iterate all words in the dictionary. // The returned token has to be used to get the next word. If token is 0, this method newly // starts iterating the dictionary. - virtual int getNextWordAndNextToken(const int token, int *const outCodePoints) = 0; + virtual int getNextWordAndNextToken(const int token, int *const outCodePoints, + int *const outCodePointCount) = 0; virtual bool isCorrupted() const = 0; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp index 4ac0f406e..9780ae048 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp @@ -478,10 +478,9 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(const int *const code return WordProperty(&codePointVector, &unigramProperty, &bigrams); } -int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints) { - // TODO: Return code point count like other methods. - // Null termination. - outCodePoints[0] = 0; +int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints, + int *const outCodePointCount) { + *outCodePointCount = 0; if (token == 0) { mTerminalPtNodePositionsForIteratingWords.clear(); DynamicPtReadingHelper::TraversePolicyToGetAllTerminalPtNodePositions traversePolicy( @@ -498,13 +497,8 @@ int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const } const int terminalPtNodePos = mTerminalPtNodePositionsForIteratingWords[token]; int unigramProbability = NOT_A_PROBABILITY; - const int codePointCount = getCodePointsAndProbabilityAndReturnCodePointCount( + *outCodePointCount = getCodePointsAndProbabilityAndReturnCodePointCount( terminalPtNodePos, MAX_WORD_LENGTH, outCodePoints, &unigramProbability); - if (codePointCount < MAX_WORD_LENGTH) { - // Null termination. outCodePoints have to be null terminated or contain MAX_WORD_LENGTH - // code points. - outCodePoints[codePointCount] = 0; - } const int nextToken = token + 1; if (nextToken >= terminalPtNodePositionsVectorSize) { // All words have been iterated. diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h index 2e948ac4a..16b1bd2c1 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h @@ -134,7 +134,8 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { const WordProperty getWordProperty(const int *const codePoints, const int codePointCount) const; - int getNextWordAndNextToken(const int token, int *const outCodePoints); + int getNextWordAndNextToken(const int token, int *const outCodePoints, + int *const outCodePointCount); bool isCorrupted() const { return mIsCorrupted; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.cpp index 1f00fc6ab..db1a802d0 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.cpp @@ -65,7 +65,7 @@ bool DynamicPtGcEventListeners bool DynamicPtGcEventListeners::TraversePolicyToUpdateBigramProbability ::onVisitingPtNode(const PtNodeParams *const ptNodeParams) { - if (!ptNodeParams->isDeleted() && ptNodeParams->hasBigrams()) { + if (!ptNodeParams->isDeleted()) { int bigramEntryCount = 0; if (!mPtNodeWriter->updateAllBigramEntriesAndDeleteUselessEntries(ptNodeParams, &bigramEntryCount)) { diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp index e77d39b8c..f31c914d2 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp @@ -270,8 +270,8 @@ const PtNodeParams DynamicPtUpdatingHelper::getUpdatedPtNodeParams( const bool isNotAWord, const bool isBlacklisted, const bool isTerminal, const int parentPos, const int codePointCount, const int *const codePoints, const int probability) const { const PatriciaTrieReadingUtils::NodeFlags flags = PatriciaTrieReadingUtils::createAndGetFlags( - isBlacklisted, isNotAWord, isTerminal, originalPtNodeParams->hasShortcutTargets(), - originalPtNodeParams->hasBigrams(), codePointCount > 1 /* hasMultipleChars */, + isBlacklisted, isNotAWord, isTerminal, false /* hasShortcutTargets */, + false /* hasBigrams */, codePointCount > 1 /* hasMultipleChars */, CHILDREN_POSITION_FIELD_SIZE); return PtNodeParams(originalPtNodeParams, flags, parentPos, codePointCount, codePoints, probability); diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp index 7e1f3b233..5c62b9caf 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp @@ -391,7 +391,9 @@ const WordProperty PatriciaTriePolicy::getWordProperty(const int *const codePoin return WordProperty(&codePointVector, &unigramProperty, &bigrams); } -int PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints) { +int PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints, + int *const outCodePointCount) { + *outCodePointCount = 0; if (token == 0) { // Start iterating the dictionary. mTerminalPtNodePositionsForIteratingWords.clear(); @@ -409,8 +411,8 @@ int PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outC } const int terminalPtNodePos = mTerminalPtNodePositionsForIteratingWords[token]; int unigramProbability = NOT_A_PROBABILITY; - getCodePointsAndProbabilityAndReturnCodePointCount(terminalPtNodePos, MAX_WORD_LENGTH, - outCodePoints, &unigramProbability); + *outCodePointCount = getCodePointsAndProbabilityAndReturnCodePointCount(terminalPtNodePos, + MAX_WORD_LENGTH, outCodePoints, &unigramProbability); const int nextToken = token + 1; if (nextToken >= terminalPtNodePositionsVectorSize) { // All words have been iterated. diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h index dce94363a..ec8407408 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h @@ -137,7 +137,8 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { const WordProperty getWordProperty(const int *const codePoints, const int codePointCount) const; - int getNextWordAndNextToken(const int token, int *const outCodePoints); + int getNextWordAndNextToken(const int token, int *const outCodePoints, + int *const outCodePointCount); bool isCorrupted() const { return mIsCorrupted; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp index f89d3d7a0..3d8da9173 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp @@ -231,14 +231,6 @@ bool Ver4PatriciaTrieNodeWriter::addNewBigramEntry( sourcePtNodeParams->getTerminalId(), targetPtNodeParam->getTerminalId()); return false; } - if (!sourcePtNodeParams->hasBigrams()) { - // Update has bigrams flag. - return updatePtNodeFlags(sourcePtNodeParams->getHeadPos(), - sourcePtNodeParams->isBlacklisted(), sourcePtNodeParams->isNotAWord(), - sourcePtNodeParams->isTerminal(), sourcePtNodeParams->hasShortcutTargets(), - true /* hasBigrams */, - sourcePtNodeParams->getCodePointCount() > 1 /* hasMultipleChars */); - } return true; } @@ -303,28 +295,9 @@ bool Ver4PatriciaTrieNodeWriter::addShortcutTarget(const PtNodeParams *const ptN AKLOGE("Cannot add new shortuct entry. terminalId: %d", ptNodeParams->getTerminalId()); return false; } - if (!ptNodeParams->hasShortcutTargets()) { - // Update has shortcut targets flag. - return updatePtNodeFlags(ptNodeParams->getHeadPos(), - ptNodeParams->isBlacklisted(), ptNodeParams->isNotAWord(), - ptNodeParams->isTerminal(), true /* hasShortcutTargets */, - ptNodeParams->hasBigrams(), - ptNodeParams->getCodePointCount() > 1 /* hasMultipleChars */); - } return true; } -bool Ver4PatriciaTrieNodeWriter::updatePtNodeHasBigramsAndShortcutTargetsFlags( - const PtNodeParams *const ptNodeParams) { - const bool hasBigrams = mBuffers->getBigramDictContent()->getBigramListHeadPos( - ptNodeParams->getTerminalId()) != NOT_A_DICT_POS; - const bool hasShortcutTargets = mBuffers->getShortcutDictContent()->getShortcutListHeadPos( - ptNodeParams->getTerminalId()) != NOT_A_DICT_POS; - return updatePtNodeFlags(ptNodeParams->getHeadPos(), ptNodeParams->isBlacklisted(), - ptNodeParams->isNotAWord(), ptNodeParams->isTerminal(), hasShortcutTargets, - hasBigrams, ptNodeParams->getCodePointCount() > 1 /* hasMultipleChars */); -} - bool Ver4PatriciaTrieNodeWriter::writePtNodeAndGetTerminalIdAndAdvancePosition( const PtNodeParams *const ptNodeParams, int *const outTerminalId, int *const ptNodeWritingPos) { @@ -377,8 +350,7 @@ bool Ver4PatriciaTrieNodeWriter::writePtNodeAndGetTerminalIdAndAdvancePosition( return false; } return updatePtNodeFlags(nodePos, ptNodeParams->isBlacklisted(), ptNodeParams->isNotAWord(), - isTerminal, ptNodeParams->hasShortcutTargets(), ptNodeParams->hasBigrams(), - ptNodeParams->getCodePointCount() > 1 /* hasMultipleChars */); + isTerminal, ptNodeParams->getCodePointCount() > 1 /* hasMultipleChars */); } const ProbabilityEntry Ver4PatriciaTrieNodeWriter::createUpdatedEntryFrom( @@ -402,11 +374,11 @@ const ProbabilityEntry Ver4PatriciaTrieNodeWriter::createUpdatedEntryFrom( bool Ver4PatriciaTrieNodeWriter::updatePtNodeFlags(const int ptNodePos, const bool isBlacklisted, const bool isNotAWord, const bool isTerminal, - const bool hasShortcutTargets, const bool hasBigrams, const bool hasMultipleChars) { + const bool hasMultipleChars) { // Create node flags and write them. PatriciaTrieReadingUtils::NodeFlags nodeFlags = PatriciaTrieReadingUtils::createAndGetFlags(isBlacklisted, isNotAWord, isTerminal, - hasShortcutTargets, hasBigrams, hasMultipleChars, + false /* hasShortcutTargets */, false /* hasBigrams */, hasMultipleChars, CHILDREN_POSITION_FIELD_SIZE); if (!DynamicPtWritingUtils::writeFlags(mTrieBuffer, nodeFlags, ptNodePos)) { AKLOGE("Cannot write PtNode flags. flags: %x, pos: %d", nodeFlags, ptNodePos); diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h index e90bc44c0..162dc9b1d 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h @@ -93,8 +93,6 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter { const int *const targetCodePoints, const int targetCodePointCount, const int shortcutProbability); - bool updatePtNodeHasBigramsAndShortcutTargetsFlags(const PtNodeParams *const ptNodeParams); - private: DISALLOW_COPY_AND_ASSIGN(Ver4PatriciaTrieNodeWriter); @@ -110,8 +108,7 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter { const UnigramProperty *const unigramProperty) const; bool updatePtNodeFlags(const int ptNodePos, const bool isBlacklisted, const bool isNotAWord, - const bool isTerminal, const bool hasShortcutTargets, const bool hasBigrams, - const bool hasMultipleChars); + const bool isTerminal, const bool hasMultipleChars); static const int CHILDREN_POSITION_FIELD_SIZE; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp index f7f2a32b4..46107d92a 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp @@ -489,10 +489,9 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(const int *const code return WordProperty(&codePointVector, &unigramProperty, &bigrams); } -int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints) { - // TODO: Return code point count like other methods. - // Null termination. - outCodePoints[0] = 0; +int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints, + int *const outCodePointCount) { + *outCodePointCount = 0; if (token == 0) { mTerminalPtNodePositionsForIteratingWords.clear(); DynamicPtReadingHelper::TraversePolicyToGetAllTerminalPtNodePositions traversePolicy( @@ -509,13 +508,8 @@ int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const } const int terminalPtNodePos = mTerminalPtNodePositionsForIteratingWords[token]; int unigramProbability = NOT_A_PROBABILITY; - const int codePointCount = getCodePointsAndProbabilityAndReturnCodePointCount( + *outCodePointCount = getCodePointsAndProbabilityAndReturnCodePointCount( terminalPtNodePos, MAX_WORD_LENGTH, outCodePoints, &unigramProbability); - if (codePointCount < MAX_WORD_LENGTH) { - // Null termination. outCodePoints have to be null terminated or contain MAX_WORD_LENGTH - // code points. - outCodePoints[codePointCount] = 0; - } const int nextToken = token + 1; if (nextToken >= terminalPtNodePositionsVectorSize) { // All words have been iterated. diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h index 0a20965f3..5d66a2cce 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h @@ -113,7 +113,8 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { const WordProperty getWordProperty(const int *const codePoints, const int codePointCount) const; - int getNextWordAndNextToken(const int token, int *const outCodePoints); + int getNextWordAndNextToken(const int token, int *const outCodePoints, + int *const outCodePointCount); bool isCorrupted() const { return mIsCorrupted; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp index 3eedcf2b8..40fdfa068 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp @@ -286,8 +286,9 @@ bool Ver4PatriciaTrieWritingHelper::TraversePolicyToUpdateAllPtNodeFlagsAndTermi } if (!mPtNodeWriter->updateTerminalId(ptNodeParams, it->second)) { AKLOGE("Cannot update terminal id. %d -> %d", it->first, it->second); + return false; } - return mPtNodeWriter->updatePtNodeHasBigramsAndShortcutTargetsFlags(ptNodeParams); + return true; } } // namespace latinime |