diff options
Diffstat (limited to 'native/jni/src')
14 files changed, 103 insertions, 46 deletions
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp index fb25f757c..bf917d69c 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.cpp +++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp @@ -88,13 +88,7 @@ void Dictionary::getPredictions(const PrevWordsInfo *const prevWordsInfo, } int Dictionary::getProbability(const int *word, int length) const { - TimeKeeper::setCurrentTime(); - int pos = getDictionaryStructurePolicy()->getTerminalPtNodePositionOfWord(word, length, - false /* forceLowerCaseSearch */); - if (NOT_A_DICT_POS == pos) { - return NOT_A_PROBABILITY; - } - return getDictionaryStructurePolicy()->getUnigramProbabilityOfPtNode(pos); + return getNgramProbability(nullptr /* prevWordsInfo */, word, length); } int Dictionary::getMaxProbabilityOfExactMatches(const int *word, int length) const { @@ -109,18 +103,7 @@ int Dictionary::getNgramProbability(const PrevWordsInfo *const prevWordsInfo, co int nextWordPos = mDictionaryStructureWithBufferPolicy->getTerminalPtNodePositionOfWord(word, length, false /* forceLowerCaseSearch */); if (NOT_A_DICT_POS == nextWordPos) return NOT_A_PROBABILITY; - BinaryDictionaryBigramsIterator bigramsIt = prevWordsInfo->getBigramsIteratorForPrediction( - mDictionaryStructureWithBufferPolicy.get()); - while (bigramsIt.hasNext()) { - bigramsIt.next(); - if (bigramsIt.getBigramPos() == nextWordPos - && bigramsIt.getProbability() != NOT_A_PROBABILITY) { - return mDictionaryStructureWithBufferPolicy->getProbability( - mDictionaryStructureWithBufferPolicy->getUnigramProbabilityOfPtNode( - nextWordPos), bigramsIt.getProbability()); - } - } - return NOT_A_PROBABILITY; + return getDictionaryStructurePolicy()->getProbabilityOfPtNode(prevWordsInfo, nextWordPos); } bool Dictionary::addUnigramEntry(const int *const word, const int length, diff --git a/native/jni/src/suggest/core/policy/dictionary_bigrams_structure_policy.h b/native/jni/src/suggest/core/policy/dictionary_bigrams_structure_policy.h index 661ef1b1a..aa0d068aa 100644 --- a/native/jni/src/suggest/core/policy/dictionary_bigrams_structure_policy.h +++ b/native/jni/src/suggest/core/policy/dictionary_bigrams_structure_policy.h @@ -30,7 +30,7 @@ class DictionaryBigramsStructurePolicy { virtual void getNextBigram(int *const outBigramPos, int *const outProbability, bool *const outHasNext, int *const pos) const = 0; - virtual void skipAllBigrams(int *const pos) const = 0; + virtual bool skipAllBigrams(int *const pos) const = 0; protected: DictionaryBigramsStructurePolicy() {} diff --git a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h index a48d64473..7ad20e782 100644 --- a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h +++ b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h @@ -58,7 +58,8 @@ class DictionaryStructureWithBufferPolicy { virtual int getProbability(const int unigramProbability, const int bigramProbability) const = 0; - virtual int getUnigramProbabilityOfPtNode(const int nodePos) const = 0; + virtual int getProbabilityOfPtNode(const PrevWordsInfo *const prevWordsInfo, + const int nodePos) const = 0; virtual int getShortcutPositionOfPtNode(const int nodePos) const = 0; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.h index 61623468e..50a4c9743 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.h @@ -58,8 +58,9 @@ class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy { void getNextBigram(int *const outBigramPos, int *const outProbability, bool *const outHasNext, int *const bigramEntryPos) const; - void skipAllBigrams(int *const pos) const { + bool skipAllBigrams(int *const pos) const { // Do nothing because we don't need to skip bigram lists in ver4 dictionaries. + return true; } bool addNewEntry(const int terminalId, const int newTargetTerminalId, diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp index f478d9b91..327741065 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp @@ -131,7 +131,8 @@ int Ver4PatriciaTriePolicy::getProbability(const int unigramProbability, } } -int Ver4PatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int ptNodePos) const { +int Ver4PatriciaTriePolicy::getProbabilityOfPtNode(const PrevWordsInfo *const prevWordsInfo, + const int ptNodePos) const { if (ptNodePos == NOT_A_DICT_POS) { return NOT_A_PROBABILITY; } @@ -139,6 +140,18 @@ int Ver4PatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int ptNodePos) c if (ptNodeParams.isDeleted() || ptNodeParams.isBlacklisted() || ptNodeParams.isNotAWord()) { return NOT_A_PROBABILITY; } + if (prevWordsInfo) { + BinaryDictionaryBigramsIterator bigramsIt = + prevWordsInfo->getBigramsIteratorForPrediction(this /* dictStructurePolicy */); + while (bigramsIt.hasNext()) { + bigramsIt.next(); + if (bigramsIt.getBigramPos() == ptNodePos + && bigramsIt.getProbability() != NOT_A_PROBABILITY) { + return getProbability(ptNodeParams.getProbability(), bigramsIt.getProbability()); + } + } + return NOT_A_PROBABILITY; + } return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY); } diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h index 6d97c7cc8..c80a73af7 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h @@ -90,7 +90,8 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { int getProbability(const int unigramProbability, const int bigramProbability) const; - int getUnigramProbabilityOfPtNode(const int ptNodePos) const; + int getProbabilityOfPtNode(const PrevWordsInfo *const prevWordsInfo, + const int ptNodePos) const; int getShortcutPositionOfPtNode(const int ptNodePos) const; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.cpp index 08b4e0b5e..f7fd5c071 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.cpp @@ -38,9 +38,14 @@ const BigramListReadWriteUtils::BigramFlags BigramListReadWriteUtils::FLAG_ATTRI const BigramListReadWriteUtils::BigramFlags BigramListReadWriteUtils::MASK_ATTRIBUTE_PROBABILITY = 0x0F; -/* static */ void BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition( - const uint8_t *const bigramsBuf, BigramFlags *const outBigramFlags, +/* static */ bool BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition( + const uint8_t *const bigramsBuf, const int bufSize, BigramFlags *const outBigramFlags, int *const outTargetPtNodePos, int *const bigramEntryPos) { + if (bufSize <= *bigramEntryPos) { + AKLOGE("Read invalid pos in getBigramEntryPropertiesAndAdvancePosition(). bufSize: %d, " + "bigramEntryPos: %d.", bufSize, *bigramEntryPos); + return false; + } const BigramFlags bigramFlags = ByteArrayUtils::readUint8AndAdvancePosition(bigramsBuf, bigramEntryPos); if (outBigramFlags) { @@ -51,15 +56,19 @@ const BigramListReadWriteUtils::BigramFlags if (outTargetPtNodePos) { *outTargetPtNodePos = targetPos; } + return true; } -/* static */ void BigramListReadWriteUtils::skipExistingBigrams(const uint8_t *const bigramsBuf, - int *const bigramListPos) { +/* static */ bool BigramListReadWriteUtils::skipExistingBigrams(const uint8_t *const bigramsBuf, + const int bufSize, int *const bigramListPos) { BigramFlags flags; do { - getBigramEntryPropertiesAndAdvancePosition(bigramsBuf, &flags, 0 /* outTargetPtNodePos */, - bigramListPos); + if (!getBigramEntryPropertiesAndAdvancePosition(bigramsBuf, bufSize, &flags, + 0 /* outTargetPtNodePos */, bigramListPos)) { + return false; + } } while(hasNext(flags)); + return true; } /* static */ int BigramListReadWriteUtils::getBigramAddressAndAdvancePosition( diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.h index 15f924a6a..10f93fb7a 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.h @@ -30,8 +30,8 @@ class BigramListReadWriteUtils { public: typedef uint8_t BigramFlags; - static void getBigramEntryPropertiesAndAdvancePosition(const uint8_t *const bigramsBuf, - BigramFlags *const outBigramFlags, int *const outTargetPtNodePos, + static bool getBigramEntryPropertiesAndAdvancePosition(const uint8_t *const bigramsBuf, + const int bufSize, BigramFlags *const outBigramFlags, int *const outTargetPtNodePos, int *const bigramEntryPos); static AK_FORCE_INLINE int getProbabilityFromFlags(const BigramFlags flags) { @@ -43,7 +43,8 @@ public: } // Bigrams reading methods - static void skipExistingBigrams(const uint8_t *const bigramsBuf, int *const bigramListPos); + static bool skipExistingBigrams(const uint8_t *const bigramsBuf, const int bufSize, + int *const bigramListPos); private: DISALLOW_IMPLICIT_CONSTRUCTORS(BigramListReadWriteUtils); diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/bigram/bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/bigram/bigram_list_policy.h index 00bb502dc..73e291ec2 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/bigram/bigram_list_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/bigram/bigram_list_policy.h @@ -27,27 +27,34 @@ namespace latinime { class BigramListPolicy : public DictionaryBigramsStructurePolicy { public: - explicit BigramListPolicy(const uint8_t *const bigramsBuf) : mBigramsBuf(bigramsBuf) {} + BigramListPolicy(const uint8_t *const bigramsBuf, const int bufSize) + : mBigramsBuf(bigramsBuf), mBufSize(bufSize) {} ~BigramListPolicy() {} void getNextBigram(int *const outBigramPos, int *const outProbability, bool *const outHasNext, int *const pos) const { BigramListReadWriteUtils::BigramFlags flags; - BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(mBigramsBuf, &flags, - outBigramPos, pos); + if (!BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(mBigramsBuf, + mBufSize, &flags, outBigramPos, pos)) { + AKLOGE("Cannot read bigram entry. mBufSize: %d, pos: %d. ", mBufSize, *pos); + *outProbability = NOT_A_PROBABILITY; + *outHasNext = false; + return; + } *outProbability = BigramListReadWriteUtils::getProbabilityFromFlags(flags); *outHasNext = BigramListReadWriteUtils::hasNext(flags); } - void skipAllBigrams(int *const pos) const { - BigramListReadWriteUtils::skipExistingBigrams(mBigramsBuf, pos); + bool skipAllBigrams(int *const pos) const { + return BigramListReadWriteUtils::skipExistingBigrams(mBigramsBuf, mBufSize, pos); } private: DISALLOW_IMPLICIT_CONSTRUCTORS(BigramListPolicy); const uint8_t *const mBigramsBuf; + const int mBufSize; }; } // namespace latinime #endif // LATINIME_BIGRAM_LIST_POLICY_H diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp index 91d76040f..b909e8268 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp @@ -21,6 +21,7 @@ #include "suggest/core/dicnode/dic_node.h" #include "suggest/core/dicnode/dic_node_vector.h" #include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h" +#include "suggest/core/session/prev_words_info.h" #include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h" #include "suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/utils/probability_utils.h" @@ -223,7 +224,14 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( mShortcutListPolicy.skipAllShortcuts(&pos); } if (PatriciaTrieReadingUtils::hasBigrams(flags)) { - mBigramListPolicy.skipAllBigrams(&pos); + if (!mBigramListPolicy.skipAllBigrams(&pos)) { + AKLOGE("Cannot skip bigrams. BufSize: %d, pos: %d.", mDictBufferSize, + pos); + mIsCorrupted = true; + ASSERT(false); + *outUnigramProbability = NOT_A_PROBABILITY; + return 0; + } } } } else { @@ -240,7 +248,13 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( mShortcutListPolicy.skipAllShortcuts(&pos); } if (PatriciaTrieReadingUtils::hasBigrams(flags)) { - mBigramListPolicy.skipAllBigrams(&pos); + if (!mBigramListPolicy.skipAllBigrams(&pos)) { + AKLOGE("Cannot skip bigrams. BufSize: %d, pos: %d.", mDictBufferSize, pos); + mIsCorrupted = true; + ASSERT(false); + *outUnigramProbability = NOT_A_PROBABILITY; + return 0; + } } } @@ -282,7 +296,8 @@ int PatriciaTriePolicy::getProbability(const int unigramProbability, } } -int PatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int ptNodePos) const { +int PatriciaTriePolicy::getProbabilityOfPtNode(const PrevWordsInfo *const prevWordsInfo, + const int ptNodePos) const { if (ptNodePos == NOT_A_DICT_POS) { return NOT_A_PROBABILITY; } @@ -294,6 +309,18 @@ int PatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int ptNodePos) const // for shortcuts). return NOT_A_PROBABILITY; } + if (prevWordsInfo) { + BinaryDictionaryBigramsIterator bigramsIt = + prevWordsInfo->getBigramsIteratorForPrediction(this /* dictStructurePolicy */); + while (bigramsIt.hasNext()) { + bigramsIt.next(); + if (bigramsIt.getBigramPos() == ptNodePos + && bigramsIt.getProbability() != NOT_A_PROBABILITY) { + return getProbability(ptNodeParams.getProbability(), bigramsIt.getProbability()); + } + } + return NOT_A_PROBABILITY; + } return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY); } diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h index 7c0b9d3c5..1dd5705be 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h @@ -42,7 +42,7 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { mHeaderPolicy(mMmappedBuffer->getBuffer(), FormatUtils::VERSION_2), mDictRoot(mMmappedBuffer->getBuffer() + mHeaderPolicy.getSize()), mDictBufferSize(mMmappedBuffer->getBufferSize() - mHeaderPolicy.getSize()), - mBigramListPolicy(mDictRoot), mShortcutListPolicy(mDictRoot), + mBigramListPolicy(mDictRoot, mDictBufferSize), mShortcutListPolicy(mDictRoot), mPtNodeReader(mDictRoot, mDictBufferSize, &mBigramListPolicy, &mShortcutListPolicy), mPtNodeArrayReader(mDictRoot, mDictBufferSize), mTerminalPtNodePositionsForIteratingWords(), mIsCorrupted(false) {} @@ -63,7 +63,7 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { int getProbability(const int unigramProbability, const int bigramProbability) const; - int getUnigramProbabilityOfPtNode(const int ptNodePos) const; + int getProbabilityOfPtNode(const PrevWordsInfo *const prevWordsInfo, const int ptNodePos) const; int getShortcutPositionOfPtNode(const int ptNodePos) const; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.h index 55ba613a5..4b3bb3725 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.h @@ -40,8 +40,9 @@ class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy { void getNextBigram(int *const outBigramPos, int *const outProbability, bool *const outHasNext, int *const bigramEntryPos) const; - void skipAllBigrams(int *const pos) const { + bool skipAllBigrams(int *const pos) const { // Do nothing because we don't need to skip bigram lists in ver4 dictionaries. + return true; } bool addNewEntry(const int terminalId, const int newTargetTerminalId, diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp index 0b5764aba..cada3d1f7 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp @@ -121,7 +121,8 @@ int Ver4PatriciaTriePolicy::getProbability(const int unigramProbability, } } -int Ver4PatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int ptNodePos) const { +int Ver4PatriciaTriePolicy::getProbabilityOfPtNode(const PrevWordsInfo *const prevWordsInfo, + const int ptNodePos) const { if (ptNodePos == NOT_A_DICT_POS) { return NOT_A_PROBABILITY; } @@ -129,6 +130,18 @@ int Ver4PatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int ptNodePos) c if (ptNodeParams.isDeleted() || ptNodeParams.isBlacklisted() || ptNodeParams.isNotAWord()) { return NOT_A_PROBABILITY; } + if (prevWordsInfo) { + BinaryDictionaryBigramsIterator bigramsIt = + prevWordsInfo->getBigramsIteratorForPrediction(this /* dictStructurePolicy */); + while (bigramsIt.hasNext()) { + bigramsIt.next(); + if (bigramsIt.getBigramPos() == ptNodePos + && bigramsIt.getProbability() != NOT_A_PROBABILITY) { + return getProbability(ptNodeParams.getProbability(), bigramsIt.getProbability()); + } + } + return NOT_A_PROBABILITY; + } return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY); } diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h index 85929b785..b0f16cd01 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h @@ -72,7 +72,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { int getProbability(const int unigramProbability, const int bigramProbability) const; - int getUnigramProbabilityOfPtNode(const int ptNodePos) const; + int getProbabilityOfPtNode(const PrevWordsInfo *const prevWordsInfo, const int ptNodePos) const; int getShortcutPositionOfPtNode(const int ptNodePos) const; |