diff options
12 files changed, 58 insertions, 27 deletions
diff --git a/java/src/com/android/inputmethod/latin/ContactsBinaryDictionary.java b/java/src/com/android/inputmethod/latin/ContactsBinaryDictionary.java index ad14c06ef..162a209e3 100644 --- a/java/src/com/android/inputmethod/latin/ContactsBinaryDictionary.java +++ b/java/src/com/android/inputmethod/latin/ContactsBinaryDictionary.java @@ -231,7 +231,7 @@ public class ContactsBinaryDictionary extends ExpandableBinaryDictionary { // Don't add single letter words, possibly confuses // capitalization of i. final int wordLen = StringUtils.codePointCount(word); - if (wordLen < MAX_WORD_LENGTH && wordLen > 1) { + if (wordLen <= MAX_WORD_LENGTH && wordLen > 1) { if (DEBUG) { Log.d(TAG, "addName " + name + ", " + word + ", " + prevWordsInfo); } diff --git a/java/src/com/android/inputmethod/latin/UserBinaryDictionary.java b/java/src/com/android/inputmethod/latin/UserBinaryDictionary.java index debaad13e..21014b378 100644 --- a/java/src/com/android/inputmethod/latin/UserBinaryDictionary.java +++ b/java/src/com/android/inputmethod/latin/UserBinaryDictionary.java @@ -253,12 +253,12 @@ public class UserBinaryDictionary extends ExpandableBinaryDictionary { final int frequency = cursor.getInt(indexFrequency); final int adjustedFrequency = scaleFrequencyFromDefaultToLatinIme(frequency); // Safeguard against adding really long words. - if (word.length() < MAX_WORD_LENGTH) { + if (word.length() <= MAX_WORD_LENGTH) { runGCIfRequiredLocked(true /* mindsBlockByGC */); addUnigramLocked(word, adjustedFrequency, null /* shortcutTarget */, 0 /* shortcutFreq */, false /* isNotAWord */, false /* isBlacklisted */, BinaryDictionary.NOT_A_VALID_TIMESTAMP); - if (null != shortcut && shortcut.length() < MAX_WORD_LENGTH) { + if (null != shortcut && shortcut.length() <= MAX_WORD_LENGTH) { runGCIfRequiredLocked(true /* mindsBlockByGC */); addUnigramLocked(shortcut, adjustedFrequency, word, USER_DICT_SHORTCUT_FREQUENCY, true /* isNotAWord */, diff --git a/java/src/com/android/inputmethod/latin/personalization/UserHistoryDictionary.java b/java/src/com/android/inputmethod/latin/personalization/UserHistoryDictionary.java index 8e027e4f9..34d4d4ed7 100644 --- a/java/src/com/android/inputmethod/latin/personalization/UserHistoryDictionary.java +++ b/java/src/com/android/inputmethod/latin/personalization/UserHistoryDictionary.java @@ -62,8 +62,8 @@ public class UserHistoryDictionary extends DecayingExpandableBinaryDictionaryBas final PrevWordsInfo prevWordsInfo, final String word, final boolean isValid, final int timestamp, final DistracterFilter distracterFilter) { final CharSequence prevWord = prevWordsInfo.mPrevWordsInfo[0].mWord; - if (word.length() >= Constants.DICTIONARY_MAX_WORD_LENGTH || - (prevWord != null && prevWord.length() >= Constants.DICTIONARY_MAX_WORD_LENGTH)) { + if (word.length() > Constants.DICTIONARY_MAX_WORD_LENGTH || + (prevWord != null && prevWord.length() > Constants.DICTIONARY_MAX_WORD_LENGTH)) { return; } final int frequency = isValid ? diff --git a/java/src/com/android/inputmethod/latin/utils/DictionaryInfoUtils.java b/java/src/com/android/inputmethod/latin/utils/DictionaryInfoUtils.java index d76ea10c0..197908032 100644 --- a/java/src/com/android/inputmethod/latin/utils/DictionaryInfoUtils.java +++ b/java/src/com/android/inputmethod/latin/utils/DictionaryInfoUtils.java @@ -386,8 +386,7 @@ public class DictionaryInfoUtils { final SpacingAndPunctuations spacingAndPunctuations) { if (TextUtils.isEmpty(text)) return false; final int length = text.length(); - // TODO: Make this test "length > Constants.DICTIONARY_MAX_WORD_LENGTH". - if (length >= Constants.DICTIONARY_MAX_WORD_LENGTH) { + if (length > Constants.DICTIONARY_MAX_WORD_LENGTH) { return false; } int i = 0; diff --git a/native/jni/src/suggest/core/policy/dictionary_bigrams_structure_policy.h b/native/jni/src/suggest/core/policy/dictionary_bigrams_structure_policy.h index 661ef1b1a..aa0d068aa 100644 --- a/native/jni/src/suggest/core/policy/dictionary_bigrams_structure_policy.h +++ b/native/jni/src/suggest/core/policy/dictionary_bigrams_structure_policy.h @@ -30,7 +30,7 @@ class DictionaryBigramsStructurePolicy { virtual void getNextBigram(int *const outBigramPos, int *const outProbability, bool *const outHasNext, int *const pos) const = 0; - virtual void skipAllBigrams(int *const pos) const = 0; + virtual bool skipAllBigrams(int *const pos) const = 0; protected: DictionaryBigramsStructurePolicy() {} diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.h index 61623468e..50a4c9743 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.h @@ -58,8 +58,9 @@ class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy { void getNextBigram(int *const outBigramPos, int *const outProbability, bool *const outHasNext, int *const bigramEntryPos) const; - void skipAllBigrams(int *const pos) const { + bool skipAllBigrams(int *const pos) const { // Do nothing because we don't need to skip bigram lists in ver4 dictionaries. + return true; } bool addNewEntry(const int terminalId, const int newTargetTerminalId, diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.cpp index 08b4e0b5e..f7fd5c071 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.cpp @@ -38,9 +38,14 @@ const BigramListReadWriteUtils::BigramFlags BigramListReadWriteUtils::FLAG_ATTRI const BigramListReadWriteUtils::BigramFlags BigramListReadWriteUtils::MASK_ATTRIBUTE_PROBABILITY = 0x0F; -/* static */ void BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition( - const uint8_t *const bigramsBuf, BigramFlags *const outBigramFlags, +/* static */ bool BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition( + const uint8_t *const bigramsBuf, const int bufSize, BigramFlags *const outBigramFlags, int *const outTargetPtNodePos, int *const bigramEntryPos) { + if (bufSize <= *bigramEntryPos) { + AKLOGE("Read invalid pos in getBigramEntryPropertiesAndAdvancePosition(). bufSize: %d, " + "bigramEntryPos: %d.", bufSize, *bigramEntryPos); + return false; + } const BigramFlags bigramFlags = ByteArrayUtils::readUint8AndAdvancePosition(bigramsBuf, bigramEntryPos); if (outBigramFlags) { @@ -51,15 +56,19 @@ const BigramListReadWriteUtils::BigramFlags if (outTargetPtNodePos) { *outTargetPtNodePos = targetPos; } + return true; } -/* static */ void BigramListReadWriteUtils::skipExistingBigrams(const uint8_t *const bigramsBuf, - int *const bigramListPos) { +/* static */ bool BigramListReadWriteUtils::skipExistingBigrams(const uint8_t *const bigramsBuf, + const int bufSize, int *const bigramListPos) { BigramFlags flags; do { - getBigramEntryPropertiesAndAdvancePosition(bigramsBuf, &flags, 0 /* outTargetPtNodePos */, - bigramListPos); + if (!getBigramEntryPropertiesAndAdvancePosition(bigramsBuf, bufSize, &flags, + 0 /* outTargetPtNodePos */, bigramListPos)) { + return false; + } } while(hasNext(flags)); + return true; } /* static */ int BigramListReadWriteUtils::getBigramAddressAndAdvancePosition( diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.h index 15f924a6a..10f93fb7a 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.h @@ -30,8 +30,8 @@ class BigramListReadWriteUtils { public: typedef uint8_t BigramFlags; - static void getBigramEntryPropertiesAndAdvancePosition(const uint8_t *const bigramsBuf, - BigramFlags *const outBigramFlags, int *const outTargetPtNodePos, + static bool getBigramEntryPropertiesAndAdvancePosition(const uint8_t *const bigramsBuf, + const int bufSize, BigramFlags *const outBigramFlags, int *const outTargetPtNodePos, int *const bigramEntryPos); static AK_FORCE_INLINE int getProbabilityFromFlags(const BigramFlags flags) { @@ -43,7 +43,8 @@ public: } // Bigrams reading methods - static void skipExistingBigrams(const uint8_t *const bigramsBuf, int *const bigramListPos); + static bool skipExistingBigrams(const uint8_t *const bigramsBuf, const int bufSize, + int *const bigramListPos); private: DISALLOW_IMPLICIT_CONSTRUCTORS(BigramListReadWriteUtils); diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/bigram/bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/bigram/bigram_list_policy.h index 00bb502dc..73e291ec2 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/bigram/bigram_list_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/bigram/bigram_list_policy.h @@ -27,27 +27,34 @@ namespace latinime { class BigramListPolicy : public DictionaryBigramsStructurePolicy { public: - explicit BigramListPolicy(const uint8_t *const bigramsBuf) : mBigramsBuf(bigramsBuf) {} + BigramListPolicy(const uint8_t *const bigramsBuf, const int bufSize) + : mBigramsBuf(bigramsBuf), mBufSize(bufSize) {} ~BigramListPolicy() {} void getNextBigram(int *const outBigramPos, int *const outProbability, bool *const outHasNext, int *const pos) const { BigramListReadWriteUtils::BigramFlags flags; - BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(mBigramsBuf, &flags, - outBigramPos, pos); + if (!BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(mBigramsBuf, + mBufSize, &flags, outBigramPos, pos)) { + AKLOGE("Cannot read bigram entry. mBufSize: %d, pos: %d. ", mBufSize, *pos); + *outProbability = NOT_A_PROBABILITY; + *outHasNext = false; + return; + } *outProbability = BigramListReadWriteUtils::getProbabilityFromFlags(flags); *outHasNext = BigramListReadWriteUtils::hasNext(flags); } - void skipAllBigrams(int *const pos) const { - BigramListReadWriteUtils::skipExistingBigrams(mBigramsBuf, pos); + bool skipAllBigrams(int *const pos) const { + return BigramListReadWriteUtils::skipExistingBigrams(mBigramsBuf, mBufSize, pos); } private: DISALLOW_IMPLICIT_CONSTRUCTORS(BigramListPolicy); const uint8_t *const mBigramsBuf; + const int mBufSize; }; } // namespace latinime #endif // LATINIME_BIGRAM_LIST_POLICY_H diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp index 91d76040f..531cbb710 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp @@ -223,7 +223,14 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( mShortcutListPolicy.skipAllShortcuts(&pos); } if (PatriciaTrieReadingUtils::hasBigrams(flags)) { - mBigramListPolicy.skipAllBigrams(&pos); + if (!mBigramListPolicy.skipAllBigrams(&pos)) { + AKLOGE("Cannot skip bigrams. BufSize: %d, pos: %d.", mDictBufferSize, + pos); + mIsCorrupted = true; + ASSERT(false); + *outUnigramProbability = NOT_A_PROBABILITY; + return 0; + } } } } else { @@ -240,7 +247,13 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( mShortcutListPolicy.skipAllShortcuts(&pos); } if (PatriciaTrieReadingUtils::hasBigrams(flags)) { - mBigramListPolicy.skipAllBigrams(&pos); + if (!mBigramListPolicy.skipAllBigrams(&pos)) { + AKLOGE("Cannot skip bigrams. BufSize: %d, pos: %d.", mDictBufferSize, pos); + mIsCorrupted = true; + ASSERT(false); + *outUnigramProbability = NOT_A_PROBABILITY; + return 0; + } } } diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h index 7c0b9d3c5..d40b9af8d 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h @@ -42,7 +42,7 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { mHeaderPolicy(mMmappedBuffer->getBuffer(), FormatUtils::VERSION_2), mDictRoot(mMmappedBuffer->getBuffer() + mHeaderPolicy.getSize()), mDictBufferSize(mMmappedBuffer->getBufferSize() - mHeaderPolicy.getSize()), - mBigramListPolicy(mDictRoot), mShortcutListPolicy(mDictRoot), + mBigramListPolicy(mDictRoot, mDictBufferSize), mShortcutListPolicy(mDictRoot), mPtNodeReader(mDictRoot, mDictBufferSize, &mBigramListPolicy, &mShortcutListPolicy), mPtNodeArrayReader(mDictRoot, mDictBufferSize), mTerminalPtNodePositionsForIteratingWords(), mIsCorrupted(false) {} diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.h index 55ba613a5..4b3bb3725 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.h @@ -40,8 +40,9 @@ class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy { void getNextBigram(int *const outBigramPos, int *const outProbability, bool *const outHasNext, int *const bigramEntryPos) const; - void skipAllBigrams(int *const pos) const { + bool skipAllBigrams(int *const pos) const { // Do nothing because we don't need to skip bigram lists in ver4 dictionaries. + return true; } bool addNewEntry(const int terminalId, const int newTargetTerminalId, |