diff options
10 files changed, 39 insertions, 26 deletions
diff --git a/java/src/com/android/inputmethod/latin/Suggest.java b/java/src/com/android/inputmethod/latin/Suggest.java index 9bf017578..4ad5ba65e 100644 --- a/java/src/com/android/inputmethod/latin/Suggest.java +++ b/java/src/com/android/inputmethod/latin/Suggest.java @@ -235,7 +235,7 @@ public final class Suggest { SuggestedWordInfo.removeDups(null /* typedWord */, suggestionsContainer); // For some reason some suggestions with MIN_VALUE are making their way here. - // TODO: Find a more robust way to detect distractors. + // TODO: Find a more robust way to detect distracters. for (int i = suggestionsContainer.size() - 1; i >= 0; --i) { if (suggestionsContainer.get(i).mScore < SUPPRESS_SUGGEST_THRESHOLD) { suggestionsContainer.remove(i); diff --git a/java/src/com/android/inputmethod/latin/utils/DistracterFilterCheckingExactMatchesAndSuggestions.java b/java/src/com/android/inputmethod/latin/utils/DistracterFilterCheckingExactMatchesAndSuggestions.java index 1db525502..f8a845304 100644 --- a/java/src/com/android/inputmethod/latin/utils/DistracterFilterCheckingExactMatchesAndSuggestions.java +++ b/java/src/com/android/inputmethod/latin/utils/DistracterFilterCheckingExactMatchesAndSuggestions.java @@ -64,9 +64,9 @@ public class DistracterFilterCheckingExactMatchesAndSuggestions implements Distr private final Object mLock = new Object(); // If the score of the top suggestion exceeds this value, the tested word (e.g., - // an OOV, a misspelling, or an in-vocabulary word) would be considered as a distractor to + // an OOV, a misspelling, or an in-vocabulary word) would be considered as a distracter to // words in dictionary. The greater the threshold is, the less likely the tested word would - // become a distractor, which means the tested word will be more likely to be added to + // become a distracter, which means the tested word will be more likely to be added to // the dictionary. private static final float DISTRACTER_WORD_SCORE_THRESHOLD = 0.4f; @@ -196,7 +196,7 @@ public class DistracterFilterCheckingExactMatchesAndSuggestions implements Distr } final boolean Word = dictionaryFacilitator.isValidWord(testedWord, false /* ignoreCase */); if (Word) { - // Valid word is not a distractor. + // Valid word is not a distracter. if (DEBUG) { Log.d(TAG, "isDistracter: false (valid word)"); } @@ -257,12 +257,12 @@ public class DistracterFilterCheckingExactMatchesAndSuggestions implements Distr return false; } final SuggestedWordInfo firstSuggestion = suggestionResults.first(); - final boolean isDistractor = suggestionExceedsDistracterThreshold( + final boolean isDistracter = suggestionExceedsDistracterThreshold( firstSuggestion, consideredWord, DISTRACTER_WORD_SCORE_THRESHOLD); if (DEBUG) { - Log.d(TAG, "isDistracter: " + isDistractor); + Log.d(TAG, "isDistracter: " + isDistracter); } - return isDistractor; + return isDistracter; } private static boolean suggestionExceedsDistracterThreshold(final SuggestedWordInfo suggestion, diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp index 0675de6fa..85d6d434d 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp @@ -167,6 +167,14 @@ int LanguageModelDictContent::createAndGetBitmapEntryIndex(const WordIdArrayView if (lastBitmapEntryIndex == TrieMap::INVALID_INDEX) { return TrieMap::INVALID_INDEX; } + const int oldestPrevWordId = prevWordIds[prevWordIds.size() - 1]; + const TrieMap::Result result = mTrieMap.get(oldestPrevWordId, lastBitmapEntryIndex); + if (!result.mIsValid) { + if (!mTrieMap.put(oldestPrevWordId, + ProbabilityEntry().encode(mHasHistoricalInfo), lastBitmapEntryIndex)) { + return TrieMap::INVALID_INDEX; + } + } return mTrieMap.getNextLevelBitmapEntryIndex(prevWordIds[prevWordIds.size() - 1], lastBitmapEntryIndex); } diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h index 3dfaba755..f1bf12cb2 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h @@ -36,7 +36,8 @@ class ProbabilityEntry { // Dummy entry ProbabilityEntry() - : mFlags(0), mProbability(NOT_A_PROBABILITY), mHistoricalInfo() {} + : mFlags(Ver4DictConstants::FLAG_NOT_A_VALID_ENTRY), mProbability(NOT_A_PROBABILITY), + mHistoricalInfo() {} // Entry without historical information ProbabilityEntry(const int flags, const int probability) @@ -61,7 +62,7 @@ class ProbabilityEntry { bigramProperty->getCount()) {} bool isValid() const { - return (mProbability != NOT_A_PROBABILITY) || hasHistoricalInfo(); + return (mFlags & Ver4DictConstants::FLAG_NOT_A_VALID_ENTRY) == 0; } bool hasHistoricalInfo() const { diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp index 9acf2d44f..39822b94a 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp @@ -53,6 +53,7 @@ const int Ver4DictConstants::WORD_LEVEL_FIELD_SIZE = 1; const int Ver4DictConstants::WORD_COUNT_FIELD_SIZE = 1; const uint8_t Ver4DictConstants::FLAG_REPRESENTS_BEGINNING_OF_SENTENCE = 0x1; +const uint8_t Ver4DictConstants::FLAG_NOT_A_VALID_ENTRY = 0x2; const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE = 64; const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE = 4; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h index 97035311e..dfcdd4d6f 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h @@ -51,6 +51,7 @@ class Ver4DictConstants { static const int WORD_COUNT_FIELD_SIZE; // Flags in probability entry. static const uint8_t FLAG_REPRESENTS_BEGINNING_OF_SENTENCE; + static const uint8_t FLAG_NOT_A_VALID_ENTRY; static const int SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE; static const int SHORTCUT_ADDRESS_TABLE_DATA_SIZE; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp index b808c904d..2af218ab6 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp @@ -120,16 +120,15 @@ const WordAttributes Ver4PatriciaTriePolicy::getWordAttributesInContext( const int ptNodePos = mBuffers->getTerminalPositionLookupTable()->getTerminalPtNodePosition(wordId); const PtNodeParams ptNodeParams = mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos); - // TODO: Support n-gram. const int probability = mBuffers->getLanguageModelDictContent()->getWordProbability( - prevWordIds.limit(1 /* maxSize */), wordId, mHeaderPolicy); + prevWordIds, wordId, mHeaderPolicy); return WordAttributes(probability, ptNodeParams.isBlacklisted(), ptNodeParams.isNotAWord(), probability == 0); } int Ver4PatriciaTriePolicy::getProbabilityOfWord(const WordIdArrayView prevWordIds, const int wordId) const { - if (wordId == NOT_A_WORD_ID) { + if (wordId == NOT_A_WORD_ID || prevWordIds.contains(NOT_A_WORD_ID)) { return NOT_A_PROBABILITY; } const int ptNodePos = @@ -138,10 +137,8 @@ int Ver4PatriciaTriePolicy::getProbabilityOfWord(const WordIdArrayView prevWordI if (ptNodeParams.isDeleted() || ptNodeParams.isBlacklisted() || ptNodeParams.isNotAWord()) { return NOT_A_PROBABILITY; } - // TODO: Support n-gram. const ProbabilityEntry probabilityEntry = - mBuffers->getLanguageModelDictContent()->getNgramProbabilityEntry( - prevWordIds.limit(1 /* maxSize */), wordId); + mBuffers->getLanguageModelDictContent()->getNgramProbabilityEntry(prevWordIds, wordId); if (!probabilityEntry.isValid()) { return NOT_A_PROBABILITY; } @@ -164,16 +161,18 @@ void Ver4PatriciaTriePolicy::iterateNgramEntries(const WordIdArrayView prevWordI if (prevWordIds.empty()) { return; } - // TODO: Support n-gram. const auto languageModelDictContent = mBuffers->getLanguageModelDictContent(); - for (const auto entry : languageModelDictContent->getProbabilityEntries( - prevWordIds.limit(1 /* maxSize */))) { - const ProbabilityEntry &probabilityEntry = entry.getProbabilityEntry(); - const int probability = probabilityEntry.hasHistoricalInfo() ? - ForgettingCurveUtils::decodeProbability( - probabilityEntry.getHistoricalInfo(), mHeaderPolicy) : - probabilityEntry.getProbability(); - listener->onVisitEntry(probability, entry.getWordId()); + for (size_t i = 1; i <= prevWordIds.size(); ++i) { + for (const auto entry : languageModelDictContent->getProbabilityEntries( + prevWordIds.limit(i))) { + const ProbabilityEntry &probabilityEntry = entry.getProbabilityEntry(); + const int probability = probabilityEntry.hasHistoricalInfo() ? + ForgettingCurveUtils::decodeProbability( + probabilityEntry.getHistoricalInfo(), mHeaderPolicy) + + ForgettingCurveUtils::getProbabilityBiasForNgram(i + 1 /* n */) : + probabilityEntry.getProbability(); + listener->onVisitEntry(probability, entry.getWordId()); + } } } diff --git a/native/jni/tests/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_test.cpp b/native/jni/tests/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_test.cpp index c5849d054..06f82df52 100644 --- a/native/jni/tests/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_test.cpp +++ b/native/jni/tests/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_test.cpp @@ -29,7 +29,7 @@ namespace { TEST(LanguageModelDictContentTest, TestUnigramProbability) { LanguageModelDictContent languageModelDictContent(false /* useHistoricalInfo */); - const int flag = 0xFF; + const int flag = 0xF0; const int probability = 10; const int wordId = 100; const ProbabilityEntry probabilityEntry(flag, probability); diff --git a/tests/src/com/android/inputmethod/latin/InputTestsBase.java b/tests/src/com/android/inputmethod/latin/InputTestsBase.java index d7a649a5b..6860bea45 100644 --- a/tests/src/com/android/inputmethod/latin/InputTestsBase.java +++ b/tests/src/com/android/inputmethod/latin/InputTestsBase.java @@ -183,6 +183,9 @@ public class InputTestsBase extends ServiceTestCase<LatinIMEForTests> { | InputType.TYPE_TEXT_FLAG_MULTI_LINE; mEditText.setInputType(inputType); mEditText.setEnabled(true); + if (null == Looper.myLooper()) { + Looper.prepare(); + } setupService(); mLatinIME = getService(); setDebugMode(true); diff --git a/tests/src/com/android/inputmethod/latin/utils/DistracterFilterTest.java b/tests/src/com/android/inputmethod/latin/utils/DistracterFilterTest.java index 5fbd36ac7..6ed912088 100644 --- a/tests/src/com/android/inputmethod/latin/utils/DistracterFilterTest.java +++ b/tests/src/com/android/inputmethod/latin/utils/DistracterFilterTest.java @@ -57,7 +57,7 @@ public class DistracterFilterTest extends AndroidTestCase { mDistracterFilter.close(); } - public void testIsDistractorToWordsInDictionaries() { + public void testIsDistracterToWordsInDictionaries() { final PrevWordsInfo EMPTY_PREV_WORDS_INFO = PrevWordsInfo.EMPTY_PREV_WORDS_INFO; final Locale localeEnUs = new Locale("en", "US"); |