diff options
5 files changed, 124 insertions, 54 deletions
diff --git a/java/res/values-lo-rLA/strings-letter-descriptions.xml b/java/res/values-lo-rLA/strings-letter-descriptions.xml index ecc0b7a71..47f7cbc81 100644 --- a/java/res/values-lo-rLA/strings-letter-descriptions.xml +++ b/java/res/values-lo-rLA/strings-letter-descriptions.xml @@ -186,7 +186,7 @@ <string name="spoken_symbol_2019" msgid="8892530161598134083">"Right single quotation mark"</string> <string name="spoken_symbol_201A" msgid="2072987157683446644">"Single low-9 quotation mark"</string> <string name="spoken_symbol_201C" msgid="4588048378803665427">"ເຄື່ອງໝາຍວົງຢືມຊ້າຍ"</string> - <string name="spoken_symbol_201D" msgid="1642776849495925895">"ເຄື່ອງໝາຍວົງຢືມຂວາ"</string> + <string name="spoken_symbol_201D" msgid="1642776849495925895">"ເຄື່ອງໝາຍວົງຢືມຂວາ"</string> <string name="spoken_symbol_2020" msgid="9084628638189344431">"Dagger"</string> <string name="spoken_symbol_2021" msgid="5081396468559426475">"Double dagger"</string> <string name="spoken_symbol_2030" msgid="9068837172419431755">"ເຄື່ອງໝາຍຕໍ່ໄມລ໌"</string> diff --git a/java/res/values-my-rMM/strings-letter-descriptions.xml b/java/res/values-my-rMM/strings-letter-descriptions.xml index 2d5338b86..d904f53f6 100644 --- a/java/res/values-my-rMM/strings-letter-descriptions.xml +++ b/java/res/values-my-rMM/strings-letter-descriptions.xml @@ -29,7 +29,7 @@ <string name="spoken_accented_letter_00AA" msgid="4374325261868451570">"ဣထိလိင် အစဉ်ပြ အညွှန်း"</string> <string name="spoken_accented_letter_00B5" msgid="9031387673828823891">"မိုက်ခရို သင်္ကေတ"</string> <string name="spoken_accented_letter_00BA" msgid="5045198452071207437">"ပုလိင် အစဉ်ပြ အညွှန်း"</string> - <string name="spoken_accented_letter_00DF" msgid="2260098367028134281">"ပြတ်သားသည့် S"</string> + <string name="spoken_accented_letter_00DF" msgid="2260098367028134281">"ပြတ်သားသည့် S"</string> <string name="spoken_accented_letter_00E0" msgid="2234515772182387086">"A၊ တည်ငြိမ်သော"</string> <string name="spoken_accented_letter_00E1" msgid="7780174500802535063">"A၊ စူးရှသော"</string> <string name="spoken_accented_letter_00E2" msgid="7054108480488102631">"A၊ သရသံသင်္ကေတ"</string> @@ -133,45 +133,45 @@ <string name="spoken_accented_letter_0259" msgid="2464085263158415898">"Schwa"</string> <string name="spoken_accented_letter_1EA1" msgid="688124877202887630">"A၊ အောက်မှာ အစက်"</string> <string name="spoken_accented_letter_1EA3" msgid="327960130366386256">"A အပေါ်မှာ ချိတ်"</string> - <string name="spoken_accented_letter_1EA5" msgid="637406363453769610">"A၊ သရသံသင်္ကေတ နှင့် စူးရှသော"</string> - <string name="spoken_accented_letter_1EA7" msgid="1419591804181615409">"A၊ သရသံသင်္ကေတ နှင့် တည်ငြိမ်သော"</string> - <string name="spoken_accented_letter_1EA9" msgid="6068887382734896756">"A၊ သရသံသင်္ကေတ နှင့် အပေါ်မှာ ချိတ်"</string> - <string name="spoken_accented_letter_1EAB" msgid="7236523151662538333">"A၊ သရသံသင်္ကေတ နှင့် tilde"</string> - <string name="spoken_accented_letter_1EAD" msgid="2363364864106332076">"A,၊ သရသံသင်္ကေတ နှင့် အောက်မှာ အစက်"</string> - <string name="spoken_accented_letter_1EAF" msgid="1576329511464272935">"A၊ တည်ငြိမ်သော နှင့် စူးရှသော"</string> - <string name="spoken_accented_letter_1EB1" msgid="4634735072816076592">"A၊ breve နှင့် တည်ငြိမ်သော"</string> - <string name="spoken_accented_letter_1EB3" msgid="2325245849038771534">"A၊ breve နှင့် အပေါ်မှာ ချိတ်"</string> - <string name="spoken_accented_letter_1EB5" msgid="3720427596242746295">"A၊ breve နှင့် tilde"</string> - <string name="spoken_accented_letter_1EB7" msgid="700415535653646695">"A၊ breve နှင့် အောက်မှာ အစက်"</string> + <string name="spoken_accented_letter_1EA5" msgid="637406363453769610">"A၊ သရသံသင်္ကေတ နှင့် စူးရှသော"</string> + <string name="spoken_accented_letter_1EA7" msgid="1419591804181615409">"A၊ သရသံသင်္ကေတ နှင့် တည်ငြိမ်သော"</string> + <string name="spoken_accented_letter_1EA9" msgid="6068887382734896756">"A၊ သရသံသင်္ကေတ နှင့် အပေါ်မှာ ချိတ်"</string> + <string name="spoken_accented_letter_1EAB" msgid="7236523151662538333">"A၊ သရသံသင်္ကေတ နှင့် tilde"</string> + <string name="spoken_accented_letter_1EAD" msgid="2363364864106332076">"A,၊ သရသံသင်္ကေတ နှင့် အောက်မှာ အစက်"</string> + <string name="spoken_accented_letter_1EAF" msgid="1576329511464272935">"A၊ တည်ငြိမ်သော နှင့် စူးရှသော"</string> + <string name="spoken_accented_letter_1EB1" msgid="4634735072816076592">"A၊ breve နှင့် တည်ငြိမ်သော"</string> + <string name="spoken_accented_letter_1EB3" msgid="2325245849038771534">"A၊ breve နှင့် အပေါ်မှာ ချိတ်"</string> + <string name="spoken_accented_letter_1EB5" msgid="3720427596242746295">"A၊ breve နှင့် tilde"</string> + <string name="spoken_accented_letter_1EB7" msgid="700415535653646695">"A၊ breve နှင့် အောက်မှာ အစက်"</string> <string name="spoken_accented_letter_1EB9" msgid="3901338692305890487">"E၊ အောက်မှာ အစက်"</string> <string name="spoken_accented_letter_1EBB" msgid="4028688699415417302">"E၊ အပေါ်မှာ ချိတ်"</string> <string name="spoken_accented_letter_1EBD" msgid="181253633045931897">"E၊ tilde"</string> - <string name="spoken_accented_letter_1EBF" msgid="3309618845007944963">"E၊ သရသံသင်္ကေတ နှင့် စူးရှသော"</string> - <string name="spoken_accented_letter_1EC1" msgid="8139046749226332542">"E၊ သရသံသင်္ကေတ နှင့် တည်ငြိမ်သော"</string> - <string name="spoken_accented_letter_1EC3" msgid="3239674223053133383">"E၊ သရသံသင်္ကေတ နှင့် ချိတ် အပေါ်မှာ"</string> - <string name="spoken_accented_letter_1EC5" msgid="2216559244705714587">"E၊ သရသံသင်္ကေတ နှင့် tilde"</string> - <string name="spoken_accented_letter_1EC7" msgid="9012731468253986792">"E၊ သရသံသင်္ကေတ နှင့် အောက်မှာ အစက်"</string> + <string name="spoken_accented_letter_1EBF" msgid="3309618845007944963">"E၊ သရသံသင်္ကေတ နှင့် စူးရှသော"</string> + <string name="spoken_accented_letter_1EC1" msgid="8139046749226332542">"E၊ သရသံသင်္ကေတ နှင့် တည်ငြိမ်သော"</string> + <string name="spoken_accented_letter_1EC3" msgid="3239674223053133383">"E၊ သရသံသင်္ကေတ နှင့် ချိတ် အပေါ်မှာ"</string> + <string name="spoken_accented_letter_1EC5" msgid="2216559244705714587">"E၊ သရသံသင်္ကေတ နှင့် tilde"</string> + <string name="spoken_accented_letter_1EC7" msgid="9012731468253986792">"E၊ သရသံသင်္ကေတ နှင့် အောက်မှာ အစက်"</string> <string name="spoken_accented_letter_1EC9" msgid="2901917620195717034">"I၊ အပေါ်မှာ ချိတ်"</string> <string name="spoken_accented_letter_1ECB" msgid="5470387489820034621">"I၊ အောက်မှာ အစက်"</string> <string name="spoken_accented_letter_1ECD" msgid="1340122876914839806">"O၊ အောက်မှာ အစက်"</string> <string name="spoken_accented_letter_1ECF" msgid="2326921263882559755">"O၊ အပေါ်မှာ ချိတ်"</string> - <string name="spoken_accented_letter_1ED1" msgid="2885683296042774958">"O၊ သရသံသင်္ကေတ နှင့် စူးရှသော"</string> - <string name="spoken_accented_letter_1ED3" msgid="6857664926477376178">"O၊ သရသံသင်္ကေတ နှင့် တည်ငြိမ်သော"</string> - <string name="spoken_accented_letter_1ED5" msgid="2015209467290624062">"O၊ သရသံသင်္ကေတ နှင့် အပေါ်မှာ ချိတ်"</string> - <string name="spoken_accented_letter_1ED7" msgid="7924481354960306389">"O၊ သရသံသင်္ကေတ နှင့် tilde"</string> - <string name="spoken_accented_letter_1ED9" msgid="7023315590332365554">"O၊ သရသံသင်္ကေတ နှင့် အောက်မှာ အစက်"</string> - <string name="spoken_accented_letter_1EDB" msgid="2379438944917634496">"O၊ horn နှင့် စူးရှသော"</string> - <string name="spoken_accented_letter_1EDD" msgid="8107077534204404085">"O၊ horn နှင့် တည်ငြိမ်သော"</string> - <string name="spoken_accented_letter_1EDF" msgid="1846880105528347966">"O၊ horn နှင့် အပေါ်မှာ ချိတ်"</string> - <string name="spoken_accented_letter_1EE1" msgid="1520037313389776718">"O၊ horn နှင့် tilde"</string> - <string name="spoken_accented_letter_1EE3" msgid="907964027171008963">"O၊ horn နှင့် အောက်မှာ အစက်"</string> + <string name="spoken_accented_letter_1ED1" msgid="2885683296042774958">"O၊ သရသံသင်္ကေတ နှင့် စူးရှသော"</string> + <string name="spoken_accented_letter_1ED3" msgid="6857664926477376178">"O၊ သရသံသင်္ကေတ နှင့် တည်ငြိမ်သော"</string> + <string name="spoken_accented_letter_1ED5" msgid="2015209467290624062">"O၊ သရသံသင်္ကေတ နှင့် အပေါ်မှာ ချိတ်"</string> + <string name="spoken_accented_letter_1ED7" msgid="7924481354960306389">"O၊ သရသံသင်္ကေတ နှင့် tilde"</string> + <string name="spoken_accented_letter_1ED9" msgid="7023315590332365554">"O၊ သရသံသင်္ကေတ နှင့် အောက်မှာ အစက်"</string> + <string name="spoken_accented_letter_1EDB" msgid="2379438944917634496">"O၊ horn နှင့် စူးရှသော"</string> + <string name="spoken_accented_letter_1EDD" msgid="8107077534204404085">"O၊ horn နှင့် တည်ငြိမ်သော"</string> + <string name="spoken_accented_letter_1EDF" msgid="1846880105528347966">"O၊ horn နှင့် အပေါ်မှာ ချိတ်"</string> + <string name="spoken_accented_letter_1EE1" msgid="1520037313389776718">"O၊ horn နှင့် tilde"</string> + <string name="spoken_accented_letter_1EE3" msgid="907964027171008963">"O၊ horn နှင့် အောက်မှာ အစက်"</string> <string name="spoken_accented_letter_1EE5" msgid="1522024630360038700">"U၊ အောက်မှာ အစက်"</string> <string name="spoken_accented_letter_1EE7" msgid="7815412228302952637">"U၊ အပေါ်မှာ ချိတ်"</string> - <string name="spoken_accented_letter_1EE9" msgid="4219119671251485651">"U၊ horn နှင့် စူးရှသော"</string> - <string name="spoken_accented_letter_1EEB" msgid="4086009841269002231">"U၊ horn နှင့် တည်ငြိမ်သော"</string> - <string name="spoken_accented_letter_1EED" msgid="3528151733528719847">"U၊ horn နှင့် အပေါ်မှာ ချိတ်"</string> - <string name="spoken_accented_letter_1EEF" msgid="3508548229409072119">"U၊ horn နှင့် tilde"</string> - <string name="spoken_accented_letter_1EF1" msgid="1912816350401931115">"U၊ horn နှင့် အောက်မှာ အစက်"</string> + <string name="spoken_accented_letter_1EE9" msgid="4219119671251485651">"U၊ horn နှင့် စူးရှသော"</string> + <string name="spoken_accented_letter_1EEB" msgid="4086009841269002231">"U၊ horn နှင့် တည်ငြိမ်သော"</string> + <string name="spoken_accented_letter_1EED" msgid="3528151733528719847">"U၊ horn နှင့် အပေါ်မှာ ချိတ်"</string> + <string name="spoken_accented_letter_1EEF" msgid="3508548229409072119">"U၊ horn နှင့် tilde"</string> + <string name="spoken_accented_letter_1EF1" msgid="1912816350401931115">"U၊ horn နှင့် အောက်မှာ အစက်"</string> <string name="spoken_accented_letter_1EF3" msgid="7211760439435946494">"Y၊ တည်ငြိမ်သော"</string> <string name="spoken_accented_letter_1EF5" msgid="8998864482764007384">"Y၊ အောက်မှာ အစက်"</string> <string name="spoken_accented_letter_1EF7" msgid="922043627252869200">"Y၊ အပေါ်မှာ ချိတ်"</string> diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp index a7296a302..c4297f5d6 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp @@ -270,16 +270,26 @@ int LanguageModelDictContent::getBitmapEntryIndex(const WordIdArrayView prevWord } bool LanguageModelDictContent::updateAllProbabilityEntriesForGCInner(const int bitmapEntryIndex, - const int level, const HeaderPolicy *const headerPolicy, int *const outEntryCounts) { + const int prevWordCount, const HeaderPolicy *const headerPolicy, + int *const outEntryCounts) { for (const auto &entry : mTrieMap.getEntriesInSpecifiedLevel(bitmapEntryIndex)) { - if (level > MAX_PREV_WORD_COUNT_FOR_N_GRAM) { - AKLOGE("Invalid level. level: %d, MAX_PREV_WORD_COUNT_FOR_N_GRAM: %d.", - level, MAX_PREV_WORD_COUNT_FOR_N_GRAM); + if (prevWordCount > MAX_PREV_WORD_COUNT_FOR_N_GRAM) { + AKLOGE("Invalid prevWordCount. prevWordCount: %d, MAX_PREV_WORD_COUNT_FOR_N_GRAM: %d.", + prevWordCount, MAX_PREV_WORD_COUNT_FOR_N_GRAM); return false; } const ProbabilityEntry probabilityEntry = ProbabilityEntry::decode(entry.value(), mHasHistoricalInfo); - if (mHasHistoricalInfo && !probabilityEntry.representsBeginningOfSentence()) { + if (prevWordCount > 0 && probabilityEntry.isValid() + && !mTrieMap.getRoot(entry.key()).mIsValid) { + // The entry is related to a word that has been removed. Remove the entry. + if (!mTrieMap.remove(entry.key(), bitmapEntryIndex)) { + return false; + } + continue; + } + if (mHasHistoricalInfo && !probabilityEntry.representsBeginningOfSentence() + && probabilityEntry.isValid()) { const HistoricalInfo historicalInfo = ForgettingCurveUtils::createHistoricalInfoToSave( probabilityEntry.getHistoricalInfo(), headerPolicy); if (ForgettingCurveUtils::needsToKeep(&historicalInfo, headerPolicy)) { @@ -298,13 +308,13 @@ bool LanguageModelDictContent::updateAllProbabilityEntriesForGCInner(const int b } } if (!probabilityEntry.representsBeginningOfSentence()) { - outEntryCounts[level] += 1; + outEntryCounts[prevWordCount] += 1; } if (!entry.hasNextLevelMap()) { continue; } - if (!updateAllProbabilityEntriesForGCInner(entry.getNextLevelBitmapEntryIndex(), level + 1, - headerPolicy, outEntryCounts)) { + if (!updateAllProbabilityEntriesForGCInner(entry.getNextLevelBitmapEntryIndex(), + prevWordCount + 1, headerPolicy, outEntryCounts)) { return false; } } @@ -332,7 +342,7 @@ bool LanguageModelDictContent::turncateEntriesInSpecifiedLevel( for (int i = 0; i < entryCountToRemove; ++i) { const EntryInfoToTurncate &entryInfo = entryInfoVector[i]; if (!removeNgramProbabilityEntry( - WordIdArrayView(entryInfo.mPrevWordIds, entryInfo.mEntryLevel), entryInfo.mKey)) { + WordIdArrayView(entryInfo.mPrevWordIds, entryInfo.mPrevWordCount), entryInfo.mKey)) { return false; } } @@ -342,9 +352,9 @@ bool LanguageModelDictContent::turncateEntriesInSpecifiedLevel( bool LanguageModelDictContent::getEntryInfo(const HeaderPolicy *const headerPolicy, const int targetLevel, const int bitmapEntryIndex, std::vector<int> *const prevWordIds, std::vector<EntryInfoToTurncate> *const outEntryInfo) const { - const int currentLevel = prevWordIds->size(); + const int prevWordCount = prevWordIds->size(); for (const auto &entry : mTrieMap.getEntriesInSpecifiedLevel(bitmapEntryIndex)) { - if (currentLevel < targetLevel) { + if (prevWordCount < targetLevel) { if (!entry.hasNextLevelMap()) { continue; } @@ -379,10 +389,10 @@ bool LanguageModelDictContent::EntryInfoToTurncate::Comparator::operator()( if (left.mKey != right.mKey) { return left.mKey < right.mKey; } - if (left.mEntryLevel != right.mEntryLevel) { - return left.mEntryLevel > right.mEntryLevel; + if (left.mPrevWordCount != right.mPrevWordCount) { + return left.mPrevWordCount > right.mPrevWordCount; } - for (int i = 0; i < left.mEntryLevel; ++i) { + for (int i = 0; i < left.mPrevWordCount; ++i) { if (left.mPrevWordIds[i] != right.mPrevWordIds[i]) { return left.mPrevWordIds[i] < right.mPrevWordIds[i]; } @@ -392,9 +402,10 @@ bool LanguageModelDictContent::EntryInfoToTurncate::Comparator::operator()( } LanguageModelDictContent::EntryInfoToTurncate::EntryInfoToTurncate(const int probability, - const int timestamp, const int key, const int entryLevel, const int *const prevWordIds) - : mProbability(probability), mTimestamp(timestamp), mKey(key), mEntryLevel(entryLevel) { - memmove(mPrevWordIds, prevWordIds, mEntryLevel * sizeof(mPrevWordIds[0])); + const int timestamp, const int key, const int prevWordCount, const int *const prevWordIds) + : mProbability(probability), mTimestamp(timestamp), mKey(key), + mPrevWordCount(prevWordCount) { + memmove(mPrevWordIds, prevWordIds, mPrevWordCount * sizeof(mPrevWordIds[0])); } } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h index 834cf933d..51ef090e1 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h @@ -160,7 +160,7 @@ class LanguageModelDictContent { outEntryCounts[i] = 0; } return updateAllProbabilityEntriesForGCInner(mTrieMap.getRootBitmapEntryIndex(), - 0 /* level */, headerPolicy, outEntryCounts); + 0 /* prevWordCount */, headerPolicy, outEntryCounts); } // entryCounts should be created by updateAllProbabilityEntries. @@ -185,12 +185,12 @@ class LanguageModelDictContent { }; EntryInfoToTurncate(const int probability, const int timestamp, const int key, - const int entryLevel, const int *const prevWordIds); + const int prevWordCount, const int *const prevWordIds); int mProbability; int mTimestamp; int mKey; - int mEntryLevel; + int mPrevWordCount; int mPrevWordIds[MAX_PREV_WORD_COUNT_FOR_N_GRAM + 1]; private: @@ -208,7 +208,7 @@ class LanguageModelDictContent { int *const outNgramCount); int createAndGetBitmapEntryIndex(const WordIdArrayView prevWordIds); int getBitmapEntryIndex(const WordIdArrayView prevWordIds) const; - bool updateAllProbabilityEntriesForGCInner(const int bitmapEntryIndex, const int level, + bool updateAllProbabilityEntriesForGCInner(const int bitmapEntryIndex, const int prevWordCount, const HeaderPolicy *const headerPolicy, int *const outEntryCounts); bool turncateEntriesInSpecifiedLevel(const HeaderPolicy *const headerPolicy, const int maxEntryCount, const int targetLevel, int *const outEntryCount); diff --git a/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java b/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java index 0e58b7211..fa70f9988 100644 --- a/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java +++ b/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java @@ -75,6 +75,10 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase { return formatVersion > FormatSpec.VERSION401; } + private static boolean supportsNgram(final int formatVersion) { + return formatVersion >= FormatSpec.VERSION4_DEV; + } + private void onInputWord(final BinaryDictionary binaryDictionary, final String word, final boolean isValidWord) { binaryDictionary.updateEntriesForWordWithNgramContext(NgramContext.EMPTY_PREV_WORDS_INFO, @@ -88,6 +92,14 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase { mCurrentTime /* timestamp */); } + private void onInputWordWithPrevWords(final BinaryDictionary binaryDictionary, + final String word, final boolean isValidWord, final String prevWord, + final String prevPrevWord) { + binaryDictionary.updateEntriesForWordWithNgramContext( + new NgramContext(new WordInfo(prevWord), new WordInfo(prevPrevWord)), word, + isValidWord, 1 /* count */, mCurrentTime /* timestamp */); + } + private void onInputWordWithBeginningOfSentenceContext( final BinaryDictionary binaryDictionary, final String word, final boolean isValidWord) { binaryDictionary.updateEntriesForWordWithNgramContext(NgramContext.BEGINNING_OF_SENTENCE, @@ -99,6 +111,12 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase { return binaryDictionary.isValidNgram(new NgramContext(new WordInfo(word0)), word1); } + private static boolean isValidTrigram(final BinaryDictionary binaryDictionary, + final String word0, final String word1, final String word2) { + return binaryDictionary.isValidNgram( + new NgramContext(new WordInfo(word1), new WordInfo(word0)), word2); + } + private void forcePassingShortTime(final BinaryDictionary binaryDictionary) { // 30 days. final int timeToElapse = (int)TimeUnit.SECONDS.convert(30, TimeUnit.DAYS); @@ -256,7 +274,23 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase { onInputWordWithPrevWord(binaryDictionary, "y", true /* isValidWord */, "x"); assertFalse(isValidBigram(binaryDictionary, "x", "y")); - binaryDictionary.close(); + if (!supportsNgram(formatVersion)) { + return; + } + + onInputWordWithPrevWords(binaryDictionary, "c", false /* isValidWord */, "b", "a"); + assertFalse(isValidTrigram(binaryDictionary, "a", "b", "c")); + assertFalse(isValidBigram(binaryDictionary, "b", "c")); + onInputWordWithPrevWords(binaryDictionary, "c", false /* isValidWord */, "b", "a"); + assertTrue(isValidTrigram(binaryDictionary, "a", "b", "c")); + assertTrue(isValidBigram(binaryDictionary, "b", "c")); + + onInputWordWithPrevWords(binaryDictionary, "d", true /* isValidWord */, "b", "a"); + assertTrue(isValidTrigram(binaryDictionary, "a", "b", "d")); + assertTrue(isValidBigram(binaryDictionary, "b", "d")); + + onInputWordWithPrevWords(binaryDictionary, "cd", true /* isValidWord */, "b", "a"); + assertTrue(isValidTrigram(binaryDictionary, "a", "b", "cd")); } public void testDecayingProbability() { @@ -301,6 +335,31 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase { forcePassingLongTime(binaryDictionary); assertFalse(isValidBigram(binaryDictionary, "a", "b")); + if (!supportsNgram(formatVersion)) { + return; + } + + onInputWord(binaryDictionary, "ab", true /* isValidWord */); + onInputWordWithPrevWord(binaryDictionary, "bc", true /* isValidWord */, "ab"); + onInputWordWithPrevWords(binaryDictionary, "cd", true /* isValidWord */, "bc", "ab"); + assertTrue(isValidTrigram(binaryDictionary, "ab", "bc", "cd")); + forcePassingShortTime(binaryDictionary); + assertFalse(isValidTrigram(binaryDictionary, "ab", "bc", "cd")); + + onInputWord(binaryDictionary, "ab", true /* isValidWord */); + onInputWordWithPrevWord(binaryDictionary, "bc", true /* isValidWord */, "ab"); + onInputWordWithPrevWords(binaryDictionary, "cd", true /* isValidWord */, "bc", "ab"); + onInputWord(binaryDictionary, "ab", true /* isValidWord */); + onInputWordWithPrevWord(binaryDictionary, "bc", true /* isValidWord */, "ab"); + onInputWordWithPrevWords(binaryDictionary, "cd", true /* isValidWord */, "bc", "ab"); + onInputWord(binaryDictionary, "ab", true /* isValidWord */); + onInputWordWithPrevWord(binaryDictionary, "bc", true /* isValidWord */, "ab"); + onInputWordWithPrevWords(binaryDictionary, "cd", true /* isValidWord */, "bc", "ab"); + forcePassingShortTime(binaryDictionary); + assertTrue(isValidTrigram(binaryDictionary, "ab", "bc", "cd")); + forcePassingLongTime(binaryDictionary); + assertFalse(isValidTrigram(binaryDictionary, "ab", "bc", "cd")); + binaryDictionary.close(); } |