diff options
23 files changed, 179 insertions, 38 deletions
diff --git a/dictionaries/en_GB_wordlist.combined.gz b/dictionaries/en_GB_wordlist.combined.gz Binary files differindex 839f3efca..4f008ed8f 100644 --- a/dictionaries/en_GB_wordlist.combined.gz +++ b/dictionaries/en_GB_wordlist.combined.gz diff --git a/java/src/com/android/inputmethod/latin/AbstractDictionaryWriter.java b/java/src/com/android/inputmethod/latin/AbstractDictionaryWriter.java index 4a0ce3735..463d09344 100644 --- a/java/src/com/android/inputmethod/latin/AbstractDictionaryWriter.java +++ b/java/src/com/android/inputmethod/latin/AbstractDictionaryWriter.java @@ -41,8 +41,17 @@ abstract public class AbstractDictionaryWriter extends Dictionary { abstract public void clear(); + /** + * Add a unigram with an optional shortcut to the dictionary. + * @param word The word to add. + * @param shortcutTarget A shortcut target for this word, or null if none. + * @param frequency The frequency for this unigram. + * @param shortcutFreq The frequency of the shortcut (0~15, with 15 = whitelist). Ignored + * if shortcutTarget is null. + * @param isNotAWord true if this is not a word, i.e. shortcut only. + */ abstract public void addUnigramWord(final String word, final String shortcutTarget, - final int frequency, final boolean isNotAWord); + final int frequency, final int shortcutFreq, final boolean isNotAWord); // TODO: Remove lastModifiedTime after making binary dictionary support forgetting curve. abstract public void addBigramWords(final String word0, final String word1, diff --git a/java/src/com/android/inputmethod/latin/BinaryDictionary.java b/java/src/com/android/inputmethod/latin/BinaryDictionary.java index 541e69788..fd296988e 100644 --- a/java/src/com/android/inputmethod/latin/BinaryDictionary.java +++ b/java/src/com/android/inputmethod/latin/BinaryDictionary.java @@ -52,6 +52,10 @@ public final class BinaryDictionary extends Dictionary { public static final String UNIGRAM_COUNT_QUERY = "UNIGRAM_COUNT"; @UsedForTesting public static final String BIGRAM_COUNT_QUERY = "BIGRAM_COUNT"; + @UsedForTesting + public static final String MAX_UNIGRAM_COUNT_QUERY = "MAX_UNIGRAM_COUNT"; + @UsedForTesting + public static final String MAX_BIGRAM_COUNT_QUERY = "MAX_BIGRAM_COUNT"; private long mNativeDict; private final Locale mLocale; diff --git a/java/src/com/android/inputmethod/latin/ContactsBinaryDictionary.java b/java/src/com/android/inputmethod/latin/ContactsBinaryDictionary.java index ffeb92784..47891c6b7 100644 --- a/java/src/com/android/inputmethod/latin/ContactsBinaryDictionary.java +++ b/java/src/com/android/inputmethod/latin/ContactsBinaryDictionary.java @@ -127,7 +127,7 @@ public class ContactsBinaryDictionary extends ExpandableBinaryDictionary { if (DEBUG) { Log.d(TAG, "loadAccountVocabulary: " + word); } - super.addWord(word, null /* shortcut */, FREQUENCY_FOR_CONTACTS, + super.addWord(word, null /* shortcut */, FREQUENCY_FOR_CONTACTS, 0 /* shortcutFreq */, false /* isNotAWord */); } } @@ -213,7 +213,7 @@ public class ContactsBinaryDictionary extends ExpandableBinaryDictionary { Log.d(TAG, "addName " + name + ", " + word + ", " + prevWord); } super.addWord(word, null /* shortcut */, FREQUENCY_FOR_CONTACTS, - false /* isNotAWord */); + 0 /* shortcutFreq */, false /* isNotAWord */); if (!TextUtils.isEmpty(prevWord)) { if (mUseFirstLastBigrams) { super.addBigram(prevWord, word, FREQUENCY_FOR_CONTACTS_BIGRAM, diff --git a/java/src/com/android/inputmethod/latin/DictionaryWriter.java b/java/src/com/android/inputmethod/latin/DictionaryWriter.java index 84abfa66d..3df2a2b63 100644 --- a/java/src/com/android/inputmethod/latin/DictionaryWriter.java +++ b/java/src/com/android/inputmethod/latin/DictionaryWriter.java @@ -62,13 +62,13 @@ public class DictionaryWriter extends AbstractDictionaryWriter { // considering performance regression. @Override public void addUnigramWord(final String word, final String shortcutTarget, final int frequency, - final boolean isNotAWord) { + final int shortcutFreq, final boolean isNotAWord) { if (shortcutTarget == null) { mFusionDictionary.add(word, frequency, null, isNotAWord); } else { // TODO: Do this in the subclass, with this class taking an arraylist. final ArrayList<WeightedString> shortcutTargets = CollectionUtils.newArrayList(); - shortcutTargets.add(new WeightedString(shortcutTarget, frequency)); + shortcutTargets.add(new WeightedString(shortcutTarget, shortcutFreq)); mFusionDictionary.add(word, frequency, shortcutTargets, isNotAWord); } } diff --git a/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java b/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java index c79a4ff90..eb8650e6f 100644 --- a/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java +++ b/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java @@ -261,10 +261,16 @@ abstract public class ExpandableBinaryDictionary extends Dictionary { /** * Adds a word unigram to the dictionary. Used for loading a dictionary. + * @param word The word to add. + * @param shortcutTarget A shortcut target for this word, or null if none. + * @param frequency The frequency for this unigram. + * @param shortcutFreq The frequency of the shortcut (0~15, with 15 = whitelist). Ignored + * if shortcutTarget is null. + * @param isNotAWord true if this is not a word, i.e. shortcut only. */ protected void addWord(final String word, final String shortcutTarget, - final int frequency, final boolean isNotAWord) { - mDictionaryWriter.addUnigramWord(word, shortcutTarget, frequency, isNotAWord); + final int frequency, final int shortcutFreq, final boolean isNotAWord) { + mDictionaryWriter.addUnigramWord(word, shortcutTarget, frequency, shortcutFreq, isNotAWord); } /** @@ -313,7 +319,7 @@ abstract public class ExpandableBinaryDictionary extends Dictionary { * Dynamically adds a word unigram to the dictionary. May overwrite an existing entry. */ protected void addWordDynamically(final String word, final String shortcutTarget, - final int frequency, final boolean isNotAWord) { + final int frequency, final int shortcutFreq, final boolean isNotAWord) { if (!mIsUpdatable) { Log.w(TAG, "addWordDynamically is called for non-updatable dictionary: " + mFilename); return; @@ -326,7 +332,8 @@ abstract public class ExpandableBinaryDictionary extends Dictionary { mBinaryDictionary.addUnigramWord(word, frequency); } else { // TODO: Remove. - mDictionaryWriter.addUnigramWord(word, shortcutTarget, frequency, isNotAWord); + mDictionaryWriter.addUnigramWord(word, shortcutTarget, frequency, shortcutFreq, + isNotAWord); } } }); diff --git a/java/src/com/android/inputmethod/latin/ExpandableDictionary.java b/java/src/com/android/inputmethod/latin/ExpandableDictionary.java index d491f988a..95c9bcab9 100644 --- a/java/src/com/android/inputmethod/latin/ExpandableDictionary.java +++ b/java/src/com/android/inputmethod/latin/ExpandableDictionary.java @@ -156,15 +156,36 @@ public class ExpandableDictionary extends Dictionary { return Constants.DICTIONARY_MAX_WORD_LENGTH; } - public void addWord(final String word, final String shortcutTarget, final int frequency) { + /** + * Add a word with an optional shortcut to the dictionary. + * @param word The word to add. + * @param shortcutTarget A shortcut target for this word, or null if none. + * @param frequency The frequency for this unigram. + * @param shortcutFreq The frequency of the shortcut (0~15, with 15 = whitelist). Ignored + * if shortcutTarget is null. + */ + public void addWord(final String word, final String shortcutTarget, final int frequency, + final int shortcutFreq) { if (word.length() >= Constants.DICTIONARY_MAX_WORD_LENGTH) { return; } - addWordRec(mRoots, word, 0, shortcutTarget, frequency, null); + addWordRec(mRoots, word, 0, shortcutTarget, frequency, shortcutFreq, null); } + /** + * Add a word, recursively searching for its correct place in the trie tree. + * @param children The node to recursively search for addition. Initially, the root of the tree. + * @param word The word to add. + * @param depth The current depth in the tree. + * @param shortcutTarget A shortcut target for this word, or null if none. + * @param frequency The frequency for this unigram. + * @param shortcutFreq The frequency of the shortcut (0~15, with 15 = whitelist). Ignored + * if shortcutTarget is null. + * @param parentNode The parent node, for up linking. Initially null, as the root has no parent. + */ private void addWordRec(final NodeArray children, final String word, final int depth, - final String shortcutTarget, final int frequency, final Node parentNode) { + final String shortcutTarget, final int frequency, final int shortcutFreq, + final Node parentNode) { final int wordLength = word.length(); if (wordLength <= depth) return; final char c = word.charAt(depth); @@ -204,7 +225,8 @@ public class ExpandableDictionary extends Dictionary { if (childNode.mChildren == null) { childNode.mChildren = new NodeArray(); } - addWordRec(childNode.mChildren, word, depth + 1, shortcutTarget, frequency, childNode); + addWordRec(childNode.mChildren, word, depth + 1, shortcutTarget, frequency, shortcutFreq, + childNode); } @Override diff --git a/java/src/com/android/inputmethod/latin/Suggest.java b/java/src/com/android/inputmethod/latin/Suggest.java index 9fd1f53a2..c270d47d0 100644 --- a/java/src/com/android/inputmethod/latin/Suggest.java +++ b/java/src/com/android/inputmethod/latin/Suggest.java @@ -286,14 +286,16 @@ public final class Suggest { // the word *would* have been auto-corrected. if (!isCorrectionEnabled || !allowsToBeAutoCorrected || !wordComposer.isComposingWord() || suggestionsSet.isEmpty() || wordComposer.hasDigits() - || wordComposer.isMostlyCaps() || wordComposer.isResumed() - || !hasMainDictionary()) { + || wordComposer.isMostlyCaps() || wordComposer.isResumed() || !hasMainDictionary() + || SuggestedWordInfo.KIND_SHORTCUT == suggestionsSet.first().mKind) { // If we don't have a main dictionary, we never want to auto-correct. The reason for // this is, the user may have a contact whose name happens to match a valid word in // their language, and it will unexpectedly auto-correct. For example, if the user // types in English with no dictionary and has a "Will" in their contact list, "will" // would always auto-correct to "Will" which is unwanted. Hence, no main dict => no // auto-correct. + // Also, shortcuts should never auto-correct unless they are whitelist entries. + // TODO: we may want to have shortcut-only entries auto-correct in the future. hasAutoCorrection = false; } else { hasAutoCorrection = AutoCorrectionUtils.suggestionExceedsAutoCorrectionThreshold( diff --git a/java/src/com/android/inputmethod/latin/UserBinaryDictionary.java b/java/src/com/android/inputmethod/latin/UserBinaryDictionary.java index 864a17375..15b3d8d02 100644 --- a/java/src/com/android/inputmethod/latin/UserBinaryDictionary.java +++ b/java/src/com/android/inputmethod/latin/UserBinaryDictionary.java @@ -47,6 +47,9 @@ public class UserBinaryDictionary extends ExpandableBinaryDictionary { private static final String USER_DICTIONARY_ALL_LANGUAGES = ""; private static final int HISTORICAL_DEFAULT_USER_DICTIONARY_FREQUENCY = 250; private static final int LATINIME_DEFAULT_USER_DICTIONARY_FREQUENCY = 160; + // Shortcut frequency is 0~15, with 15 = whitelist. We don't want user dictionary entries + // to auto-correct, so we set this to the highest frequency that won't, i.e. 14. + private static final int USER_DICT_SHORTCUT_FREQUENCY = 14; // TODO: use Words.SHORTCUT when we target JellyBean or above final static String SHORTCUT = "shortcut"; @@ -243,10 +246,12 @@ public class UserBinaryDictionary extends ExpandableBinaryDictionary { final int adjustedFrequency = scaleFrequencyFromDefaultToLatinIme(frequency); // Safeguard against adding really long words. if (word.length() < MAX_WORD_LENGTH) { - super.addWord(word, null, adjustedFrequency, false /* isNotAWord */); + super.addWord(word, null, adjustedFrequency, 0 /* shortcutFreq */, + false /* isNotAWord */); } if (null != shortcut && shortcut.length() < MAX_WORD_LENGTH) { - super.addWord(shortcut, word, adjustedFrequency, true /* isNotAWord */); + super.addWord(shortcut, word, adjustedFrequency, USER_DICT_SHORTCUT_FREQUENCY, + true /* isNotAWord */); } cursor.moveToNext(); } diff --git a/java/src/com/android/inputmethod/latin/personalization/DecayingExpandableBinaryDictionaryBase.java b/java/src/com/android/inputmethod/latin/personalization/DecayingExpandableBinaryDictionaryBase.java index c8b62b6c8..a1e36006b 100644 --- a/java/src/com/android/inputmethod/latin/personalization/DecayingExpandableBinaryDictionaryBase.java +++ b/java/src/com/android/inputmethod/latin/personalization/DecayingExpandableBinaryDictionaryBase.java @@ -138,7 +138,7 @@ public abstract class DecayingExpandableBinaryDictionaryBase extends ExpandableB final int frequency = ENABLE_BINARY_DICTIONARY_DYNAMIC_UPDATE ? (isValid ? FREQUENCY_FOR_WORDS_IN_DICTS : FREQUENCY_FOR_WORDS_NOT_IN_DICTS) : FREQUENCY_FOR_TYPED; - addWordDynamically(word1, null /* the "shortcut" parameter is null */, frequency, + addWordDynamically(word1, null /* shortcutTarget */, frequency, 0 /* shortcutFreq */, false /* isNotAWord */); // Do not insert a word as a bigram of itself if (word1.equals(word0)) { @@ -171,11 +171,11 @@ public abstract class DecayingExpandableBinaryDictionaryBase extends ExpandableB final OnAddWordListener listener = new OnAddWordListener() { @Override public void setUnigram(final String word, final String shortcutTarget, - final int frequency) { + final int frequency, final int shortcutFreq) { if (DBG_SAVE_RESTORE) { Log.d(TAG, "load unigram: " + word + "," + frequency); } - addWord(word, shortcutTarget, frequency, false /* isNotAWord */); + addWord(word, shortcutTarget, frequency, shortcutFreq, false /* isNotAWord */); ++profTotalCount[0]; } diff --git a/java/src/com/android/inputmethod/latin/personalization/DynamicPersonalizationDictionaryWriter.java b/java/src/com/android/inputmethod/latin/personalization/DynamicPersonalizationDictionaryWriter.java index 039b25337..6f152bb91 100644 --- a/java/src/com/android/inputmethod/latin/personalization/DynamicPersonalizationDictionaryWriter.java +++ b/java/src/com/android/inputmethod/latin/personalization/DynamicPersonalizationDictionaryWriter.java @@ -75,15 +75,21 @@ public class DynamicPersonalizationDictionaryWriter extends AbstractDictionaryWr /** * Adds a word unigram to the fusion dictionary. Call updateBinaryDictionary when all changes * are done to update the binary dictionary. + * @param word The word to add. + * @param shortcutTarget A shortcut target for this word, or null if none. + * @param frequency The frequency for this unigram. + * @param shortcutFreq The frequency of the shortcut (0~15, with 15 = whitelist). Ignored + * if shortcutTarget is null. + * @param isNotAWord true if this is not a word, i.e. shortcut only. */ @Override public void addUnigramWord(final String word, final String shortcutTarget, final int frequency, - final boolean isNotAWord) { + final int shortcutFreq, final boolean isNotAWord) { if (mBigramList.size() > mMaxHistoryBigrams * 2) { // Too many entries: just stop adding new vocabulary and wait next refresh. return; } - mExpandableDictionary.addWord(word, shortcutTarget, frequency); + mExpandableDictionary.addWord(word, shortcutTarget, frequency, shortcutFreq); mBigramList.addBigram(null, word, (byte)frequency); } diff --git a/java/src/com/android/inputmethod/latin/utils/UserHistoryDictIOUtils.java b/java/src/com/android/inputmethod/latin/utils/UserHistoryDictIOUtils.java index ea32a74ff..635afe7cc 100644 --- a/java/src/com/android/inputmethod/latin/utils/UserHistoryDictIOUtils.java +++ b/java/src/com/android/inputmethod/latin/utils/UserHistoryDictIOUtils.java @@ -49,7 +49,16 @@ public final class UserHistoryDictIOUtils { private static final String LAST_UPDATED_TIME_KEY = "date"; public interface OnAddWordListener { - public void setUnigram(final String word, final String shortcutTarget, final int frequency); + /** + * Callback to be notified when a word is added to the dictionary. + * @param word The added word. + * @param shortcutTarget A shortcut target for this word, or null if none. + * @param frequency The frequency for this word. + * @param shortcutFreq The frequency of the shortcut (0~15, with 15 = whitelist). + * Unspecified if shortcutTarget is null - do not rely on its value. + */ + public void setUnigram(final String word, final String shortcutTarget, final int frequency, + final int shortcutFreq); public void setBigram(final String word1, final String word2, final int frequency); } @@ -153,7 +162,7 @@ public final class UserHistoryDictIOUtils { for (Entry<Integer, String> entry : unigrams.entrySet()) { final String word1 = entry.getValue(); final int unigramFrequency = frequencies.get(entry.getKey()); - to.setUnigram(word1, null, unigramFrequency); + to.setUnigram(word1, null /* shortcutTarget */, unigramFrequency, 0 /* shortcutFreq */); final ArrayList<PendingAttribute> attrList = bigrams.get(entry.getKey()); if (attrList != null) { for (final PendingAttribute attr : attrList) { diff --git a/native/jni/src/suggest/core/dictionary/shortcut_utils.h b/native/jni/src/suggest/core/dictionary/shortcut_utils.h index 461d7b454..9ccef020f 100644 --- a/native/jni/src/suggest/core/dictionary/shortcut_utils.h +++ b/native/jni/src/suggest/core/dictionary/shortcut_utils.h @@ -44,7 +44,7 @@ class ShortcutUtils { shortcutScore = finalScore; // Protection against int underflow shortcutScore = max(S_INT_MIN + 1, shortcutScore) - 1; - kind = Dictionary::KIND_CORRECTION; + kind = Dictionary::KIND_SHORTCUT; } outputTypes[outputWordIndex] = kind; frequencies[outputWordIndex] = shortcutScore; diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp index a17a0acf6..5724c5d88 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp @@ -39,7 +39,7 @@ bool DynamicPatriciaTrieGcEventListeners return false; } if (!ForgettingCurveUtils::isValidEncodedProbability(newProbability)) { - isUselessPtNode = false; + isUselessPtNode = true; } } if (mChildrenValue > 0) { diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h index 3ca2f2a01..9755120b0 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h @@ -60,6 +60,7 @@ class DynamicPatriciaTrieGcEventListeners { bool onDescend(const int ptNodeArrayPos) { mValueStack.push_back(0); + mChildrenValue = 0; return true; } diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp index 31e3fb42f..3d07c9d6c 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp @@ -37,6 +37,8 @@ namespace latinime { // BinaryDictionaryDecayingTests. const char *const DynamicPatriciaTriePolicy::UNIGRAM_COUNT_QUERY = "UNIGRAM_COUNT"; const char *const DynamicPatriciaTriePolicy::BIGRAM_COUNT_QUERY = "BIGRAM_COUNT"; +const char *const DynamicPatriciaTriePolicy::MAX_UNIGRAM_COUNT_QUERY = "MAX_UNIGRAM_COUNT"; +const char *const DynamicPatriciaTriePolicy::MAX_BIGRAM_COUNT_QUERY = "MAX_BIGRAM_COUNT"; const char *const DynamicPatriciaTriePolicy::SET_NEEDS_TO_DECAY_FOR_TESTING_QUERY = "SET_NEEDS_TO_DECAY_FOR_TESTING"; const int DynamicPatriciaTriePolicy::MAX_DICT_EXTENDED_REGION_SIZE = 1024 * 1024; @@ -355,6 +357,14 @@ void DynamicPatriciaTriePolicy::getProperty(const char *const query, char *const snprintf(outResult, maxResultLength, "%d", mUnigramCount); } else if (strncmp(query, BIGRAM_COUNT_QUERY, maxResultLength) == 0) { snprintf(outResult, maxResultLength, "%d", mBigramCount); + } else if (strncmp(query, MAX_UNIGRAM_COUNT_QUERY, maxResultLength) == 0) { + snprintf(outResult, maxResultLength, "%d", + mHeaderPolicy.isDecayingDict() ? ForgettingCurveUtils::MAX_UNIGRAM_COUNT : + DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE); + } else if (strncmp(query, MAX_BIGRAM_COUNT_QUERY, maxResultLength) == 0) { + snprintf(outResult, maxResultLength, "%d", + mHeaderPolicy.isDecayingDict() ? ForgettingCurveUtils::MAX_BIGRAM_COUNT : + DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE); } else if (strncmp(query, SET_NEEDS_TO_DECAY_FOR_TESTING_QUERY, maxResultLength) == 0) { mNeedsToDecayForTesting = true; } diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h index 903f65e8e..be97ee1a5 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h @@ -102,6 +102,8 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { static const char *const UNIGRAM_COUNT_QUERY; static const char *const BIGRAM_COUNT_QUERY; + static const char *const MAX_UNIGRAM_COUNT_QUERY; + static const char *const MAX_BIGRAM_COUNT_QUERY; static const char *const SET_NEEDS_TO_DECAY_FOR_TESTING_QUERY; static const int MAX_DICT_EXTENDED_REGION_SIZE; static const int MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS; diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp index 601ee663b..f108c219f 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp @@ -93,6 +93,12 @@ bool DynamicPatriciaTrieReadingHelper::traverseAllPtNodesInPtNodeArrayLevelPreor if (!listener->onDescend(getPosOfLastPtNodeArrayHead())) { return false; } + if (isEnd()) { + // Empty dictionary. Needs to notify the listener of the tail of empty PtNode array. + if (!listener->onReadingPtNodeArrayTail()) { + return false; + } + } pushReadingStateToStack(); while (!isEnd()) { if (alreadyVisitedAllPtNodesInArray) { diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h index 512a4d818..a71c06971 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h @@ -279,7 +279,9 @@ class DynamicPatriciaTrieReadingHelper { } else { mReadingState = mReadingStateStack.back(); mReadingStateStack.pop_back(); - fetchPtNodeInfo(); + if (!isEnd()) { + fetchPtNodeInfo(); + } } } }; diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp index 19ca35481..1632fd072 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp @@ -93,8 +93,7 @@ void ForgettingCurveUtils::TimeKeeper::setCurrentTime() { for (int i = 0; i < decayIterationCount; ++i) { const float currentRate = static_cast<float>(currentEncodedProbability) / static_cast<float>(MAX_ENCODED_PROBABILITY); - const float thresholdToDecay = MIN_PROBABILITY_TO_DECAY - + (1.0f - MIN_PROBABILITY_TO_DECAY) * currentRate; + const float thresholdToDecay = (1.0f - MIN_PROBABILITY_TO_DECAY) * currentRate; const float randValue = static_cast<float>(rand()) / static_cast<float>(RAND_MAX); if (thresholdToDecay < randValue) { currentEncodedProbability = max(currentEncodedProbability - ENCODED_PROBABILITY_STEP, diff --git a/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java b/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java index ded8eaa97..cecdd2ffb 100644 --- a/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java +++ b/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java @@ -19,13 +19,16 @@ package com.android.inputmethod.latin; import android.test.AndroidTestCase; import android.test.suitebuilder.annotation.LargeTest; +import com.android.inputmethod.latin.makedict.CodePointUtils; import com.android.inputmethod.latin.makedict.FormatSpec; import java.io.File; import java.io.IOException; +import java.util.ArrayList; import java.util.HashMap; import java.util.Locale; import java.util.Map; +import java.util.Random; @LargeTest public class BinaryDictionaryDecayingTests extends AndroidTestCase { @@ -179,4 +182,55 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase { binaryDictionary.close(); dictFile.delete(); } + + public void testAddManyUnigramsToDecayingDict() { + final int unigramCount = 30000; + final int unigramTypedCount = 100000; + final int codePointSetSize = 50; + final long seed = System.currentTimeMillis(); + final Random random = new Random(seed); + + File dictFile = null; + try { + dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); + } catch (IOException e) { + fail("IOException while writing an initial dictionary : " + e); + } + BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), + 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, + Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); + + final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); + final ArrayList<String> words = new ArrayList<String>(); + + for (int i = 0; i < unigramCount; i++) { + final String word = CodePointUtils.generateWord(random, codePointSet); + words.add(word); + } + + final int maxUnigramCount = Integer.parseInt( + binaryDictionary.getPropertyForTests(BinaryDictionary.MAX_UNIGRAM_COUNT_QUERY)); + for (int i = 0; i < unigramTypedCount; i++) { + final String word = words.get(random.nextInt(words.size())); + binaryDictionary.addUnigramWord(word, DUMMY_PROBABILITY); + + if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) { + final int unigramCountBeforeGC = + Integer.parseInt(binaryDictionary.getPropertyForTests( + BinaryDictionary.UNIGRAM_COUNT_QUERY)); + while (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) { + binaryDictionary.flushWithGC(); + } + final int unigramCountAfterGC = + Integer.parseInt(binaryDictionary.getPropertyForTests( + BinaryDictionary.UNIGRAM_COUNT_QUERY)); + assertTrue(unigramCountBeforeGC > unigramCountAfterGC); + } + } + + assertTrue(Integer.parseInt(binaryDictionary.getPropertyForTests( + BinaryDictionary.UNIGRAM_COUNT_QUERY)) > 0); + assertTrue(Integer.parseInt(binaryDictionary.getPropertyForTests( + BinaryDictionary.UNIGRAM_COUNT_QUERY)) <= maxUnigramCount); + } } diff --git a/tests/src/com/android/inputmethod/latin/ExpandableDictionaryTests.java b/tests/src/com/android/inputmethod/latin/ExpandableDictionaryTests.java index ecf3af736..6aae1044e 100644 --- a/tests/src/com/android/inputmethod/latin/ExpandableDictionaryTests.java +++ b/tests/src/com/android/inputmethod/latin/ExpandableDictionaryTests.java @@ -26,13 +26,16 @@ import android.test.suitebuilder.annotation.SmallTest; public class ExpandableDictionaryTests extends AndroidTestCase { private final static int UNIGRAM_FREQ = 50; + // See UserBinaryDictionary for more information about this variable. + // For tests, its actual value does not matter. + private final static int SHORTCUT_FREQ = 14; public void testAddWordAndGetWordFrequency() { final ExpandableDictionary dict = new ExpandableDictionary(Dictionary.TYPE_USER); // Add words - dict.addWord("abcde", "abcde", UNIGRAM_FREQ); - dict.addWord("abcef", null, UNIGRAM_FREQ + 1); + dict.addWord("abcde", "abcde", UNIGRAM_FREQ, SHORTCUT_FREQ); + dict.addWord("abcef", null, UNIGRAM_FREQ + 1, 0); // Check words assertFalse(dict.isValidWord("abcde")); @@ -40,16 +43,16 @@ public class ExpandableDictionaryTests extends AndroidTestCase { assertTrue(dict.isValidWord("abcef")); assertEquals(UNIGRAM_FREQ+1, dict.getWordFrequency("abcef")); - dict.addWord("abc", null, UNIGRAM_FREQ + 2); + dict.addWord("abc", null, UNIGRAM_FREQ + 2, 0); assertTrue(dict.isValidWord("abc")); assertEquals(UNIGRAM_FREQ + 2, dict.getWordFrequency("abc")); // Add existing word with lower frequency - dict.addWord("abc", null, UNIGRAM_FREQ); + dict.addWord("abc", null, UNIGRAM_FREQ, 0); assertEquals(UNIGRAM_FREQ + 2, dict.getWordFrequency("abc")); // Add existing word with higher frequency - dict.addWord("abc", null, UNIGRAM_FREQ + 3); + dict.addWord("abc", null, UNIGRAM_FREQ + 3, 0); assertEquals(UNIGRAM_FREQ + 3, dict.getWordFrequency("abc")); } } diff --git a/tests/src/com/android/inputmethod/latin/utils/UserHistoryDictIOUtilsTests.java b/tests/src/com/android/inputmethod/latin/utils/UserHistoryDictIOUtilsTests.java index 3eabe2b3c..1944fd332 100644 --- a/tests/src/com/android/inputmethod/latin/utils/UserHistoryDictIOUtilsTests.java +++ b/tests/src/com/android/inputmethod/latin/utils/UserHistoryDictIOUtilsTests.java @@ -196,8 +196,8 @@ public class UserHistoryDictIOUtilsTests extends AndroidTestCase final UserHistoryDictionaryBigramList resultList = new UserHistoryDictionaryBigramList(); final OnAddWordListener listener = new OnAddWordListener() { @Override - public void setUnigram(final String word, - final String shortcutTarget, final int frequency) { + public void setUnigram(final String word, final String shortcutTarget, + final int frequency, final int shortcutFreq) { Log.d(TAG, "in: setUnigram: " + word + "," + frequency); resultList.addBigram(null, word, (byte)frequency); } @@ -220,8 +220,8 @@ public class UserHistoryDictIOUtilsTests extends AndroidTestCase final UserHistoryDictionaryBigramList resultList2 = new UserHistoryDictionaryBigramList(); final OnAddWordListener listener2 = new OnAddWordListener() { @Override - public void setUnigram(final String word, - final String shortcutTarget, final int frequency) { + public void setUnigram(final String word, final String shortcutTarget, + final int frequency, final int shortcutFreq) { Log.d(TAG, "in: setUnigram: " + word + "," + frequency); resultList2.addBigram(null, word, (byte)frequency); } |