aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--dictionaries/en_GB_wordlist.combined.gzbin859935 -> 859952 bytes
-rw-r--r--java/src/com/android/inputmethod/latin/AbstractDictionaryWriter.java11
-rw-r--r--java/src/com/android/inputmethod/latin/BinaryDictionary.java4
-rw-r--r--java/src/com/android/inputmethod/latin/ContactsBinaryDictionary.java4
-rw-r--r--java/src/com/android/inputmethod/latin/DictionaryWriter.java4
-rw-r--r--java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java15
-rw-r--r--java/src/com/android/inputmethod/latin/ExpandableDictionary.java30
-rw-r--r--java/src/com/android/inputmethod/latin/Suggest.java6
-rw-r--r--java/src/com/android/inputmethod/latin/UserBinaryDictionary.java9
-rw-r--r--java/src/com/android/inputmethod/latin/personalization/DecayingExpandableBinaryDictionaryBase.java6
-rw-r--r--java/src/com/android/inputmethod/latin/personalization/DynamicPersonalizationDictionaryWriter.java10
-rw-r--r--java/src/com/android/inputmethod/latin/utils/UserHistoryDictIOUtils.java13
-rw-r--r--native/jni/src/suggest/core/dictionary/shortcut_utils.h2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h1
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp10
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp6
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h4
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp3
-rw-r--r--tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java54
-rw-r--r--tests/src/com/android/inputmethod/latin/ExpandableDictionaryTests.java13
-rw-r--r--tests/src/com/android/inputmethod/latin/utils/UserHistoryDictIOUtilsTests.java8
23 files changed, 179 insertions, 38 deletions
diff --git a/dictionaries/en_GB_wordlist.combined.gz b/dictionaries/en_GB_wordlist.combined.gz
index 839f3efca..4f008ed8f 100644
--- a/dictionaries/en_GB_wordlist.combined.gz
+++ b/dictionaries/en_GB_wordlist.combined.gz
Binary files differ
diff --git a/java/src/com/android/inputmethod/latin/AbstractDictionaryWriter.java b/java/src/com/android/inputmethod/latin/AbstractDictionaryWriter.java
index 4a0ce3735..463d09344 100644
--- a/java/src/com/android/inputmethod/latin/AbstractDictionaryWriter.java
+++ b/java/src/com/android/inputmethod/latin/AbstractDictionaryWriter.java
@@ -41,8 +41,17 @@ abstract public class AbstractDictionaryWriter extends Dictionary {
abstract public void clear();
+ /**
+ * Add a unigram with an optional shortcut to the dictionary.
+ * @param word The word to add.
+ * @param shortcutTarget A shortcut target for this word, or null if none.
+ * @param frequency The frequency for this unigram.
+ * @param shortcutFreq The frequency of the shortcut (0~15, with 15 = whitelist). Ignored
+ * if shortcutTarget is null.
+ * @param isNotAWord true if this is not a word, i.e. shortcut only.
+ */
abstract public void addUnigramWord(final String word, final String shortcutTarget,
- final int frequency, final boolean isNotAWord);
+ final int frequency, final int shortcutFreq, final boolean isNotAWord);
// TODO: Remove lastModifiedTime after making binary dictionary support forgetting curve.
abstract public void addBigramWords(final String word0, final String word1,
diff --git a/java/src/com/android/inputmethod/latin/BinaryDictionary.java b/java/src/com/android/inputmethod/latin/BinaryDictionary.java
index 541e69788..fd296988e 100644
--- a/java/src/com/android/inputmethod/latin/BinaryDictionary.java
+++ b/java/src/com/android/inputmethod/latin/BinaryDictionary.java
@@ -52,6 +52,10 @@ public final class BinaryDictionary extends Dictionary {
public static final String UNIGRAM_COUNT_QUERY = "UNIGRAM_COUNT";
@UsedForTesting
public static final String BIGRAM_COUNT_QUERY = "BIGRAM_COUNT";
+ @UsedForTesting
+ public static final String MAX_UNIGRAM_COUNT_QUERY = "MAX_UNIGRAM_COUNT";
+ @UsedForTesting
+ public static final String MAX_BIGRAM_COUNT_QUERY = "MAX_BIGRAM_COUNT";
private long mNativeDict;
private final Locale mLocale;
diff --git a/java/src/com/android/inputmethod/latin/ContactsBinaryDictionary.java b/java/src/com/android/inputmethod/latin/ContactsBinaryDictionary.java
index ffeb92784..47891c6b7 100644
--- a/java/src/com/android/inputmethod/latin/ContactsBinaryDictionary.java
+++ b/java/src/com/android/inputmethod/latin/ContactsBinaryDictionary.java
@@ -127,7 +127,7 @@ public class ContactsBinaryDictionary extends ExpandableBinaryDictionary {
if (DEBUG) {
Log.d(TAG, "loadAccountVocabulary: " + word);
}
- super.addWord(word, null /* shortcut */, FREQUENCY_FOR_CONTACTS,
+ super.addWord(word, null /* shortcut */, FREQUENCY_FOR_CONTACTS, 0 /* shortcutFreq */,
false /* isNotAWord */);
}
}
@@ -213,7 +213,7 @@ public class ContactsBinaryDictionary extends ExpandableBinaryDictionary {
Log.d(TAG, "addName " + name + ", " + word + ", " + prevWord);
}
super.addWord(word, null /* shortcut */, FREQUENCY_FOR_CONTACTS,
- false /* isNotAWord */);
+ 0 /* shortcutFreq */, false /* isNotAWord */);
if (!TextUtils.isEmpty(prevWord)) {
if (mUseFirstLastBigrams) {
super.addBigram(prevWord, word, FREQUENCY_FOR_CONTACTS_BIGRAM,
diff --git a/java/src/com/android/inputmethod/latin/DictionaryWriter.java b/java/src/com/android/inputmethod/latin/DictionaryWriter.java
index 84abfa66d..3df2a2b63 100644
--- a/java/src/com/android/inputmethod/latin/DictionaryWriter.java
+++ b/java/src/com/android/inputmethod/latin/DictionaryWriter.java
@@ -62,13 +62,13 @@ public class DictionaryWriter extends AbstractDictionaryWriter {
// considering performance regression.
@Override
public void addUnigramWord(final String word, final String shortcutTarget, final int frequency,
- final boolean isNotAWord) {
+ final int shortcutFreq, final boolean isNotAWord) {
if (shortcutTarget == null) {
mFusionDictionary.add(word, frequency, null, isNotAWord);
} else {
// TODO: Do this in the subclass, with this class taking an arraylist.
final ArrayList<WeightedString> shortcutTargets = CollectionUtils.newArrayList();
- shortcutTargets.add(new WeightedString(shortcutTarget, frequency));
+ shortcutTargets.add(new WeightedString(shortcutTarget, shortcutFreq));
mFusionDictionary.add(word, frequency, shortcutTargets, isNotAWord);
}
}
diff --git a/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java b/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java
index c79a4ff90..eb8650e6f 100644
--- a/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java
+++ b/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java
@@ -261,10 +261,16 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
/**
* Adds a word unigram to the dictionary. Used for loading a dictionary.
+ * @param word The word to add.
+ * @param shortcutTarget A shortcut target for this word, or null if none.
+ * @param frequency The frequency for this unigram.
+ * @param shortcutFreq The frequency of the shortcut (0~15, with 15 = whitelist). Ignored
+ * if shortcutTarget is null.
+ * @param isNotAWord true if this is not a word, i.e. shortcut only.
*/
protected void addWord(final String word, final String shortcutTarget,
- final int frequency, final boolean isNotAWord) {
- mDictionaryWriter.addUnigramWord(word, shortcutTarget, frequency, isNotAWord);
+ final int frequency, final int shortcutFreq, final boolean isNotAWord) {
+ mDictionaryWriter.addUnigramWord(word, shortcutTarget, frequency, shortcutFreq, isNotAWord);
}
/**
@@ -313,7 +319,7 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
* Dynamically adds a word unigram to the dictionary. May overwrite an existing entry.
*/
protected void addWordDynamically(final String word, final String shortcutTarget,
- final int frequency, final boolean isNotAWord) {
+ final int frequency, final int shortcutFreq, final boolean isNotAWord) {
if (!mIsUpdatable) {
Log.w(TAG, "addWordDynamically is called for non-updatable dictionary: " + mFilename);
return;
@@ -326,7 +332,8 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
mBinaryDictionary.addUnigramWord(word, frequency);
} else {
// TODO: Remove.
- mDictionaryWriter.addUnigramWord(word, shortcutTarget, frequency, isNotAWord);
+ mDictionaryWriter.addUnigramWord(word, shortcutTarget, frequency, shortcutFreq,
+ isNotAWord);
}
}
});
diff --git a/java/src/com/android/inputmethod/latin/ExpandableDictionary.java b/java/src/com/android/inputmethod/latin/ExpandableDictionary.java
index d491f988a..95c9bcab9 100644
--- a/java/src/com/android/inputmethod/latin/ExpandableDictionary.java
+++ b/java/src/com/android/inputmethod/latin/ExpandableDictionary.java
@@ -156,15 +156,36 @@ public class ExpandableDictionary extends Dictionary {
return Constants.DICTIONARY_MAX_WORD_LENGTH;
}
- public void addWord(final String word, final String shortcutTarget, final int frequency) {
+ /**
+ * Add a word with an optional shortcut to the dictionary.
+ * @param word The word to add.
+ * @param shortcutTarget A shortcut target for this word, or null if none.
+ * @param frequency The frequency for this unigram.
+ * @param shortcutFreq The frequency of the shortcut (0~15, with 15 = whitelist). Ignored
+ * if shortcutTarget is null.
+ */
+ public void addWord(final String word, final String shortcutTarget, final int frequency,
+ final int shortcutFreq) {
if (word.length() >= Constants.DICTIONARY_MAX_WORD_LENGTH) {
return;
}
- addWordRec(mRoots, word, 0, shortcutTarget, frequency, null);
+ addWordRec(mRoots, word, 0, shortcutTarget, frequency, shortcutFreq, null);
}
+ /**
+ * Add a word, recursively searching for its correct place in the trie tree.
+ * @param children The node to recursively search for addition. Initially, the root of the tree.
+ * @param word The word to add.
+ * @param depth The current depth in the tree.
+ * @param shortcutTarget A shortcut target for this word, or null if none.
+ * @param frequency The frequency for this unigram.
+ * @param shortcutFreq The frequency of the shortcut (0~15, with 15 = whitelist). Ignored
+ * if shortcutTarget is null.
+ * @param parentNode The parent node, for up linking. Initially null, as the root has no parent.
+ */
private void addWordRec(final NodeArray children, final String word, final int depth,
- final String shortcutTarget, final int frequency, final Node parentNode) {
+ final String shortcutTarget, final int frequency, final int shortcutFreq,
+ final Node parentNode) {
final int wordLength = word.length();
if (wordLength <= depth) return;
final char c = word.charAt(depth);
@@ -204,7 +225,8 @@ public class ExpandableDictionary extends Dictionary {
if (childNode.mChildren == null) {
childNode.mChildren = new NodeArray();
}
- addWordRec(childNode.mChildren, word, depth + 1, shortcutTarget, frequency, childNode);
+ addWordRec(childNode.mChildren, word, depth + 1, shortcutTarget, frequency, shortcutFreq,
+ childNode);
}
@Override
diff --git a/java/src/com/android/inputmethod/latin/Suggest.java b/java/src/com/android/inputmethod/latin/Suggest.java
index 9fd1f53a2..c270d47d0 100644
--- a/java/src/com/android/inputmethod/latin/Suggest.java
+++ b/java/src/com/android/inputmethod/latin/Suggest.java
@@ -286,14 +286,16 @@ public final class Suggest {
// the word *would* have been auto-corrected.
if (!isCorrectionEnabled || !allowsToBeAutoCorrected || !wordComposer.isComposingWord()
|| suggestionsSet.isEmpty() || wordComposer.hasDigits()
- || wordComposer.isMostlyCaps() || wordComposer.isResumed()
- || !hasMainDictionary()) {
+ || wordComposer.isMostlyCaps() || wordComposer.isResumed() || !hasMainDictionary()
+ || SuggestedWordInfo.KIND_SHORTCUT == suggestionsSet.first().mKind) {
// If we don't have a main dictionary, we never want to auto-correct. The reason for
// this is, the user may have a contact whose name happens to match a valid word in
// their language, and it will unexpectedly auto-correct. For example, if the user
// types in English with no dictionary and has a "Will" in their contact list, "will"
// would always auto-correct to "Will" which is unwanted. Hence, no main dict => no
// auto-correct.
+ // Also, shortcuts should never auto-correct unless they are whitelist entries.
+ // TODO: we may want to have shortcut-only entries auto-correct in the future.
hasAutoCorrection = false;
} else {
hasAutoCorrection = AutoCorrectionUtils.suggestionExceedsAutoCorrectionThreshold(
diff --git a/java/src/com/android/inputmethod/latin/UserBinaryDictionary.java b/java/src/com/android/inputmethod/latin/UserBinaryDictionary.java
index 864a17375..15b3d8d02 100644
--- a/java/src/com/android/inputmethod/latin/UserBinaryDictionary.java
+++ b/java/src/com/android/inputmethod/latin/UserBinaryDictionary.java
@@ -47,6 +47,9 @@ public class UserBinaryDictionary extends ExpandableBinaryDictionary {
private static final String USER_DICTIONARY_ALL_LANGUAGES = "";
private static final int HISTORICAL_DEFAULT_USER_DICTIONARY_FREQUENCY = 250;
private static final int LATINIME_DEFAULT_USER_DICTIONARY_FREQUENCY = 160;
+ // Shortcut frequency is 0~15, with 15 = whitelist. We don't want user dictionary entries
+ // to auto-correct, so we set this to the highest frequency that won't, i.e. 14.
+ private static final int USER_DICT_SHORTCUT_FREQUENCY = 14;
// TODO: use Words.SHORTCUT when we target JellyBean or above
final static String SHORTCUT = "shortcut";
@@ -243,10 +246,12 @@ public class UserBinaryDictionary extends ExpandableBinaryDictionary {
final int adjustedFrequency = scaleFrequencyFromDefaultToLatinIme(frequency);
// Safeguard against adding really long words.
if (word.length() < MAX_WORD_LENGTH) {
- super.addWord(word, null, adjustedFrequency, false /* isNotAWord */);
+ super.addWord(word, null, adjustedFrequency, 0 /* shortcutFreq */,
+ false /* isNotAWord */);
}
if (null != shortcut && shortcut.length() < MAX_WORD_LENGTH) {
- super.addWord(shortcut, word, adjustedFrequency, true /* isNotAWord */);
+ super.addWord(shortcut, word, adjustedFrequency, USER_DICT_SHORTCUT_FREQUENCY,
+ true /* isNotAWord */);
}
cursor.moveToNext();
}
diff --git a/java/src/com/android/inputmethod/latin/personalization/DecayingExpandableBinaryDictionaryBase.java b/java/src/com/android/inputmethod/latin/personalization/DecayingExpandableBinaryDictionaryBase.java
index c8b62b6c8..a1e36006b 100644
--- a/java/src/com/android/inputmethod/latin/personalization/DecayingExpandableBinaryDictionaryBase.java
+++ b/java/src/com/android/inputmethod/latin/personalization/DecayingExpandableBinaryDictionaryBase.java
@@ -138,7 +138,7 @@ public abstract class DecayingExpandableBinaryDictionaryBase extends ExpandableB
final int frequency = ENABLE_BINARY_DICTIONARY_DYNAMIC_UPDATE ?
(isValid ? FREQUENCY_FOR_WORDS_IN_DICTS : FREQUENCY_FOR_WORDS_NOT_IN_DICTS) :
FREQUENCY_FOR_TYPED;
- addWordDynamically(word1, null /* the "shortcut" parameter is null */, frequency,
+ addWordDynamically(word1, null /* shortcutTarget */, frequency, 0 /* shortcutFreq */,
false /* isNotAWord */);
// Do not insert a word as a bigram of itself
if (word1.equals(word0)) {
@@ -171,11 +171,11 @@ public abstract class DecayingExpandableBinaryDictionaryBase extends ExpandableB
final OnAddWordListener listener = new OnAddWordListener() {
@Override
public void setUnigram(final String word, final String shortcutTarget,
- final int frequency) {
+ final int frequency, final int shortcutFreq) {
if (DBG_SAVE_RESTORE) {
Log.d(TAG, "load unigram: " + word + "," + frequency);
}
- addWord(word, shortcutTarget, frequency, false /* isNotAWord */);
+ addWord(word, shortcutTarget, frequency, shortcutFreq, false /* isNotAWord */);
++profTotalCount[0];
}
diff --git a/java/src/com/android/inputmethod/latin/personalization/DynamicPersonalizationDictionaryWriter.java b/java/src/com/android/inputmethod/latin/personalization/DynamicPersonalizationDictionaryWriter.java
index 039b25337..6f152bb91 100644
--- a/java/src/com/android/inputmethod/latin/personalization/DynamicPersonalizationDictionaryWriter.java
+++ b/java/src/com/android/inputmethod/latin/personalization/DynamicPersonalizationDictionaryWriter.java
@@ -75,15 +75,21 @@ public class DynamicPersonalizationDictionaryWriter extends AbstractDictionaryWr
/**
* Adds a word unigram to the fusion dictionary. Call updateBinaryDictionary when all changes
* are done to update the binary dictionary.
+ * @param word The word to add.
+ * @param shortcutTarget A shortcut target for this word, or null if none.
+ * @param frequency The frequency for this unigram.
+ * @param shortcutFreq The frequency of the shortcut (0~15, with 15 = whitelist). Ignored
+ * if shortcutTarget is null.
+ * @param isNotAWord true if this is not a word, i.e. shortcut only.
*/
@Override
public void addUnigramWord(final String word, final String shortcutTarget, final int frequency,
- final boolean isNotAWord) {
+ final int shortcutFreq, final boolean isNotAWord) {
if (mBigramList.size() > mMaxHistoryBigrams * 2) {
// Too many entries: just stop adding new vocabulary and wait next refresh.
return;
}
- mExpandableDictionary.addWord(word, shortcutTarget, frequency);
+ mExpandableDictionary.addWord(word, shortcutTarget, frequency, shortcutFreq);
mBigramList.addBigram(null, word, (byte)frequency);
}
diff --git a/java/src/com/android/inputmethod/latin/utils/UserHistoryDictIOUtils.java b/java/src/com/android/inputmethod/latin/utils/UserHistoryDictIOUtils.java
index ea32a74ff..635afe7cc 100644
--- a/java/src/com/android/inputmethod/latin/utils/UserHistoryDictIOUtils.java
+++ b/java/src/com/android/inputmethod/latin/utils/UserHistoryDictIOUtils.java
@@ -49,7 +49,16 @@ public final class UserHistoryDictIOUtils {
private static final String LAST_UPDATED_TIME_KEY = "date";
public interface OnAddWordListener {
- public void setUnigram(final String word, final String shortcutTarget, final int frequency);
+ /**
+ * Callback to be notified when a word is added to the dictionary.
+ * @param word The added word.
+ * @param shortcutTarget A shortcut target for this word, or null if none.
+ * @param frequency The frequency for this word.
+ * @param shortcutFreq The frequency of the shortcut (0~15, with 15 = whitelist).
+ * Unspecified if shortcutTarget is null - do not rely on its value.
+ */
+ public void setUnigram(final String word, final String shortcutTarget, final int frequency,
+ final int shortcutFreq);
public void setBigram(final String word1, final String word2, final int frequency);
}
@@ -153,7 +162,7 @@ public final class UserHistoryDictIOUtils {
for (Entry<Integer, String> entry : unigrams.entrySet()) {
final String word1 = entry.getValue();
final int unigramFrequency = frequencies.get(entry.getKey());
- to.setUnigram(word1, null, unigramFrequency);
+ to.setUnigram(word1, null /* shortcutTarget */, unigramFrequency, 0 /* shortcutFreq */);
final ArrayList<PendingAttribute> attrList = bigrams.get(entry.getKey());
if (attrList != null) {
for (final PendingAttribute attr : attrList) {
diff --git a/native/jni/src/suggest/core/dictionary/shortcut_utils.h b/native/jni/src/suggest/core/dictionary/shortcut_utils.h
index 461d7b454..9ccef020f 100644
--- a/native/jni/src/suggest/core/dictionary/shortcut_utils.h
+++ b/native/jni/src/suggest/core/dictionary/shortcut_utils.h
@@ -44,7 +44,7 @@ class ShortcutUtils {
shortcutScore = finalScore;
// Protection against int underflow
shortcutScore = max(S_INT_MIN + 1, shortcutScore) - 1;
- kind = Dictionary::KIND_CORRECTION;
+ kind = Dictionary::KIND_SHORTCUT;
}
outputTypes[outputWordIndex] = kind;
frequencies[outputWordIndex] = shortcutScore;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp
index a17a0acf6..5724c5d88 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp
@@ -39,7 +39,7 @@ bool DynamicPatriciaTrieGcEventListeners
return false;
}
if (!ForgettingCurveUtils::isValidEncodedProbability(newProbability)) {
- isUselessPtNode = false;
+ isUselessPtNode = true;
}
}
if (mChildrenValue > 0) {
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h
index 3ca2f2a01..9755120b0 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h
@@ -60,6 +60,7 @@ class DynamicPatriciaTrieGcEventListeners {
bool onDescend(const int ptNodeArrayPos) {
mValueStack.push_back(0);
+ mChildrenValue = 0;
return true;
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp
index 31e3fb42f..3d07c9d6c 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp
@@ -37,6 +37,8 @@ namespace latinime {
// BinaryDictionaryDecayingTests.
const char *const DynamicPatriciaTriePolicy::UNIGRAM_COUNT_QUERY = "UNIGRAM_COUNT";
const char *const DynamicPatriciaTriePolicy::BIGRAM_COUNT_QUERY = "BIGRAM_COUNT";
+const char *const DynamicPatriciaTriePolicy::MAX_UNIGRAM_COUNT_QUERY = "MAX_UNIGRAM_COUNT";
+const char *const DynamicPatriciaTriePolicy::MAX_BIGRAM_COUNT_QUERY = "MAX_BIGRAM_COUNT";
const char *const DynamicPatriciaTriePolicy::SET_NEEDS_TO_DECAY_FOR_TESTING_QUERY =
"SET_NEEDS_TO_DECAY_FOR_TESTING";
const int DynamicPatriciaTriePolicy::MAX_DICT_EXTENDED_REGION_SIZE = 1024 * 1024;
@@ -355,6 +357,14 @@ void DynamicPatriciaTriePolicy::getProperty(const char *const query, char *const
snprintf(outResult, maxResultLength, "%d", mUnigramCount);
} else if (strncmp(query, BIGRAM_COUNT_QUERY, maxResultLength) == 0) {
snprintf(outResult, maxResultLength, "%d", mBigramCount);
+ } else if (strncmp(query, MAX_UNIGRAM_COUNT_QUERY, maxResultLength) == 0) {
+ snprintf(outResult, maxResultLength, "%d",
+ mHeaderPolicy.isDecayingDict() ? ForgettingCurveUtils::MAX_UNIGRAM_COUNT :
+ DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE);
+ } else if (strncmp(query, MAX_BIGRAM_COUNT_QUERY, maxResultLength) == 0) {
+ snprintf(outResult, maxResultLength, "%d",
+ mHeaderPolicy.isDecayingDict() ? ForgettingCurveUtils::MAX_BIGRAM_COUNT :
+ DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE);
} else if (strncmp(query, SET_NEEDS_TO_DECAY_FOR_TESTING_QUERY, maxResultLength) == 0) {
mNeedsToDecayForTesting = true;
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h
index 903f65e8e..be97ee1a5 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h
@@ -102,6 +102,8 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
static const char *const UNIGRAM_COUNT_QUERY;
static const char *const BIGRAM_COUNT_QUERY;
+ static const char *const MAX_UNIGRAM_COUNT_QUERY;
+ static const char *const MAX_BIGRAM_COUNT_QUERY;
static const char *const SET_NEEDS_TO_DECAY_FOR_TESTING_QUERY;
static const int MAX_DICT_EXTENDED_REGION_SIZE;
static const int MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp
index 601ee663b..f108c219f 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp
@@ -93,6 +93,12 @@ bool DynamicPatriciaTrieReadingHelper::traverseAllPtNodesInPtNodeArrayLevelPreor
if (!listener->onDescend(getPosOfLastPtNodeArrayHead())) {
return false;
}
+ if (isEnd()) {
+ // Empty dictionary. Needs to notify the listener of the tail of empty PtNode array.
+ if (!listener->onReadingPtNodeArrayTail()) {
+ return false;
+ }
+ }
pushReadingStateToStack();
while (!isEnd()) {
if (alreadyVisitedAllPtNodesInArray) {
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h
index 512a4d818..a71c06971 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h
@@ -279,7 +279,9 @@ class DynamicPatriciaTrieReadingHelper {
} else {
mReadingState = mReadingStateStack.back();
mReadingStateStack.pop_back();
- fetchPtNodeInfo();
+ if (!isEnd()) {
+ fetchPtNodeInfo();
+ }
}
}
};
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp
index 19ca35481..1632fd072 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp
@@ -93,8 +93,7 @@ void ForgettingCurveUtils::TimeKeeper::setCurrentTime() {
for (int i = 0; i < decayIterationCount; ++i) {
const float currentRate = static_cast<float>(currentEncodedProbability)
/ static_cast<float>(MAX_ENCODED_PROBABILITY);
- const float thresholdToDecay = MIN_PROBABILITY_TO_DECAY
- + (1.0f - MIN_PROBABILITY_TO_DECAY) * currentRate;
+ const float thresholdToDecay = (1.0f - MIN_PROBABILITY_TO_DECAY) * currentRate;
const float randValue = static_cast<float>(rand()) / static_cast<float>(RAND_MAX);
if (thresholdToDecay < randValue) {
currentEncodedProbability = max(currentEncodedProbability - ENCODED_PROBABILITY_STEP,
diff --git a/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java b/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java
index ded8eaa97..cecdd2ffb 100644
--- a/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java
+++ b/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java
@@ -19,13 +19,16 @@ package com.android.inputmethod.latin;
import android.test.AndroidTestCase;
import android.test.suitebuilder.annotation.LargeTest;
+import com.android.inputmethod.latin.makedict.CodePointUtils;
import com.android.inputmethod.latin.makedict.FormatSpec;
import java.io.File;
import java.io.IOException;
+import java.util.ArrayList;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
+import java.util.Random;
@LargeTest
public class BinaryDictionaryDecayingTests extends AndroidTestCase {
@@ -179,4 +182,55 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
binaryDictionary.close();
dictFile.delete();
}
+
+ public void testAddManyUnigramsToDecayingDict() {
+ final int unigramCount = 30000;
+ final int unigramTypedCount = 100000;
+ final int codePointSetSize = 50;
+ final long seed = System.currentTimeMillis();
+ final Random random = new Random(seed);
+
+ File dictFile = null;
+ try {
+ dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
+ } catch (IOException e) {
+ fail("IOException while writing an initial dictionary : " + e);
+ }
+ BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
+ 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
+ Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
+
+ final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
+ final ArrayList<String> words = new ArrayList<String>();
+
+ for (int i = 0; i < unigramCount; i++) {
+ final String word = CodePointUtils.generateWord(random, codePointSet);
+ words.add(word);
+ }
+
+ final int maxUnigramCount = Integer.parseInt(
+ binaryDictionary.getPropertyForTests(BinaryDictionary.MAX_UNIGRAM_COUNT_QUERY));
+ for (int i = 0; i < unigramTypedCount; i++) {
+ final String word = words.get(random.nextInt(words.size()));
+ binaryDictionary.addUnigramWord(word, DUMMY_PROBABILITY);
+
+ if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
+ final int unigramCountBeforeGC =
+ Integer.parseInt(binaryDictionary.getPropertyForTests(
+ BinaryDictionary.UNIGRAM_COUNT_QUERY));
+ while (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
+ binaryDictionary.flushWithGC();
+ }
+ final int unigramCountAfterGC =
+ Integer.parseInt(binaryDictionary.getPropertyForTests(
+ BinaryDictionary.UNIGRAM_COUNT_QUERY));
+ assertTrue(unigramCountBeforeGC > unigramCountAfterGC);
+ }
+ }
+
+ assertTrue(Integer.parseInt(binaryDictionary.getPropertyForTests(
+ BinaryDictionary.UNIGRAM_COUNT_QUERY)) > 0);
+ assertTrue(Integer.parseInt(binaryDictionary.getPropertyForTests(
+ BinaryDictionary.UNIGRAM_COUNT_QUERY)) <= maxUnigramCount);
+ }
}
diff --git a/tests/src/com/android/inputmethod/latin/ExpandableDictionaryTests.java b/tests/src/com/android/inputmethod/latin/ExpandableDictionaryTests.java
index ecf3af736..6aae1044e 100644
--- a/tests/src/com/android/inputmethod/latin/ExpandableDictionaryTests.java
+++ b/tests/src/com/android/inputmethod/latin/ExpandableDictionaryTests.java
@@ -26,13 +26,16 @@ import android.test.suitebuilder.annotation.SmallTest;
public class ExpandableDictionaryTests extends AndroidTestCase {
private final static int UNIGRAM_FREQ = 50;
+ // See UserBinaryDictionary for more information about this variable.
+ // For tests, its actual value does not matter.
+ private final static int SHORTCUT_FREQ = 14;
public void testAddWordAndGetWordFrequency() {
final ExpandableDictionary dict = new ExpandableDictionary(Dictionary.TYPE_USER);
// Add words
- dict.addWord("abcde", "abcde", UNIGRAM_FREQ);
- dict.addWord("abcef", null, UNIGRAM_FREQ + 1);
+ dict.addWord("abcde", "abcde", UNIGRAM_FREQ, SHORTCUT_FREQ);
+ dict.addWord("abcef", null, UNIGRAM_FREQ + 1, 0);
// Check words
assertFalse(dict.isValidWord("abcde"));
@@ -40,16 +43,16 @@ public class ExpandableDictionaryTests extends AndroidTestCase {
assertTrue(dict.isValidWord("abcef"));
assertEquals(UNIGRAM_FREQ+1, dict.getWordFrequency("abcef"));
- dict.addWord("abc", null, UNIGRAM_FREQ + 2);
+ dict.addWord("abc", null, UNIGRAM_FREQ + 2, 0);
assertTrue(dict.isValidWord("abc"));
assertEquals(UNIGRAM_FREQ + 2, dict.getWordFrequency("abc"));
// Add existing word with lower frequency
- dict.addWord("abc", null, UNIGRAM_FREQ);
+ dict.addWord("abc", null, UNIGRAM_FREQ, 0);
assertEquals(UNIGRAM_FREQ + 2, dict.getWordFrequency("abc"));
// Add existing word with higher frequency
- dict.addWord("abc", null, UNIGRAM_FREQ + 3);
+ dict.addWord("abc", null, UNIGRAM_FREQ + 3, 0);
assertEquals(UNIGRAM_FREQ + 3, dict.getWordFrequency("abc"));
}
}
diff --git a/tests/src/com/android/inputmethod/latin/utils/UserHistoryDictIOUtilsTests.java b/tests/src/com/android/inputmethod/latin/utils/UserHistoryDictIOUtilsTests.java
index 3eabe2b3c..1944fd332 100644
--- a/tests/src/com/android/inputmethod/latin/utils/UserHistoryDictIOUtilsTests.java
+++ b/tests/src/com/android/inputmethod/latin/utils/UserHistoryDictIOUtilsTests.java
@@ -196,8 +196,8 @@ public class UserHistoryDictIOUtilsTests extends AndroidTestCase
final UserHistoryDictionaryBigramList resultList = new UserHistoryDictionaryBigramList();
final OnAddWordListener listener = new OnAddWordListener() {
@Override
- public void setUnigram(final String word,
- final String shortcutTarget, final int frequency) {
+ public void setUnigram(final String word, final String shortcutTarget,
+ final int frequency, final int shortcutFreq) {
Log.d(TAG, "in: setUnigram: " + word + "," + frequency);
resultList.addBigram(null, word, (byte)frequency);
}
@@ -220,8 +220,8 @@ public class UserHistoryDictIOUtilsTests extends AndroidTestCase
final UserHistoryDictionaryBigramList resultList2 = new UserHistoryDictionaryBigramList();
final OnAddWordListener listener2 = new OnAddWordListener() {
@Override
- public void setUnigram(final String word,
- final String shortcutTarget, final int frequency) {
+ public void setUnigram(final String word, final String shortcutTarget,
+ final int frequency, final int shortcutFreq) {
Log.d(TAG, "in: setUnigram: " + word + "," + frequency);
resultList2.addBigram(null, word, (byte)frequency);
}