diff options
Diffstat (limited to 'java/src/com/android/inputmethod/latin/ExpandableDictionary.java')
-rw-r--r-- | java/src/com/android/inputmethod/latin/ExpandableDictionary.java | 71 |
1 files changed, 45 insertions, 26 deletions
diff --git a/java/src/com/android/inputmethod/latin/ExpandableDictionary.java b/java/src/com/android/inputmethod/latin/ExpandableDictionary.java index 0318175f6..97a4a1816 100644 --- a/java/src/com/android/inputmethod/latin/ExpandableDictionary.java +++ b/java/src/com/android/inputmethod/latin/ExpandableDictionary.java @@ -19,6 +19,8 @@ package com.android.inputmethod.latin; import android.content.Context; import android.os.AsyncTask; +import com.android.inputmethod.keyboard.Keyboard; + import java.util.LinkedList; /** @@ -32,14 +34,14 @@ public class ExpandableDictionary extends Dictionary { */ protected static final int MAX_WORD_LENGTH = 32; + // Bigram frequency is a fixed point number with 1 meaning 1.2 and 255 meaning 1.8. + protected static final int BIGRAM_MAX_FREQUENCY = 255; + private Context mContext; private char[] mWordBuilder = new char[MAX_WORD_LENGTH]; private int mDicTypeId; private int mMaxDepth; private int mInputLength; - private StringBuilder sb = new StringBuilder(MAX_WORD_LENGTH); - - private static final char QUOTE = '\''; private boolean mRequiresReload; @@ -98,6 +100,7 @@ public class ExpandableDictionary extends Dictionary { public int addFrequency(int add) { mFrequency += add; + if (mFrequency > BIGRAM_MAX_FREQUENCY) mFrequency = BIGRAM_MAX_FREQUENCY; return mFrequency; } } @@ -226,6 +229,7 @@ public class ExpandableDictionary extends Dictionary { * Returns the word's frequency or -1 if not found */ protected int getWordFrequency(CharSequence word) { + // Case-sensitive search Node node = searchNode(mRoots, word, 0, word.length()); return (node == null) ? -1 : node.mFrequency; } @@ -301,7 +305,8 @@ public class ExpandableDictionary extends Dictionary { getWordsRec(children, codes, word, depth + 1, completion, snr, inputIndex, skipPos, callback); } - } else if ((c == QUOTE && currentChars[0] != QUOTE) || depth == skipPos) { + } else if ((c == Keyboard.CODE_SINGLE_QUOTE + && currentChars[0] != Keyboard.CODE_SINGLE_QUOTE) || depth == skipPos) { // Skip the ' and continue deeper word[depth] = c; if (children != null) { @@ -327,7 +332,7 @@ public class ExpandableDictionary extends Dictionary { final int finalFreq; if (skipPos < 0) { finalFreq = freq * snr * addedAttenuation - * FULL_WORD_FREQ_MULTIPLIER; + * FULL_WORD_SCORE_MULTIPLIER; } else { finalFreq = computeSkippedWordFinalFreq(freq, snr * addedAttenuation, mInputLength); @@ -362,12 +367,16 @@ public class ExpandableDictionary extends Dictionary { /** * Adds bigrams to the in-memory trie structure that is being used to retrieve any word - * @param frequency frequency for this bigrams - * @param addFrequency if true, it adds to current frequency + * @param frequency frequency for this bigram + * @param addFrequency if true, it adds to current frequency, else it overwrites the old value * @return returns the final frequency */ private int addOrSetBigram(String word1, String word2, int frequency, boolean addFrequency) { - Node firstWord = searchWord(mRoots, word1, 0, null); + // We don't want results to be different according to case of the looked up left hand side + // word. We do want however to return the correct case for the right hand side. + // So we want to squash the case of the left hand side, and preserve that of the right + // hand side word. + Node firstWord = searchWord(mRoots, word1.toLowerCase(), 0, null); Node secondWord = searchWord(mRoots, word2, 0, null); LinkedList<NextWord> bigram = firstWord.mNGrams; if (bigram == null || bigram.size() == 0) { @@ -433,8 +442,12 @@ public class ExpandableDictionary extends Dictionary { } } - private void runReverseLookUp(final CharSequence previousWord, final WordCallback callback) { - Node prevWord = searchNode(mRoots, previousWord, 0, previousWord.length()); + private void runBigramReverseLookUp(final CharSequence previousWord, + final WordCallback callback) { + // Search for the lowercase version of the word only, because that's where bigrams + // store their sons. + Node prevWord = searchNode(mRoots, previousWord.toString().toLowerCase(), 0, + previousWord.length()); if (prevWord != null && prevWord.mNGrams != null) { reverseLookUp(prevWord.mNGrams, callback); } @@ -444,7 +457,7 @@ public class ExpandableDictionary extends Dictionary { public void getBigrams(final WordComposer codes, final CharSequence previousWord, final WordCallback callback) { if (!reloadDictionaryIfRequired()) { - runReverseLookUp(previousWord, callback); + runBigramReverseLookUp(previousWord, callback); } } @@ -462,6 +475,9 @@ public class ExpandableDictionary extends Dictionary { } } + // Local to reverseLookUp, but do not allocate each time. + private final char[] mLookedUpString = new char[MAX_WORD_LENGTH]; + /** * reverseLookUp retrieves the full word given a list of terminal nodes and adds those words * through callback. @@ -474,30 +490,33 @@ public class ExpandableDictionary extends Dictionary { for (NextWord nextWord : terminalNodes) { node = nextWord.mWord; freq = nextWord.getFrequency(); - // TODO Not the best way to limit suggestion threshold - if (freq >= UserBigramDictionary.SUGGEST_THRESHOLD) { - sb.setLength(0); - do { - sb.insert(0, node.mCode); - node = node.mParent; - } while(node != null); - - // TODO better way to feed char array? - callback.addWord(sb.toString().toCharArray(), 0, sb.length(), freq, mDicTypeId, - DataType.BIGRAM); - } + int index = MAX_WORD_LENGTH; + do { + --index; + mLookedUpString[index] = node.mCode; + node = node.mParent; + } while (node != null); + + callback.addWord(mLookedUpString, index, MAX_WORD_LENGTH - index, freq, mDicTypeId, + DataType.BIGRAM); } } /** - * Search for the terminal node of the word + * Recursively search for the terminal node of the word. + * + * One iteration takes the full word to search for and the current index of the recursion. + * + * @param children the node of the trie to search under. + * @param word the word to search for. Only read [offset..length] so there may be trailing chars + * @param offset the index in {@code word} this recursion should operate on. + * @param length the length of the input word. * @return Returns the terminal node of the word if the word exists */ private Node searchNode(final NodeArray children, final CharSequence word, final int offset, final int length) { - // TODO Consider combining with addWordRec final int count = children.mLength; - char currentChar = word.charAt(offset); + final char currentChar = word.charAt(offset); for (int j = 0; j < count; j++) { final Node node = children.mData[j]; if (node.mCode == currentChar) { |