aboutsummaryrefslogtreecommitdiffstats
path: root/java/src/com/android/inputmethod/latin/ExpandableDictionary.java
diff options
context:
space:
mode:
Diffstat (limited to 'java/src/com/android/inputmethod/latin/ExpandableDictionary.java')
-rw-r--r--java/src/com/android/inputmethod/latin/ExpandableDictionary.java308
1 files changed, 178 insertions, 130 deletions
diff --git a/java/src/com/android/inputmethod/latin/ExpandableDictionary.java b/java/src/com/android/inputmethod/latin/ExpandableDictionary.java
index e954c0818..97a4a1816 100644
--- a/java/src/com/android/inputmethod/latin/ExpandableDictionary.java
+++ b/java/src/com/android/inputmethod/latin/ExpandableDictionary.java
@@ -16,11 +16,13 @@
package com.android.inputmethod.latin;
-import java.util.LinkedList;
-
import android.content.Context;
import android.os.AsyncTask;
+import com.android.inputmethod.keyboard.Keyboard;
+
+import java.util.LinkedList;
+
/**
* Base class for an in-memory dictionary that can grow dynamically and can
* be searched for suggestions and valid words.
@@ -32,15 +34,14 @@ public class ExpandableDictionary extends Dictionary {
*/
protected static final int MAX_WORD_LENGTH = 32;
+ // Bigram frequency is a fixed point number with 1 meaning 1.2 and 255 meaning 1.8.
+ protected static final int BIGRAM_MAX_FREQUENCY = 255;
+
private Context mContext;
private char[] mWordBuilder = new char[MAX_WORD_LENGTH];
private int mDicTypeId;
private int mMaxDepth;
private int mInputLength;
- private int[] mNextLettersFrequencies;
- private StringBuilder sb = new StringBuilder(MAX_WORD_LENGTH);
-
- private static final char QUOTE = '\'';
private boolean mRequiresReload;
@@ -49,53 +50,66 @@ public class ExpandableDictionary extends Dictionary {
// Use this lock before touching mUpdatingDictionary & mRequiresDownload
private Object mUpdatingLock = new Object();
- static class Node {
- char code;
- int frequency;
- boolean terminal;
- Node parent;
- NodeArray children;
- LinkedList<NextWord> ngrams; // Supports ngram
+ private static class Node {
+ char mCode;
+ int mFrequency;
+ boolean mTerminal;
+ Node mParent;
+ NodeArray mChildren;
+ LinkedList<NextWord> mNGrams; // Supports ngram
}
- static class NodeArray {
- Node[] data;
- int length = 0;
+ private static class NodeArray {
+ Node[] mData;
+ int mLength = 0;
private static final int INCREMENT = 2;
NodeArray() {
- data = new Node[INCREMENT];
+ mData = new Node[INCREMENT];
}
void add(Node n) {
- if (length + 1 > data.length) {
- Node[] tempData = new Node[length + INCREMENT];
- if (length > 0) {
- System.arraycopy(data, 0, tempData, 0, length);
+ if (mLength + 1 > mData.length) {
+ Node[] tempData = new Node[mLength + INCREMENT];
+ if (mLength > 0) {
+ System.arraycopy(mData, 0, tempData, 0, mLength);
}
- data = tempData;
+ mData = tempData;
}
- data[length++] = n;
+ mData[mLength++] = n;
}
}
- static class NextWord {
- Node word;
- NextWord nextWord;
- int frequency;
+ private static class NextWord {
+ public final Node mWord;
+ private int mFrequency;
- NextWord(Node word, int frequency) {
- this.word = word;
- this.frequency = frequency;
+ public NextWord(Node word, int frequency) {
+ mWord = word;
+ mFrequency = frequency;
+ }
+
+ public int getFrequency() {
+ return mFrequency;
}
- }
+ public int setFrequency(int freq) {
+ mFrequency = freq;
+ return mFrequency;
+ }
+
+ public int addFrequency(int add) {
+ mFrequency += add;
+ if (mFrequency > BIGRAM_MAX_FREQUENCY) mFrequency = BIGRAM_MAX_FREQUENCY;
+ return mFrequency;
+ }
+ }
private NodeArray mRoots;
private int[][] mCodes;
- ExpandableDictionary(Context context, int dicTypeId) {
+ public ExpandableDictionary(Context context, int dicTypeId) {
mContext = context;
clearDictionary();
mCodes = new int[MAX_WORD_LENGTH][];
@@ -128,13 +142,14 @@ public class ExpandableDictionary extends Dictionary {
/** Override to load your dictionary here, on a background thread. */
public void loadDictionaryAsync() {
+ // empty base implementation
}
- Context getContext() {
+ public Context getContext() {
return mContext;
}
-
- int getMaxWordLength() {
+
+ public int getMaxWordLength() {
return MAX_WORD_LENGTH;
}
@@ -145,40 +160,40 @@ public class ExpandableDictionary extends Dictionary {
private void addWordRec(NodeArray children, final String word, final int depth,
final int frequency, Node parentNode) {
final int wordLength = word.length();
+ if (wordLength <= depth) return;
final char c = word.charAt(depth);
// Does children have the current character?
- final int childrenLength = children.length;
+ final int childrenLength = children.mLength;
Node childNode = null;
boolean found = false;
for (int i = 0; i < childrenLength; i++) {
- childNode = children.data[i];
- if (childNode.code == c) {
+ childNode = children.mData[i];
+ if (childNode.mCode == c) {
found = true;
break;
}
}
if (!found) {
childNode = new Node();
- childNode.code = c;
- childNode.parent = parentNode;
+ childNode.mCode = c;
+ childNode.mParent = parentNode;
children.add(childNode);
}
if (wordLength == depth + 1) {
// Terminate this word
- childNode.terminal = true;
- childNode.frequency = Math.max(frequency, childNode.frequency);
- if (childNode.frequency > 255) childNode.frequency = 255;
+ childNode.mTerminal = true;
+ childNode.mFrequency = Math.max(frequency, childNode.mFrequency);
+ if (childNode.mFrequency > 255) childNode.mFrequency = 255;
return;
}
- if (childNode.children == null) {
- childNode.children = new NodeArray();
+ if (childNode.mChildren == null) {
+ childNode.mChildren = new NodeArray();
}
- addWordRec(childNode.children, word, depth + 1, frequency, childNode);
+ addWordRec(childNode.mChildren, word, depth + 1, frequency, childNode);
}
@Override
- public void getWords(final WordComposer codes, final WordCallback callback,
- int[] nextLettersFrequencies) {
+ public void getWords(final WordComposer codes, final WordCallback callback) {
synchronized (mUpdatingLock) {
// If we need to update, start off a background task
if (mRequiresReload) startDictionaryLoadingTaskLocked();
@@ -187,7 +202,6 @@ public class ExpandableDictionary extends Dictionary {
}
mInputLength = codes.size();
- mNextLettersFrequencies = nextLettersFrequencies;
if (mCodes.length < mInputLength) mCodes = new int[mInputLength][];
// Cache the codes so that we don't have to lookup an array list
for (int i = 0; i < mInputLength; i++) {
@@ -214,9 +228,20 @@ public class ExpandableDictionary extends Dictionary {
/**
* Returns the word's frequency or -1 if not found
*/
- public int getWordFrequency(CharSequence word) {
+ protected int getWordFrequency(CharSequence word) {
+ // Case-sensitive search
Node node = searchNode(mRoots, word, 0, word.length());
- return (node == null) ? -1 : node.frequency;
+ return (node == null) ? -1 : node.mFrequency;
+ }
+
+ private static int computeSkippedWordFinalFreq(int freq, int snr, int inputLength) {
+ // The computation itself makes sense for >= 2, but the == 2 case returns 0
+ // anyway so we may as well test against 3 instead and return the constant
+ if (inputLength >= 3) {
+ return (freq * snr * (inputLength - 2)) / (inputLength - 1);
+ } else {
+ return 0;
+ }
}
/**
@@ -232,16 +257,17 @@ public class ExpandableDictionary extends Dictionary {
* @param completion whether the traversal is now in completion mode - meaning that we've
* exhausted the input and we're looking for all possible suffixes.
* @param snr current weight of the word being formed
- * @param inputIndex position in the input characters. This can be off from the depth in
+ * @param inputIndex position in the input characters. This can be off from the depth in
* case we skip over some punctuations such as apostrophe in the traversal. That is, if you type
* "wouldve", it could be matching "would've", so the depth will be one more than the
* inputIndex
* @param callback the callback class for adding a word
*/
- protected void getWordsRec(NodeArray roots, final WordComposer codes, final char[] word,
+ // TODO: Share this routine with the native code for BinaryDictionary
+ protected void getWordsRec(NodeArray roots, final WordComposer codes, final char[] word,
final int depth, boolean completion, int snr, int inputIndex, int skipPos,
WordCallback callback) {
- final int count = roots.length;
+ final int count = roots.mLength;
final int codeSize = mInputLength;
// Optimization: Prune out words that are too long compared to how much was typed.
if (depth > mMaxDepth) {
@@ -255,34 +281,36 @@ public class ExpandableDictionary extends Dictionary {
}
for (int i = 0; i < count; i++) {
- final Node node = roots.data[i];
- final char c = node.code;
+ final Node node = roots.mData[i];
+ final char c = node.mCode;
final char lowerC = toLowerCase(c);
- final boolean terminal = node.terminal;
- final NodeArray children = node.children;
- final int freq = node.frequency;
+ final boolean terminal = node.mTerminal;
+ final NodeArray children = node.mChildren;
+ final int freq = node.mFrequency;
if (completion) {
word[depth] = c;
if (terminal) {
- if (!callback.addWord(word, 0, depth + 1, freq * snr, mDicTypeId,
- DataType.UNIGRAM)) {
- return;
+ final int finalFreq;
+ if (skipPos < 0) {
+ finalFreq = freq * snr;
+ } else {
+ finalFreq = computeSkippedWordFinalFreq(freq, snr, mInputLength);
}
- // Add to frequency of next letters for predictive correction
- if (mNextLettersFrequencies != null && depth >= inputIndex && skipPos < 0
- && mNextLettersFrequencies.length > word[inputIndex]) {
- mNextLettersFrequencies[word[inputIndex]]++;
+ if (!callback.addWord(word, 0, depth + 1, finalFreq, mDicTypeId,
+ DataType.UNIGRAM)) {
+ return;
}
}
if (children != null) {
getWordsRec(children, codes, word, depth + 1, completion, snr, inputIndex,
skipPos, callback);
}
- } else if ((c == QUOTE && currentChars[0] != QUOTE) || depth == skipPos) {
+ } else if ((c == Keyboard.CODE_SINGLE_QUOTE
+ && currentChars[0] != Keyboard.CODE_SINGLE_QUOTE) || depth == skipPos) {
// Skip the ' and continue deeper
word[depth] = c;
if (children != null) {
- getWordsRec(children, codes, word, depth + 1, completion, snr, inputIndex,
+ getWordsRec(children, codes, word, depth + 1, completion, snr, inputIndex,
skipPos, callback);
}
} else {
@@ -299,10 +327,16 @@ public class ExpandableDictionary extends Dictionary {
if (codeSize == inputIndex + 1) {
if (terminal) {
- if (INCLUDE_TYPED_WORD_IF_VALID
+ if (INCLUDE_TYPED_WORD_IF_VALID
|| !same(word, depth + 1, codes.getTypedWord())) {
- int finalFreq = freq * snr * addedAttenuation;
- if (skipPos < 0) finalFreq *= FULL_WORD_FREQ_MULTIPLIER;
+ final int finalFreq;
+ if (skipPos < 0) {
+ finalFreq = freq * snr * addedAttenuation
+ * FULL_WORD_SCORE_MULTIPLIER;
+ } else {
+ finalFreq = computeSkippedWordFinalFreq(freq,
+ snr * addedAttenuation, mInputLength);
+ }
callback.addWord(word, 0, depth + 1, finalFreq, mDicTypeId,
DataType.UNIGRAM);
}
@@ -313,7 +347,7 @@ public class ExpandableDictionary extends Dictionary {
skipPos, callback);
}
} else if (children != null) {
- getWordsRec(children, codes, word, depth + 1,
+ getWordsRec(children, codes, word, depth + 1,
false, snr * addedAttenuation, inputIndex + 1,
skipPos, callback);
}
@@ -333,31 +367,33 @@ public class ExpandableDictionary extends Dictionary {
/**
* Adds bigrams to the in-memory trie structure that is being used to retrieve any word
- * @param frequency frequency for this bigrams
- * @param addFrequency if true, it adds to current frequency
+ * @param frequency frequency for this bigram
+ * @param addFrequency if true, it adds to current frequency, else it overwrites the old value
* @return returns the final frequency
*/
private int addOrSetBigram(String word1, String word2, int frequency, boolean addFrequency) {
- Node firstWord = searchWord(mRoots, word1, 0, null);
+ // We don't want results to be different according to case of the looked up left hand side
+ // word. We do want however to return the correct case for the right hand side.
+ // So we want to squash the case of the left hand side, and preserve that of the right
+ // hand side word.
+ Node firstWord = searchWord(mRoots, word1.toLowerCase(), 0, null);
Node secondWord = searchWord(mRoots, word2, 0, null);
- LinkedList<NextWord> bigram = firstWord.ngrams;
+ LinkedList<NextWord> bigram = firstWord.mNGrams;
if (bigram == null || bigram.size() == 0) {
- firstWord.ngrams = new LinkedList<NextWord>();
- bigram = firstWord.ngrams;
+ firstWord.mNGrams = new LinkedList<NextWord>();
+ bigram = firstWord.mNGrams;
} else {
for (NextWord nw : bigram) {
- if (nw.word == secondWord) {
+ if (nw.mWord == secondWord) {
if (addFrequency) {
- nw.frequency += frequency;
+ return nw.addFrequency(frequency);
} else {
- nw.frequency = frequency;
+ return nw.setFrequency(frequency);
}
- return nw.frequency;
}
}
}
- NextWord nw = new NextWord(secondWord, frequency);
- firstWord.ngrams.add(nw);
+ firstWord.mNGrams.add(new NextWord(secondWord, frequency));
return frequency;
}
@@ -369,31 +405,31 @@ public class ExpandableDictionary extends Dictionary {
final int wordLength = word.length();
final char c = word.charAt(depth);
// Does children have the current character?
- final int childrenLength = children.length;
+ final int childrenLength = children.mLength;
Node childNode = null;
boolean found = false;
for (int i = 0; i < childrenLength; i++) {
- childNode = children.data[i];
- if (childNode.code == c) {
+ childNode = children.mData[i];
+ if (childNode.mCode == c) {
found = true;
break;
}
}
if (!found) {
childNode = new Node();
- childNode.code = c;
- childNode.parent = parentNode;
+ childNode.mCode = c;
+ childNode.mParent = parentNode;
children.add(childNode);
}
if (wordLength == depth + 1) {
// Terminate this word
- childNode.terminal = true;
+ childNode.mTerminal = true;
return childNode;
}
- if (childNode.children == null) {
- childNode.children = new NodeArray();
+ if (childNode.mChildren == null) {
+ childNode.mChildren = new NodeArray();
}
- return searchWord(childNode.children, word, depth + 1, childNode);
+ return searchWord(childNode.mChildren, word, depth + 1, childNode);
}
// @VisibleForTesting
@@ -406,18 +442,22 @@ public class ExpandableDictionary extends Dictionary {
}
}
- private void runReverseLookUp(final CharSequence previousWord, final WordCallback callback) {
- Node prevWord = searchNode(mRoots, previousWord, 0, previousWord.length());
- if (prevWord != null && prevWord.ngrams != null) {
- reverseLookUp(prevWord.ngrams, callback);
+ private void runBigramReverseLookUp(final CharSequence previousWord,
+ final WordCallback callback) {
+ // Search for the lowercase version of the word only, because that's where bigrams
+ // store their sons.
+ Node prevWord = searchNode(mRoots, previousWord.toString().toLowerCase(), 0,
+ previousWord.length());
+ if (prevWord != null && prevWord.mNGrams != null) {
+ reverseLookUp(prevWord.mNGrams, callback);
}
}
@Override
public void getBigrams(final WordComposer codes, final CharSequence previousWord,
- final WordCallback callback, int[] nextLettersFrequencies) {
+ final WordCallback callback) {
if (!reloadDictionaryIfRequired()) {
- runReverseLookUp(previousWord, callback);
+ runBigramReverseLookUp(previousWord, callback);
}
}
@@ -430,10 +470,14 @@ public class ExpandableDictionary extends Dictionary {
try {
Thread.sleep(100);
} catch (InterruptedException e) {
+ //
}
}
}
+ // Local to reverseLookUp, but do not allocate each time.
+ private final char[] mLookedUpString = new char[MAX_WORD_LENGTH];
+
/**
* reverseLookUp retrieves the full word given a list of terminal nodes and adds those words
* through callback.
@@ -444,42 +488,45 @@ public class ExpandableDictionary extends Dictionary {
Node node;
int freq;
for (NextWord nextWord : terminalNodes) {
- node = nextWord.word;
- freq = nextWord.frequency;
- // TODO Not the best way to limit suggestion threshold
- if (freq >= UserBigramDictionary.SUGGEST_THRESHOLD) {
- sb.setLength(0);
- do {
- sb.insert(0, node.code);
- node = node.parent;
- } while(node != null);
-
- // TODO better way to feed char array?
- callback.addWord(sb.toString().toCharArray(), 0, sb.length(), freq, mDicTypeId,
- DataType.BIGRAM);
- }
+ node = nextWord.mWord;
+ freq = nextWord.getFrequency();
+ int index = MAX_WORD_LENGTH;
+ do {
+ --index;
+ mLookedUpString[index] = node.mCode;
+ node = node.mParent;
+ } while (node != null);
+
+ callback.addWord(mLookedUpString, index, MAX_WORD_LENGTH - index, freq, mDicTypeId,
+ DataType.BIGRAM);
}
}
/**
- * Search for the terminal node of the word
+ * Recursively search for the terminal node of the word.
+ *
+ * One iteration takes the full word to search for and the current index of the recursion.
+ *
+ * @param children the node of the trie to search under.
+ * @param word the word to search for. Only read [offset..length] so there may be trailing chars
+ * @param offset the index in {@code word} this recursion should operate on.
+ * @param length the length of the input word.
* @return Returns the terminal node of the word if the word exists
*/
private Node searchNode(final NodeArray children, final CharSequence word, final int offset,
final int length) {
- // TODO Consider combining with addWordRec
- final int count = children.length;
- char currentChar = word.charAt(offset);
+ final int count = children.mLength;
+ final char currentChar = word.charAt(offset);
for (int j = 0; j < count; j++) {
- final Node node = children.data[j];
- if (node.code == currentChar) {
+ final Node node = children.mData[j];
+ if (node.mCode == currentChar) {
if (offset == length - 1) {
- if (node.terminal) {
+ if (node.mTerminal) {
return node;
}
} else {
- if (node.children != null) {
- Node returnNode = searchNode(node.children, word, offset + 1, length);
+ if (node.mChildren != null) {
+ Node returnNode = searchNode(node.mChildren, word, offset + 1, length);
if (returnNode != null) return returnNode;
}
}
@@ -503,16 +550,17 @@ public class ExpandableDictionary extends Dictionary {
}
}
- static char toLowerCase(char c) {
+ private static char toLowerCase(char c) {
+ char baseChar = c;
if (c < BASE_CHARS.length) {
- c = BASE_CHARS[c];
+ baseChar = BASE_CHARS[c];
}
- if (c >= 'A' && c <= 'Z') {
- c = (char) (c | 32);
- } else if (c > 127) {
- c = Character.toLowerCase(c);
+ if (baseChar >= 'A' && baseChar <= 'Z') {
+ return (char)(baseChar | 32);
+ } else if (baseChar > 127) {
+ return Character.toLowerCase(baseChar);
}
- return c;
+ return baseChar;
}
/**
@@ -521,7 +569,7 @@ public class ExpandableDictionary extends Dictionary {
* if c is not a combined character, or the base character if it
* is combined.
*/
- static final char BASE_CHARS[] = {
+ private static final char BASE_CHARS[] = {
0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f,
0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017,