aboutsummaryrefslogtreecommitdiffstats
path: root/java/src/com/android/inputmethod/latin/ExpandableDictionary.java
diff options
context:
space:
mode:
authorJae Yong Sung <jysung@google.com>2010-07-26 11:43:29 -0700
committerJae Yong Sung <jysung@google.com>2010-07-28 11:08:08 -0700
commit80aa14fd432cf7d2c67f2fcfcc57c80f29f8eb64 (patch)
tree384655d5c7207325014888fd26da1bc7188db66e /java/src/com/android/inputmethod/latin/ExpandableDictionary.java
parent679b838b05a70ed965017635efdf536449aa230f (diff)
downloadlatinime-80aa14fd432cf7d2c67f2fcfcc57c80f29f8eb64.tar.gz
latinime-80aa14fd432cf7d2c67f2fcfcc57c80f29f8eb64.tar.xz
latinime-80aa14fd432cf7d2c67f2fcfcc57c80f29f8eb64.zip
- separate dict (uses xml)
- retrieve bigrams that only starts with character typed and neighbor keys - contacts bigram - performance measure bug: 2873133 Change-Id: If97c005b18c82f3fafef50009dd2dfd972b0ab8f
Diffstat (limited to 'java/src/com/android/inputmethod/latin/ExpandableDictionary.java')
-rw-r--r--java/src/com/android/inputmethod/latin/ExpandableDictionary.java193
1 files changed, 160 insertions, 33 deletions
diff --git a/java/src/com/android/inputmethod/latin/ExpandableDictionary.java b/java/src/com/android/inputmethod/latin/ExpandableDictionary.java
index d8a9547c1..53f9ed8c8 100644
--- a/java/src/com/android/inputmethod/latin/ExpandableDictionary.java
+++ b/java/src/com/android/inputmethod/latin/ExpandableDictionary.java
@@ -16,22 +16,32 @@
package com.android.inputmethod.latin;
+import java.util.LinkedList;
+
import android.content.Context;
import android.os.AsyncTask;
+import android.os.SystemClock;
+import android.util.Log;
/**
* Base class for an in-memory dictionary that can grow dynamically and can
* be searched for suggestions and valid words.
*/
public class ExpandableDictionary extends Dictionary {
+ /**
+ * There is difference between what java and native code can handle.
+ * It uses 32 because Java stack overflows when greater value is used.
+ */
+ protected static final int MAX_WORD_LENGTH = 32;
+
private Context mContext;
private char[] mWordBuilder = new char[MAX_WORD_LENGTH];
private int mDicTypeId;
private int mMaxDepth;
private int mInputLength;
private int[] mNextLettersFrequencies;
+ private StringBuilder sb = new StringBuilder(MAX_WORD_LENGTH);
- public static final int MAX_WORD_LENGTH = 32;
private static final char QUOTE = '\'';
private boolean mRequiresReload;
@@ -45,7 +55,9 @@ public class ExpandableDictionary extends Dictionary {
char code;
int frequency;
boolean terminal;
+ Node parent;
NodeArray children;
+ LinkedList<NextWord> ngrams; // Supports ngram
}
static class NodeArray {
@@ -69,6 +81,18 @@ public class ExpandableDictionary extends Dictionary {
}
}
+ static class NextWord {
+ Node word;
+ NextWord nextWord;
+ int frequency;
+
+ NextWord(Node word, int frequency) {
+ this.word = word;
+ this.frequency = frequency;
+ }
+ }
+
+
private NodeArray mRoots;
private int[][] mCodes;
@@ -117,12 +141,11 @@ public class ExpandableDictionary extends Dictionary {
}
public void addWord(String word, int frequency) {
- addWordRec(mRoots, word, 0, frequency);
+ addWordRec(mRoots, word, 0, frequency, null);
}
- private void addWordRec(NodeArray children, final String word,
- final int depth, final int frequency) {
-
+ private void addWordRec(NodeArray children, final String word, final int depth,
+ final int frequency, Node parentNode) {
final int wordLength = word.length();
final char c = word.charAt(depth);
// Does children have the current character?
@@ -139,6 +162,7 @@ public class ExpandableDictionary extends Dictionary {
if (!found) {
childNode = new Node();
childNode.code = c;
+ childNode.parent = parentNode;
children.add(childNode);
}
if (wordLength == depth + 1) {
@@ -151,7 +175,7 @@ public class ExpandableDictionary extends Dictionary {
if (childNode.children == null) {
childNode.children = new NodeArray();
}
- addWordRec(childNode.children, word, depth + 1, frequency);
+ addWordRec(childNode.children, word, depth + 1, frequency, childNode);
}
@Override
@@ -185,7 +209,7 @@ public class ExpandableDictionary extends Dictionary {
if (mRequiresReload) startDictionaryLoadingTaskLocked();
if (mUpdatingDictionary) return false;
}
- final int freq = getWordFrequencyRec(mRoots, word, 0, word.length());
+ final int freq = getWordFrequency(word);
return freq > -1;
}
@@ -193,32 +217,8 @@ public class ExpandableDictionary extends Dictionary {
* Returns the word's frequency or -1 if not found
*/
public int getWordFrequency(CharSequence word) {
- return getWordFrequencyRec(mRoots, word, 0, word.length());
- }
-
- /**
- * Returns the word's frequency or -1 if not found
- */
- private int getWordFrequencyRec(final NodeArray children, final CharSequence word,
- final int offset, final int length) {
- final int count = children.length;
- char currentChar = word.charAt(offset);
- for (int j = 0; j < count; j++) {
- final Node node = children.data[j];
- if (node.code == currentChar) {
- if (offset == length - 1) {
- if (node.terminal) {
- return node.frequency;
- }
- } else {
- if (node.children != null) {
- int freq = getWordFrequencyRec(node.children, word, offset + 1, length);
- if (freq > -1) return freq;
- }
- }
- }
- }
- return -1;
+ Node node = searchNode(mRoots, word, 0, word.length());
+ return (node == null) ? -1 : node.frequency;
}
/**
@@ -325,6 +325,133 @@ public class ExpandableDictionary extends Dictionary {
}
}
+ /**
+ * Adds bigrams to the in-memory trie structure that is being used to retrieve any word
+ * @param addFrequency adding frequency of the pair
+ * @return returns the final frequency
+ */
+ protected int addBigrams(String word1, String word2, int addFrequency) {
+ Node firstWord = searchWord(mRoots, word1, 0, null);
+ Node secondWord = searchWord(mRoots, word2, 0, null);
+ LinkedList<NextWord> bigram = firstWord.ngrams;
+ if (bigram == null || bigram.size() == 0) {
+ firstWord.ngrams = new LinkedList<NextWord>();
+ bigram = firstWord.ngrams;
+ } else {
+ for (NextWord nw : bigram) {
+ if (nw.word == secondWord) {
+ nw.frequency += addFrequency;
+ return nw.frequency;
+ }
+ }
+ }
+ NextWord nw = new NextWord(secondWord, addFrequency);
+ firstWord.ngrams.add(nw);
+ return addFrequency;
+ }
+
+ /**
+ * Searches for the word and add the word if it does not exist.
+ * @return Returns the terminal node of the word we are searching for.
+ */
+ private Node searchWord(NodeArray children, String word, int depth, Node parentNode) {
+ final int wordLength = word.length();
+ final char c = word.charAt(depth);
+ // Does children have the current character?
+ final int childrenLength = children.length;
+ Node childNode = null;
+ boolean found = false;
+ for (int i = 0; i < childrenLength; i++) {
+ childNode = children.data[i];
+ if (childNode.code == c) {
+ found = true;
+ break;
+ }
+ }
+ if (!found) {
+ childNode = new Node();
+ childNode.code = c;
+ childNode.parent = parentNode;
+ children.add(childNode);
+ }
+ if (wordLength == depth + 1) {
+ // Terminate this word
+ childNode.terminal = true;
+ return childNode;
+ }
+ if (childNode.children == null) {
+ childNode.children = new NodeArray();
+ }
+ return searchWord(childNode.children, word, depth + 1, childNode);
+ }
+
+ @Override
+ public void getBigrams(final WordComposer codes, final CharSequence previousWord,
+ final WordCallback callback, int[] nextLettersFrequencies) {
+ synchronized (mUpdatingLock) {
+ // If we need to update, start off a background task
+ if (mRequiresReload) startDictionaryLoadingTaskLocked();
+ // Currently updating contacts, don't return any results.
+ if (mUpdatingDictionary) return;
+ }
+
+ Node prevWord = searchNode(mRoots, previousWord, 0, previousWord.length());
+ if (prevWord != null && prevWord.ngrams != null) {
+ reverseLookUp(prevWord.ngrams, callback);
+ }
+ }
+
+ /**
+ * reverseLookUp retrieves the full word given a list of terminal nodes and adds those words
+ * through callback.
+ * @param terminalNodes list of terminal nodes we want to add
+ */
+ private void reverseLookUp(LinkedList<NextWord> terminalNodes,
+ final WordCallback callback) {
+ Node node;
+ int freq;
+ for (NextWord nextWord : terminalNodes) {
+ node = nextWord.word;
+ freq = nextWord.frequency;
+ sb.setLength(0);
+ do {
+ sb.insert(0, node.code);
+ node = node.parent;
+ } while(node != null);
+
+ // TODO better way to feed char array?
+ callback.addWord(sb.toString().toCharArray(), 0, sb.length(), freq, mDicTypeId,
+ DataType.BIGRAM);
+ }
+ }
+
+ /**
+ * Search for the terminal node of the word
+ * @return Returns the terminal node of the word if the word exists
+ */
+ private Node searchNode(final NodeArray children, final CharSequence word, final int offset,
+ final int length) {
+ // TODO Consider combining with addWordRec
+ final int count = children.length;
+ char currentChar = word.charAt(offset);
+ for (int j = 0; j < count; j++) {
+ final Node node = children.data[j];
+ if (node.code == currentChar) {
+ if (offset == length - 1) {
+ if (node.terminal) {
+ return node;
+ }
+ } else {
+ if (node.children != null) {
+ Node returnNode = searchNode(node.children, word, offset + 1, length);
+ if (returnNode != null) return returnNode;
+ }
+ }
+ }
+ }
+ return null;
+ }
+
protected void clearDictionary() {
mRoots = new NodeArray();
}