diff options
Diffstat (limited to 'java/src/com/android/inputmethod/latin/ExpandableDictionary.java')
-rw-r--r-- | java/src/com/android/inputmethod/latin/ExpandableDictionary.java | 193 |
1 files changed, 160 insertions, 33 deletions
diff --git a/java/src/com/android/inputmethod/latin/ExpandableDictionary.java b/java/src/com/android/inputmethod/latin/ExpandableDictionary.java index d8a9547c1..53f9ed8c8 100644 --- a/java/src/com/android/inputmethod/latin/ExpandableDictionary.java +++ b/java/src/com/android/inputmethod/latin/ExpandableDictionary.java @@ -16,22 +16,32 @@ package com.android.inputmethod.latin; +import java.util.LinkedList; + import android.content.Context; import android.os.AsyncTask; +import android.os.SystemClock; +import android.util.Log; /** * Base class for an in-memory dictionary that can grow dynamically and can * be searched for suggestions and valid words. */ public class ExpandableDictionary extends Dictionary { + /** + * There is difference between what java and native code can handle. + * It uses 32 because Java stack overflows when greater value is used. + */ + protected static final int MAX_WORD_LENGTH = 32; + private Context mContext; private char[] mWordBuilder = new char[MAX_WORD_LENGTH]; private int mDicTypeId; private int mMaxDepth; private int mInputLength; private int[] mNextLettersFrequencies; + private StringBuilder sb = new StringBuilder(MAX_WORD_LENGTH); - public static final int MAX_WORD_LENGTH = 32; private static final char QUOTE = '\''; private boolean mRequiresReload; @@ -45,7 +55,9 @@ public class ExpandableDictionary extends Dictionary { char code; int frequency; boolean terminal; + Node parent; NodeArray children; + LinkedList<NextWord> ngrams; // Supports ngram } static class NodeArray { @@ -69,6 +81,18 @@ public class ExpandableDictionary extends Dictionary { } } + static class NextWord { + Node word; + NextWord nextWord; + int frequency; + + NextWord(Node word, int frequency) { + this.word = word; + this.frequency = frequency; + } + } + + private NodeArray mRoots; private int[][] mCodes; @@ -117,12 +141,11 @@ public class ExpandableDictionary extends Dictionary { } public void addWord(String word, int frequency) { - addWordRec(mRoots, word, 0, frequency); + addWordRec(mRoots, word, 0, frequency, null); } - private void addWordRec(NodeArray children, final String word, - final int depth, final int frequency) { - + private void addWordRec(NodeArray children, final String word, final int depth, + final int frequency, Node parentNode) { final int wordLength = word.length(); final char c = word.charAt(depth); // Does children have the current character? @@ -139,6 +162,7 @@ public class ExpandableDictionary extends Dictionary { if (!found) { childNode = new Node(); childNode.code = c; + childNode.parent = parentNode; children.add(childNode); } if (wordLength == depth + 1) { @@ -151,7 +175,7 @@ public class ExpandableDictionary extends Dictionary { if (childNode.children == null) { childNode.children = new NodeArray(); } - addWordRec(childNode.children, word, depth + 1, frequency); + addWordRec(childNode.children, word, depth + 1, frequency, childNode); } @Override @@ -185,7 +209,7 @@ public class ExpandableDictionary extends Dictionary { if (mRequiresReload) startDictionaryLoadingTaskLocked(); if (mUpdatingDictionary) return false; } - final int freq = getWordFrequencyRec(mRoots, word, 0, word.length()); + final int freq = getWordFrequency(word); return freq > -1; } @@ -193,32 +217,8 @@ public class ExpandableDictionary extends Dictionary { * Returns the word's frequency or -1 if not found */ public int getWordFrequency(CharSequence word) { - return getWordFrequencyRec(mRoots, word, 0, word.length()); - } - - /** - * Returns the word's frequency or -1 if not found - */ - private int getWordFrequencyRec(final NodeArray children, final CharSequence word, - final int offset, final int length) { - final int count = children.length; - char currentChar = word.charAt(offset); - for (int j = 0; j < count; j++) { - final Node node = children.data[j]; - if (node.code == currentChar) { - if (offset == length - 1) { - if (node.terminal) { - return node.frequency; - } - } else { - if (node.children != null) { - int freq = getWordFrequencyRec(node.children, word, offset + 1, length); - if (freq > -1) return freq; - } - } - } - } - return -1; + Node node = searchNode(mRoots, word, 0, word.length()); + return (node == null) ? -1 : node.frequency; } /** @@ -325,6 +325,133 @@ public class ExpandableDictionary extends Dictionary { } } + /** + * Adds bigrams to the in-memory trie structure that is being used to retrieve any word + * @param addFrequency adding frequency of the pair + * @return returns the final frequency + */ + protected int addBigrams(String word1, String word2, int addFrequency) { + Node firstWord = searchWord(mRoots, word1, 0, null); + Node secondWord = searchWord(mRoots, word2, 0, null); + LinkedList<NextWord> bigram = firstWord.ngrams; + if (bigram == null || bigram.size() == 0) { + firstWord.ngrams = new LinkedList<NextWord>(); + bigram = firstWord.ngrams; + } else { + for (NextWord nw : bigram) { + if (nw.word == secondWord) { + nw.frequency += addFrequency; + return nw.frequency; + } + } + } + NextWord nw = new NextWord(secondWord, addFrequency); + firstWord.ngrams.add(nw); + return addFrequency; + } + + /** + * Searches for the word and add the word if it does not exist. + * @return Returns the terminal node of the word we are searching for. + */ + private Node searchWord(NodeArray children, String word, int depth, Node parentNode) { + final int wordLength = word.length(); + final char c = word.charAt(depth); + // Does children have the current character? + final int childrenLength = children.length; + Node childNode = null; + boolean found = false; + for (int i = 0; i < childrenLength; i++) { + childNode = children.data[i]; + if (childNode.code == c) { + found = true; + break; + } + } + if (!found) { + childNode = new Node(); + childNode.code = c; + childNode.parent = parentNode; + children.add(childNode); + } + if (wordLength == depth + 1) { + // Terminate this word + childNode.terminal = true; + return childNode; + } + if (childNode.children == null) { + childNode.children = new NodeArray(); + } + return searchWord(childNode.children, word, depth + 1, childNode); + } + + @Override + public void getBigrams(final WordComposer codes, final CharSequence previousWord, + final WordCallback callback, int[] nextLettersFrequencies) { + synchronized (mUpdatingLock) { + // If we need to update, start off a background task + if (mRequiresReload) startDictionaryLoadingTaskLocked(); + // Currently updating contacts, don't return any results. + if (mUpdatingDictionary) return; + } + + Node prevWord = searchNode(mRoots, previousWord, 0, previousWord.length()); + if (prevWord != null && prevWord.ngrams != null) { + reverseLookUp(prevWord.ngrams, callback); + } + } + + /** + * reverseLookUp retrieves the full word given a list of terminal nodes and adds those words + * through callback. + * @param terminalNodes list of terminal nodes we want to add + */ + private void reverseLookUp(LinkedList<NextWord> terminalNodes, + final WordCallback callback) { + Node node; + int freq; + for (NextWord nextWord : terminalNodes) { + node = nextWord.word; + freq = nextWord.frequency; + sb.setLength(0); + do { + sb.insert(0, node.code); + node = node.parent; + } while(node != null); + + // TODO better way to feed char array? + callback.addWord(sb.toString().toCharArray(), 0, sb.length(), freq, mDicTypeId, + DataType.BIGRAM); + } + } + + /** + * Search for the terminal node of the word + * @return Returns the terminal node of the word if the word exists + */ + private Node searchNode(final NodeArray children, final CharSequence word, final int offset, + final int length) { + // TODO Consider combining with addWordRec + final int count = children.length; + char currentChar = word.charAt(offset); + for (int j = 0; j < count; j++) { + final Node node = children.data[j]; + if (node.code == currentChar) { + if (offset == length - 1) { + if (node.terminal) { + return node; + } + } else { + if (node.children != null) { + Node returnNode = searchNode(node.children, word, offset + 1, length); + if (returnNode != null) return returnNode; + } + } + } + } + return null; + } + protected void clearDictionary() { mRoots = new NodeArray(); } |