1 files changed, 178 insertions, 130 deletions
diff --git a/java/src/com/android/inputmethod/latin/ExpandableDictionary.java b/java/src/com/android/inputmethod/latin/ExpandableDictionary.java
index e954c0818..97a4a1816 100644
--- a/java/src/com/android/inputmethod/latin/ExpandableDictionary.java
+++ b/java/src/com/android/inputmethod/latin/ExpandableDictionary.java
@@ -16,11 +16,13 @@
 
 package com.android.inputmethod.latin;
 
-import java.util.LinkedList;
-
 import android.content.Context;
 import android.os.AsyncTask;
 
+import com.android.inputmethod.keyboard.Keyboard;
+
+import java.util.LinkedList;
+
 /**
  * Base class for an in-memory dictionary that can grow dynamically and can
  * be searched for suggestions and valid words.
@@ -32,15 +34,14 @@ public class ExpandableDictionary extends Dictionary {
      */
     protected static final int MAX_WORD_LENGTH = 32;
 
+    // Bigram frequency is a fixed point number with 1 meaning 1.2 and 255 meaning 1.8.
+    protected static final int BIGRAM_MAX_FREQUENCY = 255;
+
     private Context mContext;
     private char[] mWordBuilder = new char[MAX_WORD_LENGTH];
     private int mDicTypeId;
     private int mMaxDepth;
     private int mInputLength;
-    private int[] mNextLettersFrequencies;
-    private StringBuilder sb = new StringBuilder(MAX_WORD_LENGTH);
-
-    private static final char QUOTE = '\'';
 
     private boolean mRequiresReload;
 
@@ -49,53 +50,66 @@ public class ExpandableDictionary extends Dictionary {
     // Use this lock before touching mUpdatingDictionary & mRequiresDownload
     private Object mUpdatingLock = new Object();
 
-    static class Node {
-        char code;
-        int frequency;
-        boolean terminal;
-        Node parent;
-        NodeArray children;
-        LinkedList<NextWord> ngrams; // Supports ngram
+    private static class Node {
+        char mCode;
+        int mFrequency;
+        boolean mTerminal;
+        Node mParent;
+        NodeArray mChildren;
+        LinkedList<NextWord> mNGrams; // Supports ngram
     }
 
-    static class NodeArray {
-        Node[] data;
-        int length = 0;
+    private static class NodeArray {
+        Node[] mData;
+        int mLength = 0;
         private static final int INCREMENT = 2;
 
         NodeArray() {
-            data = new Node[INCREMENT];
+            mData = new Node[INCREMENT];
         }
 
         void add(Node n) {
-            if (length + 1 > data.length) {
-                Node[] tempData = new Node[length + INCREMENT];
-                if (length > 0) {
-                    System.arraycopy(data, 0, tempData, 0, length);
+            if (mLength + 1 > mData.length) {
+                Node[] tempData = new Node[mLength + INCREMENT];
+                if (mLength > 0) {
+                    System.arraycopy(mData, 0, tempData, 0, mLength);
                 }
-                data = tempData;
+                mData = tempData;
             }
-            data[length++] = n;
+            mData[mLength++] = n;
         }
     }
 
-    static class NextWord {
-        Node word;
-        NextWord nextWord;
-        int frequency;
+    private static class NextWord {
+        public final Node mWord;
+        private int mFrequency;
 
-        NextWord(Node word, int frequency) {
-            this.word = word;
-            this.frequency = frequency;
+        public NextWord(Node word, int frequency) {
+            mWord = word;
+            mFrequency = frequency;
+        }
+
+        public int getFrequency() {
+            return mFrequency;
         }
-    }
 
+        public int setFrequency(int freq) {
+            mFrequency = freq;
+            return mFrequency;
+        }
+
+        public int addFrequency(int add) {
+            mFrequency += add;
+            if (mFrequency > BIGRAM_MAX_FREQUENCY) mFrequency = BIGRAM_MAX_FREQUENCY;
+            return mFrequency;
+        }
+    }
 
     private NodeArray mRoots;
 
     private int[][] mCodes;
 
-    ExpandableDictionary(Context context, int dicTypeId) {
+    public ExpandableDictionary(Context context, int dicTypeId) {
         mContext = context;
         clearDictionary();
         mCodes = new int[MAX_WORD_LENGTH][];
@@ -128,13 +142,14 @@ public class ExpandableDictionary extends Dictionary {
 
     /** Override to load your dictionary here, on a background thread. */
     public void loadDictionaryAsync() {
+        // empty base implementation
     }
 
-    Context getContext() {
+    public Context getContext() {
         return mContext;
     }
-    
-    int getMaxWordLength() {
+
+    public int getMaxWordLength() {
         return MAX_WORD_LENGTH;
     }
 
@@ -145,40 +160,40 @@ public class ExpandableDictionary extends Dictionary {
     private void addWordRec(NodeArray children, final String word, final int depth,
             final int frequency, Node parentNode) {
         final int wordLength = word.length();
+        if (wordLength <= depth) return;
         final char c = word.charAt(depth);
         // Does children have the current character?
-        final int childrenLength = children.length;
+        final int childrenLength = children.mLength;
         Node childNode = null;
         boolean found = false;
         for (int i = 0; i < childrenLength; i++) {
-            childNode = children.data[i];
-            if (childNode.code == c) {
+            childNode = children.mData[i];
+            if (childNode.mCode == c) {
                 found = true;
                 break;
             }
         }
         if (!found) {
             childNode = new Node();
-            childNode.code = c;
-            childNode.parent = parentNode;
+            childNode.mCode = c;
+            childNode.mParent = parentNode;
             children.add(childNode);
         }
         if (wordLength == depth + 1) {
             // Terminate this word
-            childNode.terminal = true;
-            childNode.frequency = Math.max(frequency, childNode.frequency);
-            if (childNode.frequency > 255) childNode.frequency = 255;
+            childNode.mTerminal = true;
+            childNode.mFrequency = Math.max(frequency, childNode.mFrequency);
+            if (childNode.mFrequency > 255) childNode.mFrequency = 255;
             return;
         }
-        if (childNode.children == null) {
-            childNode.children = new NodeArray();
+        if (childNode.mChildren == null) {
+            childNode.mChildren = new NodeArray();
         }
-        addWordRec(childNode.children, word, depth + 1, frequency, childNode);
+        addWordRec(childNode.mChildren, word, depth + 1, frequency, childNode);
     }
 
     @Override
-    public void getWords(final WordComposer codes, final WordCallback callback,
-            int[] nextLettersFrequencies) {
+    public void getWords(final WordComposer codes, final WordCallback callback) {
         synchronized (mUpdatingLock) {
             // If we need to update, start off a background task
             if (mRequiresReload) startDictionaryLoadingTaskLocked();
@@ -187,7 +202,6 @@ public class ExpandableDictionary extends Dictionary {
         }
 
         mInputLength = codes.size();
-        mNextLettersFrequencies = nextLettersFrequencies;
         if (mCodes.length < mInputLength) mCodes = new int[mInputLength][];
         // Cache the codes so that we don't have to lookup an array list
         for (int i = 0; i < mInputLength; i++) {
@@ -214,9 +228,20 @@ public class ExpandableDictionary extends Dictionary {
     /**
      * Returns the word's frequency or -1 if not found
      */
-    public int getWordFrequency(CharSequence word) {
+    protected int getWordFrequency(CharSequence word) {
+        // Case-sensitive search
         Node node = searchNode(mRoots, word, 0, word.length());
-        return (node == null) ? -1 : node.frequency;
+        return (node == null) ? -1 : node.mFrequency;
+    }
+
+    private static int computeSkippedWordFinalFreq(int freq, int snr, int inputLength) {
+        // The computation itself makes sense for >= 2, but the == 2 case returns 0
+        // anyway so we may as well test against 3 instead and return the constant
+        if (inputLength >= 3) {
+            return (freq * snr * (inputLength - 2)) / (inputLength - 1);
+        } else {
+            return 0;
+        }
     }
 
     /**
@@ -232,16 +257,17 @@ public class ExpandableDictionary extends Dictionary {
      * @param completion whether the traversal is now in completion mode - meaning that we've
      * exhausted the input and we're looking for all possible suffixes.
      * @param snr current weight of the word being formed
-     * @param inputIndex position in the input characters. This can be off from the depth in 
+     * @param inputIndex position in the input characters. This can be off from the depth in
      * case we skip over some punctuations such as apostrophe in the traversal. That is, if you type
      * "wouldve", it could be matching "would've", so the depth will be one more than the
      * inputIndex
      * @param callback the callback class for adding a word
      */
-    protected void getWordsRec(NodeArray roots, final WordComposer codes, final char[] word, 
+    // TODO: Share this routine with the native code for BinaryDictionary
+    protected void getWordsRec(NodeArray roots, final WordComposer codes, final char[] word,
             final int depth, boolean completion, int snr, int inputIndex, int skipPos,
             WordCallback callback) {
-        final int count = roots.length;
+        final int count = roots.mLength;
         final int codeSize = mInputLength;
         // Optimization: Prune out words that are too long compared to how much was typed.
         if (depth > mMaxDepth) {
@@ -255,34 +281,36 @@ public class ExpandableDictionary extends Dictionary {
         }
 
         for (int i = 0; i < count; i++) {
-            final Node node = roots.data[i];
-            final char c = node.code;
+            final Node node = roots.mData[i];
+            final char c = node.mCode;
             final char lowerC = toLowerCase(c);
-            final boolean terminal = node.terminal;
-            final NodeArray children = node.children;
-            final int freq = node.frequency;
+            final boolean terminal = node.mTerminal;
+            final NodeArray children = node.mChildren;
+            final int freq = node.mFrequency;
             if (completion) {
                 word[depth] = c;
                 if (terminal) {
-                    if (!callback.addWord(word, 0, depth + 1, freq * snr, mDicTypeId,
-                                DataType.UNIGRAM)) {
-                        return;
+                    final int finalFreq;
+                    if (skipPos < 0) {
+                        finalFreq = freq * snr;
+                    } else {
+                        finalFreq = computeSkippedWordFinalFreq(freq, snr, mInputLength);
                     }
-                    // Add to frequency of next letters for predictive correction
-                    if (mNextLettersFrequencies != null && depth >= inputIndex && skipPos < 0
-                            && mNextLettersFrequencies.length > word[inputIndex]) {
-                        mNextLettersFrequencies[word[inputIndex]]++;
+                    if (!callback.addWord(word, 0, depth + 1, finalFreq, mDicTypeId,
+                            DataType.UNIGRAM)) {
+                        return;
                     }
                 }
                 if (children != null) {
                     getWordsRec(children, codes, word, depth + 1, completion, snr, inputIndex,
                             skipPos, callback);
                 }
-            } else if ((c == QUOTE && currentChars[0] != QUOTE) || depth == skipPos) {
+            } else if ((c == Keyboard.CODE_SINGLE_QUOTE
+                    && currentChars[0] != Keyboard.CODE_SINGLE_QUOTE) || depth == skipPos) {
                 // Skip the ' and continue deeper
                 word[depth] = c;
                 if (children != null) {
-                    getWordsRec(children, codes, word, depth + 1, completion, snr, inputIndex, 
+                    getWordsRec(children, codes, word, depth + 1, completion, snr, inputIndex,
                             skipPos, callback);
                 }
             } else {
@@ -299,10 +327,16 @@ public class ExpandableDictionary extends Dictionary {
 
                         if (codeSize == inputIndex + 1) {
                             if (terminal) {
-                                if (INCLUDE_TYPED_WORD_IF_VALID 
+                                if (INCLUDE_TYPED_WORD_IF_VALID
                                         || !same(word, depth + 1, codes.getTypedWord())) {
-                                    int finalFreq = freq * snr * addedAttenuation;
-                                    if (skipPos < 0) finalFreq *= FULL_WORD_FREQ_MULTIPLIER;
+                                    final int finalFreq;
+                                    if (skipPos < 0) {
+                                        finalFreq = freq * snr * addedAttenuation
+                                                * FULL_WORD_SCORE_MULTIPLIER;
+                                    } else {
+                                        finalFreq = computeSkippedWordFinalFreq(freq,
+                                                snr * addedAttenuation, mInputLength);
+                                    }
                                     callback.addWord(word, 0, depth + 1, finalFreq, mDicTypeId,
                                             DataType.UNIGRAM);
                                 }
@@ -313,7 +347,7 @@ public class ExpandableDictionary extends Dictionary {
                                         skipPos, callback);
                             }
                         } else if (children != null) {
-                            getWordsRec(children, codes, word, depth + 1, 
+                            getWordsRec(children, codes, word, depth + 1,
                                     false, snr * addedAttenuation, inputIndex + 1,
                                     skipPos, callback);
                         }
@@ -333,31 +367,33 @@ public class ExpandableDictionary extends Dictionary {
 
     /**
      * Adds bigrams to the in-memory trie structure that is being used to retrieve any word
-     * @param frequency frequency for this bigrams
-     * @param addFrequency if true, it adds to current frequency
+     * @param frequency frequency for this bigram
+     * @param addFrequency if true, it adds to current frequency, else it overwrites the old value
      * @return returns the final frequency
      */
     private int addOrSetBigram(String word1, String word2, int frequency, boolean addFrequency) {
-        Node firstWord = searchWord(mRoots, word1, 0, null);
+        // We don't want results to be different according to case of the looked up left hand side
+        // word. We do want however to return the correct case for the right hand side.
+        // So we want to squash the case of the left hand side, and preserve that of the right
+        // hand side word.
+        Node firstWord = searchWord(mRoots, word1.toLowerCase(), 0, null);
         Node secondWord = searchWord(mRoots, word2, 0, null);
-        LinkedList<NextWord> bigram = firstWord.ngrams;
+        LinkedList<NextWord> bigram = firstWord.mNGrams;
         if (bigram == null || bigram.size() == 0) {
-            firstWord.ngrams = new LinkedList<NextWord>();
-            bigram = firstWord.ngrams;
+            firstWord.mNGrams = new LinkedList<NextWord>();
+            bigram = firstWord.mNGrams;
         } else {
             for (NextWord nw : bigram) {
-                if (nw.word == secondWord) {
+                if (nw.mWord == secondWord) {
                     if (addFrequency) {
-                        nw.frequency += frequency;
+                        return nw.addFrequency(frequency);
                     } else {
-                        nw.frequency = frequency;
+                        return nw.setFrequency(frequency);
                     }
-                    return nw.frequency;
                 }
             }
         }
-        NextWord nw = new NextWord(secondWord, frequency);
-        firstWord.ngrams.add(nw);
+        firstWord.mNGrams.add(new NextWord(secondWord, frequency));
         return frequency;
     }
 
@@ -369,31 +405,31 @@ public class ExpandableDictionary extends Dictionary {
         final int wordLength = word.length();
         final char c = word.charAt(depth);
         // Does children have the current character?
-        final int childrenLength = children.length;
+        final int childrenLength = children.mLength;
         Node childNode = null;
         boolean found = false;
         for (int i = 0; i < childrenLength; i++) {
-            childNode = children.data[i];
-            if (childNode.code == c) {
+            childNode = children.mData[i];
+            if (childNode.mCode == c) {
                 found = true;
                 break;
             }
         }
         if (!found) {
             childNode = new Node();
-            childNode.code = c;
-            childNode.parent = parentNode;
+            childNode.mCode = c;
+            childNode.mParent = parentNode;
             children.add(childNode);
         }
         if (wordLength == depth + 1) {
             // Terminate this word
-            childNode.terminal = true;
+            childNode.mTerminal = true;
             return childNode;
         }
-        if (childNode.children == null) {
-            childNode.children = new NodeArray();
+        if (childNode.mChildren == null) {
+            childNode.mChildren = new NodeArray();
         }
-        return searchWord(childNode.children, word, depth + 1, childNode);
+        return searchWord(childNode.mChildren, word, depth + 1, childNode);
     }
 
     // @VisibleForTesting
@@ -406,18 +442,22 @@ public class ExpandableDictionary extends Dictionary {
         }
     }
 
-    private void runReverseLookUp(final CharSequence previousWord, final WordCallback callback) {
-        Node prevWord = searchNode(mRoots, previousWord, 0, previousWord.length());
-        if (prevWord != null && prevWord.ngrams != null) {
-            reverseLookUp(prevWord.ngrams, callback);
+    private void runBigramReverseLookUp(final CharSequence previousWord,
+            final WordCallback callback) {
+        // Search for the lowercase version of the word only, because that's where bigrams
+        // store their sons.
+        Node prevWord = searchNode(mRoots, previousWord.toString().toLowerCase(), 0,
+                previousWord.length());
+        if (prevWord != null && prevWord.mNGrams != null) {
+            reverseLookUp(prevWord.mNGrams, callback);
         }
     }
 
     @Override
     public void getBigrams(final WordComposer codes, final CharSequence previousWord,
-            final WordCallback callback, int[] nextLettersFrequencies) {
+            final WordCallback callback) {
         if (!reloadDictionaryIfRequired()) {
-            runReverseLookUp(previousWord, callback);
+            runBigramReverseLookUp(previousWord, callback);
         }
     }
 
@@ -430,10 +470,14 @@ public class ExpandableDictionary extends Dictionary {
             try {
                 Thread.sleep(100);
             } catch (InterruptedException e) {
+                //
             }
         }
     }
 
+    // Local to reverseLookUp, but do not allocate each time.
+    private final char[] mLookedUpString = new char[MAX_WORD_LENGTH];
+
     /**
      * reverseLookUp retrieves the full word given a list of terminal nodes and adds those words
      * through callback.
@@ -444,42 +488,45 @@ public class ExpandableDictionary extends Dictionary {
         Node node;
         int freq;
         for (NextWord nextWord : terminalNodes) {
-            node = nextWord.word;
-            freq = nextWord.frequency;
-            // TODO Not the best way to limit suggestion threshold
-            if (freq >= UserBigramDictionary.SUGGEST_THRESHOLD) {
-                sb.setLength(0);
-                do {
-                    sb.insert(0, node.code);
-                    node = node.parent;
-                } while(node != null);
-
-                // TODO better way to feed char array?
-                callback.addWord(sb.toString().toCharArray(), 0, sb.length(), freq, mDicTypeId,
-                        DataType.BIGRAM);
-            }
+            node = nextWord.mWord;
+            freq = nextWord.getFrequency();
+            int index = MAX_WORD_LENGTH;
+            do {
+                --index;
+                mLookedUpString[index] = node.mCode;
+                node = node.mParent;
+            } while (node != null);
+
+            callback.addWord(mLookedUpString, index, MAX_WORD_LENGTH - index, freq, mDicTypeId,
+                    DataType.BIGRAM);
         }
     }
 
     /**
-     * Search for the terminal node of the word
+     * Recursively search for the terminal node of the word.
+     *
+     * One iteration takes the full word to search for and the current index of the recursion.
+     *
+     * @param children the node of the trie to search under.
+     * @param word the word to search for. Only read [offset..length] so there may be trailing chars
+     * @param offset the index in {@code word} this recursion should operate on.
+     * @param length the length of the input word.
      * @return Returns the terminal node of the word if the word exists
      */
     private Node searchNode(final NodeArray children, final CharSequence word, final int offset,
             final int length) {
-        // TODO Consider combining with addWordRec
-        final int count = children.length;
-        char currentChar = word.charAt(offset);
+        final int count = children.mLength;
+        final char currentChar = word.charAt(offset);
         for (int j = 0; j < count; j++) {
-            final Node node = children.data[j];
-            if (node.code == currentChar) {
+            final Node node = children.mData[j];
+            if (node.mCode == currentChar) {
                 if (offset == length - 1) {
-                    if (node.terminal) {
+                    if (node.mTerminal) {
                         return node;
                     }
                 } else {
-                    if (node.children != null) {
-                        Node returnNode = searchNode(node.children, word, offset + 1, length);
+                    if (node.mChildren != null) {
+                        Node returnNode = searchNode(node.mChildren, word, offset + 1, length);
                         if (returnNode != null) return returnNode;
                     }
                 }
@@ -503,16 +550,17 @@ public class ExpandableDictionary extends Dictionary {
         }
     }
 
-    static char toLowerCase(char c) {
+    private static char toLowerCase(char c) {
+        char baseChar = c;
         if (c < BASE_CHARS.length) {
-            c = BASE_CHARS[c];
+            baseChar = BASE_CHARS[c];
         }
-        if (c >= 'A' && c <= 'Z') {
-            c = (char) (c | 32);
-        } else if (c > 127) {
-            c = Character.toLowerCase(c);
+        if (baseChar >= 'A' && baseChar <= 'Z') {
+            return (char)(baseChar | 32);
+        } else if (baseChar > 127) {
+            return Character.toLowerCase(baseChar);
         }
-        return c;
+        return baseChar;
     }
 
     /**
@@ -521,7 +569,7 @@ public class ExpandableDictionary extends Dictionary {
      * if c is not a combined character, or the base character if it
      * is combined.
      */
-    static final char BASE_CHARS[] = {
+    private static final char BASE_CHARS[] = {
         0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 
         0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, 
         0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017,