- separate dict (uses xml)

- retrieve bigrams that only starts with character typed and neighbor keys - contacts bigram - performance measure bug: 2873133 Change-Id: If97c005b18c82f3fafef50009dd2dfd972b0ab8f
author: Jae Yong Sung <jysung@google.com> 2010-07-26 11:43:29 -0700
committer: Jae Yong Sung <jysung@google.com> 2010-07-28 11:08:08 -0700
commit: 80aa14fd432cf7d2c67f2fcfcc57c80f29f8eb64 (patch)
tree: 384655d5c7207325014888fd26da1bc7188db66e /java/src/com
parent: 679b838b05a70ed965017635efdf536449aa230f (diff)
download: latinime-80aa14fd432cf7d2c67f2fcfcc57c80f29f8eb64.tar.gz
latinime-80aa14fd432cf7d2c67f2fcfcc57c80f29f8eb64.tar.xz
latinime-80aa14fd432cf7d2c67f2fcfcc57c80f29f8eb64.zip
8 files changed, 386 insertions, 89 deletions
diff --git a/java/src/com/android/inputmethod/latin/BinaryDictionary.java b/java/src/com/android/inputmethod/latin/BinaryDictionary.java
index 0a2af0662..e2c0c4ccc 100644
--- a/java/src/com/android/inputmethod/latin/BinaryDictionary.java
+++ b/java/src/com/android/inputmethod/latin/BinaryDictionary.java
@@ -16,6 +16,7 @@
 
 package com.android.inputmethod.latin;
 
+import java.io.ByteArrayInputStream;
 import java.io.InputStream;
 import java.io.IOException;
 import java.nio.ByteBuffer;
@@ -31,6 +32,14 @@ import android.util.Log;
  */
 public class BinaryDictionary extends Dictionary {
 
+    /**
+     * There is difference between what java and native code can handle.
+     * This value should only be used in BinaryDictionary.java
+     * It is necessary to keep it at this value because some languages e.g. German have
+     * really long words.
+     */
+    protected static final int MAX_WORD_LENGTH = 48;
+
     private static final String TAG = "BinaryDictionary";
     private static final int MAX_ALTERNATIVES = 16;
     private static final int MAX_WORDS = 16;
@@ -64,8 +73,8 @@ public class BinaryDictionary extends Dictionary {
      * @param context application context for reading resources
      * @param resId the resource containing the raw binary dictionary
      */
-    public BinaryDictionary(Context context, int resId, int dicTypeId) {
-        if (resId != 0) {
+    public BinaryDictionary(Context context, int[] resId, int dicTypeId) {
+        if (resId != null && resId.length > 0 && resId[0] != 0) {
             loadDictionary(context, resId);
         }
         mDicTypeId = dicTypeId;
@@ -97,47 +106,68 @@ public class BinaryDictionary extends Dictionary {
     private native void closeNative(int dict);
     private native boolean isValidWordNative(int nativeData, char[] word, int wordLength);
     private native int getSuggestionsNative(int dict, int[] inputCodes, int codesSize, 
-            char[] outputChars, int[] frequencies,
-            int maxWordLength, int maxWords, int maxAlternatives, int skipPos,
-            int[] nextLettersFrequencies, int nextLettersSize);
-    private native int getBigramsNative(int nativeData, char[] prevWord, int prevWordLength,
-            char[] outputChars, int[] frequencies, int maxWordLength, int maxBigrams);
-
-    private final void loadDictionary(Context context, int resId) {
-        InputStream is = context.getResources().openRawResource(resId);
+            char[] outputChars, int[] frequencies, int maxWordLength, int maxWords,
+            int maxAlternatives, int skipPos, int[] nextLettersFrequencies, int nextLettersSize);
+    private native int getBigramsNative(int dict, char[] prevWord, int prevWordLength,
+            int[] inputCodes, int inputCodesLength, char[] outputChars, int[] frequencies,
+            int maxWordLength, int maxBigrams, int maxAlternatives);
+
+    private final void loadDictionary(Context context, int[] resId) {
+        InputStream[] is = null;
         try {
-            int avail = is.available();
+            // merging separated dictionary into one if dictionary is separated
+            int total = 0;
+            is = new InputStream[resId.length];
+            for (int i = 0; i < resId.length; i++) {
+                is[i] = context.getResources().openRawResource(resId[i]);
+                total += is[i].available();
+            }
+
             mNativeDictDirectBuffer =
-                    ByteBuffer.allocateDirect(avail).order(ByteOrder.nativeOrder());
-            int got = Channels.newChannel(is).read(mNativeDictDirectBuffer);
-            if (got != avail) {
-                Log.e(TAG, "Read " + got + " bytes, expected " + avail);
+                ByteBuffer.allocateDirect(total).order(ByteOrder.nativeOrder());
+            int got = 0;
+            for (int i = 0; i < resId.length; i++) {
+                 got += Channels.newChannel(is[i]).read(mNativeDictDirectBuffer);
+            }
+            if (got != total) {
+                Log.e(TAG, "Read " + got + " bytes, expected " + total);
             } else {
                 mNativeDict = openNative(mNativeDictDirectBuffer,
                         TYPED_LETTER_MULTIPLIER, FULL_WORD_FREQ_MULTIPLIER);
-                mDictLength = avail;
+                mDictLength = total;
             }
         } catch (IOException e) {
-            Log.w(TAG, "No available size for binary dictionary");
+            Log.w(TAG, "No available memory for binary dictionary");
         } finally {
             try {
-                is.close();
+                for (int i = 0;i < is.length; i++) {
+                    is[i].close();
+                }
             } catch (IOException e) {
                 Log.w(TAG, "Failed to close input stream");
             }
         }
     }
 
+
     @Override
-    public void getBigrams(final WordComposer composer, final CharSequence previousWord,
+    public void getBigrams(final WordComposer codes, final CharSequence previousWord,
             final WordCallback callback, int[] nextLettersFrequencies) {
 
         char[] chars = previousWord.toString().toCharArray();
         Arrays.fill(mOutputChars_bigrams, (char) 0);
         Arrays.fill(mFrequencies_bigrams, 0);
 
-        int count = getBigramsNative(mNativeDict, chars, chars.length, mOutputChars_bigrams,
-                mFrequencies_bigrams, MAX_WORD_LENGTH, MAX_BIGRAMS);
+        int codesSize = codes.size();
+        Arrays.fill(mInputCodes, -1);
+        int[] alternatives = codes.getCodesAt(0);
+        System.arraycopy(alternatives, 0, mInputCodes, 0,
+                Math.min(alternatives.length, MAX_ALTERNATIVES));
+
+        int count = getBigramsNative(mNativeDict, chars, chars.length, mInputCodes, codesSize,
+                mOutputChars_bigrams, mFrequencies_bigrams, MAX_WORD_LENGTH, MAX_BIGRAMS,
+                MAX_ALTERNATIVES);
+
         for (int j = 0; j < count; j++) {
             if (mFrequencies_bigrams[j] < 1) break;
             int start = j * MAX_WORD_LENGTH;
@@ -156,7 +186,7 @@ public class BinaryDictionary extends Dictionary {
     public void getWords(final WordComposer codes, final WordCallback callback,
             int[] nextLettersFrequencies) {
         final int codesSize = codes.size();
-        // Wont deal with really long words.
+        // Won't deal with really long words.
         if (codesSize > MAX_WORD_LENGTH - 1) return;
         
         Arrays.fill(mInputCodes, -1);
diff --git a/java/src/com/android/inputmethod/latin/ContactsDictionary.java b/java/src/com/android/inputmethod/latin/ContactsDictionary.java
index f5ff865c4..756782887 100644
--- a/java/src/com/android/inputmethod/latin/ContactsDictionary.java
+++ b/java/src/com/android/inputmethod/latin/ContactsDictionary.java
@@ -22,6 +22,8 @@ import android.database.ContentObserver;
 import android.database.Cursor;
 import android.os.SystemClock;
 import android.provider.ContactsContract.Contacts;
+import android.text.TextUtils;
+import android.util.Log;
 
 public class ContactsDictionary extends ExpandableDictionary {
 
@@ -30,6 +32,12 @@ public class ContactsDictionary extends ExpandableDictionary {
         Contacts.DISPLAY_NAME,
     };
 
+    /**
+     * Frequency for contacts information into the dictionary
+     */
+    private static final int FREQUENCY_FOR_CONTACTS = 128;
+    private static final int FREQUENCY_FOR_CONTACTS_BIGRAM = 90;
+
     private static final int INDEX_NAME = 1;
 
     private ContentObserver mObserver;
@@ -90,6 +98,7 @@ public class ContactsDictionary extends ExpandableDictionary {
 
                 if (name != null) {
                     int len = name.length();
+                    String prevWord = null;
 
                     // TODO: Better tokenization for non-Latin writing systems
                     for (int i = 0; i < len; i++) {
@@ -113,7 +122,13 @@ public class ContactsDictionary extends ExpandableDictionary {
                             // capitalization of i.
                             final int wordLen = word.length();
                             if (wordLen < maxWordLength && wordLen > 1) {
-                                super.addWord(word, 128);
+                                super.addWord(word, FREQUENCY_FOR_CONTACTS);
+                                if (!TextUtils.isEmpty(prevWord)) {
+                                    // TODO Do not add email address
+                                    super.addBigrams(prevWord, word,
+                                            FREQUENCY_FOR_CONTACTS_BIGRAM);
+                                }
+                                prevWord = word;
                             }
                         }
                     }
diff --git a/java/src/com/android/inputmethod/latin/Dictionary.java b/java/src/com/android/inputmethod/latin/Dictionary.java
index a02edeee5..d04bf57a7 100644
--- a/java/src/com/android/inputmethod/latin/Dictionary.java
+++ b/java/src/com/android/inputmethod/latin/Dictionary.java
@@ -21,9 +21,6 @@ package com.android.inputmethod.latin;
  * strokes.
  */
 abstract public class Dictionary {
-
-    protected static final int MAX_WORD_LENGTH = 48;
-
     /**
      * Whether or not to replicate the typed word in the suggested list, even if it's valid.
      */
diff --git a/java/src/com/android/inputmethod/latin/EditingUtil.java b/java/src/com/android/inputmethod/latin/EditingUtil.java
index 5133c60ca..0c87f8d58 100644
--- a/java/src/com/android/inputmethod/latin/EditingUtil.java
+++ b/java/src/com/android/inputmethod/latin/EditingUtil.java
@@ -29,7 +29,7 @@ public class EditingUtil {
     /**
      * Number of characters we want to look back in order to identify the previous word
      */
-    public static final int LOOKBACK_CHARACTER_NUM = 15;
+    private static final int LOOKBACK_CHARACTER_NUM = 15;
 
     private EditingUtil() {};
 
@@ -185,10 +185,22 @@ public class EditingUtil {
 
     private static final Pattern spaceRegex = Pattern.compile("\\s+");
 
-    public static CharSequence getPreviousWord(InputConnection connection) {
+    public static CharSequence getPreviousWord(InputConnection connection,
+            String sentenceSeperators) {
         //TODO: Should fix this. This could be slow!
         CharSequence prev = connection.getTextBeforeCursor(LOOKBACK_CHARACTER_NUM, 0);
+        if (prev == null) {
+            return null;
+        }
         String[] w = spaceRegex.split(prev);
-        return (w.length >= 2) ? w[w.length-2] : null;
+        if (w.length >= 2 && w[w.length-2].length() > 0) {
+            char lastChar = w[w.length-2].charAt(w[w.length-2].length() -1);
+            if (sentenceSeperators.contains(String.valueOf(lastChar))) {
+                return null;
+            }
+            return w[w.length-2];
+        } else {
+            return null;
+        }
     }
 }
diff --git a/java/src/com/android/inputmethod/latin/ExpandableDictionary.java b/java/src/com/android/inputmethod/latin/ExpandableDictionary.java
index d8a9547c1..53f9ed8c8 100644
--- a/java/src/com/android/inputmethod/latin/ExpandableDictionary.java
+++ b/java/src/com/android/inputmethod/latin/ExpandableDictionary.java
@@ -16,22 +16,32 @@
 
 package com.android.inputmethod.latin;
 
+import java.util.LinkedList;
+
 import android.content.Context;
 import android.os.AsyncTask;
+import android.os.SystemClock;
+import android.util.Log;
 
 /**
  * Base class for an in-memory dictionary that can grow dynamically and can
  * be searched for suggestions and valid words.
  */
 public class ExpandableDictionary extends Dictionary {
+    /**
+     * There is difference between what java and native code can handle.
+     * It uses 32 because Java stack overflows when greater value is used.
+     */
+    protected static final int MAX_WORD_LENGTH = 32;
+
     private Context mContext;
     private char[] mWordBuilder = new char[MAX_WORD_LENGTH];
     private int mDicTypeId;
     private int mMaxDepth;
     private int mInputLength;
     private int[] mNextLettersFrequencies;
+    private StringBuilder sb = new StringBuilder(MAX_WORD_LENGTH);
 
-    public static final int MAX_WORD_LENGTH = 32;
     private static final char QUOTE = '\'';
 
     private boolean mRequiresReload;
@@ -45,7 +55,9 @@ public class ExpandableDictionary extends Dictionary {
         char code;
         int frequency;
         boolean terminal;
+        Node parent;
         NodeArray children;
+        LinkedList<NextWord> ngrams; // Supports ngram
     }
 
     static class NodeArray {
@@ -69,6 +81,18 @@ public class ExpandableDictionary extends Dictionary {
         }
     }
 
+    static class NextWord {
+        Node word;
+        NextWord nextWord;
+        int frequency;
+
+        NextWord(Node word, int frequency) {
+            this.word = word;
+            this.frequency = frequency;
+        }
+    }
+
+
     private NodeArray mRoots;
 
     private int[][] mCodes;
@@ -117,12 +141,11 @@ public class ExpandableDictionary extends Dictionary {
     }
 
     public void addWord(String word, int frequency) {
-        addWordRec(mRoots, word, 0, frequency);
+        addWordRec(mRoots, word, 0, frequency, null);
     }
 
-    private void addWordRec(NodeArray children, final String word,
-            final int depth, final int frequency) {
-        
+    private void addWordRec(NodeArray children, final String word, final int depth,
+            final int frequency, Node parentNode) {
         final int wordLength = word.length();
         final char c = word.charAt(depth);
         // Does children have the current character?
@@ -139,6 +162,7 @@ public class ExpandableDictionary extends Dictionary {
         if (!found) {
             childNode = new Node();
             childNode.code = c;
+            childNode.parent = parentNode;
             children.add(childNode);
         }
         if (wordLength == depth + 1) {
@@ -151,7 +175,7 @@ public class ExpandableDictionary extends Dictionary {
         if (childNode.children == null) {
             childNode.children = new NodeArray();
         }
-        addWordRec(childNode.children, word, depth + 1, frequency);
+        addWordRec(childNode.children, word, depth + 1, frequency, childNode);
     }
 
     @Override
@@ -185,7 +209,7 @@ public class ExpandableDictionary extends Dictionary {
             if (mRequiresReload) startDictionaryLoadingTaskLocked();
             if (mUpdatingDictionary) return false;
         }
-        final int freq = getWordFrequencyRec(mRoots, word, 0, word.length());
+        final int freq = getWordFrequency(word);
         return freq > -1;
     }
 
@@ -193,32 +217,8 @@ public class ExpandableDictionary extends Dictionary {
      * Returns the word's frequency or -1 if not found
      */
     public int getWordFrequency(CharSequence word) {
-        return getWordFrequencyRec(mRoots, word, 0, word.length());
-    }
-
-    /**
-     * Returns the word's frequency or -1 if not found
-     */
-    private int getWordFrequencyRec(final NodeArray children, final CharSequence word, 
-            final int offset, final int length) {
-        final int count = children.length;
-        char currentChar = word.charAt(offset);
-        for (int j = 0; j < count; j++) {
-            final Node node = children.data[j];
-            if (node.code == currentChar) {
-                if (offset == length - 1) {
-                    if (node.terminal) {
-                        return node.frequency;
-                    }
-                } else {
-                    if (node.children != null) {
-                        int freq = getWordFrequencyRec(node.children, word, offset + 1, length);
-                        if (freq > -1) return freq;
-                    }
-                }
-            }
-        }
-        return -1;
+        Node node = searchNode(mRoots, word, 0, word.length());
+        return (node == null) ? -1 : node.frequency;
     }
 
     /**
@@ -325,6 +325,133 @@ public class ExpandableDictionary extends Dictionary {
         }
     }
 
+    /**
+     * Adds bigrams to the in-memory trie structure that is being used to retrieve any word
+     * @param addFrequency adding frequency of the pair
+     * @return returns the final frequency
+     */
+    protected int addBigrams(String word1, String word2, int addFrequency) {
+        Node firstWord = searchWord(mRoots, word1, 0, null);
+        Node secondWord = searchWord(mRoots, word2, 0, null);
+        LinkedList<NextWord> bigram = firstWord.ngrams;
+        if (bigram == null || bigram.size() == 0) {
+            firstWord.ngrams = new LinkedList<NextWord>();
+            bigram = firstWord.ngrams;
+        } else {
+            for (NextWord nw : bigram) {
+                if (nw.word == secondWord) {
+                    nw.frequency += addFrequency;
+                    return nw.frequency;
+                }
+            }
+        }
+        NextWord nw = new NextWord(secondWord, addFrequency);
+        firstWord.ngrams.add(nw);
+        return addFrequency;
+    }
+
+    /**
+     * Searches for the word and add the word if it does not exist.
+     * @return Returns the terminal node of the word we are searching for.
+     */
+    private Node searchWord(NodeArray children, String word, int depth, Node parentNode) {
+        final int wordLength = word.length();
+        final char c = word.charAt(depth);
+        // Does children have the current character?
+        final int childrenLength = children.length;
+        Node childNode = null;
+        boolean found = false;
+        for (int i = 0; i < childrenLength; i++) {
+            childNode = children.data[i];
+            if (childNode.code == c) {
+                found = true;
+                break;
+            }
+        }
+        if (!found) {
+            childNode = new Node();
+            childNode.code = c;
+            childNode.parent = parentNode;
+            children.add(childNode);
+        }
+        if (wordLength == depth + 1) {
+            // Terminate this word
+            childNode.terminal = true;
+            return childNode;
+        }
+        if (childNode.children == null) {
+            childNode.children = new NodeArray();
+        }
+        return searchWord(childNode.children, word, depth + 1, childNode);
+    }
+
+    @Override
+    public void getBigrams(final WordComposer codes, final CharSequence previousWord,
+            final WordCallback callback, int[] nextLettersFrequencies) {
+        synchronized (mUpdatingLock) {
+            // If we need to update, start off a background task
+            if (mRequiresReload) startDictionaryLoadingTaskLocked();
+            // Currently updating contacts, don't return any results.
+            if (mUpdatingDictionary) return;
+        }
+
+        Node prevWord = searchNode(mRoots, previousWord, 0, previousWord.length());
+        if (prevWord != null && prevWord.ngrams != null) {
+            reverseLookUp(prevWord.ngrams, callback);
+        }
+    }
+
+    /**
+     * reverseLookUp retrieves the full word given a list of terminal nodes and adds those words
+     * through callback.
+     * @param terminalNodes list of terminal nodes we want to add
+     */
+    private void reverseLookUp(LinkedList<NextWord> terminalNodes,
+            final WordCallback callback) {
+        Node node;
+        int freq;
+        for (NextWord nextWord : terminalNodes) {
+            node = nextWord.word;
+            freq = nextWord.frequency;
+            sb.setLength(0);
+            do {
+                sb.insert(0, node.code);
+                node = node.parent;
+            } while(node != null);
+
+            // TODO better way to feed char array?
+            callback.addWord(sb.toString().toCharArray(), 0, sb.length(), freq, mDicTypeId,
+                    DataType.BIGRAM);
+        }
+    }
+
+    /**
+     * Search for the terminal node of the word
+     * @return Returns the terminal node of the word if the word exists
+     */
+    private Node searchNode(final NodeArray children, final CharSequence word, final int offset,
+            final int length) {
+        // TODO Consider combining with addWordRec
+        final int count = children.length;
+        char currentChar = word.charAt(offset);
+        for (int j = 0; j < count; j++) {
+            final Node node = children.data[j];
+            if (node.code == currentChar) {
+                if (offset == length - 1) {
+                    if (node.terminal) {
+                        return node;
+                    }
+                } else {
+                    if (node.children != null) {
+                        Node returnNode = searchNode(node.children, word, offset + 1, length);
+                        if (returnNode != null) return returnNode;
+                    }
+                }
+            }
+        }
+        return null;
+    }
+
     protected void clearDictionary() {
         mRoots = new NodeArray();
     }
diff --git a/java/src/com/android/inputmethod/latin/InputLanguageSelection.java b/java/src/com/android/inputmethod/latin/InputLanguageSelection.java
index 718fda18d..923dce359 100644
--- a/java/src/com/android/inputmethod/latin/InputLanguageSelection.java
+++ b/java/src/com/android/inputmethod/latin/InputLanguageSelection.java
@@ -99,7 +99,10 @@ public class InputLanguageSelection extends PreferenceActivity {
         boolean haveDictionary = false;
         conf.locale = locale;
         res.updateConfiguration(conf, res.getDisplayMetrics());
-        BinaryDictionary bd = new BinaryDictionary(this, R.raw.main, Suggest.DIC_MAIN);
+
+        int[] dictionaries = LatinIME.getDictionary(res, this.getPackageName());
+        BinaryDictionary bd = new BinaryDictionary(this, dictionaries, Suggest.DIC_MAIN);
+
         // Is the dictionary larger than a placeholder? Arbitrarily chose a lower limit of
         // 4000-5000 words, whereas the LARGE_DICTIONARY is about 20000+ words.
         if (bd.getSize() > Suggest.LARGE_DICTIONARY_THRESHOLD / 4) {
diff --git a/java/src/com/android/inputmethod/latin/LatinIME.java b/java/src/com/android/inputmethod/latin/LatinIME.java
index f5d13ac05..bbca316a4 100644
--- a/java/src/com/android/inputmethod/latin/LatinIME.java
+++ b/java/src/com/android/inputmethod/latin/LatinIME.java
@@ -20,6 +20,8 @@ import com.android.inputmethod.voice.FieldContext;
 import com.android.inputmethod.voice.SettingsUtil;
 import com.android.inputmethod.voice.VoiceInput;
 
+import org.xmlpull.v1.XmlPullParserException;
+
 import android.app.AlertDialog;
 import android.content.BroadcastReceiver;
 import android.content.Context;
@@ -29,6 +31,7 @@ import android.content.IntentFilter;
 import android.content.SharedPreferences;
 import android.content.res.Configuration;
 import android.content.res.Resources;
+import android.content.res.XmlResourceParser;
 import android.inputmethodservice.InputMethodService;
 import android.inputmethodservice.Keyboard;
 import android.media.AudioManager;
@@ -60,6 +63,7 @@ import android.view.inputmethod.InputConnection;
 import android.view.inputmethod.InputMethodManager;
 
 import java.io.FileDescriptor;
+import java.io.IOException;
 import java.io.PrintWriter;
 import java.util.ArrayList;
 import java.util.Collections;
@@ -159,6 +163,8 @@ public class LatinIME extends InputMethodService
     KeyboardSwitcher mKeyboardSwitcher;
 
     private UserDictionary mUserDictionary;
+    // User Bigram is disabled for now
+    //private UserBigramDictionary mUserBigramDictionary;
     private ContactsDictionary mContactsDictionary;
     private AutoDictionary mAutoDictionary;
 
@@ -383,6 +389,45 @@ public class LatinIME extends InputMethodService
         prefs.registerOnSharedPreferenceChangeListener(this);
     }
 
+    /**
+     * Loads a dictionary or multiple separated dictionary
+     * @return returns array of dictionary resource ids
+     */
+    static int[] getDictionary(Resources res, String packageName) {
+        XmlResourceParser xrp = res.getXml(R.xml.dictionary);
+        int dictionaryCount = 0;
+        ArrayList<Integer> dictionaries = new ArrayList<Integer>();
+
+        try {
+            int current = xrp.getEventType();
+            while (current != XmlResourceParser.END_DOCUMENT) {
+                if (current == XmlResourceParser.START_TAG) {
+                    String tag = xrp.getName();
+                    if (tag != null) {
+                        if (tag.equals("part")) {
+                            String dictFileName = xrp.getAttributeValue(null, "name");
+                            dictionaries.add(res.getIdentifier(dictFileName, "raw", packageName));
+                        }
+                    }
+                }
+                xrp.next();
+                current = xrp.getEventType();
+            }
+        } catch (XmlPullParserException e) {
+            Log.e(TAG, "Dictionary XML parsing failure");
+        } catch (IOException e) {
+            Log.e(TAG, "Dictionary XML IOException");
+        }
+
+        int count = dictionaries.size();
+        int[] dict = new int[count];
+        for (int i = 0; i < count; i++) {
+            dict[i] = dictionaries.get(i);
+        }
+
+        return dict;
+    }
+
     private void initSuggest(String locale) {
         mInputLocale = locale;
 
@@ -396,7 +441,9 @@ public class LatinIME extends InputMethodService
         }
         SharedPreferences sp = PreferenceManager.getDefaultSharedPreferences(this);
         mQuickFixes = sp.getBoolean(PREF_QUICK_FIXES, true);
-        mSuggest = new Suggest(this, R.raw.main);
+
+        int[] dictionaries = getDictionary(orig, this.getPackageName());
+        mSuggest = new Suggest(this, dictionaries);
         updateAutoTextEnabled(saveLocale);
         if (mUserDictionary != null) mUserDictionary.close();
         mUserDictionary = new UserDictionary(this, mInputLocale);
@@ -407,6 +454,15 @@ public class LatinIME extends InputMethodService
             mAutoDictionary.close();
         }
         mAutoDictionary = new AutoDictionary(this, this, mInputLocale, Suggest.DIC_AUTO);
+        // User Bigram is disabled for now
+        /*
+        if (mUserBigramDictionary != null) {
+            mUserBigramDictionary.close();
+        }
+        mUserBigramDictionary = new UserBigramDictionary(this, this, mInputLocale,
+                Suggest.DIC_USERBIGRAM);
+        mSuggest.setUserBigramDictionary(mUserBigramDictionary);
+        */
         mSuggest.setUserDictionary(mUserDictionary);
         mSuggest.setContactsDictionary(mContactsDictionary);
         mSuggest.setAutoDictionary(mAutoDictionary);
@@ -642,6 +698,8 @@ public class LatinIME extends InputMethodService
             mKeyboardSwitcher.getInputView().closing();
         }
         if (mAutoDictionary != null) mAutoDictionary.flushPendingWrites();
+        // User Bigram is disabled for now
+        //if (mUserBigramDictionary != null) mUserBigramDictionary.flushPendingWrites();
     }
 
     @Override
@@ -897,7 +955,7 @@ public class LatinIME extends InputMethodService
                 }
                 mCommittedLength = mComposing.length();
                 TextEntryState.acceptedTyped(mComposing);
-                checkAddToDictionary(mComposing, AutoDictionary.FREQUENCY_FOR_TYPED);
+                addToDictionaries(mComposing, AutoDictionary.FREQUENCY_FOR_TYPED);
             }
             updateSuggestions();
         }
@@ -1583,9 +1641,10 @@ public class LatinIME extends InputMethodService
     private void showSuggestions(WordComposer word) {
         //long startTime = System.currentTimeMillis(); // TIME MEASUREMENT!
         // TODO Maybe need better way of retrieving previous word
-        CharSequence prevWord = EditingUtil.getPreviousWord(getCurrentInputConnection());
+        CharSequence prevWord = EditingUtil.getPreviousWord(getCurrentInputConnection(),
+                mWordSeparators);
         List<CharSequence> stringList = mSuggest.getSuggestions(
-                mKeyboardSwitcher.getInputView(), word, false, prevWord);
+            mKeyboardSwitcher.getInputView(), word, false, prevWord);
         //long stopTime = System.currentTimeMillis(); // TIME MEASUREMENT!
         //Log.d("LatinIME","Suggest Total Time - " + (stopTime - startTime));
 
@@ -1601,7 +1660,8 @@ public class LatinIME extends InputMethodService
         boolean typedWordValid = mSuggest.isValidWord(typedWord) ||
                 (preferCapitalization()
                         && mSuggest.isValidWord(typedWord.toString().toLowerCase()));
-        if (mCorrectionMode == Suggest.CORRECTION_FULL) {
+        if (mCorrectionMode == Suggest.CORRECTION_FULL
+                || mCorrectionMode == Suggest.CORRECTION_FULL_BIGRAM) {
             correctionAvailable |= typedWordValid;
         }
         // Don't auto-correct words with multiple capital letter
@@ -1637,8 +1697,9 @@ public class LatinIME extends InputMethodService
             mJustAccepted = true;
             pickSuggestion(mBestWord, false);
             // Add the word to the auto dictionary if it's not a known word
-            checkAddToDictionary(mBestWord, AutoDictionary.FREQUENCY_FOR_TYPED);
+            addToDictionaries(mBestWord, AutoDictionary.FREQUENCY_FOR_TYPED);
             return true;
+
         }
         return false;
     }
@@ -1692,7 +1753,9 @@ public class LatinIME extends InputMethodService
         pickSuggestion(suggestion, correcting);
         // Add the word to the auto dictionary if it's not a known word
         if (index == 0) {
-            checkAddToDictionary(suggestion, AutoDictionary.FREQUENCY_FOR_PICKED);
+            addToDictionaries(suggestion, AutoDictionary.FREQUENCY_FOR_PICKED);
+        } else {
+            addToBigramDictionary(suggestion, 1);
         }
         LatinImeLogger.logOnManualSuggestion(mComposing.toString(), suggestion.toString(),
                 index, suggestions);
@@ -1892,16 +1955,43 @@ public class LatinIME extends InputMethodService
         ic.setSelection(mLastSelectionStart, mLastSelectionStart);
     }
 
-    private void checkAddToDictionary(CharSequence suggestion, int frequencyDelta) {
+    private void addToDictionaries(CharSequence suggestion, int frequencyDelta) {
+        checkAddToDictionary(suggestion, frequencyDelta, false);
+    }
+
+    private void addToBigramDictionary(CharSequence suggestion, int frequencyDelta) {
+        checkAddToDictionary(suggestion, frequencyDelta, true);
+    }
+
+    /**
+     * Adds to the UserBigramDictionary and/or AutoDictionary
+     * @param addToBigramDictionary true if it should be added to bigram dictionary if possible
+     */
+    private void checkAddToDictionary(CharSequence suggestion, int frequencyDelta,
+            boolean addToBigramDictionary) {
         if (suggestion == null || suggestion.length() < 1) return;
         // Only auto-add to dictionary if auto-correct is ON. Otherwise we'll be
         // adding words in situations where the user or application really didn't
         // want corrections enabled or learned.
-        if (!(mCorrectionMode == Suggest.CORRECTION_FULL)) return;
-        if (suggestion != null && mAutoDictionary.isValidWord(suggestion)
-                || (!mSuggest.isValidWord(suggestion.toString())
+        if (!(mCorrectionMode == Suggest.CORRECTION_FULL
+                || mCorrectionMode == Suggest.CORRECTION_FULL_BIGRAM)) {
+            return;
+        }
+        if (suggestion != null) {
+            if (!addToBigramDictionary && mAutoDictionary.isValidWord(suggestion)
+                    || (!mSuggest.isValidWord(suggestion.toString())
                     && !mSuggest.isValidWord(suggestion.toString().toLowerCase()))) {
-            mAutoDictionary.addWord(suggestion.toString(), frequencyDelta);
+                mAutoDictionary.addWord(suggestion.toString(), frequencyDelta);
+            }
+            // User Bigram is disabled for now
+            /*
+            if (mUserBigramDictionary != null) {
+                CharSequence prevWord = EditingUtil.getPreviousWord(getCurrentInputConnection());
+                if (!TextUtils.isEmpty(prevWord)) {
+                    mUserBigramDictionary.addBigrams(prevWord.toString(), suggestion.toString(), 1);
+                }
+            }
+            */
         }
     }
 
diff --git a/java/src/com/android/inputmethod/latin/Suggest.java b/java/src/com/android/inputmethod/latin/Suggest.java
index b90547483..cfb691021 100755
--- a/java/src/com/android/inputmethod/latin/Suggest.java
+++ b/java/src/com/android/inputmethod/latin/Suggest.java
@@ -34,6 +34,10 @@ import android.view.View;
  */
 public class Suggest implements Dictionary.WordCallback {
 
+    private static final String TAG = "Suggest";
+
+    public static final int APPROX_MAX_WORD_LENGTH = 32;
+
     public static final int CORRECTION_NONE = 0;
     public static final int CORRECTION_BASIC = 1;
     public static final int CORRECTION_FULL = 2;
@@ -71,6 +75,8 @@ public class Suggest implements Dictionary.WordCallback {
 
     private Dictionary mContactsDictionary;
 
+    private Dictionary mUserBigramDictionary;
+
     private int mPrefMaxSuggestions = 12;
     private int mPrefMaxBigrams = 255;
 
@@ -95,7 +101,7 @@ public class Suggest implements Dictionary.WordCallback {
 
     private int mCorrectionMode = CORRECTION_BASIC;
 
-    public Suggest(Context context, int dictionaryResId) {
+    public Suggest(Context context, int[] dictionaryResId) {
         mMainDict = new BinaryDictionary(context, dictionaryResId, DIC_MAIN);
         initPool();
     }
@@ -107,7 +113,7 @@ public class Suggest implements Dictionary.WordCallback {
 
     private void initPool() {
         for (int i = 0; i < mPrefMaxSuggestions; i++) {
-            StringBuilder sb = new StringBuilder(Dictionary.MAX_WORD_LENGTH);
+            StringBuilder sb = new StringBuilder(getApproxMaxWordLength());
             mStringPool.add(sb);
         }
     }
@@ -128,6 +134,10 @@ public class Suggest implements Dictionary.WordCallback {
         return mMainDict.getSize() > LARGE_DICTIONARY_THRESHOLD;
     }
 
+    public int getApproxMaxWordLength() {
+        return APPROX_MAX_WORD_LENGTH;
+    }
+
     /**
      * Sets an optional user dictionary resource to be loaded. The user dictionary is consulted
      * before the main dictionary, if set.
@@ -147,6 +157,10 @@ public class Suggest implements Dictionary.WordCallback {
         mAutoDictionary = autoDictionary;
     }
 
+    public void setUserBigramDictionary(Dictionary userBigramDictionary) {
+        mUserBigramDictionary = userBigramDictionary;
+    }
+
     /**
      * Number of suggestions to generate from the input key sequence. This has
      * to be a number between 1 and 100 (inclusive).
@@ -162,7 +176,7 @@ public class Suggest implements Dictionary.WordCallback {
         mBigramPriorities = new int[mPrefMaxBigrams];
         collectGarbage(mSuggestions, mPrefMaxSuggestions);
         while (mStringPool.size() < mPrefMaxSuggestions) {
-            StringBuilder sb = new StringBuilder(Dictionary.MAX_WORD_LENGTH);
+            StringBuilder sb = new StringBuilder(getApproxMaxWordLength());
             mStringPool.add(sb);
         }
     }
@@ -224,10 +238,9 @@ public class Suggest implements Dictionary.WordCallback {
             mLowerOriginalWord = "";
         }
 
-        // Search the dictionary only if there are at least 2 characters
         if (wordComposer.size() == 1 && (mCorrectionMode == CORRECTION_FULL_BIGRAM
                 || mCorrectionMode == CORRECTION_BASIC)) {
-            // At first character, just get the bigrams
+            // At first character typed, search only the bigrams
             Arrays.fill(mBigramPriorities, 0);
             collectGarbage(mBigramSuggestions, mPrefMaxBigrams);
 
@@ -236,17 +249,29 @@ public class Suggest implements Dictionary.WordCallback {
                 if (mMainDict.isValidWord(lowerPrevWord)) {
                     prevWordForBigram = lowerPrevWord;
                 }
-                mMainDict.getBigrams(wordComposer, prevWordForBigram, this,
-                        mNextLettersFrequencies);
+                if (mUserBigramDictionary != null) {
+                    mUserBigramDictionary.getBigrams(wordComposer, prevWordForBigram, this,
+                            mNextLettersFrequencies);
+                }
+                if (mContactsDictionary != null) {
+                    mContactsDictionary.getBigrams(wordComposer, prevWordForBigram, this,
+                            mNextLettersFrequencies);
+                }
+                if (mMainDict != null) {
+                    mMainDict.getBigrams(wordComposer, prevWordForBigram, this,
+                            mNextLettersFrequencies);
+                }
                 char currentChar = wordComposer.getTypedWord().charAt(0);
+                char currentCharUpper = Character.toUpperCase(currentChar);
                 int count = 0;
                 int bigramSuggestionSize = mBigramSuggestions.size();
                 for (int i = 0; i < bigramSuggestionSize; i++) {
-                    if (mBigramSuggestions.get(i).charAt(0) == currentChar) {
+                    if (mBigramSuggestions.get(i).charAt(0) == currentChar
+                            || mBigramSuggestions.get(i).charAt(0) == currentCharUpper) {
                         int poolSize = mStringPool.size();
                         StringBuilder sb = poolSize > 0 ?
                                 (StringBuilder) mStringPool.remove(poolSize - 1)
-                                : new StringBuilder(Dictionary.MAX_WORD_LENGTH);
+                                : new StringBuilder(getApproxMaxWordLength());
                         sb.setLength(0);
                         sb.append(mBigramSuggestions.get(i));
                         mSuggestions.add(count++, sb);
@@ -256,7 +281,7 @@ public class Suggest implements Dictionary.WordCallback {
             }
 
         } else if (wordComposer.size() > 1) {
-            // Search the dictionary only if there are at least 2 characters
+            // At second character typed, search the unigrams (scores being affected by bigrams)
             if (mUserDictionary != null || mContactsDictionary != null) {
                 if (mUserDictionary != null) {
                     mUserDictionary.getWords(wordComposer, this, mNextLettersFrequencies);
@@ -277,7 +302,6 @@ public class Suggest implements Dictionary.WordCallback {
                 mHaveCorrection = true;
             }
         }
-
         if (mOriginalWord != null) {
             mSuggestions.add(0, mOriginalWord.toString());
         }
@@ -290,7 +314,6 @@ public class Suggest implements Dictionary.WordCallback {
                 mHaveCorrection = false;
             }
         }
-
         if (mAutoTextEnabled) {
             int i = 0;
             int max = 6;
@@ -401,7 +424,7 @@ public class Suggest implements Dictionary.WordCallback {
                             / MAXIMUM_BIGRAM_FREQUENCY)
                             * (BIGRAM_MULTIPLIER_MAX - BIGRAM_MULTIPLIER_MIN)
                             + BIGRAM_MULTIPLIER_MIN;
-                    /* Log.d("Suggest","bigram num: " + bigramSuggestion
+                    /* Log.d(TAG,"bigram num: " + bigramSuggestion
                             + "  wordB: " + mBigramSuggestions.get(bigramSuggestion).toString()
                             + "  currentPriority: " + freq + "  bigramPriority: "
                             + mBigramPriorities[bigramSuggestion]
@@ -430,7 +453,7 @@ public class Suggest implements Dictionary.WordCallback {
         priorities[pos] = freq;
         int poolSize = mStringPool.size();
         StringBuilder sb = poolSize > 0 ? (StringBuilder) mStringPool.remove(poolSize - 1) 
-                : new StringBuilder(Dictionary.MAX_WORD_LENGTH);
+                : new StringBuilder(getApproxMaxWordLength());
         sb.setLength(0);
         if (mCapitalize) {
             sb.append(Character.toUpperCase(word[offset]));
author	Jae Yong Sung <jysung@google.com>	2010-07-26 11:43:29 -0700
committer	Jae Yong Sung <jysung@google.com>	2010-07-28 11:08:08 -0700
commit	80aa14fd432cf7d2c67f2fcfcc57c80f29f8eb64 (patch)
tree	384655d5c7207325014888fd26da1bc7188db66e /java/src/com
parent	679b838b05a70ed965017635efdf536449aa230f (diff)
download	latinime-80aa14fd432cf7d2c67f2fcfcc57c80f29f8eb64.tar.gz latinime-80aa14fd432cf7d2c67f2fcfcc57c80f29f8eb64.tar.xz latinime-80aa14fd432cf7d2c67f2fcfcc57c80f29f8eb64.zip