1 files changed, 163 insertions, 146 deletions
diff --git a/java/src/com/android/inputmethod/latin/Suggest.java b/java/src/com/android/inputmethod/latin/Suggest.java
index ced355bb2..0de474e59 100644
--- a/java/src/com/android/inputmethod/latin/Suggest.java
+++ b/java/src/com/android/inputmethod/latin/Suggest.java
@@ -22,8 +22,13 @@ import android.text.TextUtils;
 import android.util.Log;
 import android.view.View;
 
+import java.io.File;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
 
 /**
  * This class loads a dictionary and provides a list of suggestions for a given sequence of
@@ -62,40 +67,37 @@ public class Suggest implements Dictionary.WordCallback {
     // If you add a type of dictionary, increment DIC_TYPE_LAST_ID
     public static final int DIC_TYPE_LAST_ID = 4;
 
-    static final int LARGE_DICTIONARY_THRESHOLD = 200 * 1000;
-
-    private static boolean DBG = LatinImeLogger.sDBG;
-
-    private BinaryDictionary mMainDict;
+    public static final String DICT_KEY_MAIN = "main";
+    public static final String DICT_KEY_CONTACTS = "contacts";
+    public static final String DICT_KEY_AUTO = "auto";
+    public static final String DICT_KEY_USER = "user";
+    public static final String DICT_KEY_USER_BIGRAM = "user_bigram";
+    public static final String DICT_KEY_WHITELIST ="whitelist";
 
-    private Dictionary mUserDictionary;
+    static final int LARGE_DICTIONARY_THRESHOLD = 200 * 1000;
 
-    private Dictionary mAutoDictionary;
+    private static final boolean DBG = LatinImeLogger.sDBG;
 
-    private Dictionary mContactsDictionary;
+    private AutoCorrection mAutoCorrection;
 
-    private Dictionary mUserBigramDictionary;
+    private BinaryDictionary mMainDict;
+    private WhitelistDictionary mWhiteListDictionary;
+    private final Map<String, Dictionary> mUnigramDictionaries = new HashMap<String, Dictionary>();
+    private final Map<String, Dictionary> mBigramDictionaries = new HashMap<String, Dictionary>();
 
     private int mPrefMaxSuggestions = 12;
 
     private static final int PREF_MAX_BIGRAMS = 60;
 
-    private boolean mAutoTextEnabled;
+    private boolean mQuickFixesEnabled;
 
     private double mAutoCorrectionThreshold;
     private int[] mPriorities = new int[mPrefMaxSuggestions];
     private int[] mBigramPriorities = new int[PREF_MAX_BIGRAMS];
 
-    // Handle predictive correction for only the first 1280 characters for performance reasons
-    // If we support scripts that need latin characters beyond that, we should probably use some
-    // kind of a sparse array or language specific list with a mapping lookup table.
-    // 1280 is the size of the BASE_CHARS array in ExpandableDictionary, which is a basic set of
-    // latin characters.
-    private int[] mNextLettersFrequencies = new int[1280];
     private ArrayList<CharSequence> mSuggestions = new ArrayList<CharSequence>();
     ArrayList<CharSequence> mBigramSuggestions  = new ArrayList<CharSequence>();
     private ArrayList<CharSequence> mStringPool = new ArrayList<CharSequence>();
-    private boolean mHasAutoCorrection;
     private String mLowerOriginalWord;
 
     // TODO: Remove these member variables by passing more context to addWord() callback method
@@ -105,7 +107,24 @@ public class Suggest implements Dictionary.WordCallback {
     private int mCorrectionMode = CORRECTION_BASIC;
 
     public Suggest(Context context, int dictionaryResId) {
-        mMainDict = BinaryDictionary.initDictionary(context, dictionaryResId, DIC_MAIN);
+        init(context, BinaryDictionary.initDictionary(context, dictionaryResId, DIC_MAIN));
+    }
+
+    /* package for test */ Suggest(File dictionary, long startOffset, long length) {
+        init(null, BinaryDictionary.initDictionary(dictionary, startOffset, length, DIC_MAIN));
+    }
+
+    private void init(Context context, BinaryDictionary mainDict) {
+        if (mainDict != null) {
+            mMainDict = mainDict;
+            mUnigramDictionaries.put(DICT_KEY_MAIN, mainDict);
+            mBigramDictionaries.put(DICT_KEY_MAIN, mainDict);
+        }
+        mWhiteListDictionary = WhitelistDictionary.init(context);
+        if (mWhiteListDictionary != null) {
+            mUnigramDictionaries.put(DICT_KEY_WHITELIST, mWhiteListDictionary);
+        }
+        mAutoCorrection = new AutoCorrection();
         initPool();
     }
 
@@ -116,8 +135,8 @@ public class Suggest implements Dictionary.WordCallback {
         }
     }
 
-    public void setAutoTextEnabled(boolean enabled) {
-        mAutoTextEnabled = enabled;
+    public void setQuickFixesEnabled(boolean enabled) {
+        mQuickFixesEnabled = enabled;
     }
 
     public int getCorrectionMode() {
@@ -132,6 +151,10 @@ public class Suggest implements Dictionary.WordCallback {
         return mMainDict != null && mMainDict.getSize() > LARGE_DICTIONARY_THRESHOLD;
     }
 
+    public Map<String, Dictionary> getUnigramDictionaries() {
+        return mUnigramDictionaries;
+    }
+
     public int getApproxMaxWordLength() {
         return APPROX_MAX_WORD_LENGTH;
     }
@@ -141,22 +164,28 @@ public class Suggest implements Dictionary.WordCallback {
      * before the main dictionary, if set.
      */
     public void setUserDictionary(Dictionary userDictionary) {
-        mUserDictionary = userDictionary;
+        if (userDictionary != null)
+            mUnigramDictionaries.put(DICT_KEY_USER, userDictionary);
     }
 
     /**
      * Sets an optional contacts dictionary resource to be loaded.
      */
-    public void setContactsDictionary(Dictionary userDictionary) {
-        mContactsDictionary = userDictionary;
+    public void setContactsDictionary(Dictionary contactsDictionary) {
+        if (contactsDictionary != null) {
+            mUnigramDictionaries.put(DICT_KEY_CONTACTS, contactsDictionary);
+            mBigramDictionaries.put(DICT_KEY_CONTACTS, contactsDictionary);
+        }
     }
 
     public void setAutoDictionary(Dictionary autoDictionary) {
-        mAutoDictionary = autoDictionary;
+        if (autoDictionary != null)
+            mUnigramDictionaries.put(DICT_KEY_AUTO, autoDictionary);
     }
 
     public void setUserBigramDictionary(Dictionary userBigramDictionary) {
-        mUserBigramDictionary = userBigramDictionary;
+        if (userBigramDictionary != null)
+            mBigramDictionaries.put(DICT_KEY_USER_BIGRAM, userBigramDictionary);
     }
 
     public void setAutoCorrectionThreshold(double threshold) {
@@ -200,16 +229,34 @@ public class Suggest implements Dictionary.WordCallback {
         return getSuggestedWordBuilder(view, wordComposer, prevWordForBigram).build();
     }
 
+    private CharSequence capitalizeWord(boolean all, boolean first, CharSequence word) {
+        if (TextUtils.isEmpty(word) || !(all || first)) return word;
+        final int wordLength = word.length();
+        final int poolSize = mStringPool.size();
+        final StringBuilder sb =
+                poolSize > 0 ? (StringBuilder) mStringPool.remove(poolSize - 1)
+                        : new StringBuilder(getApproxMaxWordLength());
+        sb.setLength(0);
+        if (all) {
+            sb.append(word.toString().toUpperCase());
+        } else if (first) {
+            sb.append(Character.toUpperCase(word.charAt(0)));
+            if (wordLength > 1) {
+                sb.append(word.subSequence(1, wordLength));
+            }
+        }
+        return sb;
+    }
+
     // TODO: cleanup dictionaries looking up and suggestions building with SuggestedWords.Builder
     public SuggestedWords.Builder getSuggestedWordBuilder(View view, WordComposer wordComposer,
             CharSequence prevWordForBigram) {
         LatinImeLogger.onStartSuggestion(prevWordForBigram);
-        mHasAutoCorrection = false;
+        mAutoCorrection.init();
         mIsFirstCharCapitalized = wordComposer.isFirstCharCapitalized();
         mIsAllUpperCase = wordComposer.isAllUpperCase();
         collectGarbage(mSuggestions, mPrefMaxSuggestions);
         Arrays.fill(mPriorities, 0);
-        Arrays.fill(mNextLettersFrequencies, 0);
 
         // Save a lowercase version of the original word
         CharSequence typedWord = wordComposer.getTypedWord();
@@ -235,17 +282,8 @@ public class Suggest implements Dictionary.WordCallback {
                 if (mMainDict != null && mMainDict.isValidWord(lowerPrevWord)) {
                     prevWordForBigram = lowerPrevWord;
                 }
-                if (mUserBigramDictionary != null) {
-                    mUserBigramDictionary.getBigrams(wordComposer, prevWordForBigram, this,
-                            mNextLettersFrequencies);
-                }
-                if (mContactsDictionary != null) {
-                    mContactsDictionary.getBigrams(wordComposer, prevWordForBigram, this,
-                            mNextLettersFrequencies);
-                }
-                if (mMainDict != null) {
-                    mMainDict.getBigrams(wordComposer, prevWordForBigram, this,
-                            mNextLettersFrequencies);
+                for (final Dictionary dictionary : mBigramDictionaries.values()) {
+                    dictionary.getBigrams(wordComposer, prevWordForBigram, this);
                 }
                 char currentChar = wordComposer.getTypedWord().charAt(0);
                 char currentCharUpper = Character.toUpperCase(currentChar);
@@ -268,97 +306,86 @@ public class Suggest implements Dictionary.WordCallback {
 
         } else if (wordComposer.size() > 1) {
             // At second character typed, search the unigrams (scores being affected by bigrams)
-            if (mUserDictionary != null || mContactsDictionary != null) {
-                if (mUserDictionary != null) {
-                    mUserDictionary.getWords(wordComposer, this, mNextLettersFrequencies);
-                }
-                if (mContactsDictionary != null) {
-                    mContactsDictionary.getWords(wordComposer, this, mNextLettersFrequencies);
-                }
-
-                if (mSuggestions.size() > 0 && isValidWord(typedWord)
-                        && (mCorrectionMode == CORRECTION_FULL
-                        || mCorrectionMode == CORRECTION_FULL_BIGRAM)) {
-                    if (DBG) {
-                        Log.d(TAG, "Auto corrected by CORRECTION_FULL.");
-                    }
-                    mHasAutoCorrection = true;
-                }
-            }
-            if (mMainDict != null) mMainDict.getWords(wordComposer, this, mNextLettersFrequencies);
-            if ((mCorrectionMode == CORRECTION_FULL || mCorrectionMode == CORRECTION_FULL_BIGRAM)
-                    && mSuggestions.size() > 0 && mPriorities.length > 0) {
-                // TODO: when the normalized score of the first suggestion is nearly equals to
-                //       the normalized score of the second suggestion, behave less aggressive.
-                final double normalizedScore = Utils.calcNormalizedScore(
-                        typedWord, mSuggestions.get(0), mPriorities[0]);
-                if (LatinImeLogger.sDBG) {
-                    Log.d(TAG, "Normalized " + typedWord + "," + mSuggestions.get(0) + ","
-                            + mPriorities[0] + ", " + normalizedScore
-                            + "(" + mAutoCorrectionThreshold + ")");
-                }
-                if (normalizedScore >= mAutoCorrectionThreshold) {
-                    if (DBG) {
-                        Log.d(TAG, "Auto corrected by S-threthhold.");
-                    }
-                    mHasAutoCorrection = true;
-                }
+            for (final String key : mUnigramDictionaries.keySet()) {
+                // Skip AutoDictionary and WhitelistDictionary to lookup
+                if (key.equals(DICT_KEY_AUTO) || key.equals(DICT_KEY_WHITELIST))
+                    continue;
+                final Dictionary dictionary = mUnigramDictionaries.get(key);
+                dictionary.getWords(wordComposer, this);
             }
         }
+        CharSequence autoText = null;
+        final String typedWordString = typedWord == null ? null : typedWord.toString();
         if (typedWord != null) {
-            mSuggestions.add(0, typedWord.toString());
-        }
-        if (mAutoTextEnabled) {
-            int i = 0;
-            int max = 6;
-            // Don't autotext the suggestions from the dictionaries
-            if (mCorrectionMode == CORRECTION_BASIC) max = 1;
-            while (i < mSuggestions.size() && i < max) {
-                String suggestedWord = mSuggestions.get(i).toString().toLowerCase();
-                CharSequence autoText =
-                        AutoText.get(suggestedWord, 0, suggestedWord.length(), view);
+            // Apply quick fix only for the typed word.
+            if (mQuickFixesEnabled) {
+                final String lowerCaseTypedWord = typedWordString.toLowerCase();
+                CharSequence tempAutoText = capitalizeWord(
+                        mIsAllUpperCase, mIsFirstCharCapitalized, AutoText.get(
+                                lowerCaseTypedWord, 0, lowerCaseTypedWord.length(), view));
+                // TODO: cleanup canAdd
                 // Is there an AutoText (also known as Quick Fixes) correction?
-                boolean canAdd = autoText != null;
                 // Capitalize as needed
-                final int autoTextLength = autoText != null ? autoText.length() : 0;
-                if (autoTextLength > 0 && (mIsAllUpperCase || mIsFirstCharCapitalized)) {
-                    int poolSize = mStringPool.size();
-                    StringBuilder sb = poolSize > 0 ? (StringBuilder) mStringPool.remove(
-                            poolSize - 1) : new StringBuilder(getApproxMaxWordLength());
-                    sb.setLength(0);
-                    if (mIsAllUpperCase) {
-                        sb.append(autoText.toString().toUpperCase());
-                    } else if (mIsFirstCharCapitalized) {
-                        sb.append(Character.toUpperCase(autoText.charAt(0)));
-                        if (autoTextLength > 1) {
-                            sb.append(autoText.subSequence(1, autoTextLength));
-                        }
-                    }
-                    autoText = sb.toString();
-                }
+                boolean canAdd = tempAutoText != null;
                 // Is that correction already the current prediction (or original word)?
-                canAdd &= !TextUtils.equals(autoText, mSuggestions.get(i));
+                canAdd &= !TextUtils.equals(tempAutoText, typedWord);
                 // Is that correction already the next predicted word?
-                if (canAdd && i + 1 < mSuggestions.size() && mCorrectionMode != CORRECTION_BASIC) {
-                    canAdd &= !TextUtils.equals(autoText, mSuggestions.get(i + 1));
+                if (canAdd && mSuggestions.size() > 0 && mCorrectionMode != CORRECTION_BASIC) {
+                    canAdd &= !TextUtils.equals(tempAutoText, mSuggestions.get(0));
                 }
                 if (canAdd) {
                     if (DBG) {
                         Log.d(TAG, "Auto corrected by AUTOTEXT.");
                     }
-                    mHasAutoCorrection = true;
-                    mSuggestions.add(i + 1, autoText);
-                    i++;
+                    autoText = tempAutoText;
                 }
-                i++;
             }
         }
+
+        CharSequence whitelistedWord = capitalizeWord(mIsAllUpperCase, mIsFirstCharCapitalized,
+                mWhiteListDictionary.getWhiteListedWord(typedWordString));
+
+        mAutoCorrection.updateAutoCorrectionStatus(mUnigramDictionaries, wordComposer,
+                mSuggestions, mPriorities, typedWord, mAutoCorrectionThreshold, mCorrectionMode,
+                autoText, whitelistedWord);
+
+        if (autoText != null) {
+            mSuggestions.add(0, autoText);
+        }
+
+        if (whitelistedWord != null) {
+            mSuggestions.add(0, whitelistedWord);
+        }
+
+        if (typedWord != null) {
+            mSuggestions.add(0, typedWordString);
+        }
         removeDupes();
-        return new SuggestedWords.Builder().addWords(mSuggestions, null);
-    }
 
-    public int[] getNextLettersFrequencies() {
-        return mNextLettersFrequencies;
+        if (DBG) {
+            double normalizedScore = mAutoCorrection.getNormalizedScore();
+            ArrayList<SuggestedWords.SuggestedWordInfo> frequencyInfoList =
+                    new ArrayList<SuggestedWords.SuggestedWordInfo>();
+            frequencyInfoList.add(new SuggestedWords.SuggestedWordInfo("+", false));
+            final int priorityLength = mPriorities.length;
+            for (int i = 0; i < priorityLength; ++i) {
+                if (normalizedScore > 0) {
+                    final String priorityThreshold = Integer.toString(mPriorities[i]) + " (" +
+                            normalizedScore + ")";
+                    frequencyInfoList.add(
+                            new SuggestedWords.SuggestedWordInfo(priorityThreshold, false));
+                    normalizedScore = 0.0;
+                } else {
+                    final String priority = Integer.toString(mPriorities[i]);
+                    frequencyInfoList.add(new SuggestedWords.SuggestedWordInfo(priority, false));
+                }
+            }
+            for (int i = priorityLength; i < mSuggestions.size(); ++i) {
+                frequencyInfoList.add(new SuggestedWords.SuggestedWordInfo("--", false));
+            }
+            return new SuggestedWords.Builder().addWords(mSuggestions, frequencyInfoList);
+        }
+        return new SuggestedWords.Builder().addWords(mSuggestions, null);
     }
 
     private void removeDupes() {
@@ -389,15 +416,15 @@ public class Suggest implements Dictionary.WordCallback {
     }
 
     public boolean hasAutoCorrection() {
-        return mHasAutoCorrection;
+        return mAutoCorrection.hasAutoCorrection();
     }
 
-    private boolean compareCaseInsensitive(final String mLowerOriginalWord,
+    private static boolean compareCaseInsensitive(final String lowerOriginalWord,
             final char[] word, final int offset, final int length) {
-        final int originalLength = mLowerOriginalWord.length();
+        final int originalLength = lowerOriginalWord.length();
         if (originalLength == length && Character.isUpperCase(word[offset])) {
             for (int i = 0; i < originalLength; i++) {
-                if (mLowerOriginalWord.charAt(i) != Character.toLowerCase(word[offset+i])) {
+                if (lowerOriginalWord.charAt(i) != Character.toLowerCase(word[offset+i])) {
                     return false;
                 }
             }
@@ -427,7 +454,20 @@ public class Suggest implements Dictionary.WordCallback {
 
         // Check if it's the same word, only caps are different
         if (compareCaseInsensitive(mLowerOriginalWord, word, offset, length)) {
-            pos = 0;
+            // TODO: remove this surrounding if clause and move this logic to
+            // getSuggestedWordBuilder.
+            if (suggestions.size() > 0) {
+                final String currentHighestWordLowerCase =
+                        suggestions.get(0).toString().toLowerCase();
+                // If the current highest word is also equal to typed word, we need to compare
+                // frequency to determine the insertion position. This does not ensure strictly
+                // correct ordering, but ensures the top score is on top which is enough for
+                // removing duplicates correctly.
+                if (compareCaseInsensitive(currentHighestWordLowerCase, word, offset, length)
+                        && freq <= priorities[0]) {
+                    pos = 1;
+                }
+            }
         } else {
             if (dataType == Dictionary.DataType.UNIGRAM) {
                 // Check if the word was already added before (by bigram data)
@@ -510,16 +550,6 @@ public class Suggest implements Dictionary.WordCallback {
         return -1;
     }
 
-    public boolean isValidWord(final CharSequence word) {
-        if (word == null || word.length() == 0 || mMainDict == null) {
-            return false;
-        }
-        return mMainDict.isValidWord(word)
-                || (mUserDictionary != null && mUserDictionary.isValidWord(word))
-                || (mAutoDictionary != null && mAutoDictionary.isValidWord(word))
-                || (mContactsDictionary != null && mContactsDictionary.isValidWord(word));
-    }
-
     private void collectGarbage(ArrayList<CharSequence> suggestions, int prefMaxSuggestions) {
         int poolSize = mStringPool.size();
         int garbageSize = suggestions.size();
@@ -538,25 +568,12 @@ public class Suggest implements Dictionary.WordCallback {
     }
 
     public void close() {
-        if (mMainDict != null) {
-            mMainDict.close();
-            mMainDict = null;
-        }
-        if (mUserDictionary != null) {
-            mUserDictionary.close();
-            mUserDictionary = null;
-        }
-        if (mUserBigramDictionary != null) {
-            mUserBigramDictionary.close();
-            mUserBigramDictionary = null;
-        }
-        if (mContactsDictionary != null) {
-            mContactsDictionary.close();
-            mContactsDictionary = null;
-        }
-        if (mAutoDictionary != null) {
-            mAutoDictionary.close();
-            mAutoDictionary = null;
+        final Set<Dictionary> dictionaries = new HashSet<Dictionary>();
+        dictionaries.addAll(mUnigramDictionaries.values());
+        dictionaries.addAll(mBigramDictionaries.values());
+        for (final Dictionary dictionary : dictionaries) {
+            dictionary.close();
         }
+        mMainDict = null;
     }
 }