aboutsummaryrefslogtreecommitdiffstats
path: root/java/src/com/android/inputmethod/latin/Suggest.java
diff options
context:
space:
mode:
Diffstat (limited to 'java/src/com/android/inputmethod/latin/Suggest.java')
-rw-r--r--java/src/com/android/inputmethod/latin/Suggest.java309
1 files changed, 163 insertions, 146 deletions
diff --git a/java/src/com/android/inputmethod/latin/Suggest.java b/java/src/com/android/inputmethod/latin/Suggest.java
index ced355bb2..0de474e59 100644
--- a/java/src/com/android/inputmethod/latin/Suggest.java
+++ b/java/src/com/android/inputmethod/latin/Suggest.java
@@ -22,8 +22,13 @@ import android.text.TextUtils;
import android.util.Log;
import android.view.View;
+import java.io.File;
import java.util.ArrayList;
import java.util.Arrays;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
/**
* This class loads a dictionary and provides a list of suggestions for a given sequence of
@@ -62,40 +67,37 @@ public class Suggest implements Dictionary.WordCallback {
// If you add a type of dictionary, increment DIC_TYPE_LAST_ID
public static final int DIC_TYPE_LAST_ID = 4;
- static final int LARGE_DICTIONARY_THRESHOLD = 200 * 1000;
-
- private static boolean DBG = LatinImeLogger.sDBG;
-
- private BinaryDictionary mMainDict;
+ public static final String DICT_KEY_MAIN = "main";
+ public static final String DICT_KEY_CONTACTS = "contacts";
+ public static final String DICT_KEY_AUTO = "auto";
+ public static final String DICT_KEY_USER = "user";
+ public static final String DICT_KEY_USER_BIGRAM = "user_bigram";
+ public static final String DICT_KEY_WHITELIST ="whitelist";
- private Dictionary mUserDictionary;
+ static final int LARGE_DICTIONARY_THRESHOLD = 200 * 1000;
- private Dictionary mAutoDictionary;
+ private static final boolean DBG = LatinImeLogger.sDBG;
- private Dictionary mContactsDictionary;
+ private AutoCorrection mAutoCorrection;
- private Dictionary mUserBigramDictionary;
+ private BinaryDictionary mMainDict;
+ private WhitelistDictionary mWhiteListDictionary;
+ private final Map<String, Dictionary> mUnigramDictionaries = new HashMap<String, Dictionary>();
+ private final Map<String, Dictionary> mBigramDictionaries = new HashMap<String, Dictionary>();
private int mPrefMaxSuggestions = 12;
private static final int PREF_MAX_BIGRAMS = 60;
- private boolean mAutoTextEnabled;
+ private boolean mQuickFixesEnabled;
private double mAutoCorrectionThreshold;
private int[] mPriorities = new int[mPrefMaxSuggestions];
private int[] mBigramPriorities = new int[PREF_MAX_BIGRAMS];
- // Handle predictive correction for only the first 1280 characters for performance reasons
- // If we support scripts that need latin characters beyond that, we should probably use some
- // kind of a sparse array or language specific list with a mapping lookup table.
- // 1280 is the size of the BASE_CHARS array in ExpandableDictionary, which is a basic set of
- // latin characters.
- private int[] mNextLettersFrequencies = new int[1280];
private ArrayList<CharSequence> mSuggestions = new ArrayList<CharSequence>();
ArrayList<CharSequence> mBigramSuggestions = new ArrayList<CharSequence>();
private ArrayList<CharSequence> mStringPool = new ArrayList<CharSequence>();
- private boolean mHasAutoCorrection;
private String mLowerOriginalWord;
// TODO: Remove these member variables by passing more context to addWord() callback method
@@ -105,7 +107,24 @@ public class Suggest implements Dictionary.WordCallback {
private int mCorrectionMode = CORRECTION_BASIC;
public Suggest(Context context, int dictionaryResId) {
- mMainDict = BinaryDictionary.initDictionary(context, dictionaryResId, DIC_MAIN);
+ init(context, BinaryDictionary.initDictionary(context, dictionaryResId, DIC_MAIN));
+ }
+
+ /* package for test */ Suggest(File dictionary, long startOffset, long length) {
+ init(null, BinaryDictionary.initDictionary(dictionary, startOffset, length, DIC_MAIN));
+ }
+
+ private void init(Context context, BinaryDictionary mainDict) {
+ if (mainDict != null) {
+ mMainDict = mainDict;
+ mUnigramDictionaries.put(DICT_KEY_MAIN, mainDict);
+ mBigramDictionaries.put(DICT_KEY_MAIN, mainDict);
+ }
+ mWhiteListDictionary = WhitelistDictionary.init(context);
+ if (mWhiteListDictionary != null) {
+ mUnigramDictionaries.put(DICT_KEY_WHITELIST, mWhiteListDictionary);
+ }
+ mAutoCorrection = new AutoCorrection();
initPool();
}
@@ -116,8 +135,8 @@ public class Suggest implements Dictionary.WordCallback {
}
}
- public void setAutoTextEnabled(boolean enabled) {
- mAutoTextEnabled = enabled;
+ public void setQuickFixesEnabled(boolean enabled) {
+ mQuickFixesEnabled = enabled;
}
public int getCorrectionMode() {
@@ -132,6 +151,10 @@ public class Suggest implements Dictionary.WordCallback {
return mMainDict != null && mMainDict.getSize() > LARGE_DICTIONARY_THRESHOLD;
}
+ public Map<String, Dictionary> getUnigramDictionaries() {
+ return mUnigramDictionaries;
+ }
+
public int getApproxMaxWordLength() {
return APPROX_MAX_WORD_LENGTH;
}
@@ -141,22 +164,28 @@ public class Suggest implements Dictionary.WordCallback {
* before the main dictionary, if set.
*/
public void setUserDictionary(Dictionary userDictionary) {
- mUserDictionary = userDictionary;
+ if (userDictionary != null)
+ mUnigramDictionaries.put(DICT_KEY_USER, userDictionary);
}
/**
* Sets an optional contacts dictionary resource to be loaded.
*/
- public void setContactsDictionary(Dictionary userDictionary) {
- mContactsDictionary = userDictionary;
+ public void setContactsDictionary(Dictionary contactsDictionary) {
+ if (contactsDictionary != null) {
+ mUnigramDictionaries.put(DICT_KEY_CONTACTS, contactsDictionary);
+ mBigramDictionaries.put(DICT_KEY_CONTACTS, contactsDictionary);
+ }
}
public void setAutoDictionary(Dictionary autoDictionary) {
- mAutoDictionary = autoDictionary;
+ if (autoDictionary != null)
+ mUnigramDictionaries.put(DICT_KEY_AUTO, autoDictionary);
}
public void setUserBigramDictionary(Dictionary userBigramDictionary) {
- mUserBigramDictionary = userBigramDictionary;
+ if (userBigramDictionary != null)
+ mBigramDictionaries.put(DICT_KEY_USER_BIGRAM, userBigramDictionary);
}
public void setAutoCorrectionThreshold(double threshold) {
@@ -200,16 +229,34 @@ public class Suggest implements Dictionary.WordCallback {
return getSuggestedWordBuilder(view, wordComposer, prevWordForBigram).build();
}
+ private CharSequence capitalizeWord(boolean all, boolean first, CharSequence word) {
+ if (TextUtils.isEmpty(word) || !(all || first)) return word;
+ final int wordLength = word.length();
+ final int poolSize = mStringPool.size();
+ final StringBuilder sb =
+ poolSize > 0 ? (StringBuilder) mStringPool.remove(poolSize - 1)
+ : new StringBuilder(getApproxMaxWordLength());
+ sb.setLength(0);
+ if (all) {
+ sb.append(word.toString().toUpperCase());
+ } else if (first) {
+ sb.append(Character.toUpperCase(word.charAt(0)));
+ if (wordLength > 1) {
+ sb.append(word.subSequence(1, wordLength));
+ }
+ }
+ return sb;
+ }
+
// TODO: cleanup dictionaries looking up and suggestions building with SuggestedWords.Builder
public SuggestedWords.Builder getSuggestedWordBuilder(View view, WordComposer wordComposer,
CharSequence prevWordForBigram) {
LatinImeLogger.onStartSuggestion(prevWordForBigram);
- mHasAutoCorrection = false;
+ mAutoCorrection.init();
mIsFirstCharCapitalized = wordComposer.isFirstCharCapitalized();
mIsAllUpperCase = wordComposer.isAllUpperCase();
collectGarbage(mSuggestions, mPrefMaxSuggestions);
Arrays.fill(mPriorities, 0);
- Arrays.fill(mNextLettersFrequencies, 0);
// Save a lowercase version of the original word
CharSequence typedWord = wordComposer.getTypedWord();
@@ -235,17 +282,8 @@ public class Suggest implements Dictionary.WordCallback {
if (mMainDict != null && mMainDict.isValidWord(lowerPrevWord)) {
prevWordForBigram = lowerPrevWord;
}
- if (mUserBigramDictionary != null) {
- mUserBigramDictionary.getBigrams(wordComposer, prevWordForBigram, this,
- mNextLettersFrequencies);
- }
- if (mContactsDictionary != null) {
- mContactsDictionary.getBigrams(wordComposer, prevWordForBigram, this,
- mNextLettersFrequencies);
- }
- if (mMainDict != null) {
- mMainDict.getBigrams(wordComposer, prevWordForBigram, this,
- mNextLettersFrequencies);
+ for (final Dictionary dictionary : mBigramDictionaries.values()) {
+ dictionary.getBigrams(wordComposer, prevWordForBigram, this);
}
char currentChar = wordComposer.getTypedWord().charAt(0);
char currentCharUpper = Character.toUpperCase(currentChar);
@@ -268,97 +306,86 @@ public class Suggest implements Dictionary.WordCallback {
} else if (wordComposer.size() > 1) {
// At second character typed, search the unigrams (scores being affected by bigrams)
- if (mUserDictionary != null || mContactsDictionary != null) {
- if (mUserDictionary != null) {
- mUserDictionary.getWords(wordComposer, this, mNextLettersFrequencies);
- }
- if (mContactsDictionary != null) {
- mContactsDictionary.getWords(wordComposer, this, mNextLettersFrequencies);
- }
-
- if (mSuggestions.size() > 0 && isValidWord(typedWord)
- && (mCorrectionMode == CORRECTION_FULL
- || mCorrectionMode == CORRECTION_FULL_BIGRAM)) {
- if (DBG) {
- Log.d(TAG, "Auto corrected by CORRECTION_FULL.");
- }
- mHasAutoCorrection = true;
- }
- }
- if (mMainDict != null) mMainDict.getWords(wordComposer, this, mNextLettersFrequencies);
- if ((mCorrectionMode == CORRECTION_FULL || mCorrectionMode == CORRECTION_FULL_BIGRAM)
- && mSuggestions.size() > 0 && mPriorities.length > 0) {
- // TODO: when the normalized score of the first suggestion is nearly equals to
- // the normalized score of the second suggestion, behave less aggressive.
- final double normalizedScore = Utils.calcNormalizedScore(
- typedWord, mSuggestions.get(0), mPriorities[0]);
- if (LatinImeLogger.sDBG) {
- Log.d(TAG, "Normalized " + typedWord + "," + mSuggestions.get(0) + ","
- + mPriorities[0] + ", " + normalizedScore
- + "(" + mAutoCorrectionThreshold + ")");
- }
- if (normalizedScore >= mAutoCorrectionThreshold) {
- if (DBG) {
- Log.d(TAG, "Auto corrected by S-threthhold.");
- }
- mHasAutoCorrection = true;
- }
+ for (final String key : mUnigramDictionaries.keySet()) {
+ // Skip AutoDictionary and WhitelistDictionary to lookup
+ if (key.equals(DICT_KEY_AUTO) || key.equals(DICT_KEY_WHITELIST))
+ continue;
+ final Dictionary dictionary = mUnigramDictionaries.get(key);
+ dictionary.getWords(wordComposer, this);
}
}
+ CharSequence autoText = null;
+ final String typedWordString = typedWord == null ? null : typedWord.toString();
if (typedWord != null) {
- mSuggestions.add(0, typedWord.toString());
- }
- if (mAutoTextEnabled) {
- int i = 0;
- int max = 6;
- // Don't autotext the suggestions from the dictionaries
- if (mCorrectionMode == CORRECTION_BASIC) max = 1;
- while (i < mSuggestions.size() && i < max) {
- String suggestedWord = mSuggestions.get(i).toString().toLowerCase();
- CharSequence autoText =
- AutoText.get(suggestedWord, 0, suggestedWord.length(), view);
+ // Apply quick fix only for the typed word.
+ if (mQuickFixesEnabled) {
+ final String lowerCaseTypedWord = typedWordString.toLowerCase();
+ CharSequence tempAutoText = capitalizeWord(
+ mIsAllUpperCase, mIsFirstCharCapitalized, AutoText.get(
+ lowerCaseTypedWord, 0, lowerCaseTypedWord.length(), view));
+ // TODO: cleanup canAdd
// Is there an AutoText (also known as Quick Fixes) correction?
- boolean canAdd = autoText != null;
// Capitalize as needed
- final int autoTextLength = autoText != null ? autoText.length() : 0;
- if (autoTextLength > 0 && (mIsAllUpperCase || mIsFirstCharCapitalized)) {
- int poolSize = mStringPool.size();
- StringBuilder sb = poolSize > 0 ? (StringBuilder) mStringPool.remove(
- poolSize - 1) : new StringBuilder(getApproxMaxWordLength());
- sb.setLength(0);
- if (mIsAllUpperCase) {
- sb.append(autoText.toString().toUpperCase());
- } else if (mIsFirstCharCapitalized) {
- sb.append(Character.toUpperCase(autoText.charAt(0)));
- if (autoTextLength > 1) {
- sb.append(autoText.subSequence(1, autoTextLength));
- }
- }
- autoText = sb.toString();
- }
+ boolean canAdd = tempAutoText != null;
// Is that correction already the current prediction (or original word)?
- canAdd &= !TextUtils.equals(autoText, mSuggestions.get(i));
+ canAdd &= !TextUtils.equals(tempAutoText, typedWord);
// Is that correction already the next predicted word?
- if (canAdd && i + 1 < mSuggestions.size() && mCorrectionMode != CORRECTION_BASIC) {
- canAdd &= !TextUtils.equals(autoText, mSuggestions.get(i + 1));
+ if (canAdd && mSuggestions.size() > 0 && mCorrectionMode != CORRECTION_BASIC) {
+ canAdd &= !TextUtils.equals(tempAutoText, mSuggestions.get(0));
}
if (canAdd) {
if (DBG) {
Log.d(TAG, "Auto corrected by AUTOTEXT.");
}
- mHasAutoCorrection = true;
- mSuggestions.add(i + 1, autoText);
- i++;
+ autoText = tempAutoText;
}
- i++;
}
}
+
+ CharSequence whitelistedWord = capitalizeWord(mIsAllUpperCase, mIsFirstCharCapitalized,
+ mWhiteListDictionary.getWhiteListedWord(typedWordString));
+
+ mAutoCorrection.updateAutoCorrectionStatus(mUnigramDictionaries, wordComposer,
+ mSuggestions, mPriorities, typedWord, mAutoCorrectionThreshold, mCorrectionMode,
+ autoText, whitelistedWord);
+
+ if (autoText != null) {
+ mSuggestions.add(0, autoText);
+ }
+
+ if (whitelistedWord != null) {
+ mSuggestions.add(0, whitelistedWord);
+ }
+
+ if (typedWord != null) {
+ mSuggestions.add(0, typedWordString);
+ }
removeDupes();
- return new SuggestedWords.Builder().addWords(mSuggestions, null);
- }
- public int[] getNextLettersFrequencies() {
- return mNextLettersFrequencies;
+ if (DBG) {
+ double normalizedScore = mAutoCorrection.getNormalizedScore();
+ ArrayList<SuggestedWords.SuggestedWordInfo> frequencyInfoList =
+ new ArrayList<SuggestedWords.SuggestedWordInfo>();
+ frequencyInfoList.add(new SuggestedWords.SuggestedWordInfo("+", false));
+ final int priorityLength = mPriorities.length;
+ for (int i = 0; i < priorityLength; ++i) {
+ if (normalizedScore > 0) {
+ final String priorityThreshold = Integer.toString(mPriorities[i]) + " (" +
+ normalizedScore + ")";
+ frequencyInfoList.add(
+ new SuggestedWords.SuggestedWordInfo(priorityThreshold, false));
+ normalizedScore = 0.0;
+ } else {
+ final String priority = Integer.toString(mPriorities[i]);
+ frequencyInfoList.add(new SuggestedWords.SuggestedWordInfo(priority, false));
+ }
+ }
+ for (int i = priorityLength; i < mSuggestions.size(); ++i) {
+ frequencyInfoList.add(new SuggestedWords.SuggestedWordInfo("--", false));
+ }
+ return new SuggestedWords.Builder().addWords(mSuggestions, frequencyInfoList);
+ }
+ return new SuggestedWords.Builder().addWords(mSuggestions, null);
}
private void removeDupes() {
@@ -389,15 +416,15 @@ public class Suggest implements Dictionary.WordCallback {
}
public boolean hasAutoCorrection() {
- return mHasAutoCorrection;
+ return mAutoCorrection.hasAutoCorrection();
}
- private boolean compareCaseInsensitive(final String mLowerOriginalWord,
+ private static boolean compareCaseInsensitive(final String lowerOriginalWord,
final char[] word, final int offset, final int length) {
- final int originalLength = mLowerOriginalWord.length();
+ final int originalLength = lowerOriginalWord.length();
if (originalLength == length && Character.isUpperCase(word[offset])) {
for (int i = 0; i < originalLength; i++) {
- if (mLowerOriginalWord.charAt(i) != Character.toLowerCase(word[offset+i])) {
+ if (lowerOriginalWord.charAt(i) != Character.toLowerCase(word[offset+i])) {
return false;
}
}
@@ -427,7 +454,20 @@ public class Suggest implements Dictionary.WordCallback {
// Check if it's the same word, only caps are different
if (compareCaseInsensitive(mLowerOriginalWord, word, offset, length)) {
- pos = 0;
+ // TODO: remove this surrounding if clause and move this logic to
+ // getSuggestedWordBuilder.
+ if (suggestions.size() > 0) {
+ final String currentHighestWordLowerCase =
+ suggestions.get(0).toString().toLowerCase();
+ // If the current highest word is also equal to typed word, we need to compare
+ // frequency to determine the insertion position. This does not ensure strictly
+ // correct ordering, but ensures the top score is on top which is enough for
+ // removing duplicates correctly.
+ if (compareCaseInsensitive(currentHighestWordLowerCase, word, offset, length)
+ && freq <= priorities[0]) {
+ pos = 1;
+ }
+ }
} else {
if (dataType == Dictionary.DataType.UNIGRAM) {
// Check if the word was already added before (by bigram data)
@@ -510,16 +550,6 @@ public class Suggest implements Dictionary.WordCallback {
return -1;
}
- public boolean isValidWord(final CharSequence word) {
- if (word == null || word.length() == 0 || mMainDict == null) {
- return false;
- }
- return mMainDict.isValidWord(word)
- || (mUserDictionary != null && mUserDictionary.isValidWord(word))
- || (mAutoDictionary != null && mAutoDictionary.isValidWord(word))
- || (mContactsDictionary != null && mContactsDictionary.isValidWord(word));
- }
-
private void collectGarbage(ArrayList<CharSequence> suggestions, int prefMaxSuggestions) {
int poolSize = mStringPool.size();
int garbageSize = suggestions.size();
@@ -538,25 +568,12 @@ public class Suggest implements Dictionary.WordCallback {
}
public void close() {
- if (mMainDict != null) {
- mMainDict.close();
- mMainDict = null;
- }
- if (mUserDictionary != null) {
- mUserDictionary.close();
- mUserDictionary = null;
- }
- if (mUserBigramDictionary != null) {
- mUserBigramDictionary.close();
- mUserBigramDictionary = null;
- }
- if (mContactsDictionary != null) {
- mContactsDictionary.close();
- mContactsDictionary = null;
- }
- if (mAutoDictionary != null) {
- mAutoDictionary.close();
- mAutoDictionary = null;
+ final Set<Dictionary> dictionaries = new HashSet<Dictionary>();
+ dictionaries.addAll(mUnigramDictionaries.values());
+ dictionaries.addAll(mBigramDictionaries.values());
+ for (final Dictionary dictionary : dictionaries) {
+ dictionary.close();
}
+ mMainDict = null;
}
}