aboutsummaryrefslogtreecommitdiffstats
path: root/java/src/com/android/inputmethod/latin/Suggest.java
diff options
context:
space:
mode:
Diffstat (limited to 'java/src/com/android/inputmethod/latin/Suggest.java')
-rw-r--r--[-rwxr-xr-x]java/src/com/android/inputmethod/latin/Suggest.java375
1 files changed, 210 insertions, 165 deletions
diff --git a/java/src/com/android/inputmethod/latin/Suggest.java b/java/src/com/android/inputmethod/latin/Suggest.java
index 3b898941f..0de474e59 100755..100644
--- a/java/src/com/android/inputmethod/latin/Suggest.java
+++ b/java/src/com/android/inputmethod/latin/Suggest.java
@@ -1,12 +1,12 @@
/*
* Copyright (C) 2008 The Android Open Source Project
- *
+ *
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
- *
+ *
* http://www.apache.org/licenses/LICENSE-2.0
- *
+ *
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
@@ -16,24 +16,28 @@
package com.android.inputmethod.latin;
-import java.nio.ByteBuffer;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-
import android.content.Context;
import android.text.AutoText;
import android.text.TextUtils;
import android.util.Log;
import android.view.View;
+import java.io.File;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+
/**
- * This class loads a dictionary and provides a list of suggestions for a given sequence of
+ * This class loads a dictionary and provides a list of suggestions for a given sequence of
* characters. This includes corrections and completions.
- * @hide pending API Council Approval
*/
public class Suggest implements Dictionary.WordCallback {
+ public static final String TAG = Suggest.class.getSimpleName();
+
public static final int APPROX_MAX_WORD_LENGTH = 32;
public static final int CORRECTION_NONE = 0;
@@ -63,38 +67,37 @@ public class Suggest implements Dictionary.WordCallback {
// If you add a type of dictionary, increment DIC_TYPE_LAST_ID
public static final int DIC_TYPE_LAST_ID = 4;
- static final int LARGE_DICTIONARY_THRESHOLD = 200 * 1000;
+ public static final String DICT_KEY_MAIN = "main";
+ public static final String DICT_KEY_CONTACTS = "contacts";
+ public static final String DICT_KEY_AUTO = "auto";
+ public static final String DICT_KEY_USER = "user";
+ public static final String DICT_KEY_USER_BIGRAM = "user_bigram";
+ public static final String DICT_KEY_WHITELIST ="whitelist";
- private BinaryDictionary mMainDict;
-
- private Dictionary mUserDictionary;
+ static final int LARGE_DICTIONARY_THRESHOLD = 200 * 1000;
- private Dictionary mAutoDictionary;
+ private static final boolean DBG = LatinImeLogger.sDBG;
- private Dictionary mContactsDictionary;
+ private AutoCorrection mAutoCorrection;
- private Dictionary mUserBigramDictionary;
+ private BinaryDictionary mMainDict;
+ private WhitelistDictionary mWhiteListDictionary;
+ private final Map<String, Dictionary> mUnigramDictionaries = new HashMap<String, Dictionary>();
+ private final Map<String, Dictionary> mBigramDictionaries = new HashMap<String, Dictionary>();
private int mPrefMaxSuggestions = 12;
private static final int PREF_MAX_BIGRAMS = 60;
- private boolean mAutoTextEnabled;
+ private boolean mQuickFixesEnabled;
+ private double mAutoCorrectionThreshold;
private int[] mPriorities = new int[mPrefMaxSuggestions];
private int[] mBigramPriorities = new int[PREF_MAX_BIGRAMS];
- // Handle predictive correction for only the first 1280 characters for performance reasons
- // If we support scripts that need latin characters beyond that, we should probably use some
- // kind of a sparse array or language specific list with a mapping lookup table.
- // 1280 is the size of the BASE_CHARS array in ExpandableDictionary, which is a basic set of
- // latin characters.
- private int[] mNextLettersFrequencies = new int[1280];
private ArrayList<CharSequence> mSuggestions = new ArrayList<CharSequence>();
ArrayList<CharSequence> mBigramSuggestions = new ArrayList<CharSequence>();
private ArrayList<CharSequence> mStringPool = new ArrayList<CharSequence>();
- private boolean mHaveCorrection;
- private CharSequence mOriginalWord;
private String mLowerOriginalWord;
// TODO: Remove these member variables by passing more context to addWord() callback method
@@ -103,13 +106,25 @@ public class Suggest implements Dictionary.WordCallback {
private int mCorrectionMode = CORRECTION_BASIC;
- public Suggest(Context context, int[] dictionaryResId) {
- mMainDict = new BinaryDictionary(context, dictionaryResId, DIC_MAIN);
- initPool();
+ public Suggest(Context context, int dictionaryResId) {
+ init(context, BinaryDictionary.initDictionary(context, dictionaryResId, DIC_MAIN));
}
- public Suggest(Context context, ByteBuffer byteBuffer) {
- mMainDict = new BinaryDictionary(context, byteBuffer, DIC_MAIN);
+ /* package for test */ Suggest(File dictionary, long startOffset, long length) {
+ init(null, BinaryDictionary.initDictionary(dictionary, startOffset, length, DIC_MAIN));
+ }
+
+ private void init(Context context, BinaryDictionary mainDict) {
+ if (mainDict != null) {
+ mMainDict = mainDict;
+ mUnigramDictionaries.put(DICT_KEY_MAIN, mainDict);
+ mBigramDictionaries.put(DICT_KEY_MAIN, mainDict);
+ }
+ mWhiteListDictionary = WhitelistDictionary.init(context);
+ if (mWhiteListDictionary != null) {
+ mUnigramDictionaries.put(DICT_KEY_WHITELIST, mWhiteListDictionary);
+ }
+ mAutoCorrection = new AutoCorrection();
initPool();
}
@@ -120,8 +135,8 @@ public class Suggest implements Dictionary.WordCallback {
}
}
- public void setAutoTextEnabled(boolean enabled) {
- mAutoTextEnabled = enabled;
+ public void setQuickFixesEnabled(boolean enabled) {
+ mQuickFixesEnabled = enabled;
}
public int getCorrectionMode() {
@@ -133,7 +148,11 @@ public class Suggest implements Dictionary.WordCallback {
}
public boolean hasMainDictionary() {
- return mMainDict.getSize() > LARGE_DICTIONARY_THRESHOLD;
+ return mMainDict != null && mMainDict.getSize() > LARGE_DICTIONARY_THRESHOLD;
+ }
+
+ public Map<String, Dictionary> getUnigramDictionaries() {
+ return mUnigramDictionaries;
}
public int getApproxMaxWordLength() {
@@ -145,22 +164,36 @@ public class Suggest implements Dictionary.WordCallback {
* before the main dictionary, if set.
*/
public void setUserDictionary(Dictionary userDictionary) {
- mUserDictionary = userDictionary;
+ if (userDictionary != null)
+ mUnigramDictionaries.put(DICT_KEY_USER, userDictionary);
}
/**
* Sets an optional contacts dictionary resource to be loaded.
*/
- public void setContactsDictionary(Dictionary userDictionary) {
- mContactsDictionary = userDictionary;
+ public void setContactsDictionary(Dictionary contactsDictionary) {
+ if (contactsDictionary != null) {
+ mUnigramDictionaries.put(DICT_KEY_CONTACTS, contactsDictionary);
+ mBigramDictionaries.put(DICT_KEY_CONTACTS, contactsDictionary);
+ }
}
-
+
public void setAutoDictionary(Dictionary autoDictionary) {
- mAutoDictionary = autoDictionary;
+ if (autoDictionary != null)
+ mUnigramDictionaries.put(DICT_KEY_AUTO, autoDictionary);
}
public void setUserBigramDictionary(Dictionary userBigramDictionary) {
- mUserBigramDictionary = userBigramDictionary;
+ if (userBigramDictionary != null)
+ mBigramDictionaries.put(DICT_KEY_USER_BIGRAM, userBigramDictionary);
+ }
+
+ public void setAutoCorrectionThreshold(double threshold) {
+ mAutoCorrectionThreshold = threshold;
+ }
+
+ public boolean isAggressiveAutoCorrectionMode() {
+ return (mAutoCorrectionThreshold == 0);
}
/**
@@ -183,59 +216,56 @@ public class Suggest implements Dictionary.WordCallback {
}
}
- private boolean haveSufficientCommonality(String original, CharSequence suggestion) {
- final int originalLength = original.length();
- final int suggestionLength = suggestion.length();
- final int minLength = Math.min(originalLength, suggestionLength);
- if (minLength <= 2) return true;
- int matching = 0;
- int lessMatching = 0; // Count matches if we skip one character
- int i;
- for (i = 0; i < minLength; i++) {
- final char origChar = ExpandableDictionary.toLowerCase(original.charAt(i));
- if (origChar == ExpandableDictionary.toLowerCase(suggestion.charAt(i))) {
- matching++;
- lessMatching++;
- } else if (i + 1 < suggestionLength
- && origChar == ExpandableDictionary.toLowerCase(suggestion.charAt(i + 1))) {
- lessMatching++;
- }
- }
- matching = Math.max(matching, lessMatching);
-
- if (minLength <= 4) {
- return matching >= 2;
- } else {
- return matching > minLength / 2;
- }
- }
-
/**
- * Returns a list of words that match the list of character codes passed in.
- * This list will be overwritten the next time this function is called.
+ * Returns a object which represents suggested words that match the list of character codes
+ * passed in. This object contents will be overwritten the next time this function is called.
* @param view a view for retrieving the context for AutoText
* @param wordComposer contains what is currently being typed
* @param prevWordForBigram previous word (used only for bigram)
- * @return list of suggestions.
+ * @return suggested words object.
*/
- public List<CharSequence> getSuggestions(View view, WordComposer wordComposer,
- boolean includeTypedWordIfValid, CharSequence prevWordForBigram) {
+ public SuggestedWords getSuggestions(View view, WordComposer wordComposer,
+ CharSequence prevWordForBigram) {
+ return getSuggestedWordBuilder(view, wordComposer, prevWordForBigram).build();
+ }
+
+ private CharSequence capitalizeWord(boolean all, boolean first, CharSequence word) {
+ if (TextUtils.isEmpty(word) || !(all || first)) return word;
+ final int wordLength = word.length();
+ final int poolSize = mStringPool.size();
+ final StringBuilder sb =
+ poolSize > 0 ? (StringBuilder) mStringPool.remove(poolSize - 1)
+ : new StringBuilder(getApproxMaxWordLength());
+ sb.setLength(0);
+ if (all) {
+ sb.append(word.toString().toUpperCase());
+ } else if (first) {
+ sb.append(Character.toUpperCase(word.charAt(0)));
+ if (wordLength > 1) {
+ sb.append(word.subSequence(1, wordLength));
+ }
+ }
+ return sb;
+ }
+
+ // TODO: cleanup dictionaries looking up and suggestions building with SuggestedWords.Builder
+ public SuggestedWords.Builder getSuggestedWordBuilder(View view, WordComposer wordComposer,
+ CharSequence prevWordForBigram) {
LatinImeLogger.onStartSuggestion(prevWordForBigram);
- mHaveCorrection = false;
+ mAutoCorrection.init();
mIsFirstCharCapitalized = wordComposer.isFirstCharCapitalized();
mIsAllUpperCase = wordComposer.isAllUpperCase();
collectGarbage(mSuggestions, mPrefMaxSuggestions);
Arrays.fill(mPriorities, 0);
- Arrays.fill(mNextLettersFrequencies, 0);
// Save a lowercase version of the original word
- mOriginalWord = wordComposer.getTypedWord();
- if (mOriginalWord != null) {
- final String mOriginalWordString = mOriginalWord.toString();
- mOriginalWord = mOriginalWordString;
- mLowerOriginalWord = mOriginalWordString.toLowerCase();
+ CharSequence typedWord = wordComposer.getTypedWord();
+ if (typedWord != null) {
+ final String typedWordString = typedWord.toString();
+ typedWord = typedWordString;
+ mLowerOriginalWord = typedWordString.toLowerCase();
// Treating USER_TYPED as UNIGRAM suggestion for logging now.
- LatinImeLogger.onAddSuggestedWord(mOriginalWordString, Suggest.DIC_USER_TYPED,
+ LatinImeLogger.onAddSuggestedWord(typedWordString, Suggest.DIC_USER_TYPED,
Dictionary.DataType.UNIGRAM);
} else {
mLowerOriginalWord = "";
@@ -249,20 +279,11 @@ public class Suggest implements Dictionary.WordCallback {
if (!TextUtils.isEmpty(prevWordForBigram)) {
CharSequence lowerPrevWord = prevWordForBigram.toString().toLowerCase();
- if (mMainDict.isValidWord(lowerPrevWord)) {
+ if (mMainDict != null && mMainDict.isValidWord(lowerPrevWord)) {
prevWordForBigram = lowerPrevWord;
}
- if (mUserBigramDictionary != null) {
- mUserBigramDictionary.getBigrams(wordComposer, prevWordForBigram, this,
- mNextLettersFrequencies);
- }
- if (mContactsDictionary != null) {
- mContactsDictionary.getBigrams(wordComposer, prevWordForBigram, this,
- mNextLettersFrequencies);
- }
- if (mMainDict != null) {
- mMainDict.getBigrams(wordComposer, prevWordForBigram, this,
- mNextLettersFrequencies);
+ for (final Dictionary dictionary : mBigramDictionaries.values()) {
+ dictionary.getBigrams(wordComposer, prevWordForBigram, this);
}
char currentChar = wordComposer.getTypedWord().charAt(0);
char currentCharUpper = Character.toUpperCase(currentChar);
@@ -285,69 +306,86 @@ public class Suggest implements Dictionary.WordCallback {
} else if (wordComposer.size() > 1) {
// At second character typed, search the unigrams (scores being affected by bigrams)
- if (mUserDictionary != null || mContactsDictionary != null) {
- if (mUserDictionary != null) {
- mUserDictionary.getWords(wordComposer, this, mNextLettersFrequencies);
- }
- if (mContactsDictionary != null) {
- mContactsDictionary.getWords(wordComposer, this, mNextLettersFrequencies);
- }
-
- if (mSuggestions.size() > 0 && isValidWord(mOriginalWord)
- && (mCorrectionMode == CORRECTION_FULL
- || mCorrectionMode == CORRECTION_FULL_BIGRAM)) {
- mHaveCorrection = true;
- }
- }
- mMainDict.getWords(wordComposer, this, mNextLettersFrequencies);
- if ((mCorrectionMode == CORRECTION_FULL || mCorrectionMode == CORRECTION_FULL_BIGRAM)
- && mSuggestions.size() > 0) {
- mHaveCorrection = true;
- }
- }
- if (mOriginalWord != null) {
- mSuggestions.add(0, mOriginalWord.toString());
- }
-
- // Check if the first suggestion has a minimum number of characters in common
- if (wordComposer.size() > 1 && mSuggestions.size() > 1
- && (mCorrectionMode == CORRECTION_FULL
- || mCorrectionMode == CORRECTION_FULL_BIGRAM)) {
- if (!haveSufficientCommonality(mLowerOriginalWord, mSuggestions.get(1))) {
- mHaveCorrection = false;
+ for (final String key : mUnigramDictionaries.keySet()) {
+ // Skip AutoDictionary and WhitelistDictionary to lookup
+ if (key.equals(DICT_KEY_AUTO) || key.equals(DICT_KEY_WHITELIST))
+ continue;
+ final Dictionary dictionary = mUnigramDictionaries.get(key);
+ dictionary.getWords(wordComposer, this);
}
}
- if (mAutoTextEnabled) {
- int i = 0;
- int max = 6;
- // Don't autotext the suggestions from the dictionaries
- if (mCorrectionMode == CORRECTION_BASIC) max = 1;
- while (i < mSuggestions.size() && i < max) {
- String suggestedWord = mSuggestions.get(i).toString().toLowerCase();
- CharSequence autoText =
- AutoText.get(suggestedWord, 0, suggestedWord.length(), view);
- // Is there an AutoText correction?
- boolean canAdd = autoText != null;
+ CharSequence autoText = null;
+ final String typedWordString = typedWord == null ? null : typedWord.toString();
+ if (typedWord != null) {
+ // Apply quick fix only for the typed word.
+ if (mQuickFixesEnabled) {
+ final String lowerCaseTypedWord = typedWordString.toLowerCase();
+ CharSequence tempAutoText = capitalizeWord(
+ mIsAllUpperCase, mIsFirstCharCapitalized, AutoText.get(
+ lowerCaseTypedWord, 0, lowerCaseTypedWord.length(), view));
+ // TODO: cleanup canAdd
+ // Is there an AutoText (also known as Quick Fixes) correction?
+ // Capitalize as needed
+ boolean canAdd = tempAutoText != null;
// Is that correction already the current prediction (or original word)?
- canAdd &= !TextUtils.equals(autoText, mSuggestions.get(i));
+ canAdd &= !TextUtils.equals(tempAutoText, typedWord);
// Is that correction already the next predicted word?
- if (canAdd && i + 1 < mSuggestions.size() && mCorrectionMode != CORRECTION_BASIC) {
- canAdd &= !TextUtils.equals(autoText, mSuggestions.get(i + 1));
+ if (canAdd && mSuggestions.size() > 0 && mCorrectionMode != CORRECTION_BASIC) {
+ canAdd &= !TextUtils.equals(tempAutoText, mSuggestions.get(0));
}
if (canAdd) {
- mHaveCorrection = true;
- mSuggestions.add(i + 1, autoText);
- i++;
+ if (DBG) {
+ Log.d(TAG, "Auto corrected by AUTOTEXT.");
+ }
+ autoText = tempAutoText;
}
- i++;
}
}
+
+ CharSequence whitelistedWord = capitalizeWord(mIsAllUpperCase, mIsFirstCharCapitalized,
+ mWhiteListDictionary.getWhiteListedWord(typedWordString));
+
+ mAutoCorrection.updateAutoCorrectionStatus(mUnigramDictionaries, wordComposer,
+ mSuggestions, mPriorities, typedWord, mAutoCorrectionThreshold, mCorrectionMode,
+ autoText, whitelistedWord);
+
+ if (autoText != null) {
+ mSuggestions.add(0, autoText);
+ }
+
+ if (whitelistedWord != null) {
+ mSuggestions.add(0, whitelistedWord);
+ }
+
+ if (typedWord != null) {
+ mSuggestions.add(0, typedWordString);
+ }
removeDupes();
- return mSuggestions;
- }
- public int[] getNextLettersFrequencies() {
- return mNextLettersFrequencies;
+ if (DBG) {
+ double normalizedScore = mAutoCorrection.getNormalizedScore();
+ ArrayList<SuggestedWords.SuggestedWordInfo> frequencyInfoList =
+ new ArrayList<SuggestedWords.SuggestedWordInfo>();
+ frequencyInfoList.add(new SuggestedWords.SuggestedWordInfo("+", false));
+ final int priorityLength = mPriorities.length;
+ for (int i = 0; i < priorityLength; ++i) {
+ if (normalizedScore > 0) {
+ final String priorityThreshold = Integer.toString(mPriorities[i]) + " (" +
+ normalizedScore + ")";
+ frequencyInfoList.add(
+ new SuggestedWords.SuggestedWordInfo(priorityThreshold, false));
+ normalizedScore = 0.0;
+ } else {
+ final String priority = Integer.toString(mPriorities[i]);
+ frequencyInfoList.add(new SuggestedWords.SuggestedWordInfo(priority, false));
+ }
+ }
+ for (int i = priorityLength; i < mSuggestions.size(); ++i) {
+ frequencyInfoList.add(new SuggestedWords.SuggestedWordInfo("--", false));
+ }
+ return new SuggestedWords.Builder().addWords(mSuggestions, frequencyInfoList);
+ }
+ return new SuggestedWords.Builder().addWords(mSuggestions, null);
}
private void removeDupes() {
@@ -377,16 +415,16 @@ public class Suggest implements Dictionary.WordCallback {
}
}
- public boolean hasMinimalCorrection() {
- return mHaveCorrection;
+ public boolean hasAutoCorrection() {
+ return mAutoCorrection.hasAutoCorrection();
}
- private boolean compareCaseInsensitive(final String mLowerOriginalWord,
+ private static boolean compareCaseInsensitive(final String lowerOriginalWord,
final char[] word, final int offset, final int length) {
- final int originalLength = mLowerOriginalWord.length();
+ final int originalLength = lowerOriginalWord.length();
if (originalLength == length && Character.isUpperCase(word[offset])) {
for (int i = 0; i < originalLength; i++) {
- if (mLowerOriginalWord.charAt(i) != Character.toLowerCase(word[offset+i])) {
+ if (lowerOriginalWord.charAt(i) != Character.toLowerCase(word[offset+i])) {
return false;
}
}
@@ -395,6 +433,7 @@ public class Suggest implements Dictionary.WordCallback {
return false;
}
+ @Override
public boolean addWord(final char[] word, final int offset, final int length, int freq,
final int dicTypeId, final Dictionary.DataType dataType) {
Dictionary.DataType dataTypeForLog = dataType;
@@ -415,7 +454,20 @@ public class Suggest implements Dictionary.WordCallback {
// Check if it's the same word, only caps are different
if (compareCaseInsensitive(mLowerOriginalWord, word, offset, length)) {
- pos = 0;
+ // TODO: remove this surrounding if clause and move this logic to
+ // getSuggestedWordBuilder.
+ if (suggestions.size() > 0) {
+ final String currentHighestWordLowerCase =
+ suggestions.get(0).toString().toLowerCase();
+ // If the current highest word is also equal to typed word, we need to compare
+ // frequency to determine the insertion position. This does not ensure strictly
+ // correct ordering, but ensures the top score is on top which is enough for
+ // removing duplicates correctly.
+ if (compareCaseInsensitive(currentHighestWordLowerCase, word, offset, length)
+ && freq <= priorities[0]) {
+ pos = 1;
+ }
+ }
} else {
if (dataType == Dictionary.DataType.UNIGRAM) {
// Check if the word was already added before (by bigram data)
@@ -450,11 +502,10 @@ public class Suggest implements Dictionary.WordCallback {
return true;
}
- System.arraycopy(priorities, pos, priorities, pos + 1,
- prefMaxSuggestions - pos - 1);
+ System.arraycopy(priorities, pos, priorities, pos + 1, prefMaxSuggestions - pos - 1);
priorities[pos] = freq;
int poolSize = mStringPool.size();
- StringBuilder sb = poolSize > 0 ? (StringBuilder) mStringPool.remove(poolSize - 1)
+ StringBuilder sb = poolSize > 0 ? (StringBuilder) mStringPool.remove(poolSize - 1)
: new StringBuilder(getApproxMaxWordLength());
sb.setLength(0);
if (mIsAllUpperCase) {
@@ -499,16 +550,6 @@ public class Suggest implements Dictionary.WordCallback {
return -1;
}
- public boolean isValidWord(final CharSequence word) {
- if (word == null || word.length() == 0) {
- return false;
- }
- return mMainDict.isValidWord(word)
- || (mUserDictionary != null && mUserDictionary.isValidWord(word))
- || (mAutoDictionary != null && mAutoDictionary.isValidWord(word))
- || (mContactsDictionary != null && mContactsDictionary.isValidWord(word));
- }
-
private void collectGarbage(ArrayList<CharSequence> suggestions, int prefMaxSuggestions) {
int poolSize = mStringPool.size();
int garbageSize = suggestions.size();
@@ -527,8 +568,12 @@ public class Suggest implements Dictionary.WordCallback {
}
public void close() {
- if (mMainDict != null) {
- mMainDict.close();
+ final Set<Dictionary> dictionaries = new HashSet<Dictionary>();
+ dictionaries.addAll(mUnigramDictionaries.values());
+ dictionaries.addAll(mBigramDictionaries.values());
+ for (final Dictionary dictionary : dictionaries) {
+ dictionary.close();
}
+ mMainDict = null;
}
}