diff options
Diffstat (limited to 'java/src/com/android/inputmethod/latin/Suggest.java')
-rw-r--r-- | java/src/com/android/inputmethod/latin/Suggest.java | 442 |
1 files changed, 245 insertions, 197 deletions
diff --git a/java/src/com/android/inputmethod/latin/Suggest.java b/java/src/com/android/inputmethod/latin/Suggest.java index caa5aac51..b31f3019c 100644 --- a/java/src/com/android/inputmethod/latin/Suggest.java +++ b/java/src/com/android/inputmethod/latin/Suggest.java @@ -20,31 +20,28 @@ import android.content.Context; import android.text.TextUtils; import android.util.Log; +import com.android.inputmethod.keyboard.Keyboard; import com.android.inputmethod.keyboard.ProximityInfo; +import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo; import java.io.File; import java.util.ArrayList; -import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; import java.util.Locale; -import java.util.Map; -import java.util.Set; /** * This class loads a dictionary and provides a list of suggestions for a given sequence of * characters. This includes corrections and completions. */ public class Suggest implements Dictionary.WordCallback { - public static final String TAG = Suggest.class.getSimpleName(); public static final int APPROX_MAX_WORD_LENGTH = 32; public static final int CORRECTION_NONE = 0; - public static final int CORRECTION_BASIC = 1; - public static final int CORRECTION_FULL = 2; - public static final int CORRECTION_FULL_BIGRAM = 3; + public static final int CORRECTION_FULL = 1; + public static final int CORRECTION_FULL_BIGRAM = 2; /** * Words that appear in both bigram and unigram data gets multiplier ranging from @@ -64,9 +61,8 @@ public class Suggest implements Dictionary.WordCallback { public static final int DIC_USER_TYPED = 0; public static final int DIC_MAIN = 1; public static final int DIC_USER = 2; - public static final int DIC_USER_UNIGRAM = 3; + public static final int DIC_USER_HISTORY = 3; public static final int DIC_CONTACTS = 4; - public static final int DIC_USER_BIGRAM = 5; public static final int DIC_WHITELIST = 6; // If you add a type of dictionary, increment DIC_TYPE_LAST_ID // TODO: this value seems unused. Remove it? @@ -75,39 +71,38 @@ public class Suggest implements Dictionary.WordCallback { public static final String DICT_KEY_CONTACTS = "contacts"; // User dictionary, the system-managed one. public static final String DICT_KEY_USER = "user"; - // User unigram dictionary, internal to LatinIME - public static final String DICT_KEY_USER_UNIGRAM = "user_unigram"; - // User bigram dictionary, internal to LatinIME - public static final String DICT_KEY_USER_BIGRAM = "user_bigram"; + // User history dictionary for the unigram map, internal to LatinIME + public static final String DICT_KEY_USER_HISTORY_UNIGRAM = "history_unigram"; + // User history dictionary for the bigram map, internal to LatinIME + public static final String DICT_KEY_USER_HISTORY_BIGRAM = "history_bigram"; public static final String DICT_KEY_WHITELIST ="whitelist"; private static final boolean DBG = LatinImeLogger.sDBG; - private AutoCorrection mAutoCorrection; - private Dictionary mMainDict; private ContactsDictionary mContactsDict; private WhitelistDictionary mWhiteListDictionary; - private final Map<String, Dictionary> mUnigramDictionaries = new HashMap<String, Dictionary>(); - private final Map<String, Dictionary> mBigramDictionaries = new HashMap<String, Dictionary>(); + private final HashMap<String, Dictionary> mUnigramDictionaries = + new HashMap<String, Dictionary>(); + private final HashMap<String, Dictionary> mBigramDictionaries = + new HashMap<String, Dictionary>(); private int mPrefMaxSuggestions = 18; private static final int PREF_MAX_BIGRAMS = 60; private double mAutoCorrectionThreshold; - private int[] mScores = new int[mPrefMaxSuggestions]; - private int[] mBigramScores = new int[PREF_MAX_BIGRAMS]; - private ArrayList<CharSequence> mSuggestions = new ArrayList<CharSequence>(); - ArrayList<CharSequence> mBigramSuggestions = new ArrayList<CharSequence>(); - private CharSequence mTypedWord; + private ArrayList<SuggestedWordInfo> mSuggestions = new ArrayList<SuggestedWordInfo>(); + private ArrayList<SuggestedWordInfo> mBigramSuggestions = new ArrayList<SuggestedWordInfo>(); + private CharSequence mConsideredWord; // TODO: Remove these member variables by passing more context to addWord() callback method private boolean mIsFirstCharCapitalized; private boolean mIsAllUpperCase; + private int mTrailingSingleQuotesCount; - private int mCorrectionMode = CORRECTION_BASIC; + private static final int MINIMUM_SAFETY_NET_CHAR_LENGTH = 4; public Suggest(final Context context, final int dictionaryResId, final Locale locale) { initAsynchronously(context, dictionaryResId, locale); @@ -116,15 +111,13 @@ public class Suggest implements Dictionary.WordCallback { /* package for test */ Suggest(final Context context, final File dictionary, final long startOffset, final long length, final Flag[] flagArray, final Locale locale) { - initSynchronously(null, DictionaryFactory.createDictionaryForTest(context, dictionary, + initSynchronously(context, DictionaryFactory.createDictionaryForTest(context, dictionary, startOffset, length, flagArray), locale); } private void initWhitelistAndAutocorrectAndPool(final Context context, final Locale locale) { mWhiteListDictionary = new WhitelistDictionary(context, locale); addOrReplaceDictionary(mUnigramDictionaries, DICT_KEY_WHITELIST, mWhiteListDictionary); - mAutoCorrection = new AutoCorrection(); - StringBuilderPool.ensureCapacity(mPrefMaxSuggestions, getApproxMaxWordLength()); } private void initAsynchronously(final Context context, final int dictionaryResId, @@ -144,7 +137,7 @@ public class Suggest implements Dictionary.WordCallback { initWhitelistAndAutocorrectAndPool(context, locale); } - private void addOrReplaceDictionary(Map<String, Dictionary> dictionaries, String key, + private static void addOrReplaceDictionary(HashMap<String, Dictionary> dictionaries, String key, Dictionary dict) { final Dictionary oldDict = (dict == null) ? dictionaries.remove(key) @@ -169,14 +162,6 @@ public class Suggest implements Dictionary.WordCallback { }.start(); } - public int getCorrectionMode() { - return mCorrectionMode; - } - - public void setCorrectionMode(int mode) { - mCorrectionMode = mode; - } - // The main dictionary could have been loaded asynchronously. Don't cache the return value // of this method. public boolean hasMainDictionary() { @@ -187,11 +172,11 @@ public class Suggest implements Dictionary.WordCallback { return mContactsDict; } - public Map<String, Dictionary> getUnigramDictionaries() { + public HashMap<String, Dictionary> getUnigramDictionaries() { return mUnigramDictionaries; } - public int getApproxMaxWordLength() { + public static int getApproxMaxWordLength() { return APPROX_MAX_WORD_LENGTH; } @@ -214,56 +199,22 @@ public class Suggest implements Dictionary.WordCallback { addOrReplaceDictionary(mBigramDictionaries, DICT_KEY_CONTACTS, contactsDictionary); } - public void setUserUnigramDictionary(Dictionary userUnigramDictionary) { - addOrReplaceDictionary(mUnigramDictionaries, DICT_KEY_USER_UNIGRAM, userUnigramDictionary); - } - - public void setUserBigramDictionary(Dictionary userBigramDictionary) { - addOrReplaceDictionary(mBigramDictionaries, DICT_KEY_USER_BIGRAM, userBigramDictionary); + public void setUserHistoryDictionary(Dictionary userHistoryDictionary) { + addOrReplaceDictionary(mUnigramDictionaries, DICT_KEY_USER_HISTORY_UNIGRAM, + userHistoryDictionary); + addOrReplaceDictionary(mBigramDictionaries, DICT_KEY_USER_HISTORY_BIGRAM, + userHistoryDictionary); } public void setAutoCorrectionThreshold(double threshold) { mAutoCorrectionThreshold = threshold; } - public boolean isAggressiveAutoCorrectionMode() { - return (mAutoCorrectionThreshold == 0); - } - - /** - * Number of suggestions to generate from the input key sequence. This has - * to be a number between 1 and 100 (inclusive). - * @param maxSuggestions - * @throws IllegalArgumentException if the number is out of range - */ - public void setMaxSuggestions(int maxSuggestions) { - if (maxSuggestions < 1 || maxSuggestions > 100) { - throw new IllegalArgumentException("maxSuggestions must be between 1 and 100"); - } - mPrefMaxSuggestions = maxSuggestions; - mScores = new int[mPrefMaxSuggestions]; - mBigramScores = new int[PREF_MAX_BIGRAMS]; - collectGarbage(mSuggestions, mPrefMaxSuggestions); - StringBuilderPool.ensureCapacity(mPrefMaxSuggestions, getApproxMaxWordLength()); - } - - /** - * Returns a object which represents suggested words that match the list of character codes - * passed in. This object contents will be overwritten the next time this function is called. - * @param wordComposer contains what is currently being typed - * @param prevWordForBigram previous word (used only for bigram) - * @return suggested words object. - */ - public SuggestedWords getSuggestions(final WordComposer wordComposer, - final CharSequence prevWordForBigram, final ProximityInfo proximityInfo) { - return getSuggestedWordBuilder(wordComposer, prevWordForBigram, - proximityInfo).build(); - } - - private CharSequence capitalizeWord(boolean all, boolean first, CharSequence word) { + private static CharSequence capitalizeWord(final boolean all, final boolean first, + final CharSequence word) { if (TextUtils.isEmpty(word) || !(all || first)) return word; final int wordLength = word.length(); - final StringBuilder sb = StringBuilderPool.getStringBuilder(getApproxMaxWordLength()); + final StringBuilder sb = new StringBuilder(getApproxMaxWordLength()); // TODO: Must pay attention to locale when changing case. if (all) { sb.append(word.toString().toUpperCase()); @@ -276,42 +227,68 @@ public class Suggest implements Dictionary.WordCallback { return sb; } - protected void addBigramToSuggestions(CharSequence bigram) { - // TODO: Try to be a little more shrewd with resource allocation. - // At the moment we copy this object because the StringBuilders are pooled (see - // StringBuilderPool.java) and when we are finished using mSuggestions and - // mBigramSuggestions we will take everything from both and insert them back in the - // pool, so we can't allow the same object to be in both lists at the same time. - final StringBuilder sb = StringBuilderPool.getStringBuilder(getApproxMaxWordLength()); - sb.append(bigram); - mSuggestions.add(sb); + protected void addBigramToSuggestions(SuggestedWordInfo bigram) { + mSuggestions.add(bigram); + } + + private static final WordComposer sEmptyWordComposer = new WordComposer(); + public SuggestedWords getBigramPredictions(CharSequence prevWordForBigram) { + LatinImeLogger.onStartSuggestion(prevWordForBigram); + mIsFirstCharCapitalized = false; + mIsAllUpperCase = false; + mTrailingSingleQuotesCount = 0; + mSuggestions = new ArrayList<SuggestedWordInfo>(mPrefMaxSuggestions); + + // Treating USER_TYPED as UNIGRAM suggestion for logging now. + LatinImeLogger.onAddSuggestedWord("", Suggest.DIC_USER_TYPED, Dictionary.UNIGRAM); + mConsideredWord = ""; + + mBigramSuggestions = new ArrayList<SuggestedWordInfo>(PREF_MAX_BIGRAMS); + + CharSequence lowerPrevWord = prevWordForBigram.toString().toLowerCase(); + if (mMainDict != null && mMainDict.isValidWord(lowerPrevWord)) { + prevWordForBigram = lowerPrevWord; + } + for (final Dictionary dictionary : mBigramDictionaries.values()) { + dictionary.getBigrams(sEmptyWordComposer, prevWordForBigram, this); + } + // Nothing entered: return all bigrams for the previous word + int insertCount = Math.min(mBigramSuggestions.size(), mPrefMaxSuggestions); + for (int i = 0; i < insertCount; ++i) { + addBigramToSuggestions(mBigramSuggestions.get(i)); + } + + SuggestedWordInfo.removeDups(mSuggestions); + + return new SuggestedWords(mSuggestions, + false /* typedWordValid */, + false /* hasAutoCorrectionCandidate */, + false /* allowsToBeAutoCorrected */, + false /* isPunctuationSuggestions */, + false /* isObsoleteSuggestions */); } // TODO: cleanup dictionaries looking up and suggestions building with SuggestedWords.Builder - public SuggestedWords.Builder getSuggestedWordBuilder( + public SuggestedWords getSuggestedWords( final WordComposer wordComposer, CharSequence prevWordForBigram, - final ProximityInfo proximityInfo) { + final ProximityInfo proximityInfo, final int correctionMode) { LatinImeLogger.onStartSuggestion(prevWordForBigram); - mAutoCorrection.init(); mIsFirstCharCapitalized = wordComposer.isFirstCharCapitalized(); mIsAllUpperCase = wordComposer.isAllUpperCase(); - collectGarbage(mSuggestions, mPrefMaxSuggestions); - Arrays.fill(mScores, 0); - - // Save a lowercase version of the original word - String typedWord = wordComposer.getTypedWord(); - if (typedWord != null) { - // Treating USER_TYPED as UNIGRAM suggestion for logging now. - LatinImeLogger.onAddSuggestedWord(typedWord, Suggest.DIC_USER_TYPED, - Dictionary.DataType.UNIGRAM); - } - mTypedWord = typedWord; - - if (wordComposer.size() <= 1 && (mCorrectionMode == CORRECTION_FULL_BIGRAM - || mCorrectionMode == CORRECTION_BASIC)) { + mTrailingSingleQuotesCount = wordComposer.trailingSingleQuotesCount(); + mSuggestions = new ArrayList<SuggestedWordInfo>(mPrefMaxSuggestions); + + final String typedWord = wordComposer.getTypedWord(); + final String consideredWord = mTrailingSingleQuotesCount > 0 + ? typedWord.substring(0, typedWord.length() - mTrailingSingleQuotesCount) + : typedWord; + // Treating USER_TYPED as UNIGRAM suggestion for logging now. + LatinImeLogger.onAddSuggestedWord(typedWord, Suggest.DIC_USER_TYPED, Dictionary.UNIGRAM); + mConsideredWord = consideredWord; + + if (wordComposer.size() <= 1 && (correctionMode == CORRECTION_FULL_BIGRAM)) { // At first character typed, search only the bigrams - Arrays.fill(mBigramScores, 0); - collectGarbage(mBigramSuggestions, PREF_MAX_BIGRAMS); + mBigramSuggestions = new ArrayList<SuggestedWordInfo>(PREF_MAX_BIGRAMS); if (!TextUtils.isEmpty(prevWordForBigram)) { CharSequence lowerPrevWord = prevWordForBigram.toString().toLowerCase(); @@ -321,7 +298,7 @@ public class Suggest implements Dictionary.WordCallback { for (final Dictionary dictionary : mBigramDictionaries.values()) { dictionary.getBigrams(wordComposer, prevWordForBigram, this); } - if (TextUtils.isEmpty(typedWord)) { + if (TextUtils.isEmpty(consideredWord)) { // Nothing entered: return all bigrams for the previous word int insertCount = Math.min(mBigramSuggestions.size(), mPrefMaxSuggestions); for (int i = 0; i < insertCount; ++i) { @@ -329,15 +306,16 @@ public class Suggest implements Dictionary.WordCallback { } } else { // Word entered: return only bigrams that match the first char of the typed word - @SuppressWarnings("null") - final char currentChar = typedWord.charAt(0); + final char currentChar = consideredWord.charAt(0); // TODO: Must pay attention to locale when changing case. + // TODO: Use codepoint instead of char final char currentCharUpper = Character.toUpperCase(currentChar); int count = 0; final int bigramSuggestionSize = mBigramSuggestions.size(); for (int i = 0; i < bigramSuggestionSize; i++) { - final CharSequence bigramSuggestion = mBigramSuggestions.get(i); - final char bigramSuggestionFirstChar = bigramSuggestion.charAt(0); + final SuggestedWordInfo bigramSuggestion = mBigramSuggestions.get(i); + final char bigramSuggestionFirstChar = + (char)bigramSuggestion.codePointAt(0); if (bigramSuggestionFirstChar == currentChar || bigramSuggestionFirstChar == currentCharUpper) { addBigramToSuggestions(bigramSuggestion); @@ -348,104 +326,157 @@ public class Suggest implements Dictionary.WordCallback { } } else if (wordComposer.size() > 1) { + final WordComposer wordComposerForLookup; + if (mTrailingSingleQuotesCount > 0) { + wordComposerForLookup = new WordComposer(wordComposer); + for (int i = mTrailingSingleQuotesCount - 1; i >= 0; --i) { + wordComposerForLookup.deleteLast(); + } + } else { + wordComposerForLookup = wordComposer; + } // At second character typed, search the unigrams (scores being affected by bigrams) for (final String key : mUnigramDictionaries.keySet()) { // Skip UserUnigramDictionary and WhitelistDictionary to lookup - if (key.equals(DICT_KEY_USER_UNIGRAM) || key.equals(DICT_KEY_WHITELIST)) + if (key.equals(DICT_KEY_USER_HISTORY_UNIGRAM) || key.equals(DICT_KEY_WHITELIST)) continue; final Dictionary dictionary = mUnigramDictionaries.get(key); - dictionary.getWords(wordComposer, this, proximityInfo); + dictionary.getWords(wordComposerForLookup, this, proximityInfo); } } - final String typedWordString = typedWord == null ? null : typedWord.toString(); - CharSequence whitelistedWord = capitalizeWord(mIsAllUpperCase, mIsFirstCharCapitalized, - mWhiteListDictionary.getWhitelistedWord(typedWordString)); + final CharSequence whitelistedWord = capitalizeWord(mIsAllUpperCase, + mIsFirstCharCapitalized, mWhiteListDictionary.getWhitelistedWord(consideredWord)); - mAutoCorrection.updateAutoCorrectionStatus(mUnigramDictionaries, wordComposer, - mSuggestions, mScores, typedWord, mAutoCorrectionThreshold, mCorrectionMode, - whitelistedWord); + final boolean hasAutoCorrection; + if (CORRECTION_FULL == correctionMode || CORRECTION_FULL_BIGRAM == correctionMode) { + final CharSequence autoCorrection = + AutoCorrection.computeAutoCorrectionWord(mUnigramDictionaries, wordComposer, + mSuggestions, consideredWord, mAutoCorrectionThreshold, + whitelistedWord); + hasAutoCorrection = (null != autoCorrection); + } else { + hasAutoCorrection = false; + } if (whitelistedWord != null) { - mSuggestions.add(0, whitelistedWord); + if (mTrailingSingleQuotesCount > 0) { + final StringBuilder sb = new StringBuilder(whitelistedWord); + for (int i = mTrailingSingleQuotesCount - 1; i >= 0; --i) { + sb.appendCodePoint(Keyboard.CODE_SINGLE_QUOTE); + } + mSuggestions.add(0, new SuggestedWordInfo( + sb.toString(), SuggestedWordInfo.MAX_SCORE)); + } else { + mSuggestions.add(0, new SuggestedWordInfo( + whitelistedWord, SuggestedWordInfo.MAX_SCORE)); + } } - if (typedWord != null) { - mSuggestions.add(0, typedWordString); - } - Utils.removeDupes(mSuggestions); + mSuggestions.add(0, new SuggestedWordInfo(typedWord, SuggestedWordInfo.MAX_SCORE)); + SuggestedWordInfo.removeDups(mSuggestions); + final ArrayList<SuggestedWordInfo> suggestionsList; if (DBG) { - double normalizedScore = mAutoCorrection.getNormalizedScore(); - ArrayList<SuggestedWords.SuggestedWordInfo> scoreInfoList = - new ArrayList<SuggestedWords.SuggestedWordInfo>(); - scoreInfoList.add(new SuggestedWords.SuggestedWordInfo("+", false)); - for (int i = 0; i < mScores.length; ++i) { - if (normalizedScore > 0) { - final String scoreThreshold = String.format("%d (%4.2f)", mScores[i], - normalizedScore); - scoreInfoList.add( - new SuggestedWords.SuggestedWordInfo(scoreThreshold, false)); - normalizedScore = 0.0; - } else { - final String score = Integer.toString(mScores[i]); - scoreInfoList.add(new SuggestedWords.SuggestedWordInfo(score, false)); - } - } - for (int i = mScores.length; i < mSuggestions.size(); ++i) { - scoreInfoList.add(new SuggestedWords.SuggestedWordInfo("--", false)); - } - return new SuggestedWords.Builder().addWords(mSuggestions, scoreInfoList); + suggestionsList = getSuggestionsInfoListWithDebugInfo(typedWord, mSuggestions); + } else { + suggestionsList = mSuggestions; + } + + // TODO: Change this scheme - a boolean is not enough. A whitelisted word may be "valid" + // but still autocorrected from - in the case the whitelist only capitalizes the word. + // The whitelist should be case-insensitive, so it's not possible to be consistent with + // a boolean flag. Right now this is handled with a slight hack in + // WhitelistDictionary#shouldForciblyAutoCorrectFrom. + final boolean allowsToBeAutoCorrected = AutoCorrection.allowsToBeAutoCorrected( + getUnigramDictionaries(), consideredWord, wordComposer.isFirstCharCapitalized()); + + boolean autoCorrectionAvailable = hasAutoCorrection; + if (correctionMode == CORRECTION_FULL || correctionMode == CORRECTION_FULL_BIGRAM) { + autoCorrectionAvailable |= !allowsToBeAutoCorrected; + } + // Don't auto-correct words with multiple capital letter + autoCorrectionAvailable &= !wordComposer.isMostlyCaps(); + if (allowsToBeAutoCorrected && suggestionsList.size() > 1 && mAutoCorrectionThreshold > 0 + && Suggest.shouldBlockAutoCorrectionBySafetyNet(typedWord, + suggestionsList.get(1).mWord)) { + autoCorrectionAvailable = false; } - return new SuggestedWords.Builder().addWords(mSuggestions, null); + return new SuggestedWords(suggestionsList, + !allowsToBeAutoCorrected /* typedWordValid */, + autoCorrectionAvailable /* hasAutoCorrectionCandidate */, + allowsToBeAutoCorrected /* allowsToBeAutoCorrected */, + false /* isPunctuationSuggestions */, + false /* isObsoleteSuggestions */); } - public boolean hasAutoCorrection() { - return mAutoCorrection.hasAutoCorrection(); + private static ArrayList<SuggestedWordInfo> getSuggestionsInfoListWithDebugInfo( + final String typedWord, final ArrayList<SuggestedWordInfo> suggestions) { + final SuggestedWordInfo typedWordInfo = suggestions.get(0); + typedWordInfo.setDebugString("+"); + double normalizedScore = BinaryDictionary.calcNormalizedScore( + typedWord, typedWordInfo.toString(), typedWordInfo.mScore); + final int suggestionsSize = suggestions.size(); + final ArrayList<SuggestedWordInfo> suggestionsList = + new ArrayList<SuggestedWordInfo>(suggestionsSize); + suggestionsList.add(typedWordInfo); + // Note: i here is the index in mScores[], but the index in mSuggestions is one more + // than i because we added the typed word to mSuggestions without touching mScores. + for (int i = 0; i < suggestionsSize - 1; ++i) { + final SuggestedWordInfo cur = suggestions.get(i + 1); + final String scoreInfoString; + if (normalizedScore > 0) { + scoreInfoString = String.format("%d (%4.2f)", cur.mScore, normalizedScore); + normalizedScore = 0.0; + } else { + scoreInfoString = Integer.toString(cur.mScore); + } + cur.setDebugString(scoreInfoString); + suggestionsList.add(cur); + } + return suggestionsList; } + // TODO: Use codepoint instead of char @Override public boolean addWord(final char[] word, final int offset, final int length, int score, - final int dicTypeId, final Dictionary.DataType dataType) { - Dictionary.DataType dataTypeForLog = dataType; - final ArrayList<CharSequence> suggestions; - final int[] sortedScores; + final int dicTypeId, final int dataType) { + int dataTypeForLog = dataType; + final ArrayList<SuggestedWordInfo> suggestions; final int prefMaxSuggestions; - if(dataType == Dictionary.DataType.BIGRAM) { + if (dataType == Dictionary.BIGRAM) { suggestions = mBigramSuggestions; - sortedScores = mBigramScores; prefMaxSuggestions = PREF_MAX_BIGRAMS; } else { suggestions = mSuggestions; - sortedScores = mScores; prefMaxSuggestions = mPrefMaxSuggestions; } int pos = 0; // Check if it's the same word, only caps are different - if (Utils.equalsIgnoreCase(mTypedWord, word, offset, length)) { + if (StringUtils.equalsIgnoreCase(mConsideredWord, word, offset, length)) { // TODO: remove this surrounding if clause and move this logic to // getSuggestedWordBuilder. if (suggestions.size() > 0) { - final String currentHighestWord = suggestions.get(0).toString(); + final SuggestedWordInfo currentHighestWord = suggestions.get(0); // If the current highest word is also equal to typed word, we need to compare // frequency to determine the insertion position. This does not ensure strictly // correct ordering, but ensures the top score is on top which is enough for // removing duplicates correctly. - if (Utils.equalsIgnoreCase(currentHighestWord, word, offset, length) - && score <= sortedScores[0]) { + if (StringUtils.equalsIgnoreCase(currentHighestWord.mWord, word, offset, length) + && score <= currentHighestWord.mScore) { pos = 1; } } } else { - if (dataType == Dictionary.DataType.UNIGRAM) { + if (dataType == Dictionary.UNIGRAM) { // Check if the word was already added before (by bigram data) int bigramSuggestion = searchBigramSuggestion(word,offset,length); if(bigramSuggestion >= 0) { - dataTypeForLog = Dictionary.DataType.BIGRAM; + dataTypeForLog = Dictionary.BIGRAM; // turn freq from bigram into multiplier specified above - double multiplier = (((double) mBigramScores[bigramSuggestion]) + double multiplier = (((double) mBigramSuggestions.get(bigramSuggestion).mScore) / MAXIMUM_BIGRAM_FREQUENCY) * (BIGRAM_MULTIPLIER_MAX - BIGRAM_MULTIPLIER_MIN) + BIGRAM_MULTIPLIER_MIN; @@ -459,10 +490,12 @@ public class Suggest implements Dictionary.WordCallback { } // Check the last one's score and bail - if (sortedScores[prefMaxSuggestions - 1] >= score) return true; - while (pos < prefMaxSuggestions) { - if (sortedScores[pos] < score - || (sortedScores[pos] == score && length < suggestions.get(pos).length())) { + if (suggestions.size() >= prefMaxSuggestions + && suggestions.get(prefMaxSuggestions - 1).mScore >= score) return true; + while (pos < suggestions.size()) { + final int curScore = suggestions.get(pos).mScore; + if (curScore < score + || (curScore == score && length < suggestions.get(pos).codePointCount())) { break; } pos++; @@ -472,9 +505,7 @@ public class Suggest implements Dictionary.WordCallback { return true; } - System.arraycopy(sortedScores, pos, sortedScores, pos + 1, prefMaxSuggestions - pos - 1); - sortedScores[pos] = score; - final StringBuilder sb = StringBuilderPool.getStringBuilder(getApproxMaxWordLength()); + final StringBuilder sb = new StringBuilder(getApproxMaxWordLength()); // TODO: Must pay attention to locale when changing case. if (mIsAllUpperCase) { sb.append(new String(word, offset, length).toUpperCase()); @@ -486,57 +517,41 @@ public class Suggest implements Dictionary.WordCallback { } else { sb.append(word, offset, length); } - suggestions.add(pos, sb); + for (int i = mTrailingSingleQuotesCount - 1; i >= 0; --i) { + sb.appendCodePoint(Keyboard.CODE_SINGLE_QUOTE); + } + suggestions.add(pos, new SuggestedWordInfo(sb, score)); if (suggestions.size() > prefMaxSuggestions) { - final CharSequence garbage = suggestions.remove(prefMaxSuggestions); - if (garbage instanceof StringBuilder) { - StringBuilderPool.recycle((StringBuilder)garbage); - } + suggestions.remove(prefMaxSuggestions); } else { LatinImeLogger.onAddSuggestedWord(sb.toString(), dicTypeId, dataTypeForLog); } return true; } + // TODO: Use codepoint instead of char private int searchBigramSuggestion(final char[] word, final int offset, final int length) { // TODO This is almost O(n^2). Might need fix. // search whether the word appeared in bigram data int bigramSuggestSize = mBigramSuggestions.size(); - for(int i = 0; i < bigramSuggestSize; i++) { - if(mBigramSuggestions.get(i).length() == length) { + for (int i = 0; i < bigramSuggestSize; i++) { + if (mBigramSuggestions.get(i).codePointCount() == length) { boolean chk = true; - for(int j = 0; j < length; j++) { - if(mBigramSuggestions.get(i).charAt(j) != word[offset+j]) { + for (int j = 0; j < length; j++) { + if (mBigramSuggestions.get(i).codePointAt(j) != word[offset+j]) { chk = false; break; } } - if(chk) return i; + if (chk) return i; } } return -1; } - private void collectGarbage(ArrayList<CharSequence> suggestions, int prefMaxSuggestions) { - int poolSize = StringBuilderPool.getSize(); - int garbageSize = suggestions.size(); - while (poolSize < prefMaxSuggestions && garbageSize > 0) { - final CharSequence garbage = suggestions.get(garbageSize - 1); - if (garbage instanceof StringBuilder) { - StringBuilderPool.recycle((StringBuilder)garbage); - poolSize++; - } - garbageSize--; - } - if (poolSize == prefMaxSuggestions + 1) { - Log.w("Suggest", "String pool got too big: " + poolSize); - } - suggestions.clear(); - } - public void close() { - final Set<Dictionary> dictionaries = new HashSet<Dictionary>(); + final HashSet<Dictionary> dictionaries = new HashSet<Dictionary>(); dictionaries.addAll(mUnigramDictionaries.values()); dictionaries.addAll(mBigramDictionaries.values()); for (final Dictionary dictionary : dictionaries) { @@ -544,4 +559,37 @@ public class Suggest implements Dictionary.WordCallback { } mMainDict = null; } + + // TODO: Resolve the inconsistencies between the native auto correction algorithms and + // this safety net + public static boolean shouldBlockAutoCorrectionBySafetyNet(final String typedWord, + final CharSequence suggestion) { + // Safety net for auto correction. + // Actually if we hit this safety net, it's a bug. + // If user selected aggressive auto correction mode, there is no need to use the safety + // net. + // If the length of typed word is less than MINIMUM_SAFETY_NET_CHAR_LENGTH, + // we should not use net because relatively edit distance can be big. + final int typedWordLength = typedWord.length(); + if (typedWordLength < Suggest.MINIMUM_SAFETY_NET_CHAR_LENGTH) { + return false; + } + final int maxEditDistanceOfNativeDictionary = + (typedWordLength < 5 ? 2 : typedWordLength / 2) + 1; + final int distance = BinaryDictionary.editDistance(typedWord, suggestion.toString()); + if (DBG) { + Log.d(TAG, "Autocorrected edit distance = " + distance + + ", " + maxEditDistanceOfNativeDictionary); + } + if (distance > maxEditDistanceOfNativeDictionary) { + if (DBG) { + Log.e(TAG, "Safety net: before = " + typedWord + ", after = " + suggestion); + Log.e(TAG, "(Error) The edit distance of this correction exceeds limit. " + + "Turning off auto-correction."); + } + return true; + } else { + return false; + } + } } |