diff options
Diffstat (limited to 'java/src/com/android/inputmethod/latin/Suggest.java')
-rw-r--r-- | java/src/com/android/inputmethod/latin/Suggest.java | 424 |
1 files changed, 302 insertions, 122 deletions
diff --git a/java/src/com/android/inputmethod/latin/Suggest.java b/java/src/com/android/inputmethod/latin/Suggest.java index f810eccf4..5aecc1371 100644 --- a/java/src/com/android/inputmethod/latin/Suggest.java +++ b/java/src/com/android/inputmethod/latin/Suggest.java @@ -18,6 +18,7 @@ package com.android.inputmethod.latin; import android.content.Context; import android.text.TextUtils; +import android.util.Log; import com.android.inputmethod.keyboard.Keyboard; import com.android.inputmethod.keyboard.ProximityInfo; @@ -25,7 +26,6 @@ import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo; import java.io.File; import java.util.ArrayList; -import java.util.Comparator; import java.util.HashSet; import java.util.Locale; import java.util.concurrent.ConcurrentHashMap; @@ -34,47 +34,80 @@ import java.util.concurrent.ConcurrentHashMap; * This class loads a dictionary and provides a list of suggestions for a given sequence of * characters. This includes corrections and completions. */ -public class Suggest { +public class Suggest implements Dictionary.WordCallback { public static final String TAG = Suggest.class.getSimpleName(); + public static final int APPROX_MAX_WORD_LENGTH = 32; + // TODO: rename this to CORRECTION_OFF public static final int CORRECTION_NONE = 0; // TODO: rename this to CORRECTION_ON public static final int CORRECTION_FULL = 1; + // It seems the following values are only used for logging. + public static final int DIC_USER_TYPED = 0; + public static final int DIC_MAIN = 1; + public static final int DIC_USER = 2; + public static final int DIC_USER_HISTORY = 3; + public static final int DIC_CONTACTS = 4; + public static final int DIC_WHITELIST = 6; + // If you add a type of dictionary, increment DIC_TYPE_LAST_ID + // TODO: this value seems unused. Remove it? + public static final int DIC_TYPE_LAST_ID = 6; + public static final String DICT_KEY_MAIN = "main"; + public static final String DICT_KEY_CONTACTS = "contacts"; + // User dictionary, the system-managed one. + public static final String DICT_KEY_USER = "user"; + // User history dictionary for the unigram map, internal to LatinIME + public static final String DICT_KEY_USER_HISTORY_UNIGRAM = "history_unigram"; + // User history dictionary for the bigram map, internal to LatinIME + public static final String DICT_KEY_USER_HISTORY_BIGRAM = "history_bigram"; + public static final String DICT_KEY_WHITELIST ="whitelist"; + private static final boolean DBG = LatinImeLogger.sDBG; private Dictionary mMainDictionary; private ContactsBinaryDictionary mContactsDict; private WhitelistDictionary mWhiteListDictionary; - private final ConcurrentHashMap<String, Dictionary> mDictionaries = + private final ConcurrentHashMap<String, Dictionary> mUnigramDictionaries = + new ConcurrentHashMap<String, Dictionary>(); + private final ConcurrentHashMap<String, Dictionary> mBigramDictionaries = new ConcurrentHashMap<String, Dictionary>(); public static final int MAX_SUGGESTIONS = 18; + private static final int PREF_MAX_BIGRAMS = 60; + private float mAutoCorrectionThreshold; - // Locale used for upper- and title-casing words - final private Locale mLocale; + private ArrayList<SuggestedWordInfo> mSuggestions = new ArrayList<SuggestedWordInfo>(); + private ArrayList<SuggestedWordInfo> mBigramSuggestions = new ArrayList<SuggestedWordInfo>(); + private CharSequence mConsideredWord; + + // TODO: Remove these member variables by passing more context to addWord() callback method + private boolean mIsFirstCharCapitalized; + private boolean mIsAllUpperCase; + private int mTrailingSingleQuotesCount; + + private static final int MINIMUM_SAFETY_NET_CHAR_LENGTH = 4; public Suggest(final Context context, final Locale locale) { initAsynchronously(context, locale); - mLocale = locale; } /* package for test */ Suggest(final Context context, final File dictionary, final long startOffset, final long length, final Locale locale) { final Dictionary mainDict = DictionaryFactory.createDictionaryForTest(context, dictionary, startOffset, length /* useFullEditDistance */, false, locale); - mLocale = locale; mMainDictionary = mainDict; - addOrReplaceDictionary(mDictionaries, Dictionary.TYPE_MAIN, mainDict); + addOrReplaceDictionary(mUnigramDictionaries, DICT_KEY_MAIN, mainDict); + addOrReplaceDictionary(mBigramDictionaries, DICT_KEY_MAIN, mainDict); initWhitelistAndAutocorrectAndPool(context, locale); } private void initWhitelistAndAutocorrectAndPool(final Context context, final Locale locale) { mWhiteListDictionary = new WhitelistDictionary(context, locale); - addOrReplaceDictionary(mDictionaries, Dictionary.TYPE_WHITELIST, mWhiteListDictionary); + addOrReplaceDictionary(mUnigramDictionaries, DICT_KEY_WHITELIST, mWhiteListDictionary); } private void initAsynchronously(final Context context, final Locale locale) { @@ -103,7 +136,8 @@ public class Suggest { public void run() { final DictionaryCollection newMainDict = DictionaryFactory.createMainDictionaryFromManager(context, locale); - addOrReplaceDictionary(mDictionaries, Dictionary.TYPE_MAIN, newMainDict); + addOrReplaceDictionary(mUnigramDictionaries, DICT_KEY_MAIN, newMainDict); + addOrReplaceDictionary(mBigramDictionaries, DICT_KEY_MAIN, newMainDict); mMainDictionary = newMainDict; } }.start(); @@ -124,7 +158,11 @@ public class Suggest { } public ConcurrentHashMap<String, Dictionary> getUnigramDictionaries() { - return mDictionaries; + return mUnigramDictionaries; + } + + public static int getApproxMaxWordLength() { + return APPROX_MAX_WORD_LENGTH; } /** @@ -132,7 +170,7 @@ public class Suggest { * before the main dictionary, if set. This refers to the system-managed user dictionary. */ public void setUserDictionary(UserBinaryDictionary userDictionary) { - addOrReplaceDictionary(mDictionaries, Dictionary.TYPE_USER, userDictionary); + addOrReplaceDictionary(mUnigramDictionaries, DICT_KEY_USER, userDictionary); } /** @@ -142,147 +180,213 @@ public class Suggest { */ public void setContactsDictionary(ContactsBinaryDictionary contactsDictionary) { mContactsDict = contactsDictionary; - addOrReplaceDictionary(mDictionaries, Dictionary.TYPE_CONTACTS, contactsDictionary); + addOrReplaceDictionary(mUnigramDictionaries, DICT_KEY_CONTACTS, contactsDictionary); + addOrReplaceDictionary(mBigramDictionaries, DICT_KEY_CONTACTS, contactsDictionary); } public void setUserHistoryDictionary(UserHistoryDictionary userHistoryDictionary) { - addOrReplaceDictionary(mDictionaries, Dictionary.TYPE_USER_HISTORY, userHistoryDictionary); + addOrReplaceDictionary(mUnigramDictionaries, DICT_KEY_USER_HISTORY_UNIGRAM, + userHistoryDictionary); + addOrReplaceDictionary(mBigramDictionaries, DICT_KEY_USER_HISTORY_BIGRAM, + userHistoryDictionary); } public void setAutoCorrectionThreshold(float threshold) { mAutoCorrectionThreshold = threshold; } + private static CharSequence capitalizeWord(final boolean all, final boolean first, + final CharSequence word) { + if (TextUtils.isEmpty(word) || !(all || first)) return word; + final int wordLength = word.length(); + final StringBuilder sb = new StringBuilder(getApproxMaxWordLength()); + // TODO: Must pay attention to locale when changing case. + if (all) { + sb.append(word.toString().toUpperCase()); + } else if (first) { + sb.append(Character.toUpperCase(word.charAt(0))); + if (wordLength > 1) { + sb.append(word.subSequence(1, wordLength)); + } + } + return sb; + } + + protected void addBigramToSuggestions(SuggestedWordInfo bigram) { + mSuggestions.add(bigram); + } + + private static final WordComposer sEmptyWordComposer = new WordComposer(); + public SuggestedWords getBigramPredictions(CharSequence prevWordForBigram) { + LatinImeLogger.onStartSuggestion(prevWordForBigram); + mIsFirstCharCapitalized = false; + mIsAllUpperCase = false; + mTrailingSingleQuotesCount = 0; + mSuggestions = new ArrayList<SuggestedWordInfo>(MAX_SUGGESTIONS); + + // Treating USER_TYPED as UNIGRAM suggestion for logging now. + LatinImeLogger.onAddSuggestedWord("", Suggest.DIC_USER_TYPED, Dictionary.UNIGRAM); + mConsideredWord = ""; + + mBigramSuggestions = new ArrayList<SuggestedWordInfo>(PREF_MAX_BIGRAMS); + + getAllBigrams(prevWordForBigram, sEmptyWordComposer); + + // Nothing entered: return all bigrams for the previous word + int insertCount = Math.min(mBigramSuggestions.size(), MAX_SUGGESTIONS); + for (int i = 0; i < insertCount; ++i) { + addBigramToSuggestions(mBigramSuggestions.get(i)); + } + + SuggestedWordInfo.removeDups(mSuggestions); + + return new SuggestedWords(mSuggestions, + false /* typedWordValid */, + false /* hasAutoCorrectionCandidate */, + false /* allowsToBeAutoCorrected */, + false /* isPunctuationSuggestions */, + false /* isObsoleteSuggestions */, + true /* isPrediction */); + } + // TODO: cleanup dictionaries looking up and suggestions building with SuggestedWords.Builder public SuggestedWords getSuggestedWords( final WordComposer wordComposer, CharSequence prevWordForBigram, - final ProximityInfo proximityInfo, final boolean isCorrectionEnabled, - // TODO: remove isPrediction parameter. It effectively means the same thing - // as wordComposer.size() <= 1 - final boolean isPrediction) { + final ProximityInfo proximityInfo, final boolean isCorrectionEnabled) { LatinImeLogger.onStartSuggestion(prevWordForBigram); - final boolean isFirstCharCapitalized = - !isPrediction && wordComposer.isFirstCharCapitalized(); - final boolean isAllUpperCase = !isPrediction && wordComposer.isAllUpperCase(); - final int trailingSingleQuotesCount = wordComposer.trailingSingleQuotesCount(); - final BoundedTreeSet suggestionsSet = new BoundedTreeSet(sSuggestedWordInfoComparator, - MAX_SUGGESTIONS); + mIsFirstCharCapitalized = wordComposer.isFirstCharCapitalized(); + mIsAllUpperCase = wordComposer.isAllUpperCase(); + mTrailingSingleQuotesCount = wordComposer.trailingSingleQuotesCount(); + mSuggestions = new ArrayList<SuggestedWordInfo>(MAX_SUGGESTIONS); final String typedWord = wordComposer.getTypedWord(); - final String consideredWord = trailingSingleQuotesCount > 0 - ? typedWord.substring(0, typedWord.length() - trailingSingleQuotesCount) + final String consideredWord = mTrailingSingleQuotesCount > 0 + ? typedWord.substring(0, typedWord.length() - mTrailingSingleQuotesCount) : typedWord; - LatinImeLogger.onAddSuggestedWord(typedWord, Dictionary.TYPE_USER_TYPED); + // Treating USER_TYPED as UNIGRAM suggestion for logging now. + LatinImeLogger.onAddSuggestedWord(typedWord, Suggest.DIC_USER_TYPED, Dictionary.UNIGRAM); + mConsideredWord = consideredWord; if (wordComposer.size() <= 1 && isCorrectionEnabled) { // At first character typed, search only the bigrams + mBigramSuggestions = new ArrayList<SuggestedWordInfo>(PREF_MAX_BIGRAMS); + if (!TextUtils.isEmpty(prevWordForBigram)) { for (final String key : mDictionaries.keySet()) { final Dictionary dictionary = mDictionaries.get(key); suggestionsSet.addAll(dictionary.getBigrams(wordComposer, prevWordForBigram)); } } + } else if (wordComposer.size() > 1) { final WordComposer wordComposerForLookup; - if (trailingSingleQuotesCount > 0) { + if (mTrailingSingleQuotesCount > 0) { wordComposerForLookup = new WordComposer(wordComposer); - for (int i = trailingSingleQuotesCount - 1; i >= 0; --i) { + for (int i = mTrailingSingleQuotesCount - 1; i >= 0; --i) { wordComposerForLookup.deleteLast(); } } else { wordComposerForLookup = wordComposer; } // At second character typed, search the unigrams (scores being affected by bigrams) - for (final String key : mDictionaries.keySet()) { + for (final String key : mUnigramDictionaries.keySet()) { // Skip UserUnigramDictionary and WhitelistDictionary to lookup - if (key.equals(Dictionary.TYPE_USER_HISTORY) - || key.equals(Dictionary.TYPE_WHITELIST)) + if (key.equals(DICT_KEY_USER_HISTORY_UNIGRAM) || key.equals(DICT_KEY_WHITELIST)) continue; - final Dictionary dictionary = mDictionaries.get(key); - suggestionsSet.addAll(dictionary.getWords( - wordComposerForLookup, prevWordForBigram, proximityInfo)); + final Dictionary dictionary = mUnigramDictionaries.get(key); + dictionary.getWords(wordComposerForLookup, prevWordForBigram, this, proximityInfo); } } - // TODO: Change this scheme - a boolean is not enough. A whitelisted word may be "valid" - // but still autocorrected from - in the case the whitelist only capitalizes the word. - // The whitelist should be case-insensitive, so it's not possible to be consistent with - // a boolean flag. Right now this is handled with a slight hack in - // WhitelistDictionary#shouldForciblyAutoCorrectFrom. - final boolean allowsToBeAutoCorrected = AutoCorrection.isWhitelistedOrNotAWord( - mDictionaries, consideredWord, wordComposer.isFirstCharCapitalized()); - - final CharSequence whitelistedWord = - mWhiteListDictionary.getWhitelistedWord(consideredWord); + final CharSequence whitelistedWord = capitalizeWord(mIsAllUpperCase, + mIsFirstCharCapitalized, mWhiteListDictionary.getWhitelistedWord(consideredWord)); final boolean hasAutoCorrection; - if (!isCorrectionEnabled || !allowsToBeAutoCorrected || wordComposer.isMostlyCaps() - || wordComposer.isResumed() || !hasMainDictionary()) { - // If we don't have a main dictionary, we never want to auto-correct. The reason for - // this is, the user may have a contact whose name happens to match a valid word in - // their language, and it will unexpectedly auto-correct. For example, if the user - // types in English with no dictionary and has a "Will" in their contact list, "will" - // would always auto-correct to "Will" which is unwanted. Hence, no main dict => no - // auto-correct. - hasAutoCorrection = false; - } else if (null != whitelistedWord) { - hasAutoCorrection = true; - } else if (suggestionsSet.isEmpty()) { - hasAutoCorrection = false; - } else if (AutoCorrection.suggestionExceedsAutoCorrectionThreshold(suggestionsSet.first(), - consideredWord, mAutoCorrectionThreshold)) { - hasAutoCorrection = true; + if (isCorrectionEnabled) { + final CharSequence autoCorrection = + AutoCorrection.computeAutoCorrectionWord(mUnigramDictionaries, wordComposer, + mSuggestions, consideredWord, mAutoCorrectionThreshold, + whitelistedWord); + hasAutoCorrection = (null != autoCorrection); } else { hasAutoCorrection = false; } if (whitelistedWord != null) { - suggestionsSet.add(new SuggestedWordInfo(whitelistedWord, - SuggestedWordInfo.MAX_SCORE, SuggestedWordInfo.KIND_WHITELIST, - Dictionary.TYPE_WHITELIST)); - } - - final ArrayList<SuggestedWordInfo> suggestionsContainer = - new ArrayList<SuggestedWordInfo>(suggestionsSet); - final int suggestionsCount = suggestionsContainer.size(); - if (isFirstCharCapitalized || isAllUpperCase || 0 != trailingSingleQuotesCount) { - for (int i = 0; i < suggestionsCount; ++i) { - final SuggestedWordInfo wordInfo = suggestionsContainer.get(i); - final SuggestedWordInfo transformedWordInfo = getTransformedSuggestedWordInfo( - wordInfo, mLocale, isAllUpperCase, isFirstCharCapitalized, - trailingSingleQuotesCount); - suggestionsContainer.set(i, transformedWordInfo); + if (mTrailingSingleQuotesCount > 0) { + final StringBuilder sb = new StringBuilder(whitelistedWord); + for (int i = mTrailingSingleQuotesCount - 1; i >= 0; --i) { + sb.appendCodePoint(Keyboard.CODE_SINGLE_QUOTE); + } + mSuggestions.add(0, new SuggestedWordInfo(sb.toString(), + SuggestedWordInfo.MAX_SCORE, SuggestedWordInfo.KIND_WHITELIST)); + } else { + mSuggestions.add(0, new SuggestedWordInfo(whitelistedWord, + SuggestedWordInfo.MAX_SCORE, SuggestedWordInfo.KIND_WHITELIST)); } } - for (int i = 0; i < suggestionsCount; ++i) { - final SuggestedWordInfo wordInfo = suggestionsContainer.get(i); - LatinImeLogger.onAddSuggestedWord(wordInfo.mWord.toString(), wordInfo.mSourceDict); - } - - if (!TextUtils.isEmpty(typedWord)) { - suggestionsContainer.add(0, new SuggestedWordInfo(typedWord, - SuggestedWordInfo.MAX_SCORE, SuggestedWordInfo.KIND_TYPED, - Dictionary.TYPE_USER_TYPED)); - } - SuggestedWordInfo.removeDups(suggestionsContainer); + mSuggestions.add(0, new SuggestedWordInfo(typedWord, SuggestedWordInfo.MAX_SCORE, + SuggestedWordInfo.KIND_TYPED)); + SuggestedWordInfo.removeDups(mSuggestions); final ArrayList<SuggestedWordInfo> suggestionsList; - if (DBG && !suggestionsContainer.isEmpty()) { - suggestionsList = getSuggestionsInfoListWithDebugInfo(typedWord, suggestionsContainer); + if (DBG) { + suggestionsList = getSuggestionsInfoListWithDebugInfo(typedWord, mSuggestions); } else { - suggestionsList = suggestionsContainer; + suggestionsList = mSuggestions; } + // TODO: Change this scheme - a boolean is not enough. A whitelisted word may be "valid" + // but still autocorrected from - in the case the whitelist only capitalizes the word. + // The whitelist should be case-insensitive, so it's not possible to be consistent with + // a boolean flag. Right now this is handled with a slight hack in + // WhitelistDictionary#shouldForciblyAutoCorrectFrom. + final boolean allowsToBeAutoCorrected = AutoCorrection.allowsToBeAutoCorrected( + getUnigramDictionaries(), consideredWord, wordComposer.isFirstCharCapitalized()) + // If we don't have a main dictionary, we never want to auto-correct. The reason for this + // is, the user may have a contact whose name happens to match a valid word in their + // language, and it will unexpectedly auto-correct. For example, if the user types in + // English with no dictionary and has a "Will" in their contact list, "will" would + // always auto-correct to "Will" which is unwanted. Hence, no main dict => no auto-correct. + && hasMainDictionary(); + + boolean autoCorrectionAvailable = hasAutoCorrection; + if (isCorrectionEnabled) { + autoCorrectionAvailable |= !allowsToBeAutoCorrected; + } + // Don't auto-correct words with multiple capital letter + autoCorrectionAvailable &= !wordComposer.isMostlyCaps(); + autoCorrectionAvailable &= !wordComposer.isResumed(); + if (allowsToBeAutoCorrected && suggestionsList.size() > 1 && mAutoCorrectionThreshold > 0 + && Suggest.shouldBlockAutoCorrectionBySafetyNet(typedWord, + suggestionsList.get(1).mWord)) { + autoCorrectionAvailable = false; + } return new SuggestedWords(suggestionsList, - // TODO: this first argument is lying. If this is a whitelisted word which is an - // actual word, it says typedWordValid = false, which looks wrong. We should either - // rename the attribute or change the value. - !isPrediction && !allowsToBeAutoCorrected /* typedWordValid */, - !isPrediction && hasAutoCorrection, /* willAutoCorrect */ + !allowsToBeAutoCorrected /* typedWordValid */, + autoCorrectionAvailable /* hasAutoCorrectionCandidate */, + allowsToBeAutoCorrected /* allowsToBeAutoCorrected */, false /* isPunctuationSuggestions */, false /* isObsoleteSuggestions */, - isPrediction); + false /* isPrediction */); + } + + /** + * Adds all bigram predictions for prevWord. Also checks the lower case version of prevWord if + * it contains any upper case characters. + */ + private void getAllBigrams(final CharSequence prevWord, final WordComposer wordComposer) { + if (StringUtils.hasUpperCase(prevWord)) { + // TODO: Must pay attention to locale when changing case. + final CharSequence lowerPrevWord = prevWord.toString().toLowerCase(); + for (final Dictionary dictionary : mBigramDictionaries.values()) { + dictionary.getBigrams(wordComposer, lowerPrevWord, this); + } + } + for (final Dictionary dictionary : mBigramDictionaries.values()) { + dictionary.getBigrams(wordComposer, prevWord, this); + } } private static ArrayList<SuggestedWordInfo> getSuggestionsInfoListWithDebugInfo( @@ -311,44 +415,120 @@ public class Suggest { return suggestionsList; } - private static class SuggestedWordInfoComparator implements Comparator<SuggestedWordInfo> { - // This comparator ranks the word info with the higher frequency first. That's because - // that's the order we want our elements in. - @Override - public int compare(final SuggestedWordInfo o1, final SuggestedWordInfo o2) { - if (o1.mScore > o2.mScore) return -1; - if (o1.mScore < o2.mScore) return 1; - if (o1.mCodePointCount < o2.mCodePointCount) return -1; - if (o1.mCodePointCount > o2.mCodePointCount) return 1; - return o1.mWord.toString().compareTo(o2.mWord.toString()); + // TODO: Use codepoint instead of char + @Override + public boolean addWord(final char[] word, final int offset, final int length, int score, + final int dicTypeId, final int dataType) { + int dataTypeForLog = dataType; + final ArrayList<SuggestedWordInfo> suggestions; + final int prefMaxSuggestions; + if (dataType == Dictionary.BIGRAM) { + suggestions = mBigramSuggestions; + prefMaxSuggestions = PREF_MAX_BIGRAMS; + } else { + suggestions = mSuggestions; + prefMaxSuggestions = MAX_SUGGESTIONS; } - } - private static final SuggestedWordInfoComparator sSuggestedWordInfoComparator = - new SuggestedWordInfoComparator(); - - private static SuggestedWordInfo getTransformedSuggestedWordInfo( - final SuggestedWordInfo wordInfo, final Locale locale, final boolean isAllUpperCase, - final boolean isFirstCharCapitalized, final int trailingSingleQuotesCount) { - final StringBuilder sb = new StringBuilder(wordInfo.mWord.length()); - if (isAllUpperCase) { - sb.append(wordInfo.mWord.toString().toUpperCase(locale)); - } else if (isFirstCharCapitalized) { - sb.append(StringUtils.toTitleCase(wordInfo.mWord.toString(), locale)); + + int pos = 0; + + // Check if it's the same word, only caps are different + if (StringUtils.equalsIgnoreCase(mConsideredWord, word, offset, length)) { + // TODO: remove this surrounding if clause and move this logic to + // getSuggestedWordBuilder. + if (suggestions.size() > 0) { + final SuggestedWordInfo currentHighestWord = suggestions.get(0); + // If the current highest word is also equal to typed word, we need to compare + // frequency to determine the insertion position. This does not ensure strictly + // correct ordering, but ensures the top score is on top which is enough for + // removing duplicates correctly. + if (StringUtils.equalsIgnoreCase(currentHighestWord.mWord, word, offset, length) + && score <= currentHighestWord.mScore) { + pos = 1; + } + } + } else { + // Check the last one's score and bail + if (suggestions.size() >= prefMaxSuggestions + && suggestions.get(prefMaxSuggestions - 1).mScore >= score) return true; + while (pos < suggestions.size()) { + final int curScore = suggestions.get(pos).mScore; + if (curScore < score + || (curScore == score && length < suggestions.get(pos).codePointCount())) { + break; + } + pos++; + } + } + if (pos >= prefMaxSuggestions) { + return true; + } + + final StringBuilder sb = new StringBuilder(getApproxMaxWordLength()); + // TODO: Must pay attention to locale when changing case. + if (mIsAllUpperCase) { + sb.append(new String(word, offset, length).toUpperCase()); + } else if (mIsFirstCharCapitalized) { + sb.append(Character.toUpperCase(word[offset])); + if (length > 1) { + sb.append(word, offset + 1, length - 1); + } } else { - sb.append(wordInfo.mWord); + sb.append(word, offset, length); } - for (int i = trailingSingleQuotesCount - 1; i >= 0; --i) { + for (int i = mTrailingSingleQuotesCount - 1; i >= 0; --i) { sb.appendCodePoint(Keyboard.CODE_SINGLE_QUOTE); } - return new SuggestedWordInfo(sb, wordInfo.mScore, wordInfo.mKind, wordInfo.mSourceDict); + // TODO: figure out what type of suggestion this is + suggestions.add(pos, new SuggestedWordInfo(sb, score, SuggestedWordInfo.KIND_CORRECTION)); + if (suggestions.size() > prefMaxSuggestions) { + suggestions.remove(prefMaxSuggestions); + } else { + LatinImeLogger.onAddSuggestedWord(sb.toString(), dicTypeId, dataTypeForLog); + } + return true; } public void close() { final HashSet<Dictionary> dictionaries = new HashSet<Dictionary>(); - dictionaries.addAll(mDictionaries.values()); + dictionaries.addAll(mUnigramDictionaries.values()); + dictionaries.addAll(mBigramDictionaries.values()); for (final Dictionary dictionary : dictionaries) { dictionary.close(); } mMainDictionary = null; } + + // TODO: Resolve the inconsistencies between the native auto correction algorithms and + // this safety net + public static boolean shouldBlockAutoCorrectionBySafetyNet(final String typedWord, + final CharSequence suggestion) { + // Safety net for auto correction. + // Actually if we hit this safety net, it's a bug. + // If user selected aggressive auto correction mode, there is no need to use the safety + // net. + // If the length of typed word is less than MINIMUM_SAFETY_NET_CHAR_LENGTH, + // we should not use net because relatively edit distance can be big. + final int typedWordLength = typedWord.length(); + if (typedWordLength < Suggest.MINIMUM_SAFETY_NET_CHAR_LENGTH) { + return false; + } + final int maxEditDistanceOfNativeDictionary = + (typedWordLength < 5 ? 2 : typedWordLength / 2) + 1; + final int distance = BinaryDictionary.editDistance(typedWord, suggestion.toString()); + if (DBG) { + Log.d(TAG, "Autocorrected edit distance = " + distance + + ", " + maxEditDistanceOfNativeDictionary); + } + if (distance > maxEditDistanceOfNativeDictionary) { + if (DBG) { + Log.e(TAG, "Safety net: before = " + typedWord + ", after = " + suggestion); + Log.e(TAG, "(Error) The edit distance of this correction exceeds limit. " + + "Turning off auto-correction."); + } + return true; + } else { + return false; + } + } } |