diff options
Diffstat (limited to 'java/src/com/android/inputmethod/latin/Suggest.java')
-rw-r--r-- | java/src/com/android/inputmethod/latin/Suggest.java | 588 |
1 files changed, 222 insertions, 366 deletions
diff --git a/java/src/com/android/inputmethod/latin/Suggest.java b/java/src/com/android/inputmethod/latin/Suggest.java index 336a76f4b..278c4b9ce 100644 --- a/java/src/com/android/inputmethod/latin/Suggest.java +++ b/java/src/com/android/inputmethod/latin/Suggest.java @@ -18,7 +18,6 @@ package com.android.inputmethod.latin; import android.content.Context; import android.text.TextUtils; -import android.util.Log; import com.android.inputmethod.keyboard.Keyboard; import com.android.inputmethod.keyboard.ProximityInfo; @@ -26,6 +25,7 @@ import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo; import java.io.File; import java.util.ArrayList; +import java.util.Comparator; import java.util.HashSet; import java.util.Locale; import java.util.concurrent.ConcurrentHashMap; @@ -34,87 +34,55 @@ import java.util.concurrent.ConcurrentHashMap; * This class loads a dictionary and provides a list of suggestions for a given sequence of * characters. This includes corrections and completions. */ -public class Suggest implements Dictionary.WordCallback { +public class Suggest { public static final String TAG = Suggest.class.getSimpleName(); - public static final int APPROX_MAX_WORD_LENGTH = 32; + // Session id for + // {@link #getSuggestedWords(WordComposer,CharSequence,ProximityInfo,boolean,int)}. + public static final int SESSION_TYPING = 0; + public static final int SESSION_GESTURE = 1; + // TODO: rename this to CORRECTION_OFF public static final int CORRECTION_NONE = 0; + // TODO: rename this to CORRECTION_ON public static final int CORRECTION_FULL = 1; - public static final int CORRECTION_FULL_BIGRAM = 2; - - // It seems the following values are only used for logging. - public static final int DIC_USER_TYPED = 0; - public static final int DIC_MAIN = 1; - public static final int DIC_USER = 2; - public static final int DIC_USER_HISTORY = 3; - public static final int DIC_CONTACTS = 4; - public static final int DIC_WHITELIST = 6; - // If you add a type of dictionary, increment DIC_TYPE_LAST_ID - // TODO: this value seems unused. Remove it? - public static final int DIC_TYPE_LAST_ID = 6; - public static final String DICT_KEY_MAIN = "main"; - public static final String DICT_KEY_CONTACTS = "contacts"; - // User dictionary, the system-managed one. - public static final String DICT_KEY_USER = "user"; - // User history dictionary for the unigram map, internal to LatinIME - public static final String DICT_KEY_USER_HISTORY_UNIGRAM = "history_unigram"; - // User history dictionary for the bigram map, internal to LatinIME - public static final String DICT_KEY_USER_HISTORY_BIGRAM = "history_bigram"; - public static final String DICT_KEY_WHITELIST ="whitelist"; - private static final boolean DBG = LatinImeLogger.sDBG; + public interface SuggestInitializationListener { + public void onUpdateMainDictionaryAvailability(boolean isMainDictionaryAvailable); + } - private boolean mHasMainDictionary; - private Dictionary mContactsDict; - private WhitelistDictionary mWhiteListDictionary; - private final ConcurrentHashMap<String, Dictionary> mUnigramDictionaries = - new ConcurrentHashMap<String, Dictionary>(); - private final ConcurrentHashMap<String, Dictionary> mBigramDictionaries = - new ConcurrentHashMap<String, Dictionary>(); + private static final boolean DBG = LatinImeLogger.sDBG; - private int mPrefMaxSuggestions = 18; + private Dictionary mMainDictionary; + private ContactsBinaryDictionary mContactsDict; + private final ConcurrentHashMap<String, Dictionary> mDictionaries = + CollectionUtils.newConcurrentHashMap(); - private static final int PREF_MAX_BIGRAMS = 60; + public static final int MAX_SUGGESTIONS = 18; private float mAutoCorrectionThreshold; - private ArrayList<SuggestedWordInfo> mSuggestions = new ArrayList<SuggestedWordInfo>(); - private ArrayList<SuggestedWordInfo> mBigramSuggestions = new ArrayList<SuggestedWordInfo>(); - private CharSequence mConsideredWord; + // Locale used for upper- and title-casing words + private final Locale mLocale; - // TODO: Remove these member variables by passing more context to addWord() callback method - private boolean mIsFirstCharCapitalized; - private boolean mIsAllUpperCase; - private int mTrailingSingleQuotesCount; - - private static final int MINIMUM_SAFETY_NET_CHAR_LENGTH = 4; - - public Suggest(final Context context, final Locale locale) { - initAsynchronously(context, locale); + public Suggest(final Context context, final Locale locale, + final SuggestInitializationListener listener) { + initAsynchronously(context, locale, listener); + mLocale = locale; } /* package for test */ Suggest(final Context context, final File dictionary, final long startOffset, final long length, final Locale locale) { final Dictionary mainDict = DictionaryFactory.createDictionaryForTest(context, dictionary, startOffset, length /* useFullEditDistance */, false, locale); - mHasMainDictionary = null != mainDict; - addOrReplaceDictionary(mUnigramDictionaries, DICT_KEY_MAIN, mainDict); - addOrReplaceDictionary(mBigramDictionaries, DICT_KEY_MAIN, mainDict); - initWhitelistAndAutocorrectAndPool(context, locale); + mLocale = locale; + mMainDictionary = mainDict; + addOrReplaceDictionary(mDictionaries, Dictionary.TYPE_MAIN, mainDict); } - private void initWhitelistAndAutocorrectAndPool(final Context context, final Locale locale) { - mWhiteListDictionary = new WhitelistDictionary(context, locale); - addOrReplaceDictionary(mUnigramDictionaries, DICT_KEY_WHITELIST, mWhiteListDictionary); - } - - private void initAsynchronously(final Context context, final Locale locale) { - resetMainDict(context, locale); - - // TODO: read the whitelist and init the pool asynchronously too. - // initPool should be done asynchronously now that the pool is thread-safe. - initWhitelistAndAutocorrectAndPool(context, locale); + private void initAsynchronously(final Context context, final Locale locale, + final SuggestInitializationListener listener) { + resetMainDict(context, locale, listener); } private static void addOrReplaceDictionary( @@ -128,16 +96,22 @@ public class Suggest implements Dictionary.WordCallback { } } - public void resetMainDict(final Context context, final Locale locale) { - mHasMainDictionary = false; + public void resetMainDict(final Context context, final Locale locale, + final SuggestInitializationListener listener) { + mMainDictionary = null; + if (listener != null) { + listener.onUpdateMainDictionaryAvailability(hasMainDictionary()); + } new Thread("InitializeBinaryDictionary") { @Override public void run() { final DictionaryCollection newMainDict = DictionaryFactory.createMainDictionaryFromManager(context, locale); - mHasMainDictionary = null != newMainDict && !newMainDict.isEmpty(); - addOrReplaceDictionary(mUnigramDictionaries, DICT_KEY_MAIN, newMainDict); - addOrReplaceDictionary(mBigramDictionaries, DICT_KEY_MAIN, newMainDict); + addOrReplaceDictionary(mDictionaries, Dictionary.TYPE_MAIN, newMainDict); + mMainDictionary = newMainDict; + if (listener != null) { + listener.onUpdateMainDictionaryAvailability(hasMainDictionary()); + } } }.start(); } @@ -145,27 +119,27 @@ public class Suggest implements Dictionary.WordCallback { // The main dictionary could have been loaded asynchronously. Don't cache the return value // of this method. public boolean hasMainDictionary() { - return mHasMainDictionary; + return null != mMainDictionary && mMainDictionary.isInitialized(); } - public Dictionary getContactsDictionary() { - return mContactsDict; + public Dictionary getMainDictionary() { + return mMainDictionary; } - public ConcurrentHashMap<String, Dictionary> getUnigramDictionaries() { - return mUnigramDictionaries; + public ContactsBinaryDictionary getContactsDictionary() { + return mContactsDict; } - public static int getApproxMaxWordLength() { - return APPROX_MAX_WORD_LENGTH; + public ConcurrentHashMap<String, Dictionary> getUnigramDictionaries() { + return mDictionaries; } /** * Sets an optional user dictionary resource to be loaded. The user dictionary is consulted * before the main dictionary, if set. This refers to the system-managed user dictionary. */ - public void setUserDictionary(Dictionary userDictionary) { - addOrReplaceDictionary(mUnigramDictionaries, DICT_KEY_USER, userDictionary); + public void setUserDictionary(UserBinaryDictionary userDictionary) { + addOrReplaceDictionary(mDictionaries, Dictionary.TYPE_USER, userDictionary); } /** @@ -173,236 +147,193 @@ public class Suggest implements Dictionary.WordCallback { * the contacts dictionary by passing null to this method. In this case no contacts dictionary * won't be used. */ - public void setContactsDictionary(Dictionary contactsDictionary) { + public void setContactsDictionary(ContactsBinaryDictionary contactsDictionary) { mContactsDict = contactsDictionary; - addOrReplaceDictionary(mUnigramDictionaries, DICT_KEY_CONTACTS, contactsDictionary); - addOrReplaceDictionary(mBigramDictionaries, DICT_KEY_CONTACTS, contactsDictionary); + addOrReplaceDictionary(mDictionaries, Dictionary.TYPE_CONTACTS, contactsDictionary); } - public void setUserHistoryDictionary(Dictionary userHistoryDictionary) { - addOrReplaceDictionary(mUnigramDictionaries, DICT_KEY_USER_HISTORY_UNIGRAM, - userHistoryDictionary); - addOrReplaceDictionary(mBigramDictionaries, DICT_KEY_USER_HISTORY_BIGRAM, - userHistoryDictionary); + public void setUserHistoryDictionary(UserHistoryDictionary userHistoryDictionary) { + addOrReplaceDictionary(mDictionaries, Dictionary.TYPE_USER_HISTORY, userHistoryDictionary); } public void setAutoCorrectionThreshold(float threshold) { mAutoCorrectionThreshold = threshold; } - private static CharSequence capitalizeWord(final boolean all, final boolean first, - final CharSequence word) { - if (TextUtils.isEmpty(word) || !(all || first)) return word; - final int wordLength = word.length(); - final StringBuilder sb = new StringBuilder(getApproxMaxWordLength()); - // TODO: Must pay attention to locale when changing case. - if (all) { - sb.append(word.toString().toUpperCase()); - } else if (first) { - sb.append(Character.toUpperCase(word.charAt(0))); - if (wordLength > 1) { - sb.append(word.subSequence(1, wordLength)); - } - } - return sb; - } - - protected void addBigramToSuggestions(SuggestedWordInfo bigram) { - mSuggestions.add(bigram); - } - - private static final WordComposer sEmptyWordComposer = new WordComposer(); - public SuggestedWords getBigramPredictions(CharSequence prevWordForBigram) { + public SuggestedWords getSuggestedWords( + final WordComposer wordComposer, CharSequence prevWordForBigram, + final ProximityInfo proximityInfo, final boolean isCorrectionEnabled, int sessionId) { LatinImeLogger.onStartSuggestion(prevWordForBigram); - mIsFirstCharCapitalized = false; - mIsAllUpperCase = false; - mTrailingSingleQuotesCount = 0; - mSuggestions = new ArrayList<SuggestedWordInfo>(mPrefMaxSuggestions); - - // Treating USER_TYPED as UNIGRAM suggestion for logging now. - LatinImeLogger.onAddSuggestedWord("", Suggest.DIC_USER_TYPED, Dictionary.UNIGRAM); - mConsideredWord = ""; - - mBigramSuggestions = new ArrayList<SuggestedWordInfo>(PREF_MAX_BIGRAMS); - - getAllBigrams(prevWordForBigram, sEmptyWordComposer); - - // Nothing entered: return all bigrams for the previous word - int insertCount = Math.min(mBigramSuggestions.size(), mPrefMaxSuggestions); - for (int i = 0; i < insertCount; ++i) { - addBigramToSuggestions(mBigramSuggestions.get(i)); + if (wordComposer.isBatchMode()) { + return getSuggestedWordsForBatchInput( + wordComposer, prevWordForBigram, proximityInfo, sessionId); + } else { + return getSuggestedWordsForTypingInput(wordComposer, prevWordForBigram, proximityInfo, + isCorrectionEnabled); } - - SuggestedWordInfo.removeDups(mSuggestions); - - return new SuggestedWords(mSuggestions, - false /* typedWordValid */, - false /* hasAutoCorrectionCandidate */, - false /* allowsToBeAutoCorrected */, - false /* isPunctuationSuggestions */, - false /* isObsoleteSuggestions */, - true /* isPrediction */); } - // TODO: cleanup dictionaries looking up and suggestions building with SuggestedWords.Builder - public SuggestedWords getSuggestedWords( + // Retrieves suggestions for the typing input. + private SuggestedWords getSuggestedWordsForTypingInput( final WordComposer wordComposer, CharSequence prevWordForBigram, - final ProximityInfo proximityInfo, final int correctionMode) { - LatinImeLogger.onStartSuggestion(prevWordForBigram); - mIsFirstCharCapitalized = wordComposer.isFirstCharCapitalized(); - mIsAllUpperCase = wordComposer.isAllUpperCase(); - mTrailingSingleQuotesCount = wordComposer.trailingSingleQuotesCount(); - mSuggestions = new ArrayList<SuggestedWordInfo>(mPrefMaxSuggestions); + final ProximityInfo proximityInfo, final boolean isCorrectionEnabled) { + final int trailingSingleQuotesCount = wordComposer.trailingSingleQuotesCount(); + final BoundedTreeSet suggestionsSet = new BoundedTreeSet(sSuggestedWordInfoComparator, + MAX_SUGGESTIONS); final String typedWord = wordComposer.getTypedWord(); - final String consideredWord = mTrailingSingleQuotesCount > 0 - ? typedWord.substring(0, typedWord.length() - mTrailingSingleQuotesCount) + final String consideredWord = trailingSingleQuotesCount > 0 + ? typedWord.substring(0, typedWord.length() - trailingSingleQuotesCount) : typedWord; - // Treating USER_TYPED as UNIGRAM suggestion for logging now. - LatinImeLogger.onAddSuggestedWord(typedWord, Suggest.DIC_USER_TYPED, Dictionary.UNIGRAM); - mConsideredWord = consideredWord; - - if (wordComposer.size() <= 1 && (correctionMode == CORRECTION_FULL_BIGRAM)) { - // At first character typed, search only the bigrams - mBigramSuggestions = new ArrayList<SuggestedWordInfo>(PREF_MAX_BIGRAMS); - - if (!TextUtils.isEmpty(prevWordForBigram)) { - getAllBigrams(prevWordForBigram, wordComposer); - if (TextUtils.isEmpty(consideredWord)) { - // Nothing entered: return all bigrams for the previous word - int insertCount = Math.min(mBigramSuggestions.size(), mPrefMaxSuggestions); - for (int i = 0; i < insertCount; ++i) { - addBigramToSuggestions(mBigramSuggestions.get(i)); - } - } else { - // Word entered: return only bigrams that match the first char of the typed word - final char currentChar = consideredWord.charAt(0); - // TODO: Must pay attention to locale when changing case. - // TODO: Use codepoint instead of char - final char currentCharUpper = Character.toUpperCase(currentChar); - int count = 0; - final int bigramSuggestionSize = mBigramSuggestions.size(); - for (int i = 0; i < bigramSuggestionSize; i++) { - final SuggestedWordInfo bigramSuggestion = mBigramSuggestions.get(i); - final char bigramSuggestionFirstChar = - (char)bigramSuggestion.codePointAt(0); - if (bigramSuggestionFirstChar == currentChar - || bigramSuggestionFirstChar == currentCharUpper) { - addBigramToSuggestions(bigramSuggestion); - if (++count > mPrefMaxSuggestions) break; - } - } - } - } + LatinImeLogger.onAddSuggestedWord(typedWord, Dictionary.TYPE_USER_TYPED); - } else if (wordComposer.size() > 1) { - final WordComposer wordComposerForLookup; - if (mTrailingSingleQuotesCount > 0) { - wordComposerForLookup = new WordComposer(wordComposer); - for (int i = mTrailingSingleQuotesCount - 1; i >= 0; --i) { - wordComposerForLookup.deleteLast(); - } - } else { - wordComposerForLookup = wordComposer; - } - // At second character typed, search the unigrams (scores being affected by bigrams) - for (final String key : mUnigramDictionaries.keySet()) { - // Skip UserUnigramDictionary and WhitelistDictionary to lookup - if (key.equals(DICT_KEY_USER_HISTORY_UNIGRAM) || key.equals(DICT_KEY_WHITELIST)) - continue; - final Dictionary dictionary = mUnigramDictionaries.get(key); - dictionary.getWords(wordComposerForLookup, prevWordForBigram, this, proximityInfo); + final WordComposer wordComposerForLookup; + if (trailingSingleQuotesCount > 0) { + wordComposerForLookup = new WordComposer(wordComposer); + for (int i = trailingSingleQuotesCount - 1; i >= 0; --i) { + wordComposerForLookup.deleteLast(); } + } else { + wordComposerForLookup = wordComposer; } - final CharSequence whitelistedWord = capitalizeWord(mIsAllUpperCase, - mIsFirstCharCapitalized, mWhiteListDictionary.getWhitelistedWord(consideredWord)); + for (final String key : mDictionaries.keySet()) { + final Dictionary dictionary = mDictionaries.get(key); + suggestionsSet.addAll(dictionary.getSuggestions( + wordComposerForLookup, prevWordForBigram, proximityInfo)); + } - final boolean hasAutoCorrection; - if (CORRECTION_FULL == correctionMode || CORRECTION_FULL_BIGRAM == correctionMode) { - final CharSequence autoCorrection = - AutoCorrection.computeAutoCorrectionWord(mUnigramDictionaries, wordComposer, - mSuggestions, consideredWord, mAutoCorrectionThreshold, - whitelistedWord); - hasAutoCorrection = (null != autoCorrection); + final CharSequence whitelistedWord; + if (suggestionsSet.isEmpty()) { + whitelistedWord = null; + } else if (SuggestedWordInfo.KIND_WHITELIST != suggestionsSet.first().mKind) { + whitelistedWord = null; } else { + whitelistedWord = suggestionsSet.first().mWord; + } + + // The word can be auto-corrected if it has a whitelist entry that is not itself, + // or if it's a 2+ characters non-word (i.e. it's not in the dictionary). + final boolean allowsToBeAutoCorrected = (null != whitelistedWord + && !whitelistedWord.equals(consideredWord)) + || (consideredWord.length() > 1 && !AutoCorrection.isInTheDictionary(mDictionaries, + consideredWord, wordComposer.isFirstCharCapitalized())); + + final boolean hasAutoCorrection; + // TODO: using isCorrectionEnabled here is not very good. It's probably useless, because + // any attempt to do auto-correction is already shielded with a test for this flag; at the + // same time, it feels wrong that the SuggestedWord object includes information about + // the current settings. It may also be useful to know, when the setting is off, whether + // the word *would* have been auto-corrected. + if (!isCorrectionEnabled || !allowsToBeAutoCorrected || !wordComposer.isComposingWord() + || suggestionsSet.isEmpty() || wordComposer.hasDigits() + || wordComposer.isMostlyCaps() || wordComposer.isResumed() + || !hasMainDictionary()) { + // If we don't have a main dictionary, we never want to auto-correct. The reason for + // this is, the user may have a contact whose name happens to match a valid word in + // their language, and it will unexpectedly auto-correct. For example, if the user + // types in English with no dictionary and has a "Will" in their contact list, "will" + // would always auto-correct to "Will" which is unwanted. Hence, no main dict => no + // auto-correct. hasAutoCorrection = false; + } else { + hasAutoCorrection = AutoCorrection.suggestionExceedsAutoCorrectionThreshold( + suggestionsSet.first(), consideredWord, mAutoCorrectionThreshold); } - if (whitelistedWord != null) { - if (mTrailingSingleQuotesCount > 0) { - final StringBuilder sb = new StringBuilder(whitelistedWord); - for (int i = mTrailingSingleQuotesCount - 1; i >= 0; --i) { - sb.appendCodePoint(Keyboard.CODE_SINGLE_QUOTE); - } - mSuggestions.add(0, new SuggestedWordInfo( - sb.toString(), SuggestedWordInfo.MAX_SCORE)); - } else { - mSuggestions.add(0, new SuggestedWordInfo( - whitelistedWord, SuggestedWordInfo.MAX_SCORE)); + final ArrayList<SuggestedWordInfo> suggestionsContainer = + CollectionUtils.newArrayList(suggestionsSet); + final int suggestionsCount = suggestionsContainer.size(); + final boolean isFirstCharCapitalized = wordComposer.isFirstCharCapitalized(); + final boolean isAllUpperCase = wordComposer.isAllUpperCase(); + if (isFirstCharCapitalized || isAllUpperCase || 0 != trailingSingleQuotesCount) { + for (int i = 0; i < suggestionsCount; ++i) { + final SuggestedWordInfo wordInfo = suggestionsContainer.get(i); + final SuggestedWordInfo transformedWordInfo = getTransformedSuggestedWordInfo( + wordInfo, mLocale, isAllUpperCase, isFirstCharCapitalized, + trailingSingleQuotesCount); + suggestionsContainer.set(i, transformedWordInfo); } } - mSuggestions.add(0, new SuggestedWordInfo(typedWord, SuggestedWordInfo.MAX_SCORE)); - SuggestedWordInfo.removeDups(mSuggestions); + for (int i = 0; i < suggestionsCount; ++i) { + final SuggestedWordInfo wordInfo = suggestionsContainer.get(i); + LatinImeLogger.onAddSuggestedWord(wordInfo.mWord.toString(), wordInfo.mSourceDict); + } + + if (!TextUtils.isEmpty(typedWord)) { + suggestionsContainer.add(0, new SuggestedWordInfo(typedWord, + SuggestedWordInfo.MAX_SCORE, SuggestedWordInfo.KIND_TYPED, + Dictionary.TYPE_USER_TYPED)); + } + SuggestedWordInfo.removeDups(suggestionsContainer); final ArrayList<SuggestedWordInfo> suggestionsList; - if (DBG) { - suggestionsList = getSuggestionsInfoListWithDebugInfo(typedWord, mSuggestions); + if (DBG && !suggestionsContainer.isEmpty()) { + suggestionsList = getSuggestionsInfoListWithDebugInfo(typedWord, suggestionsContainer); } else { - suggestionsList = mSuggestions; + suggestionsList = suggestionsContainer; } - // TODO: Change this scheme - a boolean is not enough. A whitelisted word may be "valid" - // but still autocorrected from - in the case the whitelist only capitalizes the word. - // The whitelist should be case-insensitive, so it's not possible to be consistent with - // a boolean flag. Right now this is handled with a slight hack in - // WhitelistDictionary#shouldForciblyAutoCorrectFrom. - final boolean allowsToBeAutoCorrected = AutoCorrection.allowsToBeAutoCorrected( - getUnigramDictionaries(), consideredWord, wordComposer.isFirstCharCapitalized()) - // If we don't have a main dictionary, we never want to auto-correct. The reason for this - // is, the user may have a contact whose name happens to match a valid word in their - // language, and it will unexpectedly auto-correct. For example, if the user types in - // English with no dictionary and has a "Will" in their contact list, "will" would - // always auto-correct to "Will" which is unwanted. Hence, no main dict => no auto-correct. - && mHasMainDictionary; - - boolean autoCorrectionAvailable = hasAutoCorrection; - if (correctionMode == CORRECTION_FULL || correctionMode == CORRECTION_FULL_BIGRAM) { - autoCorrectionAvailable |= !allowsToBeAutoCorrected; - } - // Don't auto-correct words with multiple capital letter - autoCorrectionAvailable &= !wordComposer.isMostlyCaps(); - autoCorrectionAvailable &= !wordComposer.isResumed(); - if (allowsToBeAutoCorrected && suggestionsList.size() > 1 && mAutoCorrectionThreshold > 0 - && Suggest.shouldBlockAutoCorrectionBySafetyNet(typedWord, - suggestionsList.get(1).mWord)) { - autoCorrectionAvailable = false; - } return new SuggestedWords(suggestionsList, + // TODO: this first argument is lying. If this is a whitelisted word which is an + // actual word, it says typedWordValid = false, which looks wrong. We should either + // rename the attribute or change the value. !allowsToBeAutoCorrected /* typedWordValid */, - autoCorrectionAvailable /* hasAutoCorrectionCandidate */, - allowsToBeAutoCorrected /* allowsToBeAutoCorrected */, + hasAutoCorrection, /* willAutoCorrect */ false /* isPunctuationSuggestions */, false /* isObsoleteSuggestions */, - false /* isPrediction */); + !wordComposer.isComposingWord() /* isPrediction */); } - /** - * Adds all bigram predictions for prevWord. Also checks the lower case version of prevWord if - * it contains any upper case characters. - */ - private void getAllBigrams(final CharSequence prevWord, final WordComposer wordComposer) { - if (StringUtils.hasUpperCase(prevWord)) { - // TODO: Must pay attention to locale when changing case. - final CharSequence lowerPrevWord = prevWord.toString().toLowerCase(); - for (final Dictionary dictionary : mBigramDictionaries.values()) { - dictionary.getBigrams(wordComposer, lowerPrevWord, this); + // Retrieves suggestions for the batch input. + private SuggestedWords getSuggestedWordsForBatchInput( + final WordComposer wordComposer, CharSequence prevWordForBigram, + final ProximityInfo proximityInfo, int sessionId) { + final BoundedTreeSet suggestionsSet = new BoundedTreeSet(sSuggestedWordInfoComparator, + MAX_SUGGESTIONS); + + // At second character typed, search the unigrams (scores being affected by bigrams) + for (final String key : mDictionaries.keySet()) { + // Skip User history dictionary for lookup + // TODO: The user history dictionary should just override getSuggestionsWithSessionId + // to make sure it doesn't return anything and we should remove this test + if (key.equals(Dictionary.TYPE_USER_HISTORY)) { + continue; } + final Dictionary dictionary = mDictionaries.get(key); + suggestionsSet.addAll(dictionary.getSuggestionsWithSessionId( + wordComposer, prevWordForBigram, proximityInfo, sessionId)); } - for (final Dictionary dictionary : mBigramDictionaries.values()) { - dictionary.getBigrams(wordComposer, prevWord, this); + + for (SuggestedWordInfo wordInfo : suggestionsSet) { + LatinImeLogger.onAddSuggestedWord(wordInfo.mWord.toString(), wordInfo.mSourceDict); } + + final ArrayList<SuggestedWordInfo> suggestionsContainer = + CollectionUtils.newArrayList(suggestionsSet); + final int suggestionsCount = suggestionsContainer.size(); + final boolean isFirstCharCapitalized = wordComposer.wasShiftedNoLock(); + final boolean isAllUpperCase = wordComposer.isAllUpperCase(); + if (isFirstCharCapitalized || isAllUpperCase) { + for (int i = 0; i < suggestionsCount; ++i) { + final SuggestedWordInfo wordInfo = suggestionsContainer.get(i); + final SuggestedWordInfo transformedWordInfo = getTransformedSuggestedWordInfo( + wordInfo, mLocale, isAllUpperCase, isFirstCharCapitalized, + 0 /* trailingSingleQuotesCount */); + suggestionsContainer.set(i, transformedWordInfo); + } + } + + SuggestedWordInfo.removeDups(suggestionsContainer); + // In the batch input mode, the most relevant suggested word should act as a "typed word" + // (typedWordValid=true), not as an "auto correct word" (willAutoCorrect=false). + return new SuggestedWords(suggestionsContainer, + true /* typedWordValid */, + false /* willAutoCorrect */, + false /* isPunctuationSuggestions */, + false /* isObsoleteSuggestions */, + false /* isPrediction */); } private static ArrayList<SuggestedWordInfo> getSuggestionsInfoListWithDebugInfo( @@ -411,7 +342,7 @@ public class Suggest implements Dictionary.WordCallback { typedWordInfo.setDebugString("+"); final int suggestionsSize = suggestions.size(); final ArrayList<SuggestedWordInfo> suggestionsList = - new ArrayList<SuggestedWordInfo>(suggestionsSize); + CollectionUtils.newArrayList(suggestionsSize); suggestionsList.add(typedWordInfo); // Note: i here is the index in mScores[], but the index in mSuggestions is one more // than i because we added the typed word to mSuggestions without touching mScores. @@ -431,119 +362,44 @@ public class Suggest implements Dictionary.WordCallback { return suggestionsList; } - // TODO: Use codepoint instead of char - @Override - public boolean addWord(final char[] word, final int offset, final int length, int score, - final int dicTypeId, final int dataType) { - int dataTypeForLog = dataType; - final ArrayList<SuggestedWordInfo> suggestions; - final int prefMaxSuggestions; - if (dataType == Dictionary.BIGRAM) { - suggestions = mBigramSuggestions; - prefMaxSuggestions = PREF_MAX_BIGRAMS; - } else { - suggestions = mSuggestions; - prefMaxSuggestions = mPrefMaxSuggestions; + private static class SuggestedWordInfoComparator implements Comparator<SuggestedWordInfo> { + // This comparator ranks the word info with the higher frequency first. That's because + // that's the order we want our elements in. + @Override + public int compare(final SuggestedWordInfo o1, final SuggestedWordInfo o2) { + if (o1.mScore > o2.mScore) return -1; + if (o1.mScore < o2.mScore) return 1; + if (o1.mCodePointCount < o2.mCodePointCount) return -1; + if (o1.mCodePointCount > o2.mCodePointCount) return 1; + return o1.mWord.toString().compareTo(o2.mWord.toString()); } - - int pos = 0; - - // Check if it's the same word, only caps are different - if (StringUtils.equalsIgnoreCase(mConsideredWord, word, offset, length)) { - // TODO: remove this surrounding if clause and move this logic to - // getSuggestedWordBuilder. - if (suggestions.size() > 0) { - final SuggestedWordInfo currentHighestWord = suggestions.get(0); - // If the current highest word is also equal to typed word, we need to compare - // frequency to determine the insertion position. This does not ensure strictly - // correct ordering, but ensures the top score is on top which is enough for - // removing duplicates correctly. - if (StringUtils.equalsIgnoreCase(currentHighestWord.mWord, word, offset, length) - && score <= currentHighestWord.mScore) { - pos = 1; - } - } - } else { - // Check the last one's score and bail - if (suggestions.size() >= prefMaxSuggestions - && suggestions.get(prefMaxSuggestions - 1).mScore >= score) return true; - while (pos < suggestions.size()) { - final int curScore = suggestions.get(pos).mScore; - if (curScore < score - || (curScore == score && length < suggestions.get(pos).codePointCount())) { - break; - } - pos++; - } - } - if (pos >= prefMaxSuggestions) { - return true; - } - - final StringBuilder sb = new StringBuilder(getApproxMaxWordLength()); - // TODO: Must pay attention to locale when changing case. - if (mIsAllUpperCase) { - sb.append(new String(word, offset, length).toUpperCase()); - } else if (mIsFirstCharCapitalized) { - sb.append(Character.toUpperCase(word[offset])); - if (length > 1) { - sb.append(word, offset + 1, length - 1); - } + } + private static final SuggestedWordInfoComparator sSuggestedWordInfoComparator = + new SuggestedWordInfoComparator(); + + private static SuggestedWordInfo getTransformedSuggestedWordInfo( + final SuggestedWordInfo wordInfo, final Locale locale, final boolean isAllUpperCase, + final boolean isFirstCharCapitalized, final int trailingSingleQuotesCount) { + final StringBuilder sb = new StringBuilder(wordInfo.mWord.length()); + if (isAllUpperCase) { + sb.append(wordInfo.mWord.toString().toUpperCase(locale)); + } else if (isFirstCharCapitalized) { + sb.append(StringUtils.toTitleCase(wordInfo.mWord.toString(), locale)); } else { - sb.append(word, offset, length); + sb.append(wordInfo.mWord); } - for (int i = mTrailingSingleQuotesCount - 1; i >= 0; --i) { + for (int i = trailingSingleQuotesCount - 1; i >= 0; --i) { sb.appendCodePoint(Keyboard.CODE_SINGLE_QUOTE); } - suggestions.add(pos, new SuggestedWordInfo(sb, score)); - if (suggestions.size() > prefMaxSuggestions) { - suggestions.remove(prefMaxSuggestions); - } else { - LatinImeLogger.onAddSuggestedWord(sb.toString(), dicTypeId, dataTypeForLog); - } - return true; + return new SuggestedWordInfo(sb, wordInfo.mScore, wordInfo.mKind, wordInfo.mSourceDict); } public void close() { - final HashSet<Dictionary> dictionaries = new HashSet<Dictionary>(); - dictionaries.addAll(mUnigramDictionaries.values()); - dictionaries.addAll(mBigramDictionaries.values()); + final HashSet<Dictionary> dictionaries = CollectionUtils.newHashSet(); + dictionaries.addAll(mDictionaries.values()); for (final Dictionary dictionary : dictionaries) { dictionary.close(); } - mHasMainDictionary = false; - } - - // TODO: Resolve the inconsistencies between the native auto correction algorithms and - // this safety net - public static boolean shouldBlockAutoCorrectionBySafetyNet(final String typedWord, - final CharSequence suggestion) { - // Safety net for auto correction. - // Actually if we hit this safety net, it's a bug. - // If user selected aggressive auto correction mode, there is no need to use the safety - // net. - // If the length of typed word is less than MINIMUM_SAFETY_NET_CHAR_LENGTH, - // we should not use net because relatively edit distance can be big. - final int typedWordLength = typedWord.length(); - if (typedWordLength < Suggest.MINIMUM_SAFETY_NET_CHAR_LENGTH) { - return false; - } - final int maxEditDistanceOfNativeDictionary = - (typedWordLength < 5 ? 2 : typedWordLength / 2) + 1; - final int distance = BinaryDictionary.editDistance(typedWord, suggestion.toString()); - if (DBG) { - Log.d(TAG, "Autocorrected edit distance = " + distance - + ", " + maxEditDistanceOfNativeDictionary); - } - if (distance > maxEditDistanceOfNativeDictionary) { - if (DBG) { - Log.e(TAG, "Safety net: before = " + typedWord + ", after = " + suggestion); - Log.e(TAG, "(Error) The edit distance of this correction exceeds limit. " - + "Turning off auto-correction."); - } - return true; - } else { - return false; - } + mMainDictionary = null; } } |