1 files changed, 434 insertions, 0 deletions
diff --git a/java/src/org/kelar/inputmethod/latin/Suggest.java b/java/src/org/kelar/inputmethod/latin/Suggest.java
new file mode 100644
index 000000000..7023b4db3
--- /dev/null
+++ b/java/src/org/kelar/inputmethod/latin/Suggest.java
@@ -0,0 +1,434 @@
+/*
+ * Copyright (C) 2008 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.kelar.inputmethod.latin;
+
+import android.text.TextUtils;
+
+import static org.kelar.inputmethod.latin.define.DecoderSpecificConstants.SHOULD_AUTO_CORRECT_USING_NON_WHITE_LISTED_SUGGESTION;
+import static org.kelar.inputmethod.latin.define.DecoderSpecificConstants.SHOULD_REMOVE_PREVIOUSLY_REJECTED_SUGGESTION;
+
+import org.kelar.inputmethod.keyboard.Keyboard;
+import org.kelar.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
+import org.kelar.inputmethod.latin.common.Constants;
+import org.kelar.inputmethod.latin.common.StringUtils;
+import org.kelar.inputmethod.latin.define.DebugFlags;
+import org.kelar.inputmethod.latin.settings.SettingsValuesForSuggestion;
+import org.kelar.inputmethod.latin.utils.AutoCorrectionUtils;
+import org.kelar.inputmethod.latin.utils.BinaryDictionaryUtils;
+import org.kelar.inputmethod.latin.utils.SuggestionResults;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Locale;
+
+import javax.annotation.Nonnull;
+
+/**
+ * This class loads a dictionary and provides a list of suggestions for a given sequence of
+ * characters. This includes corrections and completions.
+ */
+public final class Suggest {
+    public static final String TAG = Suggest.class.getSimpleName();
+
+    // Session id for
+    // {@link #getSuggestedWords(WordComposer,String,ProximityInfo,boolean,int)}.
+    // We are sharing the same ID between typing and gesture to save RAM footprint.
+    public static final int SESSION_ID_TYPING = 0;
+    public static final int SESSION_ID_GESTURE = 0;
+
+    // Close to -2**31
+    private static final int SUPPRESS_SUGGEST_THRESHOLD = -2000000000;
+
+    private static final boolean DBG = DebugFlags.DEBUG_ENABLED;
+    private final DictionaryFacilitator mDictionaryFacilitator;
+
+    private static final int MAXIMUM_AUTO_CORRECT_LENGTH_FOR_GERMAN = 12;
+    private static final HashMap<String, Integer> sLanguageToMaximumAutoCorrectionWithSpaceLength =
+            new HashMap<>();
+    static {
+        // TODO: should we add Finnish here?
+        // TODO: This should not be hardcoded here but be written in the dictionary header
+        sLanguageToMaximumAutoCorrectionWithSpaceLength.put(Locale.GERMAN.getLanguage(),
+                MAXIMUM_AUTO_CORRECT_LENGTH_FOR_GERMAN);
+    }
+
+    private float mAutoCorrectionThreshold;
+    private float mPlausibilityThreshold;
+
+    public Suggest(final DictionaryFacilitator dictionaryFacilitator) {
+        mDictionaryFacilitator = dictionaryFacilitator;
+    }
+
+    /**
+     * Set the normalized-score threshold for a suggestion to be considered strong enough that we
+     * will auto-correct to this.
+     * @param threshold the threshold
+     */
+    public void setAutoCorrectionThreshold(final float threshold) {
+        mAutoCorrectionThreshold = threshold;
+    }
+
+    /**
+     * Set the normalized-score threshold for what we consider a "plausible" suggestion, in
+     * the same dimension as the auto-correction threshold.
+     * @param threshold the threshold
+     */
+    public void setPlausibilityThreshold(final float threshold) {
+        mPlausibilityThreshold = threshold;
+    }
+
+    public interface OnGetSuggestedWordsCallback {
+        public void onGetSuggestedWords(final SuggestedWords suggestedWords);
+    }
+
+    public void getSuggestedWords(final WordComposer wordComposer,
+            final NgramContext ngramContext, final Keyboard keyboard,
+            final SettingsValuesForSuggestion settingsValuesForSuggestion,
+            final boolean isCorrectionEnabled, final int inputStyle, final int sequenceNumber,
+            final OnGetSuggestedWordsCallback callback) {
+        if (wordComposer.isBatchMode()) {
+            getSuggestedWordsForBatchInput(wordComposer, ngramContext, keyboard,
+                    settingsValuesForSuggestion, inputStyle, sequenceNumber, callback);
+        } else {
+            getSuggestedWordsForNonBatchInput(wordComposer, ngramContext, keyboard,
+                    settingsValuesForSuggestion, inputStyle, isCorrectionEnabled,
+                    sequenceNumber, callback);
+        }
+    }
+
+    private static ArrayList<SuggestedWordInfo> getTransformedSuggestedWordInfoList(
+            final WordComposer wordComposer, final SuggestionResults results,
+            final int trailingSingleQuotesCount, final Locale defaultLocale) {
+        final boolean shouldMakeSuggestionsAllUpperCase = wordComposer.isAllUpperCase()
+                && !wordComposer.isResumed();
+        final boolean isOnlyFirstCharCapitalized =
+                wordComposer.isOrWillBeOnlyFirstCharCapitalized();
+
+        final ArrayList<SuggestedWordInfo> suggestionsContainer = new ArrayList<>(results);
+        final int suggestionsCount = suggestionsContainer.size();
+        if (isOnlyFirstCharCapitalized || shouldMakeSuggestionsAllUpperCase
+                || 0 != trailingSingleQuotesCount) {
+            for (int i = 0; i < suggestionsCount; ++i) {
+                final SuggestedWordInfo wordInfo = suggestionsContainer.get(i);
+                final Locale wordLocale = wordInfo.mSourceDict.mLocale;
+                final SuggestedWordInfo transformedWordInfo = getTransformedSuggestedWordInfo(
+                        wordInfo, null == wordLocale ? defaultLocale : wordLocale,
+                        shouldMakeSuggestionsAllUpperCase, isOnlyFirstCharCapitalized,
+                        trailingSingleQuotesCount);
+                suggestionsContainer.set(i, transformedWordInfo);
+            }
+        }
+        return suggestionsContainer;
+    }
+
+    private static SuggestedWordInfo getWhitelistedWordInfoOrNull(
+            @Nonnull final ArrayList<SuggestedWordInfo> suggestions) {
+        if (suggestions.isEmpty()) {
+            return null;
+        }
+        final SuggestedWordInfo firstSuggestedWordInfo = suggestions.get(0);
+        if (!firstSuggestedWordInfo.isKindOf(SuggestedWordInfo.KIND_WHITELIST)) {
+            return null;
+        }
+        return firstSuggestedWordInfo;
+    }
+
+    // Retrieves suggestions for non-batch input (typing, recorrection, predictions...)
+    // and calls the callback function with the suggestions.
+    private void getSuggestedWordsForNonBatchInput(final WordComposer wordComposer,
+            final NgramContext ngramContext, final Keyboard keyboard,
+            final SettingsValuesForSuggestion settingsValuesForSuggestion,
+            final int inputStyleIfNotPrediction, final boolean isCorrectionEnabled,
+            final int sequenceNumber, final OnGetSuggestedWordsCallback callback) {
+        final String typedWordString = wordComposer.getTypedWord();
+        final int trailingSingleQuotesCount =
+                StringUtils.getTrailingSingleQuotesCount(typedWordString);
+        final String consideredWord = trailingSingleQuotesCount > 0
+                ? typedWordString.substring(0, typedWordString.length() - trailingSingleQuotesCount)
+                : typedWordString;
+
+        final SuggestionResults suggestionResults = mDictionaryFacilitator.getSuggestionResults(
+                wordComposer.getComposedDataSnapshot(), ngramContext, keyboard,
+                settingsValuesForSuggestion, SESSION_ID_TYPING, inputStyleIfNotPrediction);
+        final Locale locale = mDictionaryFacilitator.getLocale();
+        final ArrayList<SuggestedWordInfo> suggestionsContainer =
+                getTransformedSuggestedWordInfoList(wordComposer, suggestionResults,
+                        trailingSingleQuotesCount, locale);
+
+        boolean foundInDictionary = false;
+        Dictionary sourceDictionaryOfRemovedWord = null;
+        for (final SuggestedWordInfo info : suggestionsContainer) {
+            // Search for the best dictionary, defined as the first one with the highest match
+            // quality we can find.
+            if (!foundInDictionary && typedWordString.equals(info.mWord)) {
+                // Use this source if the old match had lower quality than this match
+                sourceDictionaryOfRemovedWord = info.mSourceDict;
+                foundInDictionary = true;
+                break;
+            }
+        }
+
+        final int firstOcurrenceOfTypedWordInSuggestions =
+                SuggestedWordInfo.removeDups(typedWordString, suggestionsContainer);
+
+        final SuggestedWordInfo whitelistedWordInfo =
+                getWhitelistedWordInfoOrNull(suggestionsContainer);
+        final String whitelistedWord = whitelistedWordInfo == null
+                ? null : whitelistedWordInfo.mWord;
+        final boolean resultsArePredictions = !wordComposer.isComposingWord();
+
+        // We allow auto-correction if whitelisting is not required or the word is whitelisted,
+        // or if the word had more than one char and was not suggested.
+        final boolean allowsToBeAutoCorrected =
+                (SHOULD_AUTO_CORRECT_USING_NON_WHITE_LISTED_SUGGESTION || whitelistedWord != null)
+                || (consideredWord.length() > 1 && (sourceDictionaryOfRemovedWord == null));
+
+        final boolean hasAutoCorrection;
+        // If correction is not enabled, we never auto-correct. This is for example for when
+        // the setting "Auto-correction" is "off": we still suggest, but we don't auto-correct.
+        if (!isCorrectionEnabled
+                // If the word does not allow to be auto-corrected, then we don't auto-correct.
+                || !allowsToBeAutoCorrected
+                // If we are doing prediction, then we never auto-correct of course
+                || resultsArePredictions
+                // If we don't have suggestion results, we can't evaluate the first suggestion
+                // for auto-correction
+                || suggestionResults.isEmpty()
+                // If the word has digits, we never auto-correct because it's likely the word
+                // was type with a lot of care
+                || wordComposer.hasDigits()
+                // If the word is mostly caps, we never auto-correct because this is almost
+                // certainly intentional (and careful input)
+                || wordComposer.isMostlyCaps()
+                // We never auto-correct when suggestions are resumed because it would be unexpected
+                || wordComposer.isResumed()
+                // If we don't have a main dictionary, we never want to auto-correct. The reason
+                // for this is, the user may have a contact whose name happens to match a valid
+                // word in their language, and it will unexpectedly auto-correct. For example, if
+                // the user types in English with no dictionary and has a "Will" in their contact
+                // list, "will" would always auto-correct to "Will" which is unwanted. Hence, no
+                // main dict => no auto-correct. Also, it would probably get obnoxious quickly.
+                // TODO: now that we have personalization, we may want to re-evaluate this decision
+                || !mDictionaryFacilitator.hasAtLeastOneInitializedMainDictionary()
+                // If the first suggestion is a shortcut we never auto-correct to it, regardless
+                // of how strong it is (allowlist entries are not KIND_SHORTCUT but KIND_WHITELIST).
+                // TODO: we may want to have shortcut-only entries auto-correct in the future.
+                || suggestionResults.first().isKindOf(SuggestedWordInfo.KIND_SHORTCUT)) {
+            hasAutoCorrection = false;
+        } else {
+            final SuggestedWordInfo firstSuggestion = suggestionResults.first();
+            if (suggestionResults.mFirstSuggestionExceedsConfidenceThreshold
+                    && firstOcurrenceOfTypedWordInSuggestions != 0) {
+                hasAutoCorrection = true;
+            } else if (!AutoCorrectionUtils.suggestionExceedsThreshold(
+                    firstSuggestion, consideredWord, mAutoCorrectionThreshold)) {
+                // Score is too low for autocorrect
+                hasAutoCorrection = false;
+            } else {
+                // We have a high score, so we need to check if this suggestion is in the correct
+                // form to allow auto-correcting to it in this language. For details of how this
+                // is determined, see #isAllowedByAutoCorrectionWithSpaceFilter.
+                // TODO: this should not have its own logic here but be handled by the dictionary.
+                hasAutoCorrection = isAllowedByAutoCorrectionWithSpaceFilter(firstSuggestion);
+            }
+        }
+
+        final SuggestedWordInfo typedWordInfo = new SuggestedWordInfo(typedWordString,
+                "" /* prevWordsContext */, SuggestedWordInfo.MAX_SCORE,
+                SuggestedWordInfo.KIND_TYPED,
+                null == sourceDictionaryOfRemovedWord ? Dictionary.DICTIONARY_USER_TYPED
+                        : sourceDictionaryOfRemovedWord,
+                SuggestedWordInfo.NOT_AN_INDEX /* indexOfTouchPointOfSecondWord */,
+                SuggestedWordInfo.NOT_A_CONFIDENCE /* autoCommitFirstWordConfidence */);
+        if (!TextUtils.isEmpty(typedWordString)) {
+            suggestionsContainer.add(0, typedWordInfo);
+        }
+
+        final ArrayList<SuggestedWordInfo> suggestionsList;
+        if (DBG && !suggestionsContainer.isEmpty()) {
+            suggestionsList = getSuggestionsInfoListWithDebugInfo(typedWordString,
+                    suggestionsContainer);
+        } else {
+            suggestionsList = suggestionsContainer;
+        }
+
+        final int inputStyle;
+        if (resultsArePredictions) {
+            inputStyle = suggestionResults.mIsBeginningOfSentence
+                    ? SuggestedWords.INPUT_STYLE_BEGINNING_OF_SENTENCE_PREDICTION
+                    : SuggestedWords.INPUT_STYLE_PREDICTION;
+        } else {
+            inputStyle = inputStyleIfNotPrediction;
+        }
+
+        final boolean isTypedWordValid = firstOcurrenceOfTypedWordInSuggestions > -1
+                || (!resultsArePredictions && !allowsToBeAutoCorrected);
+        callback.onGetSuggestedWords(new SuggestedWords(suggestionsList,
+                suggestionResults.mRawSuggestions, typedWordInfo,
+                isTypedWordValid,
+                hasAutoCorrection /* willAutoCorrect */,
+                false /* isObsoleteSuggestions */, inputStyle, sequenceNumber));
+    }
+
+    // Retrieves suggestions for the batch input
+    // and calls the callback function with the suggestions.
+    private void getSuggestedWordsForBatchInput(final WordComposer wordComposer,
+            final NgramContext ngramContext, final Keyboard keyboard,
+            final SettingsValuesForSuggestion settingsValuesForSuggestion,
+            final int inputStyle, final int sequenceNumber,
+            final OnGetSuggestedWordsCallback callback) {
+        final SuggestionResults suggestionResults = mDictionaryFacilitator.getSuggestionResults(
+                wordComposer.getComposedDataSnapshot(), ngramContext, keyboard,
+                settingsValuesForSuggestion, SESSION_ID_GESTURE, inputStyle);
+        // For transforming words that don't come from a dictionary, because it's our best bet
+        final Locale locale = mDictionaryFacilitator.getLocale();
+        final ArrayList<SuggestedWordInfo> suggestionsContainer =
+                new ArrayList<>(suggestionResults);
+        final int suggestionsCount = suggestionsContainer.size();
+        final boolean isFirstCharCapitalized = wordComposer.wasShiftedNoLock();
+        final boolean isAllUpperCase = wordComposer.isAllUpperCase();
+        if (isFirstCharCapitalized || isAllUpperCase) {
+            for (int i = 0; i < suggestionsCount; ++i) {
+                final SuggestedWordInfo wordInfo = suggestionsContainer.get(i);
+                final Locale wordlocale = wordInfo.mSourceDict.mLocale;
+                final SuggestedWordInfo transformedWordInfo = getTransformedSuggestedWordInfo(
+                        wordInfo, null == wordlocale ? locale : wordlocale, isAllUpperCase,
+                        isFirstCharCapitalized, 0 /* trailingSingleQuotesCount */);
+                suggestionsContainer.set(i, transformedWordInfo);
+            }
+        }
+
+        if (SHOULD_REMOVE_PREVIOUSLY_REJECTED_SUGGESTION
+                && suggestionsContainer.size() > 1
+                && TextUtils.equals(suggestionsContainer.get(0).mWord,
+                   wordComposer.getRejectedBatchModeSuggestion())) {
+            final SuggestedWordInfo rejected = suggestionsContainer.remove(0);
+            suggestionsContainer.add(1, rejected);
+        }
+        SuggestedWordInfo.removeDups(null /* typedWord */, suggestionsContainer);
+
+        // For some reason some suggestions with MIN_VALUE are making their way here.
+        // TODO: Find a more robust way to detect distracters.
+        for (int i = suggestionsContainer.size() - 1; i >= 0; --i) {
+            if (suggestionsContainer.get(i).mScore < SUPPRESS_SUGGEST_THRESHOLD) {
+                suggestionsContainer.remove(i);
+            }
+        }
+
+        // In the batch input mode, the most relevant suggested word should act as a "typed word"
+        // (typedWordValid=true), not as an "auto correct word" (willAutoCorrect=false).
+        // Note that because this method is never used to get predictions, there is no need to
+        // modify inputType such in getSuggestedWordsForNonBatchInput.
+        final SuggestedWordInfo pseudoTypedWordInfo = suggestionsContainer.isEmpty() ? null
+                : suggestionsContainer.get(0);
+
+        callback.onGetSuggestedWords(new SuggestedWords(suggestionsContainer,
+                suggestionResults.mRawSuggestions,
+                pseudoTypedWordInfo,
+                true /* typedWordValid */,
+                false /* willAutoCorrect */,
+                false /* isObsoleteSuggestions */,
+                inputStyle, sequenceNumber));
+    }
+
+    private static ArrayList<SuggestedWordInfo> getSuggestionsInfoListWithDebugInfo(
+            final String typedWord, final ArrayList<SuggestedWordInfo> suggestions) {
+        final SuggestedWordInfo typedWordInfo = suggestions.get(0);
+        typedWordInfo.setDebugString("+");
+        final int suggestionsSize = suggestions.size();
+        final ArrayList<SuggestedWordInfo> suggestionsList = new ArrayList<>(suggestionsSize);
+        suggestionsList.add(typedWordInfo);
+        // Note: i here is the index in mScores[], but the index in mSuggestions is one more
+        // than i because we added the typed word to mSuggestions without touching mScores.
+        for (int i = 0; i < suggestionsSize - 1; ++i) {
+            final SuggestedWordInfo cur = suggestions.get(i + 1);
+            final float normalizedScore = BinaryDictionaryUtils.calcNormalizedScore(
+                    typedWord, cur.toString(), cur.mScore);
+            final String scoreInfoString;
+            if (normalizedScore > 0) {
+                scoreInfoString = String.format(
+                        Locale.ROOT, "%d (%4.2f), %s", cur.mScore, normalizedScore,
+                        cur.mSourceDict.mDictType);
+            } else {
+                scoreInfoString = Integer.toString(cur.mScore);
+            }
+            cur.setDebugString(scoreInfoString);
+            suggestionsList.add(cur);
+        }
+        return suggestionsList;
+    }
+
+    /**
+     * Computes whether this suggestion should be blocked or not in this language
+     *
+     * This function implements a filter that avoids auto-correcting to suggestions that contain
+     * spaces that are above a certain language-dependent character limit. In languages like German
+     * where it's possible to concatenate many words, it often happens our dictionary does not
+     * have the longer words. In this case, we offer a lot of unhelpful suggestions that contain
+     * one or several spaces. Ideally we should understand what the user wants and display useful
+     * suggestions by improving the dictionary and possibly having some specific logic. Until
+     * that's possible we should avoid displaying unhelpful suggestions. But it's hard to tell
+     * whether a suggestion is useful or not. So at least for the time being we block
+     * auto-correction when the suggestion is long and contains a space, which should avoid the
+     * worst damage.
+     * This function is implementing that filter. If the language enforces no such limit, then it
+     * always returns true. If the suggestion contains no space, it also returns true. Otherwise,
+     * it checks the length against the language-specific limit.
+     *
+     * @param info the suggestion info
+     * @return whether it's fine to auto-correct to this.
+     */
+    private static boolean isAllowedByAutoCorrectionWithSpaceFilter(final SuggestedWordInfo info) {
+        final Locale locale = info.mSourceDict.mLocale;
+        if (null == locale) {
+            return true;
+        }
+        final Integer maximumLengthForThisLanguage =
+                sLanguageToMaximumAutoCorrectionWithSpaceLength.get(locale.getLanguage());
+        if (null == maximumLengthForThisLanguage) {
+            // This language does not enforce a maximum length to auto-correction
+            return true;
+        }
+        return info.mWord.length() <= maximumLengthForThisLanguage
+                || -1 == info.mWord.indexOf(Constants.CODE_SPACE);
+    }
+
+    /* package for test */ static SuggestedWordInfo getTransformedSuggestedWordInfo(
+            final SuggestedWordInfo wordInfo, final Locale locale, final boolean isAllUpperCase,
+            final boolean isOnlyFirstCharCapitalized, final int trailingSingleQuotesCount) {
+        final StringBuilder sb = new StringBuilder(wordInfo.mWord.length());
+        if (isAllUpperCase) {
+            sb.append(wordInfo.mWord.toUpperCase(locale));
+        } else if (isOnlyFirstCharCapitalized) {
+            sb.append(StringUtils.capitalizeFirstCodePoint(wordInfo.mWord, locale));
+        } else {
+            sb.append(wordInfo.mWord);
+        }
+        // Appending quotes is here to help people quote words. However, it's not helpful
+        // when they type words with quotes toward the end like "it's" or "didn't", where
+        // it's more likely the user missed the last character (or didn't type it yet).
+        final int quotesToAppend = trailingSingleQuotesCount
+                - (-1 == wordInfo.mWord.indexOf(Constants.CODE_SINGLE_QUOTE) ? 0 : 1);
+        for (int i = quotesToAppend - 1; i >= 0; --i) {
+            sb.appendCodePoint(Constants.CODE_SINGLE_QUOTE);
+        }
+        return new SuggestedWordInfo(sb.toString(), wordInfo.mPrevWordsContext,
+                wordInfo.mScore, wordInfo.mKindAndFlags,
+                wordInfo.mSourceDict, wordInfo.mIndexOfTouchPointOfSecondWord,
+                wordInfo.mAutoCommitFirstWordConfidence);
+    }
+}