1 files changed, 229 insertions, 58 deletions
diff --git a/java/src/com/android/inputmethod/latin/Suggest.java b/java/src/com/android/inputmethod/latin/Suggest.java
index b03818c1d..0bf0f687a 100644
--- a/java/src/com/android/inputmethod/latin/Suggest.java
+++ b/java/src/com/android/inputmethod/latin/Suggest.java
@@ -20,16 +20,21 @@ import android.text.TextUtils;
 
 import com.android.inputmethod.keyboard.ProximityInfo;
 import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
+import com.android.inputmethod.latin.common.Constants;
+import com.android.inputmethod.latin.common.StringUtils;
 import com.android.inputmethod.latin.define.DebugFlags;
 import com.android.inputmethod.latin.settings.SettingsValuesForSuggestion;
 import com.android.inputmethod.latin.utils.AutoCorrectionUtils;
 import com.android.inputmethod.latin.utils.BinaryDictionaryUtils;
-import com.android.inputmethod.latin.utils.StringUtils;
 import com.android.inputmethod.latin.utils.SuggestionResults;
 
 import java.util.ArrayList;
+import java.util.HashMap;
 import java.util.Locale;
 
+import javax.annotation.Nonnull;
+import javax.annotation.Nullable;
+
 /**
  * This class loads a dictionary and provides a list of suggestions for a given sequence of
  * characters. This includes corrections and completions.
@@ -49,34 +54,55 @@ public final class Suggest {
     private static final boolean DBG = DebugFlags.DEBUG_ENABLED;
     private final DictionaryFacilitator mDictionaryFacilitator;
 
+    private static final int MAXIMUM_AUTO_CORRECT_LENGTH_FOR_GERMAN = 12;
+    private static final HashMap<String, Integer> sLanguageToMaximumAutoCorrectionWithSpaceLength =
+            new HashMap<>();
+    static {
+        // TODO: should we add Finnish here?
+        // TODO: This should not be hardcoded here but be written in the dictionary header
+        sLanguageToMaximumAutoCorrectionWithSpaceLength.put(Locale.GERMAN.getLanguage(),
+                MAXIMUM_AUTO_CORRECT_LENGTH_FOR_GERMAN);
+    }
+
     private float mAutoCorrectionThreshold;
+    private float mPlausibilityThreshold;
 
     public Suggest(final DictionaryFacilitator dictionaryFacilitator) {
         mDictionaryFacilitator = dictionaryFacilitator;
     }
 
-    public Locale getLocale() {
-        return mDictionaryFacilitator.getLocale();
-    }
-
+    /**
+     * Set the normalized-score threshold for a suggestion to be considered strong enough that we
+     * will auto-correct to this.
+     * @param threshold the threshold
+     */
     public void setAutoCorrectionThreshold(final float threshold) {
         mAutoCorrectionThreshold = threshold;
     }
 
+    /**
+     * Set the normalized-score threshold for what we consider a "plausible" suggestion, in
+     * the same dimension as the auto-correction threshold.
+     * @param threshold the threshold
+     */
+    public void setPlausibilityThreshold(final float threshold) {
+        mPlausibilityThreshold = threshold;
+    }
+
     public interface OnGetSuggestedWordsCallback {
         public void onGetSuggestedWords(final SuggestedWords suggestedWords);
     }
 
     public void getSuggestedWords(final WordComposer wordComposer,
-            final PrevWordsInfo prevWordsInfo, final ProximityInfo proximityInfo,
+            final NgramContext ngramContext, final ProximityInfo proximityInfo,
             final SettingsValuesForSuggestion settingsValuesForSuggestion,
             final boolean isCorrectionEnabled, final int inputStyle, final int sequenceNumber,
             final OnGetSuggestedWordsCallback callback) {
         if (wordComposer.isBatchMode()) {
-            getSuggestedWordsForBatchInput(wordComposer, prevWordsInfo, proximityInfo,
+            getSuggestedWordsForBatchInput(wordComposer, ngramContext, proximityInfo,
                     settingsValuesForSuggestion, inputStyle, sequenceNumber, callback);
         } else {
-            getSuggestedWordsForNonBatchInput(wordComposer, prevWordsInfo, proximityInfo,
+            getSuggestedWordsForNonBatchInput(wordComposer, ngramContext, proximityInfo,
                     settingsValuesForSuggestion, inputStyle, isCorrectionEnabled,
                     sequenceNumber, callback);
         }
@@ -84,7 +110,7 @@ public final class Suggest {
 
     private static ArrayList<SuggestedWordInfo> getTransformedSuggestedWordInfoList(
             final WordComposer wordComposer, final SuggestionResults results,
-            final int trailingSingleQuotesCount) {
+            final int trailingSingleQuotesCount, final Locale defaultLocale) {
         final boolean shouldMakeSuggestionsAllUpperCase = wordComposer.isAllUpperCase()
                 && !wordComposer.isResumed();
         final boolean isOnlyFirstCharCapitalized =
@@ -96,16 +122,19 @@ public final class Suggest {
                 || 0 != trailingSingleQuotesCount) {
             for (int i = 0; i < suggestionsCount; ++i) {
                 final SuggestedWordInfo wordInfo = suggestionsContainer.get(i);
+                final Locale wordLocale = wordInfo.mSourceDict.mLocale;
                 final SuggestedWordInfo transformedWordInfo = getTransformedSuggestedWordInfo(
-                        wordInfo, results.mLocale, shouldMakeSuggestionsAllUpperCase,
-                        isOnlyFirstCharCapitalized, trailingSingleQuotesCount);
+                        wordInfo, null == wordLocale ? defaultLocale : wordLocale,
+                        shouldMakeSuggestionsAllUpperCase, isOnlyFirstCharCapitalized,
+                        trailingSingleQuotesCount);
                 suggestionsContainer.set(i, transformedWordInfo);
             }
         }
         return suggestionsContainer;
     }
 
-    private static String getWhitelistedWordOrNull(final ArrayList<SuggestedWordInfo> suggestions) {
+    private static SuggestedWordInfo getWhitelistedWordInfoOrNull(
+            @Nonnull final ArrayList<SuggestedWordInfo> suggestions) {
         if (suggestions.isEmpty()) {
             return null;
         }
@@ -113,75 +142,161 @@ public final class Suggest {
         if (!firstSuggestedWordInfo.isKindOf(SuggestedWordInfo.KIND_WHITELIST)) {
             return null;
         }
-        return firstSuggestedWordInfo.mWord;
+        return firstSuggestedWordInfo;
     }
 
+    // Quality constants for dictionary match
+    // In increasing order of quality
+    // This source dictionary does not match the typed word.
+    private static final int QUALITY_NO_MATCH = 0;
+    // This source dictionary has a null locale, and the preferred locale is also null.
+    private static final int QUALITY_MATCH_NULL = 1;
+    // This source dictionary has a non-null locale different from the preferred locale. The
+    // preferred locale may be null : this is still better than MATCH_NULL.
+    private static final int QUALITY_MATCH_OTHER_LOCALE = 2;
+    // This source dictionary matches the preferred locale.
+    private static final int QUALITY_MATCH_PREFERRED_LOCALE = 3;
+
     // Retrieves suggestions for non-batch input (typing, recorrection, predictions...)
     // and calls the callback function with the suggestions.
     private void getSuggestedWordsForNonBatchInput(final WordComposer wordComposer,
-            final PrevWordsInfo prevWordsInfo, final ProximityInfo proximityInfo,
+            final NgramContext ngramContext, final ProximityInfo proximityInfo,
             final SettingsValuesForSuggestion settingsValuesForSuggestion,
             final int inputStyleIfNotPrediction, final boolean isCorrectionEnabled,
             final int sequenceNumber, final OnGetSuggestedWordsCallback callback) {
-        final String typedWord = wordComposer.getTypedWord();
-        final int trailingSingleQuotesCount = StringUtils.getTrailingSingleQuotesCount(typedWord);
+        final String typedWordString = wordComposer.getTypedWord();
+        final int trailingSingleQuotesCount =
+                StringUtils.getTrailingSingleQuotesCount(typedWordString);
         final String consideredWord = trailingSingleQuotesCount > 0
-                ? typedWord.substring(0, typedWord.length() - trailingSingleQuotesCount)
-                : typedWord;
+                ? typedWordString.substring(0, typedWordString.length() - trailingSingleQuotesCount)
+                : typedWordString;
 
         final SuggestionResults suggestionResults = mDictionaryFacilitator.getSuggestionResults(
-                wordComposer, prevWordsInfo, proximityInfo, settingsValuesForSuggestion,
-                SESSION_ID_TYPING);
+                wordComposer, ngramContext, proximityInfo.getNativeProximityInfo(),
+                settingsValuesForSuggestion, SESSION_ID_TYPING);
+        final Locale mostProbableLocale = mDictionaryFacilitator.getMostProbableLocale();
         final ArrayList<SuggestedWordInfo> suggestionsContainer =
                 getTransformedSuggestedWordInfoList(wordComposer, suggestionResults,
-                        trailingSingleQuotesCount);
-        final boolean didRemoveTypedWord =
-                SuggestedWordInfo.removeDups(wordComposer.getTypedWord(), suggestionsContainer);
+                        trailingSingleQuotesCount,
+                        // For transforming suggestions that don't come for any dictionary, we
+                        // use the currently most probable locale as it's our best bet.
+                        mostProbableLocale);
+
+        boolean typedWordExistsInAnotherLanguage = false;
+        int qualityOfFoundSourceDictionary = QUALITY_NO_MATCH;
+        @Nullable Dictionary sourceDictionaryOfRemovedWord = null;
+        for (final SuggestedWordInfo info : suggestionsContainer) {
+            // Search for the best dictionary, defined as the first one with the highest match
+            // quality we can find.
+            if (typedWordString.equals(info.mWord)) {
+                if (mostProbableLocale.equals(info.mSourceDict.mLocale)) {
+                    if (qualityOfFoundSourceDictionary < QUALITY_MATCH_PREFERRED_LOCALE) {
+                        // Use this source if the old match had lower quality than this match
+                        sourceDictionaryOfRemovedWord = info.mSourceDict;
+                        qualityOfFoundSourceDictionary = QUALITY_MATCH_PREFERRED_LOCALE;
+                    }
+                } else {
+                    final int matchQuality = (null == info.mSourceDict.mLocale)
+                            ? QUALITY_MATCH_NULL : QUALITY_MATCH_OTHER_LOCALE;
+                    if (qualityOfFoundSourceDictionary < matchQuality) {
+                        // Use this source if the old match had lower quality than this match
+                        sourceDictionaryOfRemovedWord = info.mSourceDict;
+                        qualityOfFoundSourceDictionary = matchQuality;
+                    }
+                    typedWordExistsInAnotherLanguage = true;
+                }
+            }
+        }
+
+        SuggestedWordInfo.removeDups(typedWordString, suggestionsContainer);
 
-        final String whitelistedWord = getWhitelistedWordOrNull(suggestionsContainer);
+        final SuggestedWordInfo whitelistedWordInfo =
+                getWhitelistedWordInfoOrNull(suggestionsContainer);
+        final String whitelistedWord;
+        if (null != whitelistedWordInfo &&
+                (mDictionaryFacilitator.isConfidentAboutCurrentLanguageBeing(
+                        whitelistedWordInfo.mSourceDict.mLocale)
+                || (!typedWordExistsInAnotherLanguage
+                        && !hasPlausibleCandidateInAnyOtherLanguage(suggestionsContainer,
+                                consideredWord, whitelistedWordInfo)))) {
+            // We'll use the whitelist candidate if we are confident the user is typing in the
+            // language of the dictionary it's coming from, or if there is no plausible candidate
+            // coming from another language.
+            whitelistedWord = whitelistedWordInfo.mWord;
+        } else {
+            // If on the contrary we are not confident in the current language and we have
+            // at least a plausible candidate in any other language, then we don't use this
+            // whitelist candidate.
+            whitelistedWord = null;
+        }
         final boolean resultsArePredictions = !wordComposer.isComposingWord();
 
         // We allow auto-correction if we have a whitelisted word, or if the word had more than
         // one char and was not suggested.
         final boolean allowsToBeAutoCorrected = (null != whitelistedWord)
-                || (consideredWord.length() > 1 && !didRemoveTypedWord);
+                || (consideredWord.length() > 1 && (null == sourceDictionaryOfRemovedWord));
 
         final boolean hasAutoCorrection;
-        // TODO: using isCorrectionEnabled here is not very good. It's probably useless, because
-        // any attempt to do auto-correction is already shielded with a test for this flag; at the
-        // same time, it feels wrong that the SuggestedWord object includes information about
-        // the current settings. It may also be useful to know, when the setting is off, whether
-        // the word *would* have been auto-corrected.
-        if (!isCorrectionEnabled || !allowsToBeAutoCorrected || resultsArePredictions
-                || suggestionResults.isEmpty() || wordComposer.hasDigits()
-                || wordComposer.isMostlyCaps() || wordComposer.isResumed()
-                || !mDictionaryFacilitator.hasInitializedMainDictionary()
+        // If correction is not enabled, we never auto-correct. This is for example for when
+        // the setting "Auto-correction" is "off": we still suggest, but we don't auto-correct.
+        if (!isCorrectionEnabled
+                // If the word does not allow to be auto-corrected, then we don't auto-correct.
+                || !allowsToBeAutoCorrected
+                // If we are doing prediction, then we never auto-correct of course
+                || resultsArePredictions
+                // If we don't have suggestion results, we can't evaluate the first suggestion
+                // for auto-correction
+                || suggestionResults.isEmpty()
+                // If the word has digits, we never auto-correct because it's likely the word
+                // was type with a lot of care
+                || wordComposer.hasDigits()
+                // If the word is mostly caps, we never auto-correct because this is almost
+                // certainly intentional (and careful input)
+                || wordComposer.isMostlyCaps()
+                // We never auto-correct when suggestions are resumed because it would be unexpected
+                || wordComposer.isResumed()
+                // If we don't have a main dictionary, we never want to auto-correct. The reason
+                // for this is, the user may have a contact whose name happens to match a valid
+                // word in their language, and it will unexpectedly auto-correct. For example, if
+                // the user types in English with no dictionary and has a "Will" in their contact
+                // list, "will" would always auto-correct to "Will" which is unwanted. Hence, no
+                // main dict => no auto-correct. Also, it would probably get obnoxious quickly.
+                // TODO: now that we have personalization, we may want to re-evaluate this decision
+                || !mDictionaryFacilitator.hasAtLeastOneInitializedMainDictionary()
+                // If the first suggestion is a shortcut we never auto-correct to it, regardless
+                // of how strong it is (whitelist entries are not KIND_SHORTCUT but KIND_WHITELIST).
+                // TODO: we may want to have shortcut-only entries auto-correct in the future.
                 || suggestionResults.first().isKindOf(SuggestedWordInfo.KIND_SHORTCUT)) {
-            // If we don't have a main dictionary, we never want to auto-correct. The reason for
-            // this is, the user may have a contact whose name happens to match a valid word in
-            // their language, and it will unexpectedly auto-correct. For example, if the user
-            // types in English with no dictionary and has a "Will" in their contact list, "will"
-            // would always auto-correct to "Will" which is unwanted. Hence, no main dict => no
-            // auto-correct.
-            // Also, shortcuts should never auto-correct unless they are whitelist entries.
-            // TODO: we may want to have shortcut-only entries auto-correct in the future.
             hasAutoCorrection = false;
         } else {
-            hasAutoCorrection = AutoCorrectionUtils.suggestionExceedsAutoCorrectionThreshold(
-                    suggestionResults.first(), consideredWord, mAutoCorrectionThreshold);
+            final SuggestedWordInfo firstSuggestion = suggestionResults.first();
+            if (!AutoCorrectionUtils.suggestionExceedsThreshold(
+                    firstSuggestion, consideredWord, mAutoCorrectionThreshold)) {
+                // Score is too low for autocorrect
+                hasAutoCorrection = false;
+            } else {
+                // We have a high score, so we need to check if this suggestion is in the correct
+                // form to allow auto-correcting to it in this language. For details of how this
+                // is determined, see #isAllowedByAutoCorrectionWithSpaceFilter.
+                // TODO: this should not have its own logic here but be handled by the dictionary.
+                hasAutoCorrection = isAllowedByAutoCorrectionWithSpaceFilter(firstSuggestion);
+            }
         }
 
-        if (!TextUtils.isEmpty(typedWord)) {
-            suggestionsContainer.add(0, new SuggestedWordInfo(typedWord,
-                    SuggestedWordInfo.MAX_SCORE, SuggestedWordInfo.KIND_TYPED,
-                    Dictionary.DICTIONARY_USER_TYPED,
-                    SuggestedWordInfo.NOT_AN_INDEX /* indexOfTouchPointOfSecondWord */,
-                    SuggestedWordInfo.NOT_A_CONFIDENCE /* autoCommitFirstWordConfidence */));
+        final SuggestedWordInfo typedWordInfo = new SuggestedWordInfo(typedWordString,
+                SuggestedWordInfo.MAX_SCORE, SuggestedWordInfo.KIND_TYPED,
+                null == sourceDictionaryOfRemovedWord ? Dictionary.DICTIONARY_USER_TYPED
+                        : sourceDictionaryOfRemovedWord,
+                SuggestedWordInfo.NOT_AN_INDEX /* indexOfTouchPointOfSecondWord */,
+                SuggestedWordInfo.NOT_A_CONFIDENCE /* autoCommitFirstWordConfidence */);
+        if (!TextUtils.isEmpty(typedWordString)) {
+            suggestionsContainer.add(0, typedWordInfo);
         }
 
         final ArrayList<SuggestedWordInfo> suggestionsList;
         if (DBG && !suggestionsContainer.isEmpty()) {
-            suggestionsList = getSuggestionsInfoListWithDebugInfo(typedWord, suggestionsContainer);
+            suggestionsList = getSuggestionsInfoListWithDebugInfo(typedWordString,
+                    suggestionsContainer);
         } else {
             suggestionsList = suggestionsContainer;
         }
@@ -195,7 +310,7 @@ public final class Suggest {
             inputStyle = inputStyleIfNotPrediction;
         }
         callback.onGetSuggestedWords(new SuggestedWords(suggestionsList,
-                suggestionResults.mRawSuggestions,
+                suggestionResults.mRawSuggestions, typedWordInfo,
                 // TODO: this first argument is lying. If this is a whitelisted word which is an
                 // actual word, it says typedWordValid = false, which looks wrong. We should either
                 // rename the attribute or change the value.
@@ -204,16 +319,32 @@ public final class Suggest {
                 false /* isObsoleteSuggestions */, inputStyle, sequenceNumber));
     }
 
+    private boolean hasPlausibleCandidateInAnyOtherLanguage(
+            final ArrayList<SuggestedWordInfo> suggestionsContainer, final String consideredWord,
+            final SuggestedWordInfo whitelistedWordInfo) {
+        for (final SuggestedWordInfo info : suggestionsContainer) {
+            if (whitelistedWordInfo.mSourceDict.mLocale.equals(info.mSourceDict.mLocale)) {
+                continue;
+            }
+            return AutoCorrectionUtils.suggestionExceedsThreshold(info, consideredWord,
+                    mPlausibilityThreshold);
+        }
+        // No candidate in another language
+        return false;
+    }
+
     // Retrieves suggestions for the batch input
     // and calls the callback function with the suggestions.
     private void getSuggestedWordsForBatchInput(final WordComposer wordComposer,
-            final PrevWordsInfo prevWordsInfo, final ProximityInfo proximityInfo,
+            final NgramContext ngramContext, final ProximityInfo proximityInfo,
             final SettingsValuesForSuggestion settingsValuesForSuggestion,
             final int inputStyle, final int sequenceNumber,
             final OnGetSuggestedWordsCallback callback) {
         final SuggestionResults suggestionResults = mDictionaryFacilitator.getSuggestionResults(
-                wordComposer, prevWordsInfo, proximityInfo, settingsValuesForSuggestion,
-                SESSION_ID_GESTURE);
+                wordComposer, ngramContext, proximityInfo.getNativeProximityInfo(),
+                settingsValuesForSuggestion, SESSION_ID_GESTURE);
+        // For transforming words that don't come from a dictionary, because it's our best bet
+        final Locale defaultLocale = mDictionaryFacilitator.getMostProbableLocale();
         final ArrayList<SuggestedWordInfo> suggestionsContainer =
                 new ArrayList<>(suggestionResults);
         final int suggestionsCount = suggestionsContainer.size();
@@ -222,9 +353,10 @@ public final class Suggest {
         if (isFirstCharCapitalized || isAllUpperCase) {
             for (int i = 0; i < suggestionsCount; ++i) {
                 final SuggestedWordInfo wordInfo = suggestionsContainer.get(i);
+                final Locale wordlocale = wordInfo.mSourceDict.mLocale;
                 final SuggestedWordInfo transformedWordInfo = getTransformedSuggestedWordInfo(
-                        wordInfo, suggestionResults.mLocale, isAllUpperCase, isFirstCharCapitalized,
-                        0 /* trailingSingleQuotesCount */);
+                        wordInfo, null == wordlocale ? defaultLocale : wordlocale, isAllUpperCase,
+                        isFirstCharCapitalized, 0 /* trailingSingleQuotesCount */);
                 suggestionsContainer.set(i, transformedWordInfo);
             }
         }
@@ -237,7 +369,7 @@ public final class Suggest {
         SuggestedWordInfo.removeDups(null /* typedWord */, suggestionsContainer);
 
         // For some reason some suggestions with MIN_VALUE are making their way here.
-        // TODO: Find a more robust way to detect distractors.
+        // TODO: Find a more robust way to detect distracters.
         for (int i = suggestionsContainer.size() - 1; i >= 0; --i) {
             if (suggestionsContainer.get(i).mScore < SUPPRESS_SUGGEST_THRESHOLD) {
                 suggestionsContainer.remove(i);
@@ -248,8 +380,12 @@ public final class Suggest {
         // (typedWordValid=true), not as an "auto correct word" (willAutoCorrect=false).
         // Note that because this method is never used to get predictions, there is no need to
         // modify inputType such in getSuggestedWordsForNonBatchInput.
+        final SuggestedWordInfo pseudoTypedWordInfo = suggestionsContainer.isEmpty() ? null
+                : suggestionsContainer.get(0);
+
         callback.onGetSuggestedWords(new SuggestedWords(suggestionsContainer,
                 suggestionResults.mRawSuggestions,
+                pseudoTypedWordInfo,
                 true /* typedWordValid */,
                 false /* willAutoCorrect */,
                 false /* isObsoleteSuggestions */,
@@ -283,6 +419,41 @@ public final class Suggest {
         return suggestionsList;
     }
 
+    /**
+     * Computes whether this suggestion should be blocked or not in this language
+     *
+     * This function implements a filter that avoids auto-correcting to suggestions that contain
+     * spaces that are above a certain language-dependent character limit. In languages like German
+     * where it's possible to concatenate many words, it often happens our dictionary does not
+     * have the longer words. In this case, we offer a lot of unhelpful suggestions that contain
+     * one or several spaces. Ideally we should understand what the user wants and display useful
+     * suggestions by improving the dictionary and possibly having some specific logic. Until
+     * that's possible we should avoid displaying unhelpful suggestions. But it's hard to tell
+     * whether a suggestion is useful or not. So at least for the time being we block
+     * auto-correction when the suggestion is long and contains a space, which should avoid the
+     * worst damage.
+     * This function is implementing that filter. If the language enforces no such limit, then it
+     * always returns true. If the suggestion contains no space, it also returns true. Otherwise,
+     * it checks the length against the language-specific limit.
+     *
+     * @param info the suggestion info
+     * @return whether it's fine to auto-correct to this.
+     */
+    private static boolean isAllowedByAutoCorrectionWithSpaceFilter(final SuggestedWordInfo info) {
+        final Locale locale = info.mSourceDict.mLocale;
+        if (null == locale) {
+            return true;
+        }
+        final Integer maximumLengthForThisLanguage =
+                sLanguageToMaximumAutoCorrectionWithSpaceLength.get(locale.getLanguage());
+        if (null == maximumLengthForThisLanguage) {
+            // This language does not enforce a maximum length to auto-correction
+            return true;
+        }
+        return info.mWord.length() <= maximumLengthForThisLanguage
+                || -1 == info.mWord.indexOf(Constants.CODE_SPACE);
+    }
+
     /* package for test */ static SuggestedWordInfo getTransformedSuggestedWordInfo(
             final SuggestedWordInfo wordInfo, final Locale locale, final boolean isAllUpperCase,
             final boolean isOnlyFirstCharCapitalized, final int trailingSingleQuotesCount) {