aboutsummaryrefslogtreecommitdiffstats
path: root/java/src/com/android/inputmethod/latin/Suggest.java
diff options
context:
space:
mode:
Diffstat (limited to 'java/src/com/android/inputmethod/latin/Suggest.java')
-rw-r--r--java/src/com/android/inputmethod/latin/Suggest.java287
1 files changed, 229 insertions, 58 deletions
diff --git a/java/src/com/android/inputmethod/latin/Suggest.java b/java/src/com/android/inputmethod/latin/Suggest.java
index b03818c1d..0bf0f687a 100644
--- a/java/src/com/android/inputmethod/latin/Suggest.java
+++ b/java/src/com/android/inputmethod/latin/Suggest.java
@@ -20,16 +20,21 @@ import android.text.TextUtils;
import com.android.inputmethod.keyboard.ProximityInfo;
import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
+import com.android.inputmethod.latin.common.Constants;
+import com.android.inputmethod.latin.common.StringUtils;
import com.android.inputmethod.latin.define.DebugFlags;
import com.android.inputmethod.latin.settings.SettingsValuesForSuggestion;
import com.android.inputmethod.latin.utils.AutoCorrectionUtils;
import com.android.inputmethod.latin.utils.BinaryDictionaryUtils;
-import com.android.inputmethod.latin.utils.StringUtils;
import com.android.inputmethod.latin.utils.SuggestionResults;
import java.util.ArrayList;
+import java.util.HashMap;
import java.util.Locale;
+import javax.annotation.Nonnull;
+import javax.annotation.Nullable;
+
/**
* This class loads a dictionary and provides a list of suggestions for a given sequence of
* characters. This includes corrections and completions.
@@ -49,34 +54,55 @@ public final class Suggest {
private static final boolean DBG = DebugFlags.DEBUG_ENABLED;
private final DictionaryFacilitator mDictionaryFacilitator;
+ private static final int MAXIMUM_AUTO_CORRECT_LENGTH_FOR_GERMAN = 12;
+ private static final HashMap<String, Integer> sLanguageToMaximumAutoCorrectionWithSpaceLength =
+ new HashMap<>();
+ static {
+ // TODO: should we add Finnish here?
+ // TODO: This should not be hardcoded here but be written in the dictionary header
+ sLanguageToMaximumAutoCorrectionWithSpaceLength.put(Locale.GERMAN.getLanguage(),
+ MAXIMUM_AUTO_CORRECT_LENGTH_FOR_GERMAN);
+ }
+
private float mAutoCorrectionThreshold;
+ private float mPlausibilityThreshold;
public Suggest(final DictionaryFacilitator dictionaryFacilitator) {
mDictionaryFacilitator = dictionaryFacilitator;
}
- public Locale getLocale() {
- return mDictionaryFacilitator.getLocale();
- }
-
+ /**
+ * Set the normalized-score threshold for a suggestion to be considered strong enough that we
+ * will auto-correct to this.
+ * @param threshold the threshold
+ */
public void setAutoCorrectionThreshold(final float threshold) {
mAutoCorrectionThreshold = threshold;
}
+ /**
+ * Set the normalized-score threshold for what we consider a "plausible" suggestion, in
+ * the same dimension as the auto-correction threshold.
+ * @param threshold the threshold
+ */
+ public void setPlausibilityThreshold(final float threshold) {
+ mPlausibilityThreshold = threshold;
+ }
+
public interface OnGetSuggestedWordsCallback {
public void onGetSuggestedWords(final SuggestedWords suggestedWords);
}
public void getSuggestedWords(final WordComposer wordComposer,
- final PrevWordsInfo prevWordsInfo, final ProximityInfo proximityInfo,
+ final NgramContext ngramContext, final ProximityInfo proximityInfo,
final SettingsValuesForSuggestion settingsValuesForSuggestion,
final boolean isCorrectionEnabled, final int inputStyle, final int sequenceNumber,
final OnGetSuggestedWordsCallback callback) {
if (wordComposer.isBatchMode()) {
- getSuggestedWordsForBatchInput(wordComposer, prevWordsInfo, proximityInfo,
+ getSuggestedWordsForBatchInput(wordComposer, ngramContext, proximityInfo,
settingsValuesForSuggestion, inputStyle, sequenceNumber, callback);
} else {
- getSuggestedWordsForNonBatchInput(wordComposer, prevWordsInfo, proximityInfo,
+ getSuggestedWordsForNonBatchInput(wordComposer, ngramContext, proximityInfo,
settingsValuesForSuggestion, inputStyle, isCorrectionEnabled,
sequenceNumber, callback);
}
@@ -84,7 +110,7 @@ public final class Suggest {
private static ArrayList<SuggestedWordInfo> getTransformedSuggestedWordInfoList(
final WordComposer wordComposer, final SuggestionResults results,
- final int trailingSingleQuotesCount) {
+ final int trailingSingleQuotesCount, final Locale defaultLocale) {
final boolean shouldMakeSuggestionsAllUpperCase = wordComposer.isAllUpperCase()
&& !wordComposer.isResumed();
final boolean isOnlyFirstCharCapitalized =
@@ -96,16 +122,19 @@ public final class Suggest {
|| 0 != trailingSingleQuotesCount) {
for (int i = 0; i < suggestionsCount; ++i) {
final SuggestedWordInfo wordInfo = suggestionsContainer.get(i);
+ final Locale wordLocale = wordInfo.mSourceDict.mLocale;
final SuggestedWordInfo transformedWordInfo = getTransformedSuggestedWordInfo(
- wordInfo, results.mLocale, shouldMakeSuggestionsAllUpperCase,
- isOnlyFirstCharCapitalized, trailingSingleQuotesCount);
+ wordInfo, null == wordLocale ? defaultLocale : wordLocale,
+ shouldMakeSuggestionsAllUpperCase, isOnlyFirstCharCapitalized,
+ trailingSingleQuotesCount);
suggestionsContainer.set(i, transformedWordInfo);
}
}
return suggestionsContainer;
}
- private static String getWhitelistedWordOrNull(final ArrayList<SuggestedWordInfo> suggestions) {
+ private static SuggestedWordInfo getWhitelistedWordInfoOrNull(
+ @Nonnull final ArrayList<SuggestedWordInfo> suggestions) {
if (suggestions.isEmpty()) {
return null;
}
@@ -113,75 +142,161 @@ public final class Suggest {
if (!firstSuggestedWordInfo.isKindOf(SuggestedWordInfo.KIND_WHITELIST)) {
return null;
}
- return firstSuggestedWordInfo.mWord;
+ return firstSuggestedWordInfo;
}
+ // Quality constants for dictionary match
+ // In increasing order of quality
+ // This source dictionary does not match the typed word.
+ private static final int QUALITY_NO_MATCH = 0;
+ // This source dictionary has a null locale, and the preferred locale is also null.
+ private static final int QUALITY_MATCH_NULL = 1;
+ // This source dictionary has a non-null locale different from the preferred locale. The
+ // preferred locale may be null : this is still better than MATCH_NULL.
+ private static final int QUALITY_MATCH_OTHER_LOCALE = 2;
+ // This source dictionary matches the preferred locale.
+ private static final int QUALITY_MATCH_PREFERRED_LOCALE = 3;
+
// Retrieves suggestions for non-batch input (typing, recorrection, predictions...)
// and calls the callback function with the suggestions.
private void getSuggestedWordsForNonBatchInput(final WordComposer wordComposer,
- final PrevWordsInfo prevWordsInfo, final ProximityInfo proximityInfo,
+ final NgramContext ngramContext, final ProximityInfo proximityInfo,
final SettingsValuesForSuggestion settingsValuesForSuggestion,
final int inputStyleIfNotPrediction, final boolean isCorrectionEnabled,
final int sequenceNumber, final OnGetSuggestedWordsCallback callback) {
- final String typedWord = wordComposer.getTypedWord();
- final int trailingSingleQuotesCount = StringUtils.getTrailingSingleQuotesCount(typedWord);
+ final String typedWordString = wordComposer.getTypedWord();
+ final int trailingSingleQuotesCount =
+ StringUtils.getTrailingSingleQuotesCount(typedWordString);
final String consideredWord = trailingSingleQuotesCount > 0
- ? typedWord.substring(0, typedWord.length() - trailingSingleQuotesCount)
- : typedWord;
+ ? typedWordString.substring(0, typedWordString.length() - trailingSingleQuotesCount)
+ : typedWordString;
final SuggestionResults suggestionResults = mDictionaryFacilitator.getSuggestionResults(
- wordComposer, prevWordsInfo, proximityInfo, settingsValuesForSuggestion,
- SESSION_ID_TYPING);
+ wordComposer, ngramContext, proximityInfo.getNativeProximityInfo(),
+ settingsValuesForSuggestion, SESSION_ID_TYPING);
+ final Locale mostProbableLocale = mDictionaryFacilitator.getMostProbableLocale();
final ArrayList<SuggestedWordInfo> suggestionsContainer =
getTransformedSuggestedWordInfoList(wordComposer, suggestionResults,
- trailingSingleQuotesCount);
- final boolean didRemoveTypedWord =
- SuggestedWordInfo.removeDups(wordComposer.getTypedWord(), suggestionsContainer);
+ trailingSingleQuotesCount,
+ // For transforming suggestions that don't come for any dictionary, we
+ // use the currently most probable locale as it's our best bet.
+ mostProbableLocale);
+
+ boolean typedWordExistsInAnotherLanguage = false;
+ int qualityOfFoundSourceDictionary = QUALITY_NO_MATCH;
+ @Nullable Dictionary sourceDictionaryOfRemovedWord = null;
+ for (final SuggestedWordInfo info : suggestionsContainer) {
+ // Search for the best dictionary, defined as the first one with the highest match
+ // quality we can find.
+ if (typedWordString.equals(info.mWord)) {
+ if (mostProbableLocale.equals(info.mSourceDict.mLocale)) {
+ if (qualityOfFoundSourceDictionary < QUALITY_MATCH_PREFERRED_LOCALE) {
+ // Use this source if the old match had lower quality than this match
+ sourceDictionaryOfRemovedWord = info.mSourceDict;
+ qualityOfFoundSourceDictionary = QUALITY_MATCH_PREFERRED_LOCALE;
+ }
+ } else {
+ final int matchQuality = (null == info.mSourceDict.mLocale)
+ ? QUALITY_MATCH_NULL : QUALITY_MATCH_OTHER_LOCALE;
+ if (qualityOfFoundSourceDictionary < matchQuality) {
+ // Use this source if the old match had lower quality than this match
+ sourceDictionaryOfRemovedWord = info.mSourceDict;
+ qualityOfFoundSourceDictionary = matchQuality;
+ }
+ typedWordExistsInAnotherLanguage = true;
+ }
+ }
+ }
+
+ SuggestedWordInfo.removeDups(typedWordString, suggestionsContainer);
- final String whitelistedWord = getWhitelistedWordOrNull(suggestionsContainer);
+ final SuggestedWordInfo whitelistedWordInfo =
+ getWhitelistedWordInfoOrNull(suggestionsContainer);
+ final String whitelistedWord;
+ if (null != whitelistedWordInfo &&
+ (mDictionaryFacilitator.isConfidentAboutCurrentLanguageBeing(
+ whitelistedWordInfo.mSourceDict.mLocale)
+ || (!typedWordExistsInAnotherLanguage
+ && !hasPlausibleCandidateInAnyOtherLanguage(suggestionsContainer,
+ consideredWord, whitelistedWordInfo)))) {
+ // We'll use the whitelist candidate if we are confident the user is typing in the
+ // language of the dictionary it's coming from, or if there is no plausible candidate
+ // coming from another language.
+ whitelistedWord = whitelistedWordInfo.mWord;
+ } else {
+ // If on the contrary we are not confident in the current language and we have
+ // at least a plausible candidate in any other language, then we don't use this
+ // whitelist candidate.
+ whitelistedWord = null;
+ }
final boolean resultsArePredictions = !wordComposer.isComposingWord();
// We allow auto-correction if we have a whitelisted word, or if the word had more than
// one char and was not suggested.
final boolean allowsToBeAutoCorrected = (null != whitelistedWord)
- || (consideredWord.length() > 1 && !didRemoveTypedWord);
+ || (consideredWord.length() > 1 && (null == sourceDictionaryOfRemovedWord));
final boolean hasAutoCorrection;
- // TODO: using isCorrectionEnabled here is not very good. It's probably useless, because
- // any attempt to do auto-correction is already shielded with a test for this flag; at the
- // same time, it feels wrong that the SuggestedWord object includes information about
- // the current settings. It may also be useful to know, when the setting is off, whether
- // the word *would* have been auto-corrected.
- if (!isCorrectionEnabled || !allowsToBeAutoCorrected || resultsArePredictions
- || suggestionResults.isEmpty() || wordComposer.hasDigits()
- || wordComposer.isMostlyCaps() || wordComposer.isResumed()
- || !mDictionaryFacilitator.hasInitializedMainDictionary()
+ // If correction is not enabled, we never auto-correct. This is for example for when
+ // the setting "Auto-correction" is "off": we still suggest, but we don't auto-correct.
+ if (!isCorrectionEnabled
+ // If the word does not allow to be auto-corrected, then we don't auto-correct.
+ || !allowsToBeAutoCorrected
+ // If we are doing prediction, then we never auto-correct of course
+ || resultsArePredictions
+ // If we don't have suggestion results, we can't evaluate the first suggestion
+ // for auto-correction
+ || suggestionResults.isEmpty()
+ // If the word has digits, we never auto-correct because it's likely the word
+ // was type with a lot of care
+ || wordComposer.hasDigits()
+ // If the word is mostly caps, we never auto-correct because this is almost
+ // certainly intentional (and careful input)
+ || wordComposer.isMostlyCaps()
+ // We never auto-correct when suggestions are resumed because it would be unexpected
+ || wordComposer.isResumed()
+ // If we don't have a main dictionary, we never want to auto-correct. The reason
+ // for this is, the user may have a contact whose name happens to match a valid
+ // word in their language, and it will unexpectedly auto-correct. For example, if
+ // the user types in English with no dictionary and has a "Will" in their contact
+ // list, "will" would always auto-correct to "Will" which is unwanted. Hence, no
+ // main dict => no auto-correct. Also, it would probably get obnoxious quickly.
+ // TODO: now that we have personalization, we may want to re-evaluate this decision
+ || !mDictionaryFacilitator.hasAtLeastOneInitializedMainDictionary()
+ // If the first suggestion is a shortcut we never auto-correct to it, regardless
+ // of how strong it is (whitelist entries are not KIND_SHORTCUT but KIND_WHITELIST).
+ // TODO: we may want to have shortcut-only entries auto-correct in the future.
|| suggestionResults.first().isKindOf(SuggestedWordInfo.KIND_SHORTCUT)) {
- // If we don't have a main dictionary, we never want to auto-correct. The reason for
- // this is, the user may have a contact whose name happens to match a valid word in
- // their language, and it will unexpectedly auto-correct. For example, if the user
- // types in English with no dictionary and has a "Will" in their contact list, "will"
- // would always auto-correct to "Will" which is unwanted. Hence, no main dict => no
- // auto-correct.
- // Also, shortcuts should never auto-correct unless they are whitelist entries.
- // TODO: we may want to have shortcut-only entries auto-correct in the future.
hasAutoCorrection = false;
} else {
- hasAutoCorrection = AutoCorrectionUtils.suggestionExceedsAutoCorrectionThreshold(
- suggestionResults.first(), consideredWord, mAutoCorrectionThreshold);
+ final SuggestedWordInfo firstSuggestion = suggestionResults.first();
+ if (!AutoCorrectionUtils.suggestionExceedsThreshold(
+ firstSuggestion, consideredWord, mAutoCorrectionThreshold)) {
+ // Score is too low for autocorrect
+ hasAutoCorrection = false;
+ } else {
+ // We have a high score, so we need to check if this suggestion is in the correct
+ // form to allow auto-correcting to it in this language. For details of how this
+ // is determined, see #isAllowedByAutoCorrectionWithSpaceFilter.
+ // TODO: this should not have its own logic here but be handled by the dictionary.
+ hasAutoCorrection = isAllowedByAutoCorrectionWithSpaceFilter(firstSuggestion);
+ }
}
- if (!TextUtils.isEmpty(typedWord)) {
- suggestionsContainer.add(0, new SuggestedWordInfo(typedWord,
- SuggestedWordInfo.MAX_SCORE, SuggestedWordInfo.KIND_TYPED,
- Dictionary.DICTIONARY_USER_TYPED,
- SuggestedWordInfo.NOT_AN_INDEX /* indexOfTouchPointOfSecondWord */,
- SuggestedWordInfo.NOT_A_CONFIDENCE /* autoCommitFirstWordConfidence */));
+ final SuggestedWordInfo typedWordInfo = new SuggestedWordInfo(typedWordString,
+ SuggestedWordInfo.MAX_SCORE, SuggestedWordInfo.KIND_TYPED,
+ null == sourceDictionaryOfRemovedWord ? Dictionary.DICTIONARY_USER_TYPED
+ : sourceDictionaryOfRemovedWord,
+ SuggestedWordInfo.NOT_AN_INDEX /* indexOfTouchPointOfSecondWord */,
+ SuggestedWordInfo.NOT_A_CONFIDENCE /* autoCommitFirstWordConfidence */);
+ if (!TextUtils.isEmpty(typedWordString)) {
+ suggestionsContainer.add(0, typedWordInfo);
}
final ArrayList<SuggestedWordInfo> suggestionsList;
if (DBG && !suggestionsContainer.isEmpty()) {
- suggestionsList = getSuggestionsInfoListWithDebugInfo(typedWord, suggestionsContainer);
+ suggestionsList = getSuggestionsInfoListWithDebugInfo(typedWordString,
+ suggestionsContainer);
} else {
suggestionsList = suggestionsContainer;
}
@@ -195,7 +310,7 @@ public final class Suggest {
inputStyle = inputStyleIfNotPrediction;
}
callback.onGetSuggestedWords(new SuggestedWords(suggestionsList,
- suggestionResults.mRawSuggestions,
+ suggestionResults.mRawSuggestions, typedWordInfo,
// TODO: this first argument is lying. If this is a whitelisted word which is an
// actual word, it says typedWordValid = false, which looks wrong. We should either
// rename the attribute or change the value.
@@ -204,16 +319,32 @@ public final class Suggest {
false /* isObsoleteSuggestions */, inputStyle, sequenceNumber));
}
+ private boolean hasPlausibleCandidateInAnyOtherLanguage(
+ final ArrayList<SuggestedWordInfo> suggestionsContainer, final String consideredWord,
+ final SuggestedWordInfo whitelistedWordInfo) {
+ for (final SuggestedWordInfo info : suggestionsContainer) {
+ if (whitelistedWordInfo.mSourceDict.mLocale.equals(info.mSourceDict.mLocale)) {
+ continue;
+ }
+ return AutoCorrectionUtils.suggestionExceedsThreshold(info, consideredWord,
+ mPlausibilityThreshold);
+ }
+ // No candidate in another language
+ return false;
+ }
+
// Retrieves suggestions for the batch input
// and calls the callback function with the suggestions.
private void getSuggestedWordsForBatchInput(final WordComposer wordComposer,
- final PrevWordsInfo prevWordsInfo, final ProximityInfo proximityInfo,
+ final NgramContext ngramContext, final ProximityInfo proximityInfo,
final SettingsValuesForSuggestion settingsValuesForSuggestion,
final int inputStyle, final int sequenceNumber,
final OnGetSuggestedWordsCallback callback) {
final SuggestionResults suggestionResults = mDictionaryFacilitator.getSuggestionResults(
- wordComposer, prevWordsInfo, proximityInfo, settingsValuesForSuggestion,
- SESSION_ID_GESTURE);
+ wordComposer, ngramContext, proximityInfo.getNativeProximityInfo(),
+ settingsValuesForSuggestion, SESSION_ID_GESTURE);
+ // For transforming words that don't come from a dictionary, because it's our best bet
+ final Locale defaultLocale = mDictionaryFacilitator.getMostProbableLocale();
final ArrayList<SuggestedWordInfo> suggestionsContainer =
new ArrayList<>(suggestionResults);
final int suggestionsCount = suggestionsContainer.size();
@@ -222,9 +353,10 @@ public final class Suggest {
if (isFirstCharCapitalized || isAllUpperCase) {
for (int i = 0; i < suggestionsCount; ++i) {
final SuggestedWordInfo wordInfo = suggestionsContainer.get(i);
+ final Locale wordlocale = wordInfo.mSourceDict.mLocale;
final SuggestedWordInfo transformedWordInfo = getTransformedSuggestedWordInfo(
- wordInfo, suggestionResults.mLocale, isAllUpperCase, isFirstCharCapitalized,
- 0 /* trailingSingleQuotesCount */);
+ wordInfo, null == wordlocale ? defaultLocale : wordlocale, isAllUpperCase,
+ isFirstCharCapitalized, 0 /* trailingSingleQuotesCount */);
suggestionsContainer.set(i, transformedWordInfo);
}
}
@@ -237,7 +369,7 @@ public final class Suggest {
SuggestedWordInfo.removeDups(null /* typedWord */, suggestionsContainer);
// For some reason some suggestions with MIN_VALUE are making their way here.
- // TODO: Find a more robust way to detect distractors.
+ // TODO: Find a more robust way to detect distracters.
for (int i = suggestionsContainer.size() - 1; i >= 0; --i) {
if (suggestionsContainer.get(i).mScore < SUPPRESS_SUGGEST_THRESHOLD) {
suggestionsContainer.remove(i);
@@ -248,8 +380,12 @@ public final class Suggest {
// (typedWordValid=true), not as an "auto correct word" (willAutoCorrect=false).
// Note that because this method is never used to get predictions, there is no need to
// modify inputType such in getSuggestedWordsForNonBatchInput.
+ final SuggestedWordInfo pseudoTypedWordInfo = suggestionsContainer.isEmpty() ? null
+ : suggestionsContainer.get(0);
+
callback.onGetSuggestedWords(new SuggestedWords(suggestionsContainer,
suggestionResults.mRawSuggestions,
+ pseudoTypedWordInfo,
true /* typedWordValid */,
false /* willAutoCorrect */,
false /* isObsoleteSuggestions */,
@@ -283,6 +419,41 @@ public final class Suggest {
return suggestionsList;
}
+ /**
+ * Computes whether this suggestion should be blocked or not in this language
+ *
+ * This function implements a filter that avoids auto-correcting to suggestions that contain
+ * spaces that are above a certain language-dependent character limit. In languages like German
+ * where it's possible to concatenate many words, it often happens our dictionary does not
+ * have the longer words. In this case, we offer a lot of unhelpful suggestions that contain
+ * one or several spaces. Ideally we should understand what the user wants and display useful
+ * suggestions by improving the dictionary and possibly having some specific logic. Until
+ * that's possible we should avoid displaying unhelpful suggestions. But it's hard to tell
+ * whether a suggestion is useful or not. So at least for the time being we block
+ * auto-correction when the suggestion is long and contains a space, which should avoid the
+ * worst damage.
+ * This function is implementing that filter. If the language enforces no such limit, then it
+ * always returns true. If the suggestion contains no space, it also returns true. Otherwise,
+ * it checks the length against the language-specific limit.
+ *
+ * @param info the suggestion info
+ * @return whether it's fine to auto-correct to this.
+ */
+ private static boolean isAllowedByAutoCorrectionWithSpaceFilter(final SuggestedWordInfo info) {
+ final Locale locale = info.mSourceDict.mLocale;
+ if (null == locale) {
+ return true;
+ }
+ final Integer maximumLengthForThisLanguage =
+ sLanguageToMaximumAutoCorrectionWithSpaceLength.get(locale.getLanguage());
+ if (null == maximumLengthForThisLanguage) {
+ // This language does not enforce a maximum length to auto-correction
+ return true;
+ }
+ return info.mWord.length() <= maximumLengthForThisLanguage
+ || -1 == info.mWord.indexOf(Constants.CODE_SPACE);
+ }
+
/* package for test */ static SuggestedWordInfo getTransformedSuggestedWordInfo(
final SuggestedWordInfo wordInfo, final Locale locale, final boolean isAllUpperCase,
final boolean isOnlyFirstCharCapitalized, final int trailingSingleQuotesCount) {