diff options
Diffstat (limited to 'java/src/com/android/inputmethod/latin/utils')
4 files changed, 116 insertions, 14 deletions
diff --git a/java/src/com/android/inputmethod/latin/utils/DistracterFilter.java b/java/src/com/android/inputmethod/latin/utils/DistracterFilter.java index f2a1e524d..55cbf79b3 100644 --- a/java/src/com/android/inputmethod/latin/utils/DistracterFilter.java +++ b/java/src/com/android/inputmethod/latin/utils/DistracterFilter.java @@ -17,21 +17,35 @@ package com.android.inputmethod.latin.utils; import com.android.inputmethod.keyboard.Keyboard; +import com.android.inputmethod.keyboard.KeyboardSwitcher; +import com.android.inputmethod.keyboard.MainKeyboardView; +import com.android.inputmethod.latin.Constants; import com.android.inputmethod.latin.Suggest; +import com.android.inputmethod.latin.Suggest.OnGetSuggestedWordsCallback; +import com.android.inputmethod.latin.SuggestedWords; +import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo; +import com.android.inputmethod.latin.WordComposer; /** - * This class is used to prevent distracters/misspellings being added to personalization + * This class is used to prevent distracters being added to personalization * or user history dictionaries */ public class DistracterFilter { private final Suggest mSuggest; private final Keyboard mKeyboard; + // If the score of the top suggestion exceeds this value, the tested word (e.g., + // an OOV, a misspelling, or an in-vocabulary word) would be considered as a distracter to + // words in dictionary. The greater the threshold is, the less likely the tested word would + // become a distracter, which means the tested word will be more likely to be added to + // the dictionary. + private static final float DISTRACTER_WORD_SCORE_THRESHOLD = 2.0f; + /** * Create a DistracterFilter instance. * * @param suggest an instance of Suggest which will be used to obtain a list of suggestions - * for a potential distracter/misspelling + * for a potential distracter * @param keyboard the keyboard that is currently being used. This information is needed * when calling mSuggest.getSuggestedWords(...) to obtain a list of suggestions. */ @@ -40,9 +54,79 @@ public class DistracterFilter { mKeyboard = keyboard; } - public boolean isDistractorToWordsInDictionaries(final String prevWord, - final String targetWord) { - // TODO: to be implemented + public static DistracterFilter createDistracterFilter(final Suggest suggest, + final KeyboardSwitcher keyboardSwitcher) { + final MainKeyboardView mainKeyboardView = keyboardSwitcher.getMainKeyboardView(); + // TODO: Create Keyboard when mainKeyboardView is null. + // TODO: Figure out the most reasonable keyboard for the filter. Refer to the + // spellchecker's logic. + final Keyboard keyboard = (mainKeyboardView != null) ? + mainKeyboardView.getKeyboard() : null; + final DistracterFilter distracterFilter = new DistracterFilter(suggest, keyboard); + return distracterFilter; + } + + private static boolean suggestionExceedsDistracterThreshold( + final SuggestedWordInfo suggestion, final String consideredWord, + final float distracterThreshold) { + if (null != suggestion) { + final int suggestionScore = suggestion.mScore; + final float normalizedScore = BinaryDictionaryUtils.calcNormalizedScore( + consideredWord, suggestion.mWord, suggestionScore); + if (normalizedScore > distracterThreshold) { + return true; + } + } return false; } + + /** + * Determine whether a word is a distracter to words in dictionaries. + * + * @param prevWord the previous word, or null if none. + * @param testedWord the word that will be tested to see whether it is a distracter to words + * in dictionaries. + * @return true if testedWord is a distracter, otherwise false. + */ + public boolean isDistracterToWordsInDictionaries(final String prevWord, + final String testedWord) { + if (mSuggest == null) { + return false; + } + + final WordComposer composer = new WordComposer(); + final int[] codePoints = StringUtils.toCodePointArray(testedWord); + final int[] coordinates; + if (null == mKeyboard) { + coordinates = CoordinateUtils.newCoordinateArray(codePoints.length, + Constants.NOT_A_COORDINATE, Constants.NOT_A_COORDINATE); + } else { + coordinates = mKeyboard.getCoordinates(codePoints); + } + composer.setComposingWord(codePoints, coordinates, prevWord); + + final int trailingSingleQuotesCount = composer.trailingSingleQuotesCount(); + final String consideredWord = trailingSingleQuotesCount > 0 ? testedWord.substring(0, + testedWord.length() - trailingSingleQuotesCount) : testedWord; + final AsyncResultHolder<Boolean> holder = new AsyncResultHolder<Boolean>(); + final OnGetSuggestedWordsCallback callback = new OnGetSuggestedWordsCallback() { + @Override + public void onGetSuggestedWords(final SuggestedWords suggestedWords) { + if (suggestedWords != null && suggestedWords.size() > 1) { + // The suggestedWordInfo at 0 is the typed word. The 1st suggestion from + // the decoder is at index 1. + final SuggestedWordInfo firstSuggestion = suggestedWords.getInfo(1); + final boolean hasStrongDistractor = suggestionExceedsDistracterThreshold( + firstSuggestion, consideredWord, DISTRACTER_WORD_SCORE_THRESHOLD); + holder.set(hasStrongDistractor); + } + } + }; + mSuggest.getSuggestedWords(composer, prevWord, mKeyboard.getProximityInfo(), + true /* blockOffensiveWords */, true /* isCorrectionEnbaled */, + null /* additionalFeaturesOptions */, 0 /* sessionId */, + SuggestedWords.NOT_A_SEQUENCE_NUMBER, callback); + + return holder.get(false /* defaultValue */, Constants.GET_SUGGESTED_WORDS_TIMEOUT); + } } diff --git a/java/src/com/android/inputmethod/latin/utils/LanguageModelParam.java b/java/src/com/android/inputmethod/latin/utils/LanguageModelParam.java index 5ce977d5e..74e7db901 100644 --- a/java/src/com/android/inputmethod/latin/utils/LanguageModelParam.java +++ b/java/src/com/android/inputmethod/latin/utils/LanguageModelParam.java @@ -80,7 +80,8 @@ public final class LanguageModelParam { public static ArrayList<LanguageModelParam> createLanguageModelParamsFrom( final ArrayList<String> tokens, final int timestamp, final DictionaryFacilitatorForSuggest dictionaryFacilitator, - final SpacingAndPunctuations spacingAndPunctuations) { + final SpacingAndPunctuations spacingAndPunctuations, + final DistracterFilter distracterFilter) { final ArrayList<LanguageModelParam> languageModelParams = CollectionUtils.newArrayList(); final int N = tokens.size(); @@ -109,7 +110,8 @@ public final class LanguageModelParam { } final LanguageModelParam languageModelParam = detectWhetherVaildWordOrNotAndGetLanguageModelParam( - prevWord, tempWord, timestamp, dictionaryFacilitator); + prevWord, tempWord, timestamp, dictionaryFacilitator, + distracterFilter); if (languageModelParam == null) { continue; } @@ -121,27 +123,36 @@ public final class LanguageModelParam { private static LanguageModelParam detectWhetherVaildWordOrNotAndGetLanguageModelParam( final String prevWord, final String targetWord, final int timestamp, - final DictionaryFacilitatorForSuggest dictionaryFacilitator) { + final DictionaryFacilitatorForSuggest dictionaryFacilitator, + final DistracterFilter distracterFilter) { final Locale locale = dictionaryFacilitator.getLocale(); if (locale == null) { return null; } - if (!dictionaryFacilitator.isValidWord(targetWord, true /* ignoreCase */)) { - // OOV word. - return createAndGetLanguageModelParamOfWord(prevWord, targetWord, timestamp, - false /* isValidWord */, locale); - } + // TODO: Though targetWord is an IV (in-vocabulary) word, we should still apply + // distracterFilter in the following code. If targetWord is a distracter, + // it should be filtered out. if (dictionaryFacilitator.isValidWord(targetWord, false /* ignoreCase */)) { return createAndGetLanguageModelParamOfWord(prevWord, targetWord, timestamp, true /* isValidWord */, locale); } + final String lowerCaseTargetWord = targetWord.toLowerCase(locale); if (dictionaryFacilitator.isValidWord(lowerCaseTargetWord, false /* ignoreCase */)) { // Add the lower-cased word. return createAndGetLanguageModelParamOfWord(prevWord, lowerCaseTargetWord, timestamp, true /* isValidWord */, locale); } - // Treat the word as an OOV word. + + // Treat the word as an OOV word. The following statement checks whether this OOV + // is a distracter to words in dictionaries. Being a distracter means the OOV word is + // too close to a common word in dictionaries (e.g., the OOV "mot" is very close to "not"). + // Adding such a word to dictonaries would interfere with entering in-dictionary words. For + // example, adding "mot" to dictionaries might interfere with entering "not". + // This kind of OOV should be filtered out. + if (distracterFilter.isDistracterToWordsInDictionaries(prevWord, targetWord)) { + return null; + } return createAndGetLanguageModelParamOfWord(prevWord, targetWord, timestamp, false /* isValidWord */, locale); } diff --git a/java/src/com/android/inputmethod/latin/utils/PrioritizedSerialExecutor.java b/java/src/com/android/inputmethod/latin/utils/PrioritizedSerialExecutor.java index a23b3ac79..bf38abc95 100644 --- a/java/src/com/android/inputmethod/latin/utils/PrioritizedSerialExecutor.java +++ b/java/src/com/android/inputmethod/latin/utils/PrioritizedSerialExecutor.java @@ -16,6 +16,8 @@ package com.android.inputmethod.latin.utils; +import com.android.inputmethod.annotations.UsedForTesting; + import java.util.Queue; import java.util.concurrent.ArrayBlockingQueue; import java.util.concurrent.ConcurrentLinkedQueue; @@ -74,6 +76,7 @@ public class PrioritizedSerialExecutor { * Enqueues the given task into the prioritized task queue. * @param r the enqueued task */ + @UsedForTesting public void executePrioritized(final Runnable r) { synchronized(mLock) { if (!mIsShutdown) { diff --git a/java/src/com/android/inputmethod/latin/utils/SubtypeLocaleUtils.java b/java/src/com/android/inputmethod/latin/utils/SubtypeLocaleUtils.java index b37779bdc..938d27122 100644 --- a/java/src/com/android/inputmethod/latin/utils/SubtypeLocaleUtils.java +++ b/java/src/com/android/inputmethod/latin/utils/SubtypeLocaleUtils.java @@ -324,4 +324,8 @@ public final class SubtypeLocaleUtils { public static boolean isRtlLanguage(final InputMethodSubtype subtype) { return isRtlLanguage(getSubtypeLocale(subtype)); } + + public static String getCombiningRulesExtraValue(final InputMethodSubtype subtype) { + return subtype.getExtraValueOf(Constants.Subtype.ExtraValue.COMBINING_RULES); + } } |