diff options
author | 2014-05-13 23:08:40 +0000 | |
---|---|---|
committer | 2014-05-13 23:08:40 +0000 | |
commit | 25ec32f635998ac1ed4b1a0af39e6f48d8e1aee7 (patch) | |
tree | 4a9e78fd303d6adbf8e08a25f1c9a2c873a96f46 /java/src | |
parent | f8297dc54d0e7b55ca62aa97d1799b2dc014b0f3 (diff) | |
parent | ac20253806180ad302e6bdea681d41bc74ba0722 (diff) | |
download | latinime-25ec32f635998ac1ed4b1a0af39e6f48d8e1aee7.tar.gz latinime-25ec32f635998ac1ed4b1a0af39e6f48d8e1aee7.tar.xz latinime-25ec32f635998ac1ed4b1a0af39e6f48d8e1aee7.zip |
Merge "Implement the distracter filter"
Diffstat (limited to 'java/src')
3 files changed, 97 insertions, 17 deletions
diff --git a/java/src/com/android/inputmethod/latin/LatinIME.java b/java/src/com/android/inputmethod/latin/LatinIME.java index 7dc566a14..8a2ed1088 100644 --- a/java/src/com/android/inputmethod/latin/LatinIME.java +++ b/java/src/com/android/inputmethod/latin/LatinIME.java @@ -540,18 +540,6 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen refreshPersonalizationDictionarySession(); } - private DistracterFilter createDistracterFilter() { - final MainKeyboardView mainKeyboardView = mKeyboardSwitcher.getMainKeyboardView(); - // TODO: Create Keyboard when mainKeyboardView is null. - // TODO: Figure out the most reasonable keyboard for the filter. Refer to the - // spellchecker's logic. - final Keyboard keyboard = (mainKeyboardView != null) ? - mainKeyboardView.getKeyboard() : null; - final DistracterFilter distracterFilter = new DistracterFilter(mInputLogic.mSuggest, - keyboard); - return distracterFilter; - } - private void refreshPersonalizationDictionarySession() { final DictionaryFacilitatorForSuggest dictionaryFacilitator = mInputLogic.mSuggest.mDictionaryFacilitator; @@ -1755,6 +1743,11 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen mInputLogic.mSuggest.mDictionaryFacilitator.clearPersonalizationDictionary(); } + @UsedForTesting + /* package for test */ DistracterFilter createDistracterFilter() { + return DistracterFilter.createDistracterFilter(mInputLogic.mSuggest, mKeyboardSwitcher); + } + public void dumpDictionaryForDebug(final String dictName) { final DictionaryFacilitatorForSuggest dictionaryFacilitator = mInputLogic.mSuggest.mDictionaryFacilitator; diff --git a/java/src/com/android/inputmethod/latin/utils/DistracterFilter.java b/java/src/com/android/inputmethod/latin/utils/DistracterFilter.java index 48e43d6ef..55cbf79b3 100644 --- a/java/src/com/android/inputmethod/latin/utils/DistracterFilter.java +++ b/java/src/com/android/inputmethod/latin/utils/DistracterFilter.java @@ -17,21 +17,35 @@ package com.android.inputmethod.latin.utils; import com.android.inputmethod.keyboard.Keyboard; +import com.android.inputmethod.keyboard.KeyboardSwitcher; +import com.android.inputmethod.keyboard.MainKeyboardView; +import com.android.inputmethod.latin.Constants; import com.android.inputmethod.latin.Suggest; +import com.android.inputmethod.latin.Suggest.OnGetSuggestedWordsCallback; +import com.android.inputmethod.latin.SuggestedWords; +import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo; +import com.android.inputmethod.latin.WordComposer; /** - * This class is used to prevent distracters/misspellings being added to personalization + * This class is used to prevent distracters being added to personalization * or user history dictionaries */ public class DistracterFilter { private final Suggest mSuggest; private final Keyboard mKeyboard; + // If the score of the top suggestion exceeds this value, the tested word (e.g., + // an OOV, a misspelling, or an in-vocabulary word) would be considered as a distracter to + // words in dictionary. The greater the threshold is, the less likely the tested word would + // become a distracter, which means the tested word will be more likely to be added to + // the dictionary. + private static final float DISTRACTER_WORD_SCORE_THRESHOLD = 2.0f; + /** * Create a DistracterFilter instance. * * @param suggest an instance of Suggest which will be used to obtain a list of suggestions - * for a potential distracter/misspelling + * for a potential distracter * @param keyboard the keyboard that is currently being used. This information is needed * when calling mSuggest.getSuggestedWords(...) to obtain a list of suggestions. */ @@ -40,9 +54,79 @@ public class DistracterFilter { mKeyboard = keyboard; } - public boolean isDistracterToWordsInDictionaries(final String prevWord, - final String targetWord) { - // TODO: to be implemented + public static DistracterFilter createDistracterFilter(final Suggest suggest, + final KeyboardSwitcher keyboardSwitcher) { + final MainKeyboardView mainKeyboardView = keyboardSwitcher.getMainKeyboardView(); + // TODO: Create Keyboard when mainKeyboardView is null. + // TODO: Figure out the most reasonable keyboard for the filter. Refer to the + // spellchecker's logic. + final Keyboard keyboard = (mainKeyboardView != null) ? + mainKeyboardView.getKeyboard() : null; + final DistracterFilter distracterFilter = new DistracterFilter(suggest, keyboard); + return distracterFilter; + } + + private static boolean suggestionExceedsDistracterThreshold( + final SuggestedWordInfo suggestion, final String consideredWord, + final float distracterThreshold) { + if (null != suggestion) { + final int suggestionScore = suggestion.mScore; + final float normalizedScore = BinaryDictionaryUtils.calcNormalizedScore( + consideredWord, suggestion.mWord, suggestionScore); + if (normalizedScore > distracterThreshold) { + return true; + } + } return false; } + + /** + * Determine whether a word is a distracter to words in dictionaries. + * + * @param prevWord the previous word, or null if none. + * @param testedWord the word that will be tested to see whether it is a distracter to words + * in dictionaries. + * @return true if testedWord is a distracter, otherwise false. + */ + public boolean isDistracterToWordsInDictionaries(final String prevWord, + final String testedWord) { + if (mSuggest == null) { + return false; + } + + final WordComposer composer = new WordComposer(); + final int[] codePoints = StringUtils.toCodePointArray(testedWord); + final int[] coordinates; + if (null == mKeyboard) { + coordinates = CoordinateUtils.newCoordinateArray(codePoints.length, + Constants.NOT_A_COORDINATE, Constants.NOT_A_COORDINATE); + } else { + coordinates = mKeyboard.getCoordinates(codePoints); + } + composer.setComposingWord(codePoints, coordinates, prevWord); + + final int trailingSingleQuotesCount = composer.trailingSingleQuotesCount(); + final String consideredWord = trailingSingleQuotesCount > 0 ? testedWord.substring(0, + testedWord.length() - trailingSingleQuotesCount) : testedWord; + final AsyncResultHolder<Boolean> holder = new AsyncResultHolder<Boolean>(); + final OnGetSuggestedWordsCallback callback = new OnGetSuggestedWordsCallback() { + @Override + public void onGetSuggestedWords(final SuggestedWords suggestedWords) { + if (suggestedWords != null && suggestedWords.size() > 1) { + // The suggestedWordInfo at 0 is the typed word. The 1st suggestion from + // the decoder is at index 1. + final SuggestedWordInfo firstSuggestion = suggestedWords.getInfo(1); + final boolean hasStrongDistractor = suggestionExceedsDistracterThreshold( + firstSuggestion, consideredWord, DISTRACTER_WORD_SCORE_THRESHOLD); + holder.set(hasStrongDistractor); + } + } + }; + mSuggest.getSuggestedWords(composer, prevWord, mKeyboard.getProximityInfo(), + true /* blockOffensiveWords */, true /* isCorrectionEnbaled */, + null /* additionalFeaturesOptions */, 0 /* sessionId */, + SuggestedWords.NOT_A_SEQUENCE_NUMBER, callback); + + return holder.get(false /* defaultValue */, Constants.GET_SUGGESTED_WORDS_TIMEOUT); + } } diff --git a/java/src/com/android/inputmethod/latin/utils/LanguageModelParam.java b/java/src/com/android/inputmethod/latin/utils/LanguageModelParam.java index 55061f45f..74e7db901 100644 --- a/java/src/com/android/inputmethod/latin/utils/LanguageModelParam.java +++ b/java/src/com/android/inputmethod/latin/utils/LanguageModelParam.java @@ -129,6 +129,9 @@ public final class LanguageModelParam { if (locale == null) { return null; } + // TODO: Though targetWord is an IV (in-vocabulary) word, we should still apply + // distracterFilter in the following code. If targetWord is a distracter, + // it should be filtered out. if (dictionaryFacilitator.isValidWord(targetWord, false /* ignoreCase */)) { return createAndGetLanguageModelParamOfWord(prevWord, targetWord, timestamp, true /* isValidWord */, locale); |