diff options
author | 2015-02-06 15:07:16 -0800 | |
---|---|---|
committer | 2015-02-09 11:52:12 -0800 | |
commit | 644a709a5fec65c3ac1c96f18af397458fac7658 (patch) | |
tree | 33cdcdd15efa93c4b2291665aa1c0175d994ea83 /java/src/com/android/inputmethod/latin/utils | |
parent | aadfef6ffaf4fd4249a92252e401cbd98cf79d54 (diff) | |
download | latinime-644a709a5fec65c3ac1c96f18af397458fac7658.tar.gz latinime-644a709a5fec65c3ac1c96f18af397458fac7658.tar.xz latinime-644a709a5fec65c3ac1c96f18af397458fac7658.zip |
Remove distracter filter from client.
Bug 19296201.
Change-Id: Ic834e5956347cd86a96bd14024c42ad8ee258659
Diffstat (limited to 'java/src/com/android/inputmethod/latin/utils')
4 files changed, 5 insertions, 503 deletions
diff --git a/java/src/com/android/inputmethod/latin/utils/DistracterFilter.java b/java/src/com/android/inputmethod/latin/utils/DistracterFilter.java deleted file mode 100644 index 525212c96..000000000 --- a/java/src/com/android/inputmethod/latin/utils/DistracterFilter.java +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Copyright (C) 2014 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.android.inputmethod.latin.utils; - -import android.view.inputmethod.InputMethodSubtype; - -import com.android.inputmethod.annotations.UsedForTesting; -import com.android.inputmethod.latin.NgramContext; - -import java.util.List; -import java.util.Locale; - -import javax.annotation.Nonnull; - -public interface DistracterFilter { - /** - * Determine whether a word is a distracter to words in dictionaries. - * - * @param ngramContext the n-gram context - * @param testedWord the word that will be tested to see whether it is a distracter to words - * in dictionaries. - * @param locale the locale of word. - * @return true if testedWord is a distracter, otherwise false. - */ - public boolean isDistracterToWordsInDictionaries(final NgramContext ngramContext, - final String testedWord, final Locale locale); - - @UsedForTesting - public int getWordHandlingType(final NgramContext ngramContext, final String testedWord, - final Locale locale); - - public void updateEnabledSubtypes(final List<InputMethodSubtype> enabledSubtypes); - - public void close(); - - public static final class HandlingType { - private final static int REQUIRE_NO_SPECIAL_HANDLINGS = 0x0; - private final static int SHOULD_BE_LOWER_CASED = 0x1; - private final static int SHOULD_BE_HANDLED_AS_OOV = 0x2; - - public static int getHandlingType(final boolean shouldBeLowerCased, final boolean isOov) { - int wordHandlingType = HandlingType.REQUIRE_NO_SPECIAL_HANDLINGS; - if (shouldBeLowerCased) { - wordHandlingType |= HandlingType.SHOULD_BE_LOWER_CASED; - } - if (isOov) { - wordHandlingType |= HandlingType.SHOULD_BE_HANDLED_AS_OOV; - } - return wordHandlingType; - } - - public static boolean shouldBeLowerCased(final int handlingType) { - return (handlingType & SHOULD_BE_LOWER_CASED) != 0; - } - - public static boolean shouldBeHandledAsOov(final int handlingType) { - return (handlingType & SHOULD_BE_HANDLED_AS_OOV) != 0; - } - } - - @Nonnull - public static final DistracterFilter EMPTY_DISTRACTER_FILTER = new DistracterFilter() { - @Override - public boolean isDistracterToWordsInDictionaries(NgramContext ngramContext, - String testedWord, Locale locale) { - return false; - } - - @Override - public int getWordHandlingType(final NgramContext ngramContext, - final String testedWord, final Locale locale) { - return HandlingType.REQUIRE_NO_SPECIAL_HANDLINGS; - } - - @Override - public void close() { - } - - @Override - public void updateEnabledSubtypes(List<InputMethodSubtype> enabledSubtypes) { - } - }; -} diff --git a/java/src/com/android/inputmethod/latin/utils/DistracterFilterCheckingExactMatchesAndSuggestions.java b/java/src/com/android/inputmethod/latin/utils/DistracterFilterCheckingExactMatchesAndSuggestions.java deleted file mode 100644 index becf13fd9..000000000 --- a/java/src/com/android/inputmethod/latin/utils/DistracterFilterCheckingExactMatchesAndSuggestions.java +++ /dev/null @@ -1,324 +0,0 @@ -/* - * Copyright (C) 2014 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.android.inputmethod.latin.utils; - -import java.util.HashMap; -import java.util.List; -import java.util.Locale; -import java.util.Map; -import java.util.concurrent.ConcurrentHashMap; - -import android.content.Context; -import android.content.res.Resources; -import android.text.InputType; -import android.util.Log; -import android.util.LruCache; -import android.util.Pair; -import android.view.inputmethod.EditorInfo; -import android.view.inputmethod.InputMethodSubtype; - -import com.android.inputmethod.keyboard.Keyboard; -import com.android.inputmethod.keyboard.KeyboardId; -import com.android.inputmethod.keyboard.KeyboardLayoutSet; -import com.android.inputmethod.latin.DictionaryFacilitator; -import com.android.inputmethod.latin.DictionaryFacilitatorLruCache; -import com.android.inputmethod.latin.NgramContext; -import com.android.inputmethod.latin.RichInputMethodSubtype; -import com.android.inputmethod.latin.SuggestedWords; -import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo; -import com.android.inputmethod.latin.WordComposer; -import com.android.inputmethod.latin.common.StringUtils; -import com.android.inputmethod.latin.settings.SettingsValuesForSuggestion; - -/** - * This class is used to prevent distracters being added to personalization - * or user history dictionaries - */ -public class DistracterFilterCheckingExactMatchesAndSuggestions implements DistracterFilter { - private static final String TAG = - DistracterFilterCheckingExactMatchesAndSuggestions.class.getSimpleName(); - private static final boolean DEBUG = false; - - private static final int MAX_DISTRACTERS_CACHE_SIZE = 1024; - - private final Context mContext; - private final ConcurrentHashMap<Locale, InputMethodSubtype> mLocaleToSubtypeCache; - private final ConcurrentHashMap<Locale, Keyboard> mLocaleToKeyboardCache; - private final DictionaryFacilitatorLruCache mDictionaryFacilitatorLruCache; - // The key is a pair of a locale and a word. The value indicates the word is a distracter to - // words of the locale. - private final LruCache<Pair<Locale, String>, Boolean> mDistractersCache; - private final Object mLock = new Object(); - - // If the score of the top suggestion exceeds this value, the tested word (e.g., - // an OOV, a misspelling, or an in-vocabulary word) would be considered as a distracter to - // words in dictionary. The greater the threshold is, the less likely the tested word would - // become a distracter, which means the tested word will be more likely to be added to - // the dictionary. - private static final float DISTRACTER_WORD_SCORE_THRESHOLD = 0.4f; - - /** - * Create a DistracterFilter instance. - * - * @param context the context. - */ - public DistracterFilterCheckingExactMatchesAndSuggestions(final Context context) { - mContext = context; - mLocaleToSubtypeCache = new ConcurrentHashMap<>(); - mLocaleToKeyboardCache = new ConcurrentHashMap<>(); - mDictionaryFacilitatorLruCache = new DictionaryFacilitatorLruCache( - context, "" /* dictionaryNamePrefix */); - mDistractersCache = new LruCache<>(MAX_DISTRACTERS_CACHE_SIZE); - } - - @Override - public void close() { - mLocaleToSubtypeCache.clear(); - mLocaleToKeyboardCache.clear(); - mDictionaryFacilitatorLruCache.evictAll(); - // Don't clear mDistractersCache. - } - - @Override - public void updateEnabledSubtypes(final List<InputMethodSubtype> enabledSubtypes) { - final Map<Locale, InputMethodSubtype> newLocaleToSubtypeMap = new HashMap<>(); - if (enabledSubtypes != null) { - for (final InputMethodSubtype subtype : enabledSubtypes) { - final Locale locale = SubtypeLocaleUtils.getSubtypeLocale(subtype); - if (newLocaleToSubtypeMap.containsKey(locale)) { - // Multiple subtypes are enabled for one locale. - // TODO: Investigate what we should do for this case. - continue; - } - newLocaleToSubtypeMap.put(locale, subtype); - } - } - if (mLocaleToSubtypeCache.equals(newLocaleToSubtypeMap)) { - // Enabled subtypes have not been changed. - return; - } - // Update subtype and keyboard map for locales that are in the current mapping. - for (final Locale locale: mLocaleToSubtypeCache.keySet()) { - if (newLocaleToSubtypeMap.containsKey(locale)) { - final InputMethodSubtype newSubtype = newLocaleToSubtypeMap.remove(locale); - if (newSubtype.equals(newLocaleToSubtypeMap.get(locale))) { - // Mapping has not been changed. - continue; - } - mLocaleToSubtypeCache.replace(locale, newSubtype); - } else { - mLocaleToSubtypeCache.remove(locale); - } - mLocaleToKeyboardCache.remove(locale); - } - // Add locales that are not in the current mapping. - mLocaleToSubtypeCache.putAll(newLocaleToSubtypeMap); - } - - private Keyboard getKeyboardForLocale(final Locale locale) { - final Keyboard cachedKeyboard = mLocaleToKeyboardCache.get(locale); - if (cachedKeyboard != null) { - return cachedKeyboard; - } - final InputMethodSubtype subtype = mLocaleToSubtypeCache.get(locale); - if (subtype == null) { - return null; - } - final EditorInfo editorInfo = new EditorInfo(); - editorInfo.inputType = InputType.TYPE_CLASS_TEXT; - final KeyboardLayoutSet.Builder builder = new KeyboardLayoutSet.Builder( - mContext, editorInfo); - final Resources res = mContext.getResources(); - final int keyboardWidth = ResourceUtils.getDefaultKeyboardWidth(res); - final int keyboardHeight = ResourceUtils.getDefaultKeyboardHeight(res); - builder.setKeyboardGeometry(keyboardWidth, keyboardHeight); - builder.setSubtype(new RichInputMethodSubtype(subtype)); - builder.setIsSpellChecker(false /* isSpellChecker */); - final KeyboardLayoutSet layoutSet = builder.build(); - final Keyboard newKeyboard = layoutSet.getKeyboard(KeyboardId.ELEMENT_ALPHABET); - mLocaleToKeyboardCache.put(locale, newKeyboard); - return newKeyboard; - } - - /** - * Determine whether a word is a distracter to words in dictionaries. - * - * @param ngramContext the n-gram context. Not used for now. - * @param testedWord the word that will be tested to see whether it is a distracter to words - * in dictionaries. - * @param locale the locale of word. - * @return true if testedWord is a distracter, otherwise false. - */ - @Override - public boolean isDistracterToWordsInDictionaries(final NgramContext ngramContext, - final String testedWord, final Locale locale) { - if (locale == null) { - return false; - } - if (!mLocaleToSubtypeCache.containsKey(locale)) { - Log.e(TAG, "Locale " + locale + " is not enabled."); - // TODO: Investigate what we should do for disabled locales. - return false; - } - final DictionaryFacilitator dictionaryFacilitator = - mDictionaryFacilitatorLruCache.get(locale); - if (DEBUG) { - Log.d(TAG, "testedWord: " + testedWord); - } - final Pair<Locale, String> cacheKey = new Pair<>(locale, testedWord); - final Boolean isCachedDistracter = mDistractersCache.get(cacheKey); - if (isCachedDistracter != null && isCachedDistracter) { - if (DEBUG) { - Log.d(TAG, "isDistracter: true (cache hit)"); - } - return true; - } - - final boolean isDistracterCheckedByGetMaxFreqencyOfExactMatches = - checkDistracterUsingMaxFreqencyOfExactMatches(dictionaryFacilitator, testedWord); - if (isDistracterCheckedByGetMaxFreqencyOfExactMatches) { - // Add the pair of locale and word to the cache. - mDistractersCache.put(cacheKey, Boolean.TRUE); - return true; - } - if (dictionaryFacilitator.isValidSuggestionWord(testedWord)) { - // Valid word is not a distracter. - if (DEBUG) { - Log.d(TAG, "isDistracter: false (valid word)"); - } - return false; - } - - final Keyboard keyboard = getKeyboardForLocale(locale); - final boolean isDistracterCheckedByGetSuggestion = - checkDistracterUsingGetSuggestions(dictionaryFacilitator, keyboard, testedWord); - if (isDistracterCheckedByGetSuggestion) { - // Add the pair of locale and word to the cache. - mDistractersCache.put(cacheKey, Boolean.TRUE); - return true; - } - return false; - } - - private static boolean checkDistracterUsingMaxFreqencyOfExactMatches( - final DictionaryFacilitator dictionaryFacilitator, final String testedWord) { - // The tested word is a distracter when there is a word that is exact matched to the tested - // word and its probability is higher than the tested word's probability. - final int perfectMatchFreq = dictionaryFacilitator.getFrequency(testedWord); - final int exactMatchFreq = dictionaryFacilitator.getMaxFrequencyOfExactMatches(testedWord); - final boolean isDistracter = perfectMatchFreq < exactMatchFreq; - if (DEBUG) { - Log.d(TAG, "perfectMatchFreq: " + perfectMatchFreq); - Log.d(TAG, "exactMatchFreq: " + exactMatchFreq); - Log.d(TAG, "isDistracter: " + isDistracter); - } - return isDistracter; - } - - private boolean checkDistracterUsingGetSuggestions( - final DictionaryFacilitator dictionaryFacilitator, final Keyboard keyboard, - final String testedWord) { - if (keyboard == null) { - return false; - } - final SettingsValuesForSuggestion settingsValuesForSuggestion = - new SettingsValuesForSuggestion(false /* blockPotentiallyOffensive */, - false /* spaceAwareGestureEnabled */); - final int trailingSingleQuotesCount = StringUtils.getTrailingSingleQuotesCount(testedWord); - final String consideredWord = trailingSingleQuotesCount > 0 ? - testedWord.substring(0, testedWord.length() - trailingSingleQuotesCount) : - testedWord; - final WordComposer composer = new WordComposer(); - final int[] codePoints = StringUtils.toCodePointArray(testedWord); - final int[] coordinates = keyboard.getCoordinates(codePoints); - composer.setComposingWord(codePoints, coordinates); - final SuggestionResults suggestionResults; - synchronized (mLock) { - suggestionResults = dictionaryFacilitator.getSuggestionResults(composer, - NgramContext.EMPTY_PREV_WORDS_INFO, - keyboard.getProximityInfo().getNativeProximityInfo(), - settingsValuesForSuggestion, 0 /* sessionId */, - SuggestedWords.INPUT_STYLE_TYPING, - keyboard.getKeyboardLayout()); - } - if (suggestionResults.isEmpty()) { - return false; - } - final SuggestedWordInfo firstSuggestion = suggestionResults.first(); - final boolean isDistracter = suggestionExceedsDistracterThreshold( - firstSuggestion, consideredWord, DISTRACTER_WORD_SCORE_THRESHOLD); - if (DEBUG) { - Log.d(TAG, "isDistracter: " + isDistracter); - } - return isDistracter; - } - - private static boolean suggestionExceedsDistracterThreshold(final SuggestedWordInfo suggestion, - final String consideredWord, final float distracterThreshold) { - if (suggestion == null) { - return false; - } - final int suggestionScore = suggestion.mScore; - final float normalizedScore = BinaryDictionaryUtils.calcNormalizedScore( - consideredWord, suggestion.mWord, suggestionScore); - if (DEBUG) { - Log.d(TAG, "normalizedScore: " + normalizedScore); - Log.d(TAG, "distracterThreshold: " + distracterThreshold); - } - if (normalizedScore > distracterThreshold) { - return true; - } - return false; - } - - private boolean shouldBeLowerCased(final NgramContext ngramContext, final String testedWord, - final Locale locale) { - final DictionaryFacilitator dictionaryFacilitator = - mDictionaryFacilitatorLruCache.get(locale); - if (dictionaryFacilitator.isValidSuggestionWord(testedWord)) { - return false; - } - final String lowerCaseWord = testedWord.toLowerCase(locale); - if (testedWord.equals(lowerCaseWord)) { - return false; - } - if (dictionaryFacilitator.isValidSuggestionWord(lowerCaseWord)) { - return true; - } - if (StringUtils.getCapitalizationType(testedWord) == StringUtils.CAPITALIZE_FIRST - && !ngramContext.isValid()) { - // TODO: Check beginning-of-sentence. - return true; - } - return false; - } - - @Override - public int getWordHandlingType(final NgramContext ngramContext, final String testedWord, - final Locale locale) { - // TODO: Use this method for user history dictionary. - if (testedWord == null|| locale == null) { - return HandlingType.getHandlingType(false /* shouldBeLowerCased */, false /* isOov */); - } - final boolean shouldBeLowerCased = shouldBeLowerCased(ngramContext, testedWord, locale); - final String caseModifiedWord = shouldBeLowerCased - ? testedWord.toLowerCase(locale) : testedWord; - final boolean isOov = !mDictionaryFacilitatorLruCache.get(locale).isValidSuggestionWord( - caseModifiedWord); - return HandlingType.getHandlingType(shouldBeLowerCased, isOov); - } -} diff --git a/java/src/com/android/inputmethod/latin/utils/DistracterFilterCheckingIsInDictionary.java b/java/src/com/android/inputmethod/latin/utils/DistracterFilterCheckingIsInDictionary.java deleted file mode 100644 index 4c99fed9f..000000000 --- a/java/src/com/android/inputmethod/latin/utils/DistracterFilterCheckingIsInDictionary.java +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright (C) 2014 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.android.inputmethod.latin.utils; - -import java.util.List; -import java.util.Locale; - -import android.view.inputmethod.InputMethodSubtype; - -import com.android.inputmethod.latin.Dictionary; -import com.android.inputmethod.latin.NgramContext; - -public class DistracterFilterCheckingIsInDictionary implements DistracterFilter { - private final DistracterFilter mDistracterFilter; - private final Dictionary mDictionary; - - public DistracterFilterCheckingIsInDictionary(final DistracterFilter distracterFilter, - final Dictionary dictionary) { - mDistracterFilter = distracterFilter; - mDictionary = dictionary; - } - - @Override - public boolean isDistracterToWordsInDictionaries(NgramContext ngramContext, - String testedWord, Locale locale) { - if (mDictionary.isInDictionary(testedWord)) { - // This filter treats entries that are already in the dictionary as non-distracters - // because they have passed the filtering in the past. - return false; - } - return mDistracterFilter.isDistracterToWordsInDictionaries( - ngramContext, testedWord, locale); - } - - @Override - public int getWordHandlingType(final NgramContext ngramContext, final String testedWord, - final Locale locale) { - return mDistracterFilter.getWordHandlingType(ngramContext, testedWord, locale); - } - - @Override - public void updateEnabledSubtypes(List<InputMethodSubtype> enabledSubtypes) { - // Do nothing. - } - - @Override - public void close() { - // Do nothing. - } -} diff --git a/java/src/com/android/inputmethod/latin/utils/WordInputEventForPersonalization.java b/java/src/com/android/inputmethod/latin/utils/WordInputEventForPersonalization.java index e9a0e7a61..fc0a9cb6c 100644 --- a/java/src/com/android/inputmethod/latin/utils/WordInputEventForPersonalization.java +++ b/java/src/com/android/inputmethod/latin/utils/WordInputEventForPersonalization.java @@ -23,7 +23,6 @@ import com.android.inputmethod.latin.NgramContext; import com.android.inputmethod.latin.common.StringUtils; import com.android.inputmethod.latin.define.DecoderSpecificConstants; import com.android.inputmethod.latin.settings.SpacingAndPunctuations; -import com.android.inputmethod.latin.utils.DistracterFilter.HandlingType; import java.util.ArrayList; import java.util.List; @@ -41,17 +40,15 @@ public final class WordInputEventForPersonalization { new int[DecoderSpecificConstants.MAX_PREV_WORD_COUNT_FOR_N_GRAM][]; public final boolean[] mIsPrevWordBeginningOfSentenceArray = new boolean[DecoderSpecificConstants.MAX_PREV_WORD_COUNT_FOR_N_GRAM]; - public final boolean mIsValid; // Time stamp in seconds. public final int mTimestamp; @UsedForTesting public WordInputEventForPersonalization(final CharSequence targetWord, - final NgramContext ngramContext, final boolean isValid, final int timestamp) { + final NgramContext ngramContext, final int timestamp) { mTargetWord = StringUtils.toCodePointArray(targetWord); mPrevWordsCount = ngramContext.getPrevWordCount(); ngramContext.outputToArray(mPrevWordArray, mIsPrevWordBeginningOfSentenceArray); - mIsValid = isValid; mTimestamp = timestamp; } @@ -59,8 +56,7 @@ public final class WordInputEventForPersonalization { // objects. public static ArrayList<WordInputEventForPersonalization> createInputEventFrom( final List<String> tokens, final int timestamp, - final SpacingAndPunctuations spacingAndPunctuations, final Locale locale, - final DistracterFilter distracterFilter) { + final SpacingAndPunctuations spacingAndPunctuations, final Locale locale) { final ArrayList<WordInputEventForPersonalization> inputEvents = new ArrayList<>(); final int N = tokens.size(); NgramContext ngramContext = NgramContext.EMPTY_PREV_WORDS_INFO; @@ -89,7 +85,7 @@ public final class WordInputEventForPersonalization { } final WordInputEventForPersonalization inputEvent = detectWhetherVaildWordOrNotAndGetInputEvent( - ngramContext, tempWord, timestamp, locale, distracterFilter); + ngramContext, tempWord, timestamp, locale); if (inputEvent == null) { continue; } @@ -101,19 +97,10 @@ public final class WordInputEventForPersonalization { private static WordInputEventForPersonalization detectWhetherVaildWordOrNotAndGetInputEvent( final NgramContext ngramContext, final String targetWord, final int timestamp, - final Locale locale, final DistracterFilter distracterFilter) { + final Locale locale) { if (locale == null) { return null; } - final int wordHandlingType = distracterFilter.getWordHandlingType(ngramContext, - targetWord, locale); - final String word = HandlingType.shouldBeLowerCased(wordHandlingType) ? - targetWord.toLowerCase(locale) : targetWord; - if (distracterFilter.isDistracterToWordsInDictionaries(ngramContext, targetWord, locale)) { - // The word is a distracter. - return null; - } - return new WordInputEventForPersonalization(word, ngramContext, - !HandlingType.shouldBeHandledAsOov(wordHandlingType), timestamp); + return new WordInputEventForPersonalization(targetWord, ngramContext, timestamp); } } |