aboutsummaryrefslogtreecommitdiffstats
path: root/java/src/com/android/inputmethod/latin/utils
diff options
context:
space:
mode:
Diffstat (limited to 'java/src/com/android/inputmethod/latin/utils')
-rw-r--r--java/src/com/android/inputmethod/latin/utils/DistracterFilter.java94
-rw-r--r--java/src/com/android/inputmethod/latin/utils/LanguageModelParam.java29
-rw-r--r--java/src/com/android/inputmethod/latin/utils/PrioritizedSerialExecutor.java3
-rw-r--r--java/src/com/android/inputmethod/latin/utils/SubtypeLocaleUtils.java4
4 files changed, 116 insertions, 14 deletions
diff --git a/java/src/com/android/inputmethod/latin/utils/DistracterFilter.java b/java/src/com/android/inputmethod/latin/utils/DistracterFilter.java
index f2a1e524d..55cbf79b3 100644
--- a/java/src/com/android/inputmethod/latin/utils/DistracterFilter.java
+++ b/java/src/com/android/inputmethod/latin/utils/DistracterFilter.java
@@ -17,21 +17,35 @@
package com.android.inputmethod.latin.utils;
import com.android.inputmethod.keyboard.Keyboard;
+import com.android.inputmethod.keyboard.KeyboardSwitcher;
+import com.android.inputmethod.keyboard.MainKeyboardView;
+import com.android.inputmethod.latin.Constants;
import com.android.inputmethod.latin.Suggest;
+import com.android.inputmethod.latin.Suggest.OnGetSuggestedWordsCallback;
+import com.android.inputmethod.latin.SuggestedWords;
+import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
+import com.android.inputmethod.latin.WordComposer;
/**
- * This class is used to prevent distracters/misspellings being added to personalization
+ * This class is used to prevent distracters being added to personalization
* or user history dictionaries
*/
public class DistracterFilter {
private final Suggest mSuggest;
private final Keyboard mKeyboard;
+ // If the score of the top suggestion exceeds this value, the tested word (e.g.,
+ // an OOV, a misspelling, or an in-vocabulary word) would be considered as a distracter to
+ // words in dictionary. The greater the threshold is, the less likely the tested word would
+ // become a distracter, which means the tested word will be more likely to be added to
+ // the dictionary.
+ private static final float DISTRACTER_WORD_SCORE_THRESHOLD = 2.0f;
+
/**
* Create a DistracterFilter instance.
*
* @param suggest an instance of Suggest which will be used to obtain a list of suggestions
- * for a potential distracter/misspelling
+ * for a potential distracter
* @param keyboard the keyboard that is currently being used. This information is needed
* when calling mSuggest.getSuggestedWords(...) to obtain a list of suggestions.
*/
@@ -40,9 +54,79 @@ public class DistracterFilter {
mKeyboard = keyboard;
}
- public boolean isDistractorToWordsInDictionaries(final String prevWord,
- final String targetWord) {
- // TODO: to be implemented
+ public static DistracterFilter createDistracterFilter(final Suggest suggest,
+ final KeyboardSwitcher keyboardSwitcher) {
+ final MainKeyboardView mainKeyboardView = keyboardSwitcher.getMainKeyboardView();
+ // TODO: Create Keyboard when mainKeyboardView is null.
+ // TODO: Figure out the most reasonable keyboard for the filter. Refer to the
+ // spellchecker's logic.
+ final Keyboard keyboard = (mainKeyboardView != null) ?
+ mainKeyboardView.getKeyboard() : null;
+ final DistracterFilter distracterFilter = new DistracterFilter(suggest, keyboard);
+ return distracterFilter;
+ }
+
+ private static boolean suggestionExceedsDistracterThreshold(
+ final SuggestedWordInfo suggestion, final String consideredWord,
+ final float distracterThreshold) {
+ if (null != suggestion) {
+ final int suggestionScore = suggestion.mScore;
+ final float normalizedScore = BinaryDictionaryUtils.calcNormalizedScore(
+ consideredWord, suggestion.mWord, suggestionScore);
+ if (normalizedScore > distracterThreshold) {
+ return true;
+ }
+ }
return false;
}
+
+ /**
+ * Determine whether a word is a distracter to words in dictionaries.
+ *
+ * @param prevWord the previous word, or null if none.
+ * @param testedWord the word that will be tested to see whether it is a distracter to words
+ * in dictionaries.
+ * @return true if testedWord is a distracter, otherwise false.
+ */
+ public boolean isDistracterToWordsInDictionaries(final String prevWord,
+ final String testedWord) {
+ if (mSuggest == null) {
+ return false;
+ }
+
+ final WordComposer composer = new WordComposer();
+ final int[] codePoints = StringUtils.toCodePointArray(testedWord);
+ final int[] coordinates;
+ if (null == mKeyboard) {
+ coordinates = CoordinateUtils.newCoordinateArray(codePoints.length,
+ Constants.NOT_A_COORDINATE, Constants.NOT_A_COORDINATE);
+ } else {
+ coordinates = mKeyboard.getCoordinates(codePoints);
+ }
+ composer.setComposingWord(codePoints, coordinates, prevWord);
+
+ final int trailingSingleQuotesCount = composer.trailingSingleQuotesCount();
+ final String consideredWord = trailingSingleQuotesCount > 0 ? testedWord.substring(0,
+ testedWord.length() - trailingSingleQuotesCount) : testedWord;
+ final AsyncResultHolder<Boolean> holder = new AsyncResultHolder<Boolean>();
+ final OnGetSuggestedWordsCallback callback = new OnGetSuggestedWordsCallback() {
+ @Override
+ public void onGetSuggestedWords(final SuggestedWords suggestedWords) {
+ if (suggestedWords != null && suggestedWords.size() > 1) {
+ // The suggestedWordInfo at 0 is the typed word. The 1st suggestion from
+ // the decoder is at index 1.
+ final SuggestedWordInfo firstSuggestion = suggestedWords.getInfo(1);
+ final boolean hasStrongDistractor = suggestionExceedsDistracterThreshold(
+ firstSuggestion, consideredWord, DISTRACTER_WORD_SCORE_THRESHOLD);
+ holder.set(hasStrongDistractor);
+ }
+ }
+ };
+ mSuggest.getSuggestedWords(composer, prevWord, mKeyboard.getProximityInfo(),
+ true /* blockOffensiveWords */, true /* isCorrectionEnbaled */,
+ null /* additionalFeaturesOptions */, 0 /* sessionId */,
+ SuggestedWords.NOT_A_SEQUENCE_NUMBER, callback);
+
+ return holder.get(false /* defaultValue */, Constants.GET_SUGGESTED_WORDS_TIMEOUT);
+ }
}
diff --git a/java/src/com/android/inputmethod/latin/utils/LanguageModelParam.java b/java/src/com/android/inputmethod/latin/utils/LanguageModelParam.java
index 5ce977d5e..74e7db901 100644
--- a/java/src/com/android/inputmethod/latin/utils/LanguageModelParam.java
+++ b/java/src/com/android/inputmethod/latin/utils/LanguageModelParam.java
@@ -80,7 +80,8 @@ public final class LanguageModelParam {
public static ArrayList<LanguageModelParam> createLanguageModelParamsFrom(
final ArrayList<String> tokens, final int timestamp,
final DictionaryFacilitatorForSuggest dictionaryFacilitator,
- final SpacingAndPunctuations spacingAndPunctuations) {
+ final SpacingAndPunctuations spacingAndPunctuations,
+ final DistracterFilter distracterFilter) {
final ArrayList<LanguageModelParam> languageModelParams =
CollectionUtils.newArrayList();
final int N = tokens.size();
@@ -109,7 +110,8 @@ public final class LanguageModelParam {
}
final LanguageModelParam languageModelParam =
detectWhetherVaildWordOrNotAndGetLanguageModelParam(
- prevWord, tempWord, timestamp, dictionaryFacilitator);
+ prevWord, tempWord, timestamp, dictionaryFacilitator,
+ distracterFilter);
if (languageModelParam == null) {
continue;
}
@@ -121,27 +123,36 @@ public final class LanguageModelParam {
private static LanguageModelParam detectWhetherVaildWordOrNotAndGetLanguageModelParam(
final String prevWord, final String targetWord, final int timestamp,
- final DictionaryFacilitatorForSuggest dictionaryFacilitator) {
+ final DictionaryFacilitatorForSuggest dictionaryFacilitator,
+ final DistracterFilter distracterFilter) {
final Locale locale = dictionaryFacilitator.getLocale();
if (locale == null) {
return null;
}
- if (!dictionaryFacilitator.isValidWord(targetWord, true /* ignoreCase */)) {
- // OOV word.
- return createAndGetLanguageModelParamOfWord(prevWord, targetWord, timestamp,
- false /* isValidWord */, locale);
- }
+ // TODO: Though targetWord is an IV (in-vocabulary) word, we should still apply
+ // distracterFilter in the following code. If targetWord is a distracter,
+ // it should be filtered out.
if (dictionaryFacilitator.isValidWord(targetWord, false /* ignoreCase */)) {
return createAndGetLanguageModelParamOfWord(prevWord, targetWord, timestamp,
true /* isValidWord */, locale);
}
+
final String lowerCaseTargetWord = targetWord.toLowerCase(locale);
if (dictionaryFacilitator.isValidWord(lowerCaseTargetWord, false /* ignoreCase */)) {
// Add the lower-cased word.
return createAndGetLanguageModelParamOfWord(prevWord, lowerCaseTargetWord,
timestamp, true /* isValidWord */, locale);
}
- // Treat the word as an OOV word.
+
+ // Treat the word as an OOV word. The following statement checks whether this OOV
+ // is a distracter to words in dictionaries. Being a distracter means the OOV word is
+ // too close to a common word in dictionaries (e.g., the OOV "mot" is very close to "not").
+ // Adding such a word to dictonaries would interfere with entering in-dictionary words. For
+ // example, adding "mot" to dictionaries might interfere with entering "not".
+ // This kind of OOV should be filtered out.
+ if (distracterFilter.isDistracterToWordsInDictionaries(prevWord, targetWord)) {
+ return null;
+ }
return createAndGetLanguageModelParamOfWord(prevWord, targetWord, timestamp,
false /* isValidWord */, locale);
}
diff --git a/java/src/com/android/inputmethod/latin/utils/PrioritizedSerialExecutor.java b/java/src/com/android/inputmethod/latin/utils/PrioritizedSerialExecutor.java
index a23b3ac79..bf38abc95 100644
--- a/java/src/com/android/inputmethod/latin/utils/PrioritizedSerialExecutor.java
+++ b/java/src/com/android/inputmethod/latin/utils/PrioritizedSerialExecutor.java
@@ -16,6 +16,8 @@
package com.android.inputmethod.latin.utils;
+import com.android.inputmethod.annotations.UsedForTesting;
+
import java.util.Queue;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.ConcurrentLinkedQueue;
@@ -74,6 +76,7 @@ public class PrioritizedSerialExecutor {
* Enqueues the given task into the prioritized task queue.
* @param r the enqueued task
*/
+ @UsedForTesting
public void executePrioritized(final Runnable r) {
synchronized(mLock) {
if (!mIsShutdown) {
diff --git a/java/src/com/android/inputmethod/latin/utils/SubtypeLocaleUtils.java b/java/src/com/android/inputmethod/latin/utils/SubtypeLocaleUtils.java
index b37779bdc..938d27122 100644
--- a/java/src/com/android/inputmethod/latin/utils/SubtypeLocaleUtils.java
+++ b/java/src/com/android/inputmethod/latin/utils/SubtypeLocaleUtils.java
@@ -324,4 +324,8 @@ public final class SubtypeLocaleUtils {
public static boolean isRtlLanguage(final InputMethodSubtype subtype) {
return isRtlLanguage(getSubtypeLocale(subtype));
}
+
+ public static String getCombiningRulesExtraValue(final InputMethodSubtype subtype) {
+ return subtype.getExtraValueOf(Constants.Subtype.ExtraValue.COMBINING_RULES);
+ }
}