aboutsummaryrefslogtreecommitdiffstats
path: root/java/src/com/android/inputmethod
diff options
context:
space:
mode:
authorKeisuke Kuroyanagi <ksk@google.com>2014-09-10 03:49:09 +0000
committerAndroid (Google) Code Review <android-gerrit@google.com>2014-09-10 03:49:10 +0000
commit934e1d8087e8988a109b03f00d7d2a9ceedb3f20 (patch)
tree32d5334b8ae55615dc99f09903210fc50d656e8a /java/src/com/android/inputmethod
parent63ee10d63a67fd8e2de3047c4a8de1dbac0c9fc1 (diff)
parentd8430811935dd054f7aee87ceba532c0b30fb60e (diff)
downloadlatinime-934e1d8087e8988a109b03f00d7d2a9ceedb3f20.tar.gz
latinime-934e1d8087e8988a109b03f00d7d2a9ceedb3f20.tar.xz
latinime-934e1d8087e8988a109b03f00d7d2a9ceedb3f20.zip
Merge "Move case and OOV detection logic into distracter filter."
Diffstat (limited to 'java/src/com/android/inputmethod')
-rw-r--r--java/src/com/android/inputmethod/latin/DictionaryFacilitator.java18
-rw-r--r--java/src/com/android/inputmethod/latin/utils/DistracterFilter.java34
-rw-r--r--java/src/com/android/inputmethod/latin/utils/DistracterFilterCheckingExactMatchesAndSuggestions.java54
-rw-r--r--java/src/com/android/inputmethod/latin/utils/DistracterFilterCheckingIsInDictionary.java6
-rw-r--r--java/src/com/android/inputmethod/latin/utils/LanguageModelParam.java53
5 files changed, 103 insertions, 62 deletions
diff --git a/java/src/com/android/inputmethod/latin/DictionaryFacilitator.java b/java/src/com/android/inputmethod/latin/DictionaryFacilitator.java
index fde94da93..47aaeadcc 100644
--- a/java/src/com/android/inputmethod/latin/DictionaryFacilitator.java
+++ b/java/src/com/android/inputmethod/latin/DictionaryFacilitator.java
@@ -60,7 +60,6 @@ public class DictionaryFacilitator {
// HACK: This threshold is being used when adding a capitalized entry in the User History
// dictionary.
private static final int CAPITALIZED_FORM_MAX_PROBABILITY_FOR_INSERT = 140;
- private static final int MAX_DICTIONARY_FACILITATOR_CACHE_SIZE = 3;
private DictionaryGroup mDictionaryGroup = new DictionaryGroup();
private boolean mIsUserDictEnabled = false;
@@ -68,7 +67,6 @@ public class DictionaryFacilitator {
// To synchronize assigning mDictionaryGroup to ensure closing dictionaries.
private final Object mLock = new Object();
private final DistracterFilter mDistracterFilter;
- private final DictionaryFacilitatorLruCache mFacilitatorCacheForPersonalization;
private static final String[] DICT_TYPES_ORDERED_TO_GET_SUGGESTIONS =
new String[] {
@@ -176,14 +174,10 @@ public class DictionaryFacilitator {
public DictionaryFacilitator() {
mDistracterFilter = DistracterFilter.EMPTY_DISTRACTER_FILTER;
- mFacilitatorCacheForPersonalization = null;
}
public DictionaryFacilitator(final Context context) {
- mFacilitatorCacheForPersonalization = new DictionaryFacilitatorLruCache(context,
- MAX_DICTIONARY_FACILITATOR_CACHE_SIZE, "" /* dictionaryNamePrefix */);
- mDistracterFilter = new DistracterFilterCheckingExactMatchesAndSuggestions(context,
- mFacilitatorCacheForPersonalization);
+ mDistracterFilter = new DistracterFilterCheckingExactMatchesAndSuggestions(context);
}
public void updateEnabledSubtypes(final List<InputMethodSubtype> enabledSubtypes) {
@@ -358,9 +352,6 @@ public class DictionaryFacilitator {
for (final String dictType : DICT_TYPES_ORDERED_TO_GET_SUGGESTIONS) {
dictionaryGroup.closeDict(dictType);
}
- if (mFacilitatorCacheForPersonalization != null) {
- mFacilitatorCacheForPersonalization.evictAll();
- }
mDistracterFilter.close();
}
@@ -609,14 +600,11 @@ public class DictionaryFacilitator {
}
// TODO: Get locale from personalizationDataChunk.mDetectedLanguage.
final Locale dataChunkLocale = getLocale();
- final DictionaryFacilitator dictionaryFacilitatorForLocale =
- mFacilitatorCacheForPersonalization.get(dataChunkLocale);
final ArrayList<LanguageModelParam> languageModelParams =
LanguageModelParam.createLanguageModelParamsFrom(
personalizationDataChunk.mTokens,
- personalizationDataChunk.mTimestampInSeconds,
- dictionaryFacilitatorForLocale, spacingAndPunctuations,
- new DistracterFilterCheckingIsInDictionary(
+ personalizationDataChunk.mTimestampInSeconds, spacingAndPunctuations,
+ dataChunkLocale, new DistracterFilterCheckingIsInDictionary(
mDistracterFilter, personalizationDict));
if (languageModelParams == null || languageModelParams.isEmpty()) {
if (callback != null) {
diff --git a/java/src/com/android/inputmethod/latin/utils/DistracterFilter.java b/java/src/com/android/inputmethod/latin/utils/DistracterFilter.java
index 787e4a59d..94c62429e 100644
--- a/java/src/com/android/inputmethod/latin/utils/DistracterFilter.java
+++ b/java/src/com/android/inputmethod/latin/utils/DistracterFilter.java
@@ -36,10 +36,38 @@ public interface DistracterFilter {
public boolean isDistracterToWordsInDictionaries(final PrevWordsInfo prevWordsInfo,
final String testedWord, final Locale locale);
+ public int getWordHandlingType(final PrevWordsInfo prevWordsInfo, final String testedWord,
+ final Locale locale);
+
public void updateEnabledSubtypes(final List<InputMethodSubtype> enabledSubtypes);
public void close();
+ public static final class HandlingType {
+ private final static int REQUIRE_NO_SPECIAL_HANDLINGS = 0x0;
+ private final static int SHOULD_BE_LOWER_CASED = 0x1;
+ private final static int SHOULD_BE_HANDLED_AS_OOV = 0x2;
+
+ public static int getHandlingType(final boolean shouldBeLowerCased, final boolean isOov) {
+ int wordHandlingType = HandlingType.REQUIRE_NO_SPECIAL_HANDLINGS;
+ if (shouldBeLowerCased) {
+ wordHandlingType |= HandlingType.SHOULD_BE_LOWER_CASED;
+ }
+ if (isOov) {
+ wordHandlingType |= HandlingType.SHOULD_BE_HANDLED_AS_OOV;
+ }
+ return wordHandlingType;
+ }
+
+ public static boolean shouldBeLowerCased(final int handlingType) {
+ return (handlingType & SHOULD_BE_LOWER_CASED) != 0;
+ }
+
+ public static boolean shouldBeHandledAsOov(final int handlingType) {
+ return (handlingType & SHOULD_BE_HANDLED_AS_OOV) != 0;
+ }
+ };
+
public static final DistracterFilter EMPTY_DISTRACTER_FILTER = new DistracterFilter() {
@Override
public boolean isDistracterToWordsInDictionaries(PrevWordsInfo prevWordsInfo,
@@ -48,6 +76,12 @@ public interface DistracterFilter {
}
@Override
+ public int getWordHandlingType(final PrevWordsInfo prevWordsInfo,
+ final String testedWord, final Locale locale) {
+ return HandlingType.REQUIRE_NO_SPECIAL_HANDLINGS;
+ }
+
+ @Override
public void close() {
}
diff --git a/java/src/com/android/inputmethod/latin/utils/DistracterFilterCheckingExactMatchesAndSuggestions.java b/java/src/com/android/inputmethod/latin/utils/DistracterFilterCheckingExactMatchesAndSuggestions.java
index e10571e4a..1db525502 100644
--- a/java/src/com/android/inputmethod/latin/utils/DistracterFilterCheckingExactMatchesAndSuggestions.java
+++ b/java/src/com/android/inputmethod/latin/utils/DistracterFilterCheckingExactMatchesAndSuggestions.java
@@ -51,6 +51,7 @@ public class DistracterFilterCheckingExactMatchesAndSuggestions implements Distr
DistracterFilterCheckingExactMatchesAndSuggestions.class.getSimpleName();
private static final boolean DEBUG = false;
+ private static final int MAX_DICTIONARY_FACILITATOR_CACHE_SIZE = 3;
private static final int MAX_DISTRACTERS_CACHE_SIZE = 1024;
private final Context mContext;
@@ -73,15 +74,13 @@ public class DistracterFilterCheckingExactMatchesAndSuggestions implements Distr
* Create a DistracterFilter instance.
*
* @param context the context.
- * @param dictionaryFacilitatorLruCache the cache of dictionaryFacilitators that are used for
- * checking distracters.
*/
- public DistracterFilterCheckingExactMatchesAndSuggestions(final Context context,
- final DictionaryFacilitatorLruCache dictionaryFacilitatorLruCache) {
+ public DistracterFilterCheckingExactMatchesAndSuggestions(final Context context) {
mContext = context;
mLocaleToSubtypeCache = new ConcurrentHashMap<>();
mLocaleToKeyboardCache = new ConcurrentHashMap<>();
- mDictionaryFacilitatorLruCache = dictionaryFacilitatorLruCache;
+ mDictionaryFacilitatorLruCache = new DictionaryFacilitatorLruCache(context,
+ MAX_DICTIONARY_FACILITATOR_CACHE_SIZE, "" /* dictionaryNamePrefix */);
mDistractersCache = new LruCache<>(MAX_DISTRACTERS_CACHE_SIZE);
}
@@ -89,7 +88,8 @@ public class DistracterFilterCheckingExactMatchesAndSuggestions implements Distr
public void close() {
mLocaleToSubtypeCache.clear();
mLocaleToKeyboardCache.clear();
- mDistractersCache.evictAll();
+ mDictionaryFacilitatorLruCache.evictAll();
+ // Don't clear mDistractersCache.
}
@Override
@@ -194,9 +194,8 @@ public class DistracterFilterCheckingExactMatchesAndSuggestions implements Distr
mDistractersCache.put(cacheKey, Boolean.TRUE);
return true;
}
- final boolean isValidWord = dictionaryFacilitator.isValidWord(testedWord,
- false /* ignoreCase */);
- if (isValidWord) {
+ final boolean Word = dictionaryFacilitator.isValidWord(testedWord, false /* ignoreCase */);
+ if (Word) {
// Valid word is not a distractor.
if (DEBUG) {
Log.d(TAG, "isDistracter: false (valid word)");
@@ -283,4 +282,41 @@ public class DistracterFilterCheckingExactMatchesAndSuggestions implements Distr
}
return false;
}
+
+ private boolean shouldBeLowerCased(final PrevWordsInfo prevWordsInfo, final String testedWord,
+ final Locale locale) {
+ final DictionaryFacilitator dictionaryFacilitator =
+ mDictionaryFacilitatorLruCache.get(locale);
+ if (dictionaryFacilitator.isValidWord(testedWord, false /* ignoreCase */)) {
+ return false;
+ }
+ final String lowerCaseTargetWord = testedWord.toLowerCase(locale);
+ if (testedWord.equals(lowerCaseTargetWord)) {
+ return false;
+ }
+ if (dictionaryFacilitator.isValidWord(lowerCaseTargetWord, false /* ignoreCase */)) {
+ return true;
+ }
+ if (StringUtils.getCapitalizationType(testedWord) == StringUtils.CAPITALIZE_FIRST
+ && !prevWordsInfo.isValid()) {
+ // TODO: Check beginning-of-sentence.
+ return true;
+ }
+ return false;
+ }
+
+ @Override
+ public int getWordHandlingType(final PrevWordsInfo prevWordsInfo, final String testedWord,
+ final Locale locale) {
+ // TODO: Use this method for user history dictionary.
+ if (testedWord == null|| locale == null) {
+ return HandlingType.getHandlingType(false /* shouldBeLowerCased */, false /* isOov */);
+ }
+ final boolean shouldBeLowerCased = shouldBeLowerCased(prevWordsInfo, testedWord, locale);
+ final String caseModifiedWord =
+ shouldBeLowerCased ? testedWord.toLowerCase(locale) : testedWord;
+ final boolean isOov = !mDictionaryFacilitatorLruCache.get(locale).isValidWord(
+ caseModifiedWord, false /* ignoreCase */);
+ return HandlingType.getHandlingType(shouldBeLowerCased, isOov);
+ }
}
diff --git a/java/src/com/android/inputmethod/latin/utils/DistracterFilterCheckingIsInDictionary.java b/java/src/com/android/inputmethod/latin/utils/DistracterFilterCheckingIsInDictionary.java
index 4ad4ba784..349236f18 100644
--- a/java/src/com/android/inputmethod/latin/utils/DistracterFilterCheckingIsInDictionary.java
+++ b/java/src/com/android/inputmethod/latin/utils/DistracterFilterCheckingIsInDictionary.java
@@ -48,6 +48,12 @@ public class DistracterFilterCheckingIsInDictionary implements DistracterFilter
}
@Override
+ public int getWordHandlingType(final PrevWordsInfo prevWordsInfo, final String testedWord,
+ final Locale locale) {
+ return mDistracterFilter.getWordHandlingType(prevWordsInfo, testedWord, locale);
+ }
+
+ @Override
public void updateEnabledSubtypes(List<InputMethodSubtype> enabledSubtypes) {
// Do nothing.
}
diff --git a/java/src/com/android/inputmethod/latin/utils/LanguageModelParam.java b/java/src/com/android/inputmethod/latin/utils/LanguageModelParam.java
index fbce3f2fd..05d124764 100644
--- a/java/src/com/android/inputmethod/latin/utils/LanguageModelParam.java
+++ b/java/src/com/android/inputmethod/latin/utils/LanguageModelParam.java
@@ -22,6 +22,7 @@ import com.android.inputmethod.latin.Dictionary;
import com.android.inputmethod.latin.DictionaryFacilitator;
import com.android.inputmethod.latin.PrevWordsInfo;
import com.android.inputmethod.latin.settings.SpacingAndPunctuations;
+import com.android.inputmethod.latin.utils.DistracterFilter.HandlingType;
import java.util.ArrayList;
import java.util.List;
@@ -81,8 +82,7 @@ public final class LanguageModelParam {
// Process a list of words and return a list of {@link LanguageModelParam} objects.
public static ArrayList<LanguageModelParam> createLanguageModelParamsFrom(
final List<String> tokens, final int timestamp,
- final DictionaryFacilitator dictionaryFacilitator,
- final SpacingAndPunctuations spacingAndPunctuations,
+ final SpacingAndPunctuations spacingAndPunctuations, final Locale locale,
final DistracterFilter distracterFilter) {
final ArrayList<LanguageModelParam> languageModelParams = new ArrayList<>();
final int N = tokens.size();
@@ -111,8 +111,7 @@ public final class LanguageModelParam {
}
final LanguageModelParam languageModelParam =
detectWhetherVaildWordOrNotAndGetLanguageModelParam(
- prevWordsInfo, tempWord, timestamp, dictionaryFacilitator,
- distracterFilter);
+ prevWordsInfo, tempWord, timestamp, locale, distracterFilter);
if (languageModelParam == null) {
continue;
}
@@ -125,47 +124,25 @@ public final class LanguageModelParam {
private static LanguageModelParam detectWhetherVaildWordOrNotAndGetLanguageModelParam(
final PrevWordsInfo prevWordsInfo, final String targetWord, final int timestamp,
- final DictionaryFacilitator dictionaryFacilitator,
- final DistracterFilter distracterFilter) {
- final Locale locale = dictionaryFacilitator.getLocale();
+ final Locale locale, final DistracterFilter distracterFilter) {
if (locale == null) {
return null;
}
- if (dictionaryFacilitator.isValidWord(targetWord, false /* ignoreCase */)) {
- return createAndGetLanguageModelParamOfWord(prevWordsInfo, targetWord, timestamp,
- true /* isValidWord */, locale, distracterFilter);
- }
-
- final String lowerCaseTargetWord = targetWord.toLowerCase(locale);
- if (dictionaryFacilitator.isValidWord(lowerCaseTargetWord, false /* ignoreCase */)) {
- // Add the lower-cased word.
- return createAndGetLanguageModelParamOfWord(prevWordsInfo, lowerCaseTargetWord,
- timestamp, true /* isValidWord */, locale, distracterFilter);
+ final int wordHandlingType = distracterFilter.getWordHandlingType(prevWordsInfo,
+ targetWord, locale);
+ final String word = HandlingType.shouldBeLowerCased(wordHandlingType) ?
+ targetWord.toLowerCase(locale) : targetWord;
+ if (distracterFilter.isDistracterToWordsInDictionaries(prevWordsInfo, targetWord, locale)) {
+ // The word is a distracter.
+ return null;
}
-
- // Treat the word as an OOV word.
- return createAndGetLanguageModelParamOfWord(prevWordsInfo, targetWord, timestamp,
- false /* isValidWord */, locale, distracterFilter);
+ return createAndGetLanguageModelParamOfWord(prevWordsInfo, word, timestamp,
+ !HandlingType.shouldBeHandledAsOov(wordHandlingType));
}
private static LanguageModelParam createAndGetLanguageModelParamOfWord(
- final PrevWordsInfo prevWordsInfo, final String targetWord, final int timestamp,
- final boolean isValidWord, final Locale locale,
- final DistracterFilter distracterFilter) {
- final String word;
- if (StringUtils.getCapitalizationType(targetWord) == StringUtils.CAPITALIZE_FIRST
- && !prevWordsInfo.isValid() && !isValidWord) {
- word = targetWord.toLowerCase(locale);
- } else {
- word = targetWord;
- }
- // Check whether the word is a distracter to words in the dictionaries.
- if (distracterFilter.isDistracterToWordsInDictionaries(prevWordsInfo, word, locale)) {
- if (DEBUG) {
- Log.d(TAG, "The word (" + word + ") is a distracter. Skip this word.");
- }
- return null;
- }
+ final PrevWordsInfo prevWordsInfo, final String word, final int timestamp,
+ final boolean isValidWord) {
final int unigramProbability = isValidWord ?
UNIGRAM_PROBABILITY_FOR_VALID_WORD : UNIGRAM_PROBABILITY_FOR_OOV_WORD;
if (!prevWordsInfo.isValid()) {