aboutsummaryrefslogtreecommitdiffstats
path: root/java/src/com/android/inputmethod/latin/utils/LanguageModelParam.java
diff options
context:
space:
mode:
authorXiaojun Bi <bxj@google.com>2014-04-24 13:31:19 -0700
committerXiaojun Bi <bxj@google.com>2014-05-07 10:10:18 -0700
commit17284659ceb82a651261e905a05240fea810c519 (patch)
tree770df5a815aafcaf6f48136c4207dc3f73c67cf6 /java/src/com/android/inputmethod/latin/utils/LanguageModelParam.java
parent0de529b4cac458b2d561060d3ae2c7f7f82b676d (diff)
downloadlatinime-17284659ceb82a651261e905a05240fea810c519.tar.gz
latinime-17284659ceb82a651261e905a05240fea810c519.tar.xz
latinime-17284659ceb82a651261e905a05240fea810c519.zip
Use DistracterFilter when getting Language Model Param of a word
A preparation for implementing Distracter Filter. This CL should be checked in together with Icfefa0463180 Bug: 13142176 Change-Id: If9e5ca840c4a81cb0339ab70f146b42fbfa7af5d
Diffstat (limited to 'java/src/com/android/inputmethod/latin/utils/LanguageModelParam.java')
-rw-r--r--java/src/com/android/inputmethod/latin/utils/LanguageModelParam.java26
1 files changed, 17 insertions, 9 deletions
diff --git a/java/src/com/android/inputmethod/latin/utils/LanguageModelParam.java b/java/src/com/android/inputmethod/latin/utils/LanguageModelParam.java
index 5ce977d5e..55061f45f 100644
--- a/java/src/com/android/inputmethod/latin/utils/LanguageModelParam.java
+++ b/java/src/com/android/inputmethod/latin/utils/LanguageModelParam.java
@@ -80,7 +80,8 @@ public final class LanguageModelParam {
public static ArrayList<LanguageModelParam> createLanguageModelParamsFrom(
final ArrayList<String> tokens, final int timestamp,
final DictionaryFacilitatorForSuggest dictionaryFacilitator,
- final SpacingAndPunctuations spacingAndPunctuations) {
+ final SpacingAndPunctuations spacingAndPunctuations,
+ final DistracterFilter distracterFilter) {
final ArrayList<LanguageModelParam> languageModelParams =
CollectionUtils.newArrayList();
final int N = tokens.size();
@@ -109,7 +110,8 @@ public final class LanguageModelParam {
}
final LanguageModelParam languageModelParam =
detectWhetherVaildWordOrNotAndGetLanguageModelParam(
- prevWord, tempWord, timestamp, dictionaryFacilitator);
+ prevWord, tempWord, timestamp, dictionaryFacilitator,
+ distracterFilter);
if (languageModelParam == null) {
continue;
}
@@ -121,27 +123,33 @@ public final class LanguageModelParam {
private static LanguageModelParam detectWhetherVaildWordOrNotAndGetLanguageModelParam(
final String prevWord, final String targetWord, final int timestamp,
- final DictionaryFacilitatorForSuggest dictionaryFacilitator) {
+ final DictionaryFacilitatorForSuggest dictionaryFacilitator,
+ final DistracterFilter distracterFilter) {
final Locale locale = dictionaryFacilitator.getLocale();
if (locale == null) {
return null;
}
- if (!dictionaryFacilitator.isValidWord(targetWord, true /* ignoreCase */)) {
- // OOV word.
- return createAndGetLanguageModelParamOfWord(prevWord, targetWord, timestamp,
- false /* isValidWord */, locale);
- }
if (dictionaryFacilitator.isValidWord(targetWord, false /* ignoreCase */)) {
return createAndGetLanguageModelParamOfWord(prevWord, targetWord, timestamp,
true /* isValidWord */, locale);
}
+
final String lowerCaseTargetWord = targetWord.toLowerCase(locale);
if (dictionaryFacilitator.isValidWord(lowerCaseTargetWord, false /* ignoreCase */)) {
// Add the lower-cased word.
return createAndGetLanguageModelParamOfWord(prevWord, lowerCaseTargetWord,
timestamp, true /* isValidWord */, locale);
}
- // Treat the word as an OOV word.
+
+ // Treat the word as an OOV word. The following statement checks whether this OOV
+ // is a distracter to words in dictionaries. Being a distracter means the OOV word is
+ // too close to a common word in dictionaries (e.g., the OOV "mot" is very close to "not").
+ // Adding such a word to dictonaries would interfere with entering in-dictionary words. For
+ // example, adding "mot" to dictionaries might interfere with entering "not".
+ // This kind of OOV should be filtered out.
+ if (distracterFilter.isDistracterToWordsInDictionaries(prevWord, targetWord)) {
+ return null;
+ }
return createAndGetLanguageModelParamOfWord(prevWord, targetWord, timestamp,
false /* isValidWord */, locale);
}