diff options
author | 2014-04-24 13:31:19 -0700 | |
---|---|---|
committer | 2014-05-07 10:10:18 -0700 | |
commit | 17284659ceb82a651261e905a05240fea810c519 (patch) | |
tree | 770df5a815aafcaf6f48136c4207dc3f73c67cf6 /java/src/com/android/inputmethod/latin/utils/LanguageModelParam.java | |
parent | 0de529b4cac458b2d561060d3ae2c7f7f82b676d (diff) | |
download | latinime-17284659ceb82a651261e905a05240fea810c519.tar.gz latinime-17284659ceb82a651261e905a05240fea810c519.tar.xz latinime-17284659ceb82a651261e905a05240fea810c519.zip |
Use DistracterFilter when getting Language Model Param of a word
A preparation for implementing Distracter Filter.
This CL should be checked in together with Icfefa0463180
Bug: 13142176
Change-Id: If9e5ca840c4a81cb0339ab70f146b42fbfa7af5d
Diffstat (limited to 'java/src/com/android/inputmethod/latin/utils/LanguageModelParam.java')
-rw-r--r-- | java/src/com/android/inputmethod/latin/utils/LanguageModelParam.java | 26 |
1 files changed, 17 insertions, 9 deletions
diff --git a/java/src/com/android/inputmethod/latin/utils/LanguageModelParam.java b/java/src/com/android/inputmethod/latin/utils/LanguageModelParam.java index 5ce977d5e..55061f45f 100644 --- a/java/src/com/android/inputmethod/latin/utils/LanguageModelParam.java +++ b/java/src/com/android/inputmethod/latin/utils/LanguageModelParam.java @@ -80,7 +80,8 @@ public final class LanguageModelParam { public static ArrayList<LanguageModelParam> createLanguageModelParamsFrom( final ArrayList<String> tokens, final int timestamp, final DictionaryFacilitatorForSuggest dictionaryFacilitator, - final SpacingAndPunctuations spacingAndPunctuations) { + final SpacingAndPunctuations spacingAndPunctuations, + final DistracterFilter distracterFilter) { final ArrayList<LanguageModelParam> languageModelParams = CollectionUtils.newArrayList(); final int N = tokens.size(); @@ -109,7 +110,8 @@ public final class LanguageModelParam { } final LanguageModelParam languageModelParam = detectWhetherVaildWordOrNotAndGetLanguageModelParam( - prevWord, tempWord, timestamp, dictionaryFacilitator); + prevWord, tempWord, timestamp, dictionaryFacilitator, + distracterFilter); if (languageModelParam == null) { continue; } @@ -121,27 +123,33 @@ public final class LanguageModelParam { private static LanguageModelParam detectWhetherVaildWordOrNotAndGetLanguageModelParam( final String prevWord, final String targetWord, final int timestamp, - final DictionaryFacilitatorForSuggest dictionaryFacilitator) { + final DictionaryFacilitatorForSuggest dictionaryFacilitator, + final DistracterFilter distracterFilter) { final Locale locale = dictionaryFacilitator.getLocale(); if (locale == null) { return null; } - if (!dictionaryFacilitator.isValidWord(targetWord, true /* ignoreCase */)) { - // OOV word. - return createAndGetLanguageModelParamOfWord(prevWord, targetWord, timestamp, - false /* isValidWord */, locale); - } if (dictionaryFacilitator.isValidWord(targetWord, false /* ignoreCase */)) { return createAndGetLanguageModelParamOfWord(prevWord, targetWord, timestamp, true /* isValidWord */, locale); } + final String lowerCaseTargetWord = targetWord.toLowerCase(locale); if (dictionaryFacilitator.isValidWord(lowerCaseTargetWord, false /* ignoreCase */)) { // Add the lower-cased word. return createAndGetLanguageModelParamOfWord(prevWord, lowerCaseTargetWord, timestamp, true /* isValidWord */, locale); } - // Treat the word as an OOV word. + + // Treat the word as an OOV word. The following statement checks whether this OOV + // is a distracter to words in dictionaries. Being a distracter means the OOV word is + // too close to a common word in dictionaries (e.g., the OOV "mot" is very close to "not"). + // Adding such a word to dictonaries would interfere with entering in-dictionary words. For + // example, adding "mot" to dictionaries might interfere with entering "not". + // This kind of OOV should be filtered out. + if (distracterFilter.isDistracterToWordsInDictionaries(prevWord, targetWord)) { + return null; + } return createAndGetLanguageModelParamOfWord(prevWord, targetWord, timestamp, false /* isValidWord */, locale); } |