diff options
author | 2014-05-08 01:10:33 +0000 | |
---|---|---|
committer | 2014-05-08 01:10:33 +0000 | |
commit | 50b5295d5df9ade819c47717b8d411e3e0880058 (patch) | |
tree | 5a854ac26a72d7df7699cad501c3f4fcc4e972a0 /java/src | |
parent | e53a3825c30bbe2e6ee378edef7c3761472665fe (diff) | |
parent | 17284659ceb82a651261e905a05240fea810c519 (diff) | |
download | latinime-50b5295d5df9ade819c47717b8d411e3e0880058.tar.gz latinime-50b5295d5df9ade819c47717b8d411e3e0880058.tar.xz latinime-50b5295d5df9ade819c47717b8d411e3e0880058.zip |
Merge "Use DistracterFilter when getting Language Model Param of a word"
Diffstat (limited to 'java/src')
-rw-r--r-- | java/src/com/android/inputmethod/latin/utils/DistracterFilter.java | 2 | ||||
-rw-r--r-- | java/src/com/android/inputmethod/latin/utils/LanguageModelParam.java | 26 |
2 files changed, 18 insertions, 10 deletions
diff --git a/java/src/com/android/inputmethod/latin/utils/DistracterFilter.java b/java/src/com/android/inputmethod/latin/utils/DistracterFilter.java index f2a1e524d..48e43d6ef 100644 --- a/java/src/com/android/inputmethod/latin/utils/DistracterFilter.java +++ b/java/src/com/android/inputmethod/latin/utils/DistracterFilter.java @@ -40,7 +40,7 @@ public class DistracterFilter { mKeyboard = keyboard; } - public boolean isDistractorToWordsInDictionaries(final String prevWord, + public boolean isDistracterToWordsInDictionaries(final String prevWord, final String targetWord) { // TODO: to be implemented return false; diff --git a/java/src/com/android/inputmethod/latin/utils/LanguageModelParam.java b/java/src/com/android/inputmethod/latin/utils/LanguageModelParam.java index 5ce977d5e..55061f45f 100644 --- a/java/src/com/android/inputmethod/latin/utils/LanguageModelParam.java +++ b/java/src/com/android/inputmethod/latin/utils/LanguageModelParam.java @@ -80,7 +80,8 @@ public final class LanguageModelParam { public static ArrayList<LanguageModelParam> createLanguageModelParamsFrom( final ArrayList<String> tokens, final int timestamp, final DictionaryFacilitatorForSuggest dictionaryFacilitator, - final SpacingAndPunctuations spacingAndPunctuations) { + final SpacingAndPunctuations spacingAndPunctuations, + final DistracterFilter distracterFilter) { final ArrayList<LanguageModelParam> languageModelParams = CollectionUtils.newArrayList(); final int N = tokens.size(); @@ -109,7 +110,8 @@ public final class LanguageModelParam { } final LanguageModelParam languageModelParam = detectWhetherVaildWordOrNotAndGetLanguageModelParam( - prevWord, tempWord, timestamp, dictionaryFacilitator); + prevWord, tempWord, timestamp, dictionaryFacilitator, + distracterFilter); if (languageModelParam == null) { continue; } @@ -121,27 +123,33 @@ public final class LanguageModelParam { private static LanguageModelParam detectWhetherVaildWordOrNotAndGetLanguageModelParam( final String prevWord, final String targetWord, final int timestamp, - final DictionaryFacilitatorForSuggest dictionaryFacilitator) { + final DictionaryFacilitatorForSuggest dictionaryFacilitator, + final DistracterFilter distracterFilter) { final Locale locale = dictionaryFacilitator.getLocale(); if (locale == null) { return null; } - if (!dictionaryFacilitator.isValidWord(targetWord, true /* ignoreCase */)) { - // OOV word. - return createAndGetLanguageModelParamOfWord(prevWord, targetWord, timestamp, - false /* isValidWord */, locale); - } if (dictionaryFacilitator.isValidWord(targetWord, false /* ignoreCase */)) { return createAndGetLanguageModelParamOfWord(prevWord, targetWord, timestamp, true /* isValidWord */, locale); } + final String lowerCaseTargetWord = targetWord.toLowerCase(locale); if (dictionaryFacilitator.isValidWord(lowerCaseTargetWord, false /* ignoreCase */)) { // Add the lower-cased word. return createAndGetLanguageModelParamOfWord(prevWord, lowerCaseTargetWord, timestamp, true /* isValidWord */, locale); } - // Treat the word as an OOV word. + + // Treat the word as an OOV word. The following statement checks whether this OOV + // is a distracter to words in dictionaries. Being a distracter means the OOV word is + // too close to a common word in dictionaries (e.g., the OOV "mot" is very close to "not"). + // Adding such a word to dictonaries would interfere with entering in-dictionary words. For + // example, adding "mot" to dictionaries might interfere with entering "not". + // This kind of OOV should be filtered out. + if (distracterFilter.isDistracterToWordsInDictionaries(prevWord, targetWord)) { + return null; + } return createAndGetLanguageModelParamOfWord(prevWord, targetWord, timestamp, false /* isValidWord */, locale); } |