diff options
Diffstat (limited to 'java/src/com/android/inputmethod/latin/utils')
4 files changed, 288 insertions, 171 deletions
diff --git a/java/src/com/android/inputmethod/latin/utils/CapsModeUtils.java b/java/src/com/android/inputmethod/latin/utils/CapsModeUtils.java index 702688f93..936219332 100644 --- a/java/src/com/android/inputmethod/latin/utils/CapsModeUtils.java +++ b/java/src/com/android/inputmethod/latin/utils/CapsModeUtils.java @@ -62,6 +62,22 @@ public final class CapsModeUtils { } /** + * Helper method to find out if a code point is starting punctuation. + * + * This include the Unicode START_PUNCTUATION category, but also some other symbols that are + * starting, like the inverted question mark or the double quote. + * + * @param codePoint the code point + * @return true if it's starting punctuation, false otherwise. + */ + private static boolean isStartPunctuation(final int codePoint) { + return (codePoint == Constants.CODE_DOUBLE_QUOTE || codePoint == Constants.CODE_SINGLE_QUOTE + || codePoint == Constants.CODE_INVERTED_QUESTION_MARK + || codePoint == Constants.CODE_INVERTED_EXCLAMATION_MARK + || Character.getType(codePoint) == Character.START_PUNCTUATION); + } + + /** * Determine what caps mode should be in effect at the current offset in * the text. Only the mode bits set in <var>reqModes</var> will be * checked. Note that the caps mode flags here are explicitly defined @@ -115,8 +131,7 @@ public final class CapsModeUtils { } else { for (i = cs.length(); i > 0; i--) { final char c = cs.charAt(i - 1); - if (c != Constants.CODE_DOUBLE_QUOTE && c != Constants.CODE_SINGLE_QUOTE - && Character.getType(c) != Character.START_PUNCTUATION) { + if (!isStartPunctuation(c)) { break; } } @@ -210,11 +225,14 @@ public final class CapsModeUtils { // We found out that we have a period. We need to determine if this is a full stop or // otherwise sentence-ending period, or an abbreviation like "e.g.". An abbreviation - // looks like (\w\.){2,} + // looks like (\w\.){2,}. Moreover, in German, you put periods after digits for dates + // and some other things, and in German specifically we need to not go into autocaps after + // a whitespace-digits-period sequence. // To find out, we will have a simple state machine with the following states : - // START, WORD, PERIOD, ABBREVIATION + // START, WORD, PERIOD, ABBREVIATION, NUMBER // On START : (just before the first period) // letter => WORD + // digit => NUMBER if German; end with caps otherwise // whitespace => end with no caps (it was a stand-alone period) // otherwise => end with caps (several periods/symbols in a row) // On WORD : (within the word just before the first period) @@ -228,6 +246,11 @@ public final class CapsModeUtils { // letter => LETTER // period => PERIOD // otherwise => end with no caps (it was an abbreviation) + // On NUMBER : (period immediately preceded by one or more digits) + // digit => NUMBER + // letter => LETTER (promote to word) + // otherwise => end with no caps (it was a whitespace-digits-period sequence, + // or a punctuation-digits-period sequence like "11.11.") // "Not an abbreviation" in the above chart essentially covers cases like "...yes.". This // should capitalize. @@ -235,6 +258,7 @@ public final class CapsModeUtils { final int WORD = 1; final int PERIOD = 2; final int LETTER = 3; + final int NUMBER = 4; final int caps = (TextUtils.CAP_MODE_CHARACTERS | TextUtils.CAP_MODE_WORDS | TextUtils.CAP_MODE_SENTENCES) & reqModes; final int noCaps = (TextUtils.CAP_MODE_CHARACTERS | TextUtils.CAP_MODE_WORDS) & reqModes; @@ -247,6 +271,8 @@ public final class CapsModeUtils { state = WORD; } else if (Character.isWhitespace(c)) { return noCaps; + } else if (Character.isDigit(c) && spacingAndPunctuations.mUsesGermanRules) { + state = NUMBER; } else { return caps; } @@ -275,6 +301,15 @@ public final class CapsModeUtils { } else { return noCaps; } + break; + case NUMBER: + if (Character.isLetter(c)) { + state = WORD; + } else if (Character.isDigit(c)) { + state = NUMBER; + } else { + return noCaps; + } } } // Here we arrived at the start of the line. This should behave exactly like whitespace. diff --git a/java/src/com/android/inputmethod/latin/utils/DistracterFilter.java b/java/src/com/android/inputmethod/latin/utils/DistracterFilter.java index f1057da0b..6e0fab32a 100644 --- a/java/src/com/android/inputmethod/latin/utils/DistracterFilter.java +++ b/java/src/com/android/inputmethod/latin/utils/DistracterFilter.java @@ -16,129 +16,14 @@ package com.android.inputmethod.latin.utils; -import java.util.ArrayList; -import java.util.HashMap; import java.util.List; import java.util.Locale; -import java.util.Map; -import java.util.concurrent.TimeUnit; -import android.content.Context; -import android.content.res.Resources; -import android.text.InputType; -import android.util.Log; -import android.view.inputmethod.EditorInfo; import android.view.inputmethod.InputMethodSubtype; -import com.android.inputmethod.keyboard.Keyboard; -import com.android.inputmethod.keyboard.KeyboardId; -import com.android.inputmethod.keyboard.KeyboardLayoutSet; -import com.android.inputmethod.latin.Constants; import com.android.inputmethod.latin.PrevWordsInfo; -import com.android.inputmethod.latin.Suggest; -import com.android.inputmethod.latin.Suggest.OnGetSuggestedWordsCallback; -import com.android.inputmethod.latin.SuggestedWords; -import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo; -import com.android.inputmethod.latin.WordComposer; - -/** - * This class is used to prevent distracters being added to personalization - * or user history dictionaries - */ -public class DistracterFilter { - private static final String TAG = DistracterFilter.class.getSimpleName(); - - private static final long TIMEOUT_TO_WAIT_LOADING_DICTIONARIES_IN_SECONDS = 120; - - private final Context mContext; - private final Map<Locale, InputMethodSubtype> mLocaleToSubtypeMap; - private final Map<Locale, Keyboard> mLocaleToKeyboardMap; - private final Suggest mSuggest; - private Keyboard mKeyboard; - - // If the score of the top suggestion exceeds this value, the tested word (e.g., - // an OOV, a misspelling, or an in-vocabulary word) would be considered as a distracter to - // words in dictionary. The greater the threshold is, the less likely the tested word would - // become a distracter, which means the tested word will be more likely to be added to - // the dictionary. - private static final float DISTRACTER_WORD_SCORE_THRESHOLD = 2.0f; - - // Create empty distracter filter. - public DistracterFilter() { - this(null, new ArrayList<InputMethodSubtype>()); - } - - /** - * Create a DistracterFilter instance. - * - * @param context the context. - * @param enabledSubtypes the enabled subtypes. - */ - public DistracterFilter(final Context context, final List<InputMethodSubtype> enabledSubtypes) { - mContext = context; - mLocaleToSubtypeMap = new HashMap<>(); - if (enabledSubtypes != null) { - for (final InputMethodSubtype subtype : enabledSubtypes) { - final Locale locale = SubtypeLocaleUtils.getSubtypeLocale(subtype); - if (mLocaleToSubtypeMap.containsKey(locale)) { - // Multiple subtypes are enabled for one locale. - // TODO: Investigate what we should do for this case. - continue; - } - mLocaleToSubtypeMap.put(locale, subtype); - } - } - mLocaleToKeyboardMap = new HashMap<>(); - mSuggest = new Suggest(); - mKeyboard = null; - } - - private static boolean suggestionExceedsDistracterThreshold( - final SuggestedWordInfo suggestion, final String consideredWord, - final float distracterThreshold) { - if (null != suggestion) { - final int suggestionScore = suggestion.mScore; - final float normalizedScore = BinaryDictionaryUtils.calcNormalizedScore( - consideredWord, suggestion.mWord, suggestionScore); - if (normalizedScore > distracterThreshold) { - return true; - } - } - return false; - } - - private void loadKeyboardForLocale(final Locale newLocale) { - final Keyboard cachedKeyboard = mLocaleToKeyboardMap.get(newLocale); - if (cachedKeyboard != null) { - mKeyboard = cachedKeyboard; - return; - } - final InputMethodSubtype subtype = mLocaleToSubtypeMap.get(newLocale); - if (subtype == null) { - return; - } - final EditorInfo editorInfo = new EditorInfo(); - editorInfo.inputType = InputType.TYPE_CLASS_TEXT; - final KeyboardLayoutSet.Builder builder = new KeyboardLayoutSet.Builder( - mContext, editorInfo); - final Resources res = mContext.getResources(); - final int keyboardWidth = ResourceUtils.getDefaultKeyboardWidth(res); - final int keyboardHeight = ResourceUtils.getDefaultKeyboardHeight(res); - builder.setKeyboardGeometry(keyboardWidth, keyboardHeight); - builder.setSubtype(subtype); - builder.setIsSpellChecker(false /* isSpellChecker */); - final KeyboardLayoutSet layoutSet = builder.build(); - mKeyboard = layoutSet.getKeyboard(KeyboardId.ELEMENT_ALPHABET); - } - - private void loadDictionariesForLocale(final Locale newlocale) throws InterruptedException { - mSuggest.mDictionaryFacilitator.resetDictionaries(mContext, newlocale, - false /* useContactsDict */, false /* usePersonalizedDicts */, - false /* forceReloadMainDictionary */, null /* listener */); - mSuggest.mDictionaryFacilitator.waitForLoadingMainDictionary( - TIMEOUT_TO_WAIT_LOADING_DICTIONARIES_IN_SECONDS, TimeUnit.SECONDS); - } +public interface DistracterFilter { /** * Determine whether a word is a distracter to words in dictionaries. * @@ -149,56 +34,25 @@ public class DistracterFilter { * @return true if testedWord is a distracter, otherwise false. */ public boolean isDistracterToWordsInDictionaries(final PrevWordsInfo prevWordsInfo, - final String testedWord, final Locale locale) { - if (locale == null) { - return false; - } - if (!locale.equals(mSuggest.mDictionaryFacilitator.getLocale())) { - if (!mLocaleToSubtypeMap.containsKey(locale)) { - Log.e(TAG, "Locale " + locale + " is not enabled."); - // TODO: Investigate what we should do for disabled locales. - return false; - } - loadKeyboardForLocale(locale); - // Reset dictionaries for the locale. - try { - loadDictionariesForLocale(locale); - } catch (final InterruptedException e) { - Log.e(TAG, "Interrupted while waiting for loading dicts in DistracterFilter", e); - return false; - } - } - if (mKeyboard == null) { + final String testedWord, final Locale locale); + + public void updateEnabledSubtypes(final List<InputMethodSubtype> enabledSubtypes); + + public void close(); + + public static final class EmptyDistracterFilter implements DistracterFilter { + @Override + public boolean isDistracterToWordsInDictionaries(PrevWordsInfo prevWordsInfo, + String testedWord, Locale locale) { return false; } - final WordComposer composer = new WordComposer(); - final int[] codePoints = StringUtils.toCodePointArray(testedWord); - final int[] coordinates = mKeyboard.getCoordinates(codePoints); - composer.setComposingWord(codePoints, coordinates, prevWordsInfo); - final int trailingSingleQuotesCount = StringUtils.getTrailingSingleQuotesCount(testedWord); - final String consideredWord = trailingSingleQuotesCount > 0 ? - testedWord.substring(0, testedWord.length() - trailingSingleQuotesCount) : - testedWord; - final AsyncResultHolder<Boolean> holder = new AsyncResultHolder<Boolean>(); - final OnGetSuggestedWordsCallback callback = new OnGetSuggestedWordsCallback() { - @Override - public void onGetSuggestedWords(final SuggestedWords suggestedWords) { - if (suggestedWords != null && suggestedWords.size() > 1) { - // The suggestedWordInfo at 0 is the typed word. The 1st suggestion from - // the decoder is at index 1. - final SuggestedWordInfo firstSuggestion = suggestedWords.getInfo(1); - final boolean hasStrongDistractor = suggestionExceedsDistracterThreshold( - firstSuggestion, consideredWord, DISTRACTER_WORD_SCORE_THRESHOLD); - holder.set(hasStrongDistractor); - } - } - }; - mSuggest.getSuggestedWords(composer, prevWordsInfo, mKeyboard.getProximityInfo(), - true /* blockOffensiveWords */, true /* isCorrectionEnbaled */, - null /* additionalFeaturesOptions */, 0 /* sessionId */, - SuggestedWords.NOT_A_SEQUENCE_NUMBER, callback); + @Override + public void close() { + } - return holder.get(false /* defaultValue */, Constants.GET_SUGGESTED_WORDS_TIMEOUT); + @Override + public void updateEnabledSubtypes(List<InputMethodSubtype> enabledSubtypes) { + } } } diff --git a/java/src/com/android/inputmethod/latin/utils/DistracterFilterUsingSuggestion.java b/java/src/com/android/inputmethod/latin/utils/DistracterFilterUsingSuggestion.java new file mode 100644 index 000000000..92033b76f --- /dev/null +++ b/java/src/com/android/inputmethod/latin/utils/DistracterFilterUsingSuggestion.java @@ -0,0 +1,227 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.android.inputmethod.latin.utils; + +import java.util.HashMap; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.concurrent.TimeUnit; + +import android.content.Context; +import android.content.res.Resources; +import android.text.InputType; +import android.util.Log; +import android.view.inputmethod.EditorInfo; +import android.view.inputmethod.InputMethodSubtype; + +import com.android.inputmethod.keyboard.Keyboard; +import com.android.inputmethod.keyboard.KeyboardId; +import com.android.inputmethod.keyboard.KeyboardLayoutSet; +import com.android.inputmethod.latin.Constants; +import com.android.inputmethod.latin.DictionaryFacilitator; +import com.android.inputmethod.latin.PrevWordsInfo; +import com.android.inputmethod.latin.Suggest; +import com.android.inputmethod.latin.Suggest.OnGetSuggestedWordsCallback; +import com.android.inputmethod.latin.SuggestedWords; +import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo; +import com.android.inputmethod.latin.WordComposer; + +/** + * This class is used to prevent distracters being added to personalization + * or user history dictionaries + */ +public class DistracterFilterUsingSuggestion implements DistracterFilter { + private static final String TAG = DistracterFilterUsingSuggestion.class.getSimpleName(); + + private static final long TIMEOUT_TO_WAIT_LOADING_DICTIONARIES_IN_SECONDS = 120; + + private final Context mContext; + private final Map<Locale, InputMethodSubtype> mLocaleToSubtypeMap; + private final Map<Locale, Keyboard> mLocaleToKeyboardMap; + private final DictionaryFacilitator mDictionaryFacilitator; + private final Suggest mSuggest; + private Keyboard mKeyboard; + private final Object mLock = new Object(); + + // If the score of the top suggestion exceeds this value, the tested word (e.g., + // an OOV, a misspelling, or an in-vocabulary word) would be considered as a distracter to + // words in dictionary. The greater the threshold is, the less likely the tested word would + // become a distracter, which means the tested word will be more likely to be added to + // the dictionary. + private static final float DISTRACTER_WORD_SCORE_THRESHOLD = 2.0f; + + /** + * Create a DistracterFilter instance. + * + * @param context the context. + */ + public DistracterFilterUsingSuggestion(final Context context) { + mContext = context; + mLocaleToSubtypeMap = new HashMap<>(); + mLocaleToKeyboardMap = new HashMap<>(); + mDictionaryFacilitator = new DictionaryFacilitator(); + mSuggest = new Suggest(mDictionaryFacilitator); + mKeyboard = null; + } + + @Override + public void close() { + mDictionaryFacilitator.closeDictionaries(); + } + + @Override + public void updateEnabledSubtypes(final List<InputMethodSubtype> enabledSubtypes) { + final Map<Locale, InputMethodSubtype> newLocaleToSubtypeMap = new HashMap<>(); + if (enabledSubtypes != null) { + for (final InputMethodSubtype subtype : enabledSubtypes) { + final Locale locale = SubtypeLocaleUtils.getSubtypeLocale(subtype); + if (newLocaleToSubtypeMap.containsKey(locale)) { + // Multiple subtypes are enabled for one locale. + // TODO: Investigate what we should do for this case. + continue; + } + newLocaleToSubtypeMap.put(locale, subtype); + } + } + if (mLocaleToSubtypeMap.equals(newLocaleToSubtypeMap)) { + // Enabled subtypes have not been changed. + return; + } + synchronized (mLock) { + mLocaleToSubtypeMap.clear(); + mLocaleToSubtypeMap.putAll(newLocaleToSubtypeMap); + mLocaleToKeyboardMap.clear(); + } + } + + private static boolean suggestionExceedsDistracterThreshold( + final SuggestedWordInfo suggestion, final String consideredWord, + final float distracterThreshold) { + if (null != suggestion) { + final int suggestionScore = suggestion.mScore; + final float normalizedScore = BinaryDictionaryUtils.calcNormalizedScore( + consideredWord, suggestion.mWord, suggestionScore); + if (normalizedScore > distracterThreshold) { + return true; + } + } + return false; + } + + private void loadKeyboardForLocale(final Locale newLocale) { + final Keyboard cachedKeyboard = mLocaleToKeyboardMap.get(newLocale); + if (cachedKeyboard != null) { + mKeyboard = cachedKeyboard; + return; + } + final InputMethodSubtype subtype; + synchronized (mLock) { + subtype = mLocaleToSubtypeMap.get(newLocale); + } + if (subtype == null) { + return; + } + final EditorInfo editorInfo = new EditorInfo(); + editorInfo.inputType = InputType.TYPE_CLASS_TEXT; + final KeyboardLayoutSet.Builder builder = new KeyboardLayoutSet.Builder( + mContext, editorInfo); + final Resources res = mContext.getResources(); + final int keyboardWidth = ResourceUtils.getDefaultKeyboardWidth(res); + final int keyboardHeight = ResourceUtils.getDefaultKeyboardHeight(res); + builder.setKeyboardGeometry(keyboardWidth, keyboardHeight); + builder.setSubtype(subtype); + builder.setIsSpellChecker(false /* isSpellChecker */); + final KeyboardLayoutSet layoutSet = builder.build(); + mKeyboard = layoutSet.getKeyboard(KeyboardId.ELEMENT_ALPHABET); + } + + private void loadDictionariesForLocale(final Locale newlocale) throws InterruptedException { + mDictionaryFacilitator.resetDictionaries(mContext, newlocale, + false /* useContactsDict */, false /* usePersonalizedDicts */, + false /* forceReloadMainDictionary */, null /* listener */); + mDictionaryFacilitator.waitForLoadingMainDictionary( + TIMEOUT_TO_WAIT_LOADING_DICTIONARIES_IN_SECONDS, TimeUnit.SECONDS); + } + + /** + * Determine whether a word is a distracter to words in dictionaries. + * + * @param prevWordsInfo the information of previous words. + * @param testedWord the word that will be tested to see whether it is a distracter to words + * in dictionaries. + * @param locale the locale of word. + * @return true if testedWord is a distracter, otherwise false. + */ + @Override + public boolean isDistracterToWordsInDictionaries(final PrevWordsInfo prevWordsInfo, + final String testedWord, final Locale locale) { + if (locale == null) { + return false; + } + if (!locale.equals(mDictionaryFacilitator.getLocale())) { + synchronized (mLock) { + if (!mLocaleToSubtypeMap.containsKey(locale)) { + Log.e(TAG, "Locale " + locale + " is not enabled."); + // TODO: Investigate what we should do for disabled locales. + return false; + } + loadKeyboardForLocale(locale); + // Reset dictionaries for the locale. + try { + loadDictionariesForLocale(locale); + } catch (final InterruptedException e) { + Log.e(TAG, "Interrupted while waiting for loading dicts in DistracterFilter", + e); + return false; + } + } + } + if (mKeyboard == null) { + return false; + } + final WordComposer composer = new WordComposer(); + final int[] codePoints = StringUtils.toCodePointArray(testedWord); + final int[] coordinates = mKeyboard.getCoordinates(codePoints); + composer.setComposingWord(codePoints, coordinates, prevWordsInfo); + + final int trailingSingleQuotesCount = StringUtils.getTrailingSingleQuotesCount(testedWord); + final String consideredWord = trailingSingleQuotesCount > 0 ? + testedWord.substring(0, testedWord.length() - trailingSingleQuotesCount) : + testedWord; + final AsyncResultHolder<Boolean> holder = new AsyncResultHolder<Boolean>(); + final OnGetSuggestedWordsCallback callback = new OnGetSuggestedWordsCallback() { + @Override + public void onGetSuggestedWords(final SuggestedWords suggestedWords) { + if (suggestedWords != null && suggestedWords.size() > 1) { + // The suggestedWordInfo at 0 is the typed word. The 1st suggestion from + // the decoder is at index 1. + final SuggestedWordInfo firstSuggestion = suggestedWords.getInfo(1); + final boolean hasStrongDistractor = suggestionExceedsDistracterThreshold( + firstSuggestion, consideredWord, DISTRACTER_WORD_SCORE_THRESHOLD); + holder.set(hasStrongDistractor); + } + } + }; + mSuggest.getSuggestedWords(composer, prevWordsInfo, mKeyboard.getProximityInfo(), + true /* blockOffensiveWords */, true /* isCorrectionEnbaled */, + null /* additionalFeaturesOptions */, 0 /* sessionId */, + SuggestedWords.NOT_A_SEQUENCE_NUMBER, callback); + + return holder.get(false /* defaultValue */, Constants.GET_SUGGESTED_WORDS_TIMEOUT); + } +} diff --git a/java/src/com/android/inputmethod/latin/utils/LanguageModelParam.java b/java/src/com/android/inputmethod/latin/utils/LanguageModelParam.java index aaf4a4064..430efdd19 100644 --- a/java/src/com/android/inputmethod/latin/utils/LanguageModelParam.java +++ b/java/src/com/android/inputmethod/latin/utils/LanguageModelParam.java @@ -19,11 +19,12 @@ package com.android.inputmethod.latin.utils; import android.util.Log; import com.android.inputmethod.latin.Dictionary; -import com.android.inputmethod.latin.DictionaryFacilitatorForSuggest; +import com.android.inputmethod.latin.DictionaryFacilitator; import com.android.inputmethod.latin.PrevWordsInfo; import com.android.inputmethod.latin.settings.SpacingAndPunctuations; import java.util.ArrayList; +import java.util.List; import java.util.Locale; // Note: this class is used as a parameter type of a native method. You should be careful when you @@ -79,8 +80,8 @@ public final class LanguageModelParam { // Process a list of words and return a list of {@link LanguageModelParam} objects. public static ArrayList<LanguageModelParam> createLanguageModelParamsFrom( - final ArrayList<String> tokens, final int timestamp, - final DictionaryFacilitatorForSuggest dictionaryFacilitator, + final List<String> tokens, final int timestamp, + final DictionaryFacilitator dictionaryFacilitator, final SpacingAndPunctuations spacingAndPunctuations, final DistracterFilter distracterFilter) { final ArrayList<LanguageModelParam> languageModelParams = @@ -124,7 +125,7 @@ public final class LanguageModelParam { private static LanguageModelParam detectWhetherVaildWordOrNotAndGetLanguageModelParam( final PrevWordsInfo prevWordsInfo, final String targetWord, final int timestamp, - final DictionaryFacilitatorForSuggest dictionaryFacilitator, + final DictionaryFacilitator dictionaryFacilitator, final DistracterFilter distracterFilter) { final Locale locale = dictionaryFacilitator.getLocale(); if (locale == null) { |