diff options
52 files changed, 1026 insertions, 323 deletions
diff --git a/java-overridable/src/com/android/inputmethod/latin/define/ProductionFlags.java b/java-overridable/src/com/android/inputmethod/latin/define/ProductionFlags.java index 5ab126486..c22c5770f 100644 --- a/java-overridable/src/com/android/inputmethod/latin/define/ProductionFlags.java +++ b/java-overridable/src/com/android/inputmethod/latin/define/ProductionFlags.java @@ -40,4 +40,9 @@ public final class ProductionFlags { * When false, the metrics logging is not yet ready to be enabled. */ public static final boolean IS_METRICS_LOGGING_SUPPORTED = false; + + /** + * When {@code false}, the split keyboard is not yet ready to be enabled. + */ + public static final boolean IS_SPLIT_KEYBOARD_SUPPORTED = true; } diff --git a/java-overridable/src/com/android/inputmethod/latin/personalization/PersonalizationDictionaryUpdater.java b/java-overridable/src/com/android/inputmethod/latin/personalization/PersonalizationDictionaryUpdater.java index c97a0d232..8b66cff53 100644 --- a/java-overridable/src/com/android/inputmethod/latin/personalization/PersonalizationDictionaryUpdater.java +++ b/java-overridable/src/com/android/inputmethod/latin/personalization/PersonalizationDictionaryUpdater.java @@ -16,8 +16,6 @@ package com.android.inputmethod.latin.personalization; -import java.util.Locale; - import android.content.Context; import com.android.inputmethod.latin.DictionaryFacilitator; @@ -33,12 +31,7 @@ public class PersonalizationDictionaryUpdater { mDictionaryFacilitator = dictionaryFacilitator; } - public Locale getLocale() { - return null; - } - - public void onLoadSettings(final boolean usePersonalizedDicts, - final boolean isSystemLocaleSameAsLocaleOfAllEnabledSubtypesOfEnabledImes) { + public void onLoadSettings(final boolean usePersonalizedDicts) { if (!mDictCleared) { // Clear and never update the personalization dictionary. PersonalizationHelper.removeAllPersonalizationDictionaries(mContext); diff --git a/java-overridable/src/com/android/inputmethod/latin/utils/StatsUtils.java b/java-overridable/src/com/android/inputmethod/latin/utils/StatsUtils.java index c867fee33..38735eccb 100644 --- a/java-overridable/src/com/android/inputmethod/latin/utils/StatsUtils.java +++ b/java-overridable/src/com/android/inputmethod/latin/utils/StatsUtils.java @@ -22,12 +22,45 @@ import com.android.inputmethod.latin.settings.SettingsValues; public final class StatsUtils { + private StatsUtils() { + // Intentional empty constructor. + } + public static void onCreate(final SettingsValues settingsValues, RichInputMethodManager richImm) { } public static void onPickSuggestionManually(final SuggestedWords suggestedWords, final SuggestedWords.SuggestedWordInfo suggestionInfo) { + } + + public static void onBackspaceWordDelete(int wordLength) { + } + + public static void onBackspacePressed(int lengthToDelete) { + } + + public static void onBackspaceSelectedText(int selectedTextLength) { + } + + public static void onDeleteMultiCharInput(int multiCharLength) { + } + + public static void onRevertAutoCorrect() { + } + + public static void onRevertDoubleSpacePeriod() { + } + + public static void onRevertSwapPunctuation() { + } + + public static void onFinishInputView() { + } + + public static void onCreateInputView() { + } + public static void onStartInputView(int inputType, int displayOrientation, boolean restarting) { } } diff --git a/java/res/values/attrs.xml b/java/res/values/attrs.xml index 2e81bdf48..f1253b40c 100644 --- a/java/res/values/attrs.xml +++ b/java/res/values/attrs.xml @@ -338,6 +338,8 @@ <!-- If true, use functionalTextColor instead of ketTextColor to drawing the label on the key --> <flag name="followFunctionalTextColor" value="0x80000" /> + <!-- Keep aspect ratio of key background. --> + <flag name="keepBackgroundAspectRatio" value="0x100000" /> <!-- If true, disable keyHintLabel. --> <flag name="disableKeyHintLabel" value="0x40000000" /> <!-- If true, disable additionalMoreKeys. --> diff --git a/java/res/values/themes-lxx-dark.xml b/java/res/values/themes-lxx-dark.xml index 76e9d33bf..5b2681359 100644 --- a/java/res/values/themes-lxx-dark.xml +++ b/java/res/values/themes-lxx-dark.xml @@ -114,6 +114,7 @@ <item name="android:background">@android:color/transparent</item> <item name="keyBackground">@drawable/btn_keyboard_key_popup_action_lxx_dark</item> <item name="divider">@null</item> + <item name="keyLabelFlags">keepBackgroundAspectRatio</item> </style> <style name="SuggestionStripView.LXX_Dark" diff --git a/java/res/values/themes-lxx-light.xml b/java/res/values/themes-lxx-light.xml index 5cd84171d..f607807c8 100644 --- a/java/res/values/themes-lxx-light.xml +++ b/java/res/values/themes-lxx-light.xml @@ -114,6 +114,7 @@ <item name="android:background">@android:color/transparent</item> <item name="keyBackground">@drawable/btn_keyboard_key_popup_action_lxx_light</item> <item name="divider">@null</item> + <item name="keyLabelFlags">keepBackgroundAspectRatio</item> </style> <style name="SuggestionStripView.LXX_Light" diff --git a/java/res/xml-sw600dp/key_styles_enter.xml b/java/res/xml-sw600dp/key_styles_enter.xml index d066d2d12..63ef2f8f9 100644 --- a/java/res/xml-sw600dp/key_styles_enter.xml +++ b/java/res/xml-sw600dp/key_styles_enter.xml @@ -80,13 +80,27 @@ </default> </switch> <!-- Enter key style --> - <key-style - latin:styleName="defaultEnterKeyStyle" - latin:keySpec="!icon/enter_key|!code/key_enter" - latin:keyLabelFlags="preserveCase|autoXScale|followKeyLabelRatio|followFunctionalTextColor" - latin:keyActionFlags="noKeyPreview" - latin:backgroundType="action" - latin:parentStyle="navigateMoreKeysStyle" /> + <switch> + <case latin:keyboardTheme="ICS|KLP"> + <key-style + latin:styleName="defaultEnterKeyStyle" + latin:keySpec="!icon/enter_key|!code/key_enter" + latin:keyLabelFlags="preserveCase|autoXScale|followKeyLabelRatio|followFunctionalTextColor" + latin:keyActionFlags="noKeyPreview" + latin:backgroundType="action" + latin:parentStyle="navigateMoreKeysStyle" /> + </case> + <!-- keyboardTheme="LXXLight|LXXDark" --> + <default> + <key-style + latin:styleName="defaultEnterKeyStyle" + latin:keySpec="!icon/enter_key|!code/key_enter" + latin:keyLabelFlags="preserveCase|autoXScale|followKeyLabelRatio|followFunctionalTextColor|keepBackgroundAspectRatio" + latin:keyActionFlags="noKeyPreview" + latin:backgroundType="action" + latin:parentStyle="navigateMoreKeysStyle" /> + </default> + </switch> <include latin:keyboardLayout="@xml/key_styles_actions" /> <switch> <!-- Shift + Enter in textMultiLine field. --> diff --git a/java/res/xml/key_styles_common.xml b/java/res/xml/key_styles_common.xml index 43ee26b07..b36ddf236 100644 --- a/java/res/xml/key_styles_common.xml +++ b/java/res/xml/key_styles_common.xml @@ -80,11 +80,24 @@ latin:keyActionFlags="isRepeatable|noKeyPreview" latin:backgroundType="functional" /> <!-- emojiKeyStyle must be defined before including @xml/key_syles_enter. --> - <key-style - latin:styleName="emojiKeyStyle" - latin:keySpec="!icon/emoji_action_key|!code/key_emoji" - latin:keyActionFlags="noKeyPreview" - latin:backgroundType="action" /> + <switch> + <case latin:keyboardTheme="ICS|KLP"> + <key-style + latin:styleName="emojiKeyStyle" + latin:keySpec="!icon/emoji_action_key|!code/key_emoji" + latin:keyActionFlags="noKeyPreview" + latin:backgroundType="action" /> + </case> + <!-- keyboardTheme="LXXLight|LXXDark" --> + <default> + <key-style + latin:styleName="emojiKeyStyle" + latin:keySpec="!icon/emoji_action_key|!code/key_emoji" + latin:keyLabelFlags="keepBackgroundAspectRatio" + latin:keyActionFlags="noKeyPreview" + latin:backgroundType="action" /> + </default> + </switch> <include latin:keyboardLayout="@xml/key_styles_enter" /> <!-- TODO: Currently there is no way to specify icon alignment per theme. --> diff --git a/java/res/xml/key_styles_enter.xml b/java/res/xml/key_styles_enter.xml index d6d01b862..564f465e9 100644 --- a/java/res/xml/key_styles_enter.xml +++ b/java/res/xml/key_styles_enter.xml @@ -212,13 +212,27 @@ </default> </switch> <!-- Enter key style --> - <key-style - latin:styleName="defaultEnterKeyStyle" - latin:keySpec="!icon/enter_key|!code/key_enter" - latin:keyLabelFlags="preserveCase|autoXScale|followKeyLabelRatio|followFunctionalTextColor" - latin:keyActionFlags="noKeyPreview" - latin:backgroundType="action" - latin:parentStyle="navigateMoreKeysStyle" /> + <switch> + <case latin:keyboardTheme="ICS|KLP"> + <key-style + latin:styleName="defaultEnterKeyStyle" + latin:keySpec="!icon/enter_key|!code/key_enter" + latin:keyLabelFlags="preserveCase|autoXScale|followKeyLabelRatio|followFunctionalTextColor" + latin:keyActionFlags="noKeyPreview" + latin:backgroundType="action" + latin:parentStyle="navigateMoreKeysStyle" /> + </case> + <!-- keyboardTheme="LXXLight|LXXDark" --> + <default> + <key-style + latin:styleName="defaultEnterKeyStyle" + latin:keySpec="!icon/enter_key|!code/key_enter" + latin:keyLabelFlags="preserveCase|autoXScale|followKeyLabelRatio|followFunctionalTextColor|keepBackgroundAspectRatio" + latin:keyActionFlags="noKeyPreview" + latin:backgroundType="action" + latin:parentStyle="navigateMoreKeysStyle" /> + </default> + </switch> <include latin:keyboardLayout="@xml/key_styles_actions" /> <switch> <!-- Shift + Enter in textMultiLine field. --> diff --git a/java/src/com/android/inputmethod/keyboard/Key.java b/java/src/com/android/inputmethod/keyboard/Key.java index a6f9f3c26..bd1c1479a 100644 --- a/java/src/com/android/inputmethod/keyboard/Key.java +++ b/java/src/com/android/inputmethod/keyboard/Key.java @@ -87,6 +87,7 @@ public class Key implements Comparable<Key> { private static final int LABEL_FLAGS_SHIFTED_LETTER_ACTIVATED = 0x20000; private static final int LABEL_FLAGS_FROM_CUSTOM_ACTION_LABEL = 0x40000; private static final int LABEL_FLAGS_FOLLOW_FUNCTIONAL_TEXT_COLOR = 0x80000; + private static final int LABEL_FLAGS_KEEP_BACKGROUND_ASPECT_RATIO = 0x100000; private static final int LABEL_FLAGS_DISABLE_HINT_LABEL = 0x40000000; private static final int LABEL_FLAGS_DISABLE_ADDITIONAL_MORE_KEYS = 0x80000000; @@ -697,6 +698,10 @@ public class Key implements Comparable<Key> { return (mLabelFlags & LABEL_FLAGS_AUTO_SCALE) == LABEL_FLAGS_AUTO_SCALE; } + public final boolean needsToKeepBackgroundAspectRatio(final int defaultFlags) { + return ((mLabelFlags | defaultFlags) & LABEL_FLAGS_KEEP_BACKGROUND_ASPECT_RATIO) != 0; + } + private final boolean isShiftedLetterActivated() { return (mLabelFlags & LABEL_FLAGS_SHIFTED_LETTER_ACTIVATED) != 0 && !TextUtils.isEmpty(mHintLabel); diff --git a/java/src/com/android/inputmethod/keyboard/KeyboardView.java b/java/src/com/android/inputmethod/keyboard/KeyboardView.java index 075cd901d..bb3cbb0eb 100644 --- a/java/src/com/android/inputmethod/keyboard/KeyboardView.java +++ b/java/src/com/android/inputmethod/keyboard/KeyboardView.java @@ -340,11 +340,25 @@ public class KeyboardView extends View { // Draw key background. protected void onDrawKeyBackground(final Key key, final Canvas canvas, final Drawable background) { - final Rect padding = mKeyBackgroundPadding; - final int bgWidth = key.getDrawWidth() + padding.left + padding.right; - final int bgHeight = key.getHeight() + padding.top + padding.bottom; - final int bgX = -padding.left; - final int bgY = -padding.top; + final int keyWidth = key.getDrawWidth(); + final int keyHeight = key.getHeight(); + final int bgWidth, bgHeight, bgX, bgY; + if (key.needsToKeepBackgroundAspectRatio(mDefaultKeyLabelFlags)) { + final int intrinsicWidth = background.getIntrinsicWidth(); + final int intrinsicHeight = background.getIntrinsicHeight(); + final float minScale = Math.min( + keyWidth / (float)intrinsicWidth, keyHeight / (float)intrinsicHeight); + bgWidth = (int)(intrinsicWidth * minScale); + bgHeight = (int)(intrinsicHeight * minScale); + bgX = (keyWidth - bgWidth) / 2; + bgY = (keyHeight - bgHeight) / 2; + } else { + final Rect padding = mKeyBackgroundPadding; + bgWidth = keyWidth + padding.left + padding.right; + bgHeight = keyHeight + padding.top + padding.bottom; + bgX = -padding.left; + bgY = -padding.top; + } final Rect bounds = background.getBounds(); if (bgWidth != bounds.right || bgHeight != bounds.bottom) { background.setBounds(0, 0, bgWidth, bgHeight); diff --git a/java/src/com/android/inputmethod/keyboard/MoreKeysKeyboard.java b/java/src/com/android/inputmethod/keyboard/MoreKeysKeyboard.java index 73c84cd92..abcfff8a6 100644 --- a/java/src/com/android/inputmethod/keyboard/MoreKeysKeyboard.java +++ b/java/src/com/android/inputmethod/keyboard/MoreKeysKeyboard.java @@ -308,8 +308,8 @@ public final class MoreKeysKeyboard extends Keyboard { dividerWidth = 0; } final MoreKeySpec[] moreKeys = key.getMoreKeys(); - mParams.setParameters(moreKeys.length, key.getMoreKeysColumnNumber(), keyWidth, rowHeight, - key.getX() + key.getWidth() / 2, keyboard.mId.mWidth, + mParams.setParameters(moreKeys.length, key.getMoreKeysColumnNumber(), keyWidth, + rowHeight, key.getX() + key.getWidth() / 2, keyboard.mId.mWidth, key.isMoreKeysFixedColumn(), key.isMoreKeysFixedOrder(), dividerWidth); } diff --git a/java/src/com/android/inputmethod/latin/DictionaryFacilitator.java b/java/src/com/android/inputmethod/latin/DictionaryFacilitator.java index fde94da93..c20546607 100644 --- a/java/src/com/android/inputmethod/latin/DictionaryFacilitator.java +++ b/java/src/com/android/inputmethod/latin/DictionaryFacilitator.java @@ -24,6 +24,7 @@ import android.view.inputmethod.InputMethodSubtype; import com.android.inputmethod.annotations.UsedForTesting; import com.android.inputmethod.keyboard.ProximityInfo; +import com.android.inputmethod.latin.ExpandableBinaryDictionary.AddMultipleDictionaryEntriesCallback; import com.android.inputmethod.latin.PrevWordsInfo.WordInfo; import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo; import com.android.inputmethod.latin.personalization.ContextualDictionary; @@ -36,7 +37,6 @@ import com.android.inputmethod.latin.utils.DistracterFilter; import com.android.inputmethod.latin.utils.DistracterFilterCheckingExactMatchesAndSuggestions; import com.android.inputmethod.latin.utils.DistracterFilterCheckingIsInDictionary; import com.android.inputmethod.latin.utils.ExecutorUtils; -import com.android.inputmethod.latin.utils.LanguageModelParam; import com.android.inputmethod.latin.utils.SuggestionResults; import java.io.File; @@ -60,7 +60,6 @@ public class DictionaryFacilitator { // HACK: This threshold is being used when adding a capitalized entry in the User History // dictionary. private static final int CAPITALIZED_FORM_MAX_PROBABILITY_FOR_INSERT = 140; - private static final int MAX_DICTIONARY_FACILITATOR_CACHE_SIZE = 3; private DictionaryGroup mDictionaryGroup = new DictionaryGroup(); private boolean mIsUserDictEnabled = false; @@ -68,7 +67,7 @@ public class DictionaryFacilitator { // To synchronize assigning mDictionaryGroup to ensure closing dictionaries. private final Object mLock = new Object(); private final DistracterFilter mDistracterFilter; - private final DictionaryFacilitatorLruCache mFacilitatorCacheForPersonalization; + private final PersonalizationHelperForDictionaryFacilitator mPersonalizationHelper; private static final String[] DICT_TYPES_ORDERED_TO_GET_SUGGESTIONS = new String[] { @@ -176,18 +175,22 @@ public class DictionaryFacilitator { public DictionaryFacilitator() { mDistracterFilter = DistracterFilter.EMPTY_DISTRACTER_FILTER; - mFacilitatorCacheForPersonalization = null; + mPersonalizationHelper = null; } public DictionaryFacilitator(final Context context) { - mFacilitatorCacheForPersonalization = new DictionaryFacilitatorLruCache(context, - MAX_DICTIONARY_FACILITATOR_CACHE_SIZE, "" /* dictionaryNamePrefix */); - mDistracterFilter = new DistracterFilterCheckingExactMatchesAndSuggestions(context, - mFacilitatorCacheForPersonalization); + mDistracterFilter = new DistracterFilterCheckingExactMatchesAndSuggestions(context); + mPersonalizationHelper = + new PersonalizationHelperForDictionaryFacilitator(context, mDistracterFilter); } public void updateEnabledSubtypes(final List<InputMethodSubtype> enabledSubtypes) { mDistracterFilter.updateEnabledSubtypes(enabledSubtypes); + mPersonalizationHelper.updateEnabledSubtypes(enabledSubtypes); + } + + public void setIsMonolingualUser(final boolean isMonolingualUser) { + mPersonalizationHelper.setIsMonolingualUser(isMonolingualUser); } public Locale getLocale() { @@ -223,93 +226,125 @@ public class DictionaryFacilitator { usePersonalizedDicts, forceReloadMainDictionary, listener, "" /* dictNamePrefix */); } - public void resetDictionariesWithDictNamePrefix(final Context context, final Locale newLocale, + public void resetDictionariesWithDictNamePrefix(final Context context, + final Locale newLocaleToUse, final boolean useContactsDict, final boolean usePersonalizedDicts, final boolean forceReloadMainDictionary, final DictionaryInitializationListener listener, final String dictNamePrefix) { - final boolean localeHasBeenChanged = !newLocale.equals(mDictionaryGroup.mLocale); - // We always try to have the main dictionary. Other dictionaries can be unused. - final boolean reloadMainDictionary = localeHasBeenChanged || forceReloadMainDictionary; + final HashMap<Locale, ArrayList<String>> existingDictsToCleanup = new HashMap<>(); + // TODO: use several locales + final Locale[] newLocales = new Locale[] { newLocaleToUse }; // TODO: Make subDictTypesToUse configurable by resource or a static final list. final HashSet<String> subDictTypesToUse = new HashSet<>(); + subDictTypesToUse.add(Dictionary.TYPE_USER); if (useContactsDict) { subDictTypesToUse.add(Dictionary.TYPE_CONTACTS); } - subDictTypesToUse.add(Dictionary.TYPE_USER); if (usePersonalizedDicts) { subDictTypesToUse.add(Dictionary.TYPE_USER_HISTORY); subDictTypesToUse.add(Dictionary.TYPE_PERSONALIZATION); subDictTypesToUse.add(Dictionary.TYPE_CONTEXTUAL); } - final Dictionary newMainDict; - if (reloadMainDictionary) { - // The main dictionary will be asynchronously loaded. - newMainDict = null; - } else { - newMainDict = mDictionaryGroup.getDict(Dictionary.TYPE_MAIN); - } - - final Map<String, ExpandableBinaryDictionary> subDicts = new HashMap<>(); - for (final String dictType : SUB_DICT_TYPES) { - if (!subDictTypesToUse.contains(dictType)) { - // This dictionary will not be used. + // Gather all dictionaries. We'll remove them from the list to clean up later. + for (final Locale newLocale : newLocales) { + final ArrayList<String> dictsForLocale = new ArrayList<>(); + existingDictsToCleanup.put(newLocale, dictsForLocale); + final DictionaryGroup currentDictionaryGroupForLocale = + newLocale.equals(mDictionaryGroup.mLocale) ? mDictionaryGroup : null; + if (null == currentDictionaryGroupForLocale) { continue; } - final ExpandableBinaryDictionary dict; - if (!localeHasBeenChanged && mDictionaryGroup.hasDict(dictType)) { - // Continue to use current dictionary. - dict = mDictionaryGroup.getSubDict(dictType); + for (final String dictType : SUB_DICT_TYPES) { + if (currentDictionaryGroupForLocale.hasDict(dictType)) { + dictsForLocale.add(dictType); + } + } + if (currentDictionaryGroupForLocale.hasDict(Dictionary.TYPE_MAIN)) { + dictsForLocale.add(Dictionary.TYPE_MAIN); + } + } + + final HashMap<Locale, DictionaryGroup> newDictionaryGroups = new HashMap<>(); + for (final Locale newLocale : newLocales) { + final DictionaryGroup dictionaryGroupForLocale = + newLocale.equals(mDictionaryGroup.mLocale) ? mDictionaryGroup : null; + final ArrayList<String> dictsToCleanupForLocale = existingDictsToCleanup.get(newLocale); + final boolean noExistingDictsForThisLocale = (null == dictionaryGroupForLocale); + + final Dictionary mainDict; + if (forceReloadMainDictionary || noExistingDictsForThisLocale + || !dictionaryGroupForLocale.hasDict(Dictionary.TYPE_MAIN)) { + mainDict = null; } else { - // Start to use new dictionary. - dict = getSubDict(dictType, context, newLocale, null /* dictFile */, - dictNamePrefix); + mainDict = dictionaryGroupForLocale.getDict(Dictionary.TYPE_MAIN); + dictsToCleanupForLocale.remove(Dictionary.TYPE_MAIN); } - subDicts.put(dictType, dict); + + final Map<String, ExpandableBinaryDictionary> subDicts = new HashMap<>(); + for (final String subDictType : subDictTypesToUse) { + final ExpandableBinaryDictionary subDict; + if (noExistingDictsForThisLocale + || !dictionaryGroupForLocale.hasDict(subDictType)) { + // Create a new dictionary. + subDict = getSubDict(subDictType, context, newLocale, null /* dictFile */, + dictNamePrefix); + } else { + // Reuse the existing dictionary, and don't close it at the end + subDict = dictionaryGroupForLocale.getSubDict(subDictType); + dictsToCleanupForLocale.remove(subDictType); + } + subDicts.put(subDictType, subDict); + } + newDictionaryGroups.put(newLocale, new DictionaryGroup(newLocale, mainDict, subDicts)); } - // Replace DictionaryGroup. - final DictionaryGroup newDictionaryGroup = new DictionaryGroup(newLocale, newMainDict, subDicts); + // Replace Dictionaries. + // TODO: use multiple locales. + final DictionaryGroup newDictionaryGroup = newDictionaryGroups.get(newLocaleToUse); final DictionaryGroup oldDictionaryGroup; synchronized (mLock) { oldDictionaryGroup = mDictionaryGroup; mDictionaryGroup = newDictionaryGroup; mIsUserDictEnabled = UserBinaryDictionary.isEnabled(context); - if (reloadMainDictionary) { - asyncReloadMainDictionary(context, newLocale, listener); + if (null == newDictionaryGroup.getDict(Dictionary.TYPE_MAIN)) { + asyncReloadUninitializedMainDictionaries(context, newLocales, listener); } } if (listener != null) { listener.onUpdateMainDictionaryAvailability(hasInitializedMainDictionary()); } + // Clean up old dictionaries. - if (reloadMainDictionary) { - oldDictionaryGroup.closeDict(Dictionary.TYPE_MAIN); - } - for (final String dictType : SUB_DICT_TYPES) { - if (localeHasBeenChanged || !subDictTypesToUse.contains(dictType)) { - oldDictionaryGroup.closeDict(dictType); + for (final Locale localeToCleanUp : existingDictsToCleanup.keySet()) { + final ArrayList<String> dictTypesToCleanUp = + existingDictsToCleanup.get(localeToCleanUp); + final DictionaryGroup dictionarySetToCleanup = oldDictionaryGroup; + for (final String dictType : dictTypesToCleanUp) { + dictionarySetToCleanup.closeDict(dictType); } } - oldDictionaryGroup.mSubDictMap.clear(); } - private void asyncReloadMainDictionary(final Context context, final Locale locale, - final DictionaryInitializationListener listener) { + private void asyncReloadUninitializedMainDictionaries(final Context context, + final Locale[] locales, final DictionaryInitializationListener listener) { final CountDownLatch latchForWaitingLoadingMainDictionary = new CountDownLatch(1); mLatchForWaitingLoadingMainDictionary = latchForWaitingLoadingMainDictionary; ExecutorUtils.getExecutor("InitializeBinaryDictionary").execute(new Runnable() { @Override public void run() { - final Dictionary mainDict = - DictionaryFactory.createMainDictionaryFromManager(context, locale); - synchronized (mLock) { - if (locale.equals(mDictionaryGroup.mLocale)) { - mDictionaryGroup.setMainDict(mainDict); - } else { - // Dictionary facilitator has been reset for another locale. - mainDict.close(); + for (final Locale locale : locales) { + final DictionaryGroup dictionaryGroup = mDictionaryGroup; + final Dictionary mainDict = + DictionaryFactory.createMainDictionaryFromManager(context, locale); + synchronized (mLock) { + if (locale.equals(dictionaryGroup.mLocale)) { + dictionaryGroup.setMainDict(mainDict); + } else { + // Dictionary facilitator has been reset for another locale. + mainDict.close(); + } } } if (listener != null) { @@ -358,10 +393,10 @@ public class DictionaryFacilitator { for (final String dictType : DICT_TYPES_ORDERED_TO_GET_SUGGESTIONS) { dictionaryGroup.closeDict(dictType); } - if (mFacilitatorCacheForPersonalization != null) { - mFacilitatorCacheForPersonalization.evictAll(); - } mDistracterFilter.close(); + if (mPersonalizationHelper != null) { + mPersonalizationHelper.close(); + } } @UsedForTesting @@ -381,11 +416,11 @@ public class DictionaryFacilitator { } public void flushPersonalizationDictionary() { - final ExpandableBinaryDictionary personalizationDict = + final ExpandableBinaryDictionary personalizationDictUsedForSuggestion = mDictionaryGroup.getSubDict(Dictionary.TYPE_PERSONALIZATION); - if (personalizationDict != null) { - personalizationDict.asyncFlushBinaryDictionary(); - } + mPersonalizationHelper.flushPersonalizationDictionariesToUpdate( + personalizationDictUsedForSuggestion); + mDistracterFilter.close(); } public void waitForLoadingMainDictionary(final long timeout, final TimeUnit unit) @@ -589,6 +624,7 @@ public class DictionaryFacilitator { // personalization dictionary. public void clearPersonalizationDictionary() { clearSubDictionary(Dictionary.TYPE_PERSONALIZATION); + mPersonalizationHelper.clearDictionariesToUpdate(); } public void clearContextualDictionary() { @@ -598,33 +634,9 @@ public class DictionaryFacilitator { public void addEntriesToPersonalizationDictionary( final PersonalizationDataChunk personalizationDataChunk, final SpacingAndPunctuations spacingAndPunctuations, - final ExpandableBinaryDictionary.AddMultipleDictionaryEntriesCallback callback) { - final ExpandableBinaryDictionary personalizationDict = - mDictionaryGroup.getSubDict(Dictionary.TYPE_PERSONALIZATION); - if (personalizationDict == null) { - if (callback != null) { - callback.onFinished(); - } - return; - } - // TODO: Get locale from personalizationDataChunk.mDetectedLanguage. - final Locale dataChunkLocale = getLocale(); - final DictionaryFacilitator dictionaryFacilitatorForLocale = - mFacilitatorCacheForPersonalization.get(dataChunkLocale); - final ArrayList<LanguageModelParam> languageModelParams = - LanguageModelParam.createLanguageModelParamsFrom( - personalizationDataChunk.mTokens, - personalizationDataChunk.mTimestampInSeconds, - dictionaryFacilitatorForLocale, spacingAndPunctuations, - new DistracterFilterCheckingIsInDictionary( - mDistracterFilter, personalizationDict)); - if (languageModelParams == null || languageModelParams.isEmpty()) { - if (callback != null) { - callback.onFinished(); - } - return; - } - personalizationDict.addMultipleDictionaryEntriesDynamically(languageModelParams, callback); + final AddMultipleDictionaryEntriesCallback callback) { + mPersonalizationHelper.addEntriesToPersonalizationDictionariesToUpdate( + getLocale(), personalizationDataChunk, spacingAndPunctuations, callback); } public void addPhraseToContextualDictionary(final String[] phrase, final int probability, diff --git a/java/src/com/android/inputmethod/latin/LatinIME.java b/java/src/com/android/inputmethod/latin/LatinIME.java index 67e2ca5c7..a6243430b 100644 --- a/java/src/com/android/inputmethod/latin/LatinIME.java +++ b/java/src/com/android/inputmethod/latin/LatinIME.java @@ -614,9 +614,10 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen private void refreshPersonalizationDictionarySession( final SettingsValues currentSettingsValues) { - mPersonalizationDictionaryUpdater.onLoadSettings( - currentSettingsValues.mUsePersonalizedDicts, + mDictionaryFacilitator.setIsMonolingualUser( mSubtypeSwitcher.isSystemLocaleSameAsLocaleOfAllEnabledSubtypesOfEnabledImes()); + mPersonalizationDictionaryUpdater.onLoadSettings( + currentSettingsValues.mUsePersonalizedDicts); mContextualDictionaryUpdater.onLoadSettings(currentSettingsValues.mUsePersonalizedDicts); final boolean shouldKeepUserHistoryDictionaries; if (currentSettingsValues.mUsePersonalizedDicts) { @@ -734,15 +735,12 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen cleanupInternalStateForFinishInput(); } } - // TODO: Remove this test. - if (!conf.locale.equals(mPersonalizationDictionaryUpdater.getLocale())) { - refreshPersonalizationDictionarySession(settingsValues); - } super.onConfigurationChanged(conf); } @Override public View onCreateInputView() { + StatsUtils.onCreateInputView(); return mKeyboardSwitcher.onCreateInputView(mIsHardwareAcceleratedDrawingEnabled); } @@ -775,6 +773,7 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen @Override public void onFinishInputView(final boolean finishingInput) { + StatsUtils.onFinishInputView(); mHandler.onFinishInputView(finishingInput); } @@ -852,6 +851,11 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen final boolean inputTypeChanged = !currentSettingsValues.isSameInputType(editorInfo); final boolean isDifferentTextField = !restarting || inputTypeChanged; + + StatsUtils.onStartInputView(editorInfo.inputType, + Settings.getInstance().getCurrent().mDisplayOrientation, + !isDifferentTextField); + if (isDifferentTextField) { mSubtypeSwitcher.updateParametersOnStartInputView(); } diff --git a/java/src/com/android/inputmethod/latin/PersonalizationHelperForDictionaryFacilitator.java b/java/src/com/android/inputmethod/latin/PersonalizationHelperForDictionaryFacilitator.java new file mode 100644 index 000000000..43cebdfa4 --- /dev/null +++ b/java/src/com/android/inputmethod/latin/PersonalizationHelperForDictionaryFacilitator.java @@ -0,0 +1,185 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.android.inputmethod.latin; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Locale; +import java.util.concurrent.atomic.AtomicInteger; + +import android.content.Context; +import android.view.inputmethod.InputMethodSubtype; + +import com.android.inputmethod.latin.ExpandableBinaryDictionary.AddMultipleDictionaryEntriesCallback; +import com.android.inputmethod.latin.personalization.PersonalizationDataChunk; +import com.android.inputmethod.latin.personalization.PersonalizationDictionary; +import com.android.inputmethod.latin.settings.SpacingAndPunctuations; +import com.android.inputmethod.latin.utils.DistracterFilter; +import com.android.inputmethod.latin.utils.DistracterFilterCheckingIsInDictionary; +import com.android.inputmethod.latin.utils.LanguageModelParam; +import com.android.inputmethod.latin.utils.SubtypeLocaleUtils; + +/** + * Class for managing and updating personalization dictionaries. + */ +public class PersonalizationHelperForDictionaryFacilitator { + private final Context mContext; + private final DistracterFilter mDistracterFilter; + private final HashMap<String, HashSet<Locale>> mLangToLocalesMap = new HashMap<>(); + private final HashMap<Locale, ExpandableBinaryDictionary> mPersonalizationDictsToUpdate = + new HashMap<>(); + private boolean mIsMonolingualUser = false; + + PersonalizationHelperForDictionaryFacilitator(final Context context, + final DistracterFilter distracterFilter) { + mContext = context; + mDistracterFilter = distracterFilter; + } + + public void close() { + mLangToLocalesMap.clear(); + for (final ExpandableBinaryDictionary dict : mPersonalizationDictsToUpdate.values()) { + dict.close(); + } + mPersonalizationDictsToUpdate.clear(); + } + + public void clearDictionariesToUpdate() { + for (final ExpandableBinaryDictionary dict : mPersonalizationDictsToUpdate.values()) { + dict.clear(); + } + mPersonalizationDictsToUpdate.clear(); + } + + public void updateEnabledSubtypes(final List<InputMethodSubtype> enabledSubtypes) { + for (final InputMethodSubtype subtype : enabledSubtypes) { + final Locale locale = SubtypeLocaleUtils.getSubtypeLocale(subtype); + final String language = locale.getLanguage(); + final HashSet<Locale> locales = mLangToLocalesMap.get(language); + if (locales != null) { + locales.add(locale); + } else { + final HashSet<Locale> localeSet = new HashSet<>(); + localeSet.add(locale); + mLangToLocalesMap.put(language, localeSet); + } + } + } + + public void setIsMonolingualUser(final boolean isMonolingualUser) { + mIsMonolingualUser = isMonolingualUser; + } + + /** + * Flush personalization dictionaries to dictionary files. Close dictionaries after writing + * files except the dictionary that is used for generating suggestions. + * + * @param personalizationDictUsedForSuggestion the personalization dictionary used for + * generating suggestions that won't be closed. + */ + public void flushPersonalizationDictionariesToUpdate( + final ExpandableBinaryDictionary personalizationDictUsedForSuggestion) { + for (final ExpandableBinaryDictionary personalizationDict : + mPersonalizationDictsToUpdate.values()) { + personalizationDict.asyncFlushBinaryDictionary(); + if (personalizationDict != personalizationDictUsedForSuggestion) { + // Close if the dictionary is not being used for suggestion. + personalizationDict.close(); + } + } + mDistracterFilter.close(); + mPersonalizationDictsToUpdate.clear(); + } + + private ExpandableBinaryDictionary getPersonalizationDictToUpdate(final Context context, + final Locale locale) { + ExpandableBinaryDictionary personalizationDict = mPersonalizationDictsToUpdate.get(locale); + if (personalizationDict != null) { + return personalizationDict; + } + personalizationDict = PersonalizationDictionary.getDictionary(context, locale, + null /* dictFile */, "" /* dictNamePrefix */); + mPersonalizationDictsToUpdate.put(locale, personalizationDict); + return personalizationDict; + } + + private void addEntriesToPersonalizationDictionariesForLocale(final Locale locale, + final PersonalizationDataChunk personalizationDataChunk, + final SpacingAndPunctuations spacingAndPunctuations, + final AddMultipleDictionaryEntriesCallback callback) { + final ExpandableBinaryDictionary personalizationDict = + getPersonalizationDictToUpdate(mContext, locale); + if (personalizationDict == null) { + if (callback != null) { + callback.onFinished(); + } + return; + } + final ArrayList<LanguageModelParam> languageModelParams = + LanguageModelParam.createLanguageModelParamsFrom( + personalizationDataChunk.mTokens, + personalizationDataChunk.mTimestampInSeconds, spacingAndPunctuations, + locale, new DistracterFilterCheckingIsInDictionary( + mDistracterFilter, personalizationDict)); + if (languageModelParams == null || languageModelParams.isEmpty()) { + if (callback != null) { + callback.onFinished(); + } + return; + } + personalizationDict.addMultipleDictionaryEntriesDynamically(languageModelParams, callback); + } + + public void addEntriesToPersonalizationDictionariesToUpdate(final Locale defaultLocale, + final PersonalizationDataChunk personalizationDataChunk, + final SpacingAndPunctuations spacingAndPunctuations, + final AddMultipleDictionaryEntriesCallback callback) { + final String language = personalizationDataChunk.mDetectedLanguage; + final HashSet<Locale> locales; + if (mIsMonolingualUser && PersonalizationDataChunk.LANGUAGE_UNKNOWN.equals(language) + && mLangToLocalesMap.size() == 1) { + locales = mLangToLocalesMap.get(defaultLocale.getLanguage()); + } else { + locales = mLangToLocalesMap.get(language); + } + if (locales == null || locales.isEmpty()) { + if (callback != null) { + callback.onFinished(); + } + return; + } + final AtomicInteger remainingTaskCount = new AtomicInteger(locales.size()); + final AddMultipleDictionaryEntriesCallback callbackForLocales = + new AddMultipleDictionaryEntriesCallback() { + @Override + public void onFinished() { + if (remainingTaskCount.decrementAndGet() == 0) { + // Update tasks for all locales have been finished. + if (callback != null) { + callback.onFinished(); + } + } + } + }; + for (final Locale locale : locales) { + addEntriesToPersonalizationDictionariesForLocale(locale, personalizationDataChunk, + spacingAndPunctuations, callbackForLocales); + } + } +} diff --git a/java/src/com/android/inputmethod/latin/inputlogic/InputLogic.java b/java/src/com/android/inputmethod/latin/inputlogic/InputLogic.java index 26acabdaf..c5e60d677 100644 --- a/java/src/com/android/inputmethod/latin/inputlogic/InputLogic.java +++ b/java/src/com/android/inputmethod/latin/inputlogic/InputLogic.java @@ -1088,8 +1088,10 @@ public final class InputLogic { if (!TextUtils.isEmpty(rejectedSuggestion)) { mDictionaryFacilitator.removeWordFromPersonalizedDicts(rejectedSuggestion); } + StatsUtils.onBackspaceWordDelete(rejectedSuggestion.length()); } else { mWordComposer.applyProcessedEvent(event); + StatsUtils.onBackspacePressed(1); } if (mWordComposer.isComposingWord()) { setComposingTextInternal(getTextWithUnderline(mWordComposer.getTypedWord()), 1); @@ -1100,6 +1102,7 @@ public final class InputLogic { } else { if (mLastComposedWord.canRevertCommit()) { revertCommit(inputTransaction); + StatsUtils.onRevertAutoCorrect(); return; } if (mEnteredText != null && mConnection.sameAsTextBeforeCursor(mEnteredText)) { @@ -1107,6 +1110,7 @@ public final class InputLogic { // This is triggered on backspace after a key that inputs multiple characters, // like the smiley key or the .com key. mConnection.deleteSurroundingText(mEnteredText.length(), 0); + StatsUtils.onDeleteMultiCharInput(mEnteredText.length()); mEnteredText = null; // If we have mEnteredText, then we know that mHasUncommittedTypedChars == false. // In addition we know that spaceState is false, and that we should not be @@ -1122,10 +1126,12 @@ public final class InputLogic { inputTransaction.setRequiresUpdateSuggestions(); mWordComposer.setCapitalizedModeAtStartComposingTime( WordComposer.CAPS_MODE_OFF); + StatsUtils.onRevertDoubleSpacePeriod(); return; } } else if (SpaceState.SWAP_PUNCTUATION == inputTransaction.mSpaceState) { if (mConnection.revertSwapPunctuation()) { + StatsUtils.onRevertSwapPunctuation(); // Likewise return; } @@ -1140,6 +1146,7 @@ public final class InputLogic { mConnection.setSelection(mConnection.getExpectedSelectionEnd(), mConnection.getExpectedSelectionEnd()); mConnection.deleteSurroundingText(numCharsDeleted, 0); + StatsUtils.onBackspaceSelectedText(numCharsDeleted); } else { // There is no selection, just delete one character. if (Constants.NOT_A_CURSOR_POSITION == mConnection.getExpectedSelectionEnd()) { @@ -1156,9 +1163,12 @@ public final class InputLogic { // applications are relying on this behavior so we continue to support it for // older apps, so we retain this behavior if the app has target SDK < JellyBean. sendDownUpKeyEvent(KeyEvent.KEYCODE_DEL); + int totalDeletedLength = 1; if (mDeleteCount > Constants.DELETE_ACCELERATE_AT) { sendDownUpKeyEvent(KeyEvent.KEYCODE_DEL); + totalDeletedLength++; } + StatsUtils.onBackspacePressed(totalDeletedLength); } else { final int codePointBeforeCursor = mConnection.getCodePointBeforeCursor(); if (codePointBeforeCursor == Constants.NOT_A_CODE) { @@ -1169,11 +1179,13 @@ public final class InputLogic { // catch it and have their broken interface react. If you need the keyboard // to do this, you're doing it wrong -- please fix your app. mConnection.deleteSurroundingText(1, 0); + // TODO: Add a new StatsUtils method onBackspaceWhenNoText() return; } final int lengthToDelete = Character.isSupplementaryCodePoint(codePointBeforeCursor) ? 2 : 1; mConnection.deleteSurroundingText(lengthToDelete, 0); + int totalDeletedLength = lengthToDelete; if (mDeleteCount > Constants.DELETE_ACCELERATE_AT) { final int codePointBeforeCursorToDeleteAgain = mConnection.getCodePointBeforeCursor(); @@ -1181,8 +1193,10 @@ public final class InputLogic { final int lengthToDeleteAgain = Character.isSupplementaryCodePoint( codePointBeforeCursorToDeleteAgain) ? 2 : 1; mConnection.deleteSurroundingText(lengthToDeleteAgain, 0); + totalDeletedLength += lengthToDeleteAgain; } } + StatsUtils.onBackspacePressed(totalDeletedLength); } } if (inputTransaction.mSettingsValues diff --git a/java/src/com/android/inputmethod/latin/personalization/PersonalizationDataChunk.java b/java/src/com/android/inputmethod/latin/personalization/PersonalizationDataChunk.java index 6f4b09741..734ed5583 100644 --- a/java/src/com/android/inputmethod/latin/personalization/PersonalizationDataChunk.java +++ b/java/src/com/android/inputmethod/latin/personalization/PersonalizationDataChunk.java @@ -20,6 +20,8 @@ import java.util.Collections; import java.util.List; public class PersonalizationDataChunk { + public static final String LANGUAGE_UNKNOWN = ""; + public final boolean mInputByUser; public final List<String> mTokens; public final int mTimestampInSeconds; diff --git a/java/src/com/android/inputmethod/latin/utils/DistracterFilter.java b/java/src/com/android/inputmethod/latin/utils/DistracterFilter.java index 787e4a59d..94c62429e 100644 --- a/java/src/com/android/inputmethod/latin/utils/DistracterFilter.java +++ b/java/src/com/android/inputmethod/latin/utils/DistracterFilter.java @@ -36,10 +36,38 @@ public interface DistracterFilter { public boolean isDistracterToWordsInDictionaries(final PrevWordsInfo prevWordsInfo, final String testedWord, final Locale locale); + public int getWordHandlingType(final PrevWordsInfo prevWordsInfo, final String testedWord, + final Locale locale); + public void updateEnabledSubtypes(final List<InputMethodSubtype> enabledSubtypes); public void close(); + public static final class HandlingType { + private final static int REQUIRE_NO_SPECIAL_HANDLINGS = 0x0; + private final static int SHOULD_BE_LOWER_CASED = 0x1; + private final static int SHOULD_BE_HANDLED_AS_OOV = 0x2; + + public static int getHandlingType(final boolean shouldBeLowerCased, final boolean isOov) { + int wordHandlingType = HandlingType.REQUIRE_NO_SPECIAL_HANDLINGS; + if (shouldBeLowerCased) { + wordHandlingType |= HandlingType.SHOULD_BE_LOWER_CASED; + } + if (isOov) { + wordHandlingType |= HandlingType.SHOULD_BE_HANDLED_AS_OOV; + } + return wordHandlingType; + } + + public static boolean shouldBeLowerCased(final int handlingType) { + return (handlingType & SHOULD_BE_LOWER_CASED) != 0; + } + + public static boolean shouldBeHandledAsOov(final int handlingType) { + return (handlingType & SHOULD_BE_HANDLED_AS_OOV) != 0; + } + }; + public static final DistracterFilter EMPTY_DISTRACTER_FILTER = new DistracterFilter() { @Override public boolean isDistracterToWordsInDictionaries(PrevWordsInfo prevWordsInfo, @@ -48,6 +76,12 @@ public interface DistracterFilter { } @Override + public int getWordHandlingType(final PrevWordsInfo prevWordsInfo, + final String testedWord, final Locale locale) { + return HandlingType.REQUIRE_NO_SPECIAL_HANDLINGS; + } + + @Override public void close() { } diff --git a/java/src/com/android/inputmethod/latin/utils/DistracterFilterCheckingExactMatchesAndSuggestions.java b/java/src/com/android/inputmethod/latin/utils/DistracterFilterCheckingExactMatchesAndSuggestions.java index e10571e4a..1db525502 100644 --- a/java/src/com/android/inputmethod/latin/utils/DistracterFilterCheckingExactMatchesAndSuggestions.java +++ b/java/src/com/android/inputmethod/latin/utils/DistracterFilterCheckingExactMatchesAndSuggestions.java @@ -51,6 +51,7 @@ public class DistracterFilterCheckingExactMatchesAndSuggestions implements Distr DistracterFilterCheckingExactMatchesAndSuggestions.class.getSimpleName(); private static final boolean DEBUG = false; + private static final int MAX_DICTIONARY_FACILITATOR_CACHE_SIZE = 3; private static final int MAX_DISTRACTERS_CACHE_SIZE = 1024; private final Context mContext; @@ -73,15 +74,13 @@ public class DistracterFilterCheckingExactMatchesAndSuggestions implements Distr * Create a DistracterFilter instance. * * @param context the context. - * @param dictionaryFacilitatorLruCache the cache of dictionaryFacilitators that are used for - * checking distracters. */ - public DistracterFilterCheckingExactMatchesAndSuggestions(final Context context, - final DictionaryFacilitatorLruCache dictionaryFacilitatorLruCache) { + public DistracterFilterCheckingExactMatchesAndSuggestions(final Context context) { mContext = context; mLocaleToSubtypeCache = new ConcurrentHashMap<>(); mLocaleToKeyboardCache = new ConcurrentHashMap<>(); - mDictionaryFacilitatorLruCache = dictionaryFacilitatorLruCache; + mDictionaryFacilitatorLruCache = new DictionaryFacilitatorLruCache(context, + MAX_DICTIONARY_FACILITATOR_CACHE_SIZE, "" /* dictionaryNamePrefix */); mDistractersCache = new LruCache<>(MAX_DISTRACTERS_CACHE_SIZE); } @@ -89,7 +88,8 @@ public class DistracterFilterCheckingExactMatchesAndSuggestions implements Distr public void close() { mLocaleToSubtypeCache.clear(); mLocaleToKeyboardCache.clear(); - mDistractersCache.evictAll(); + mDictionaryFacilitatorLruCache.evictAll(); + // Don't clear mDistractersCache. } @Override @@ -194,9 +194,8 @@ public class DistracterFilterCheckingExactMatchesAndSuggestions implements Distr mDistractersCache.put(cacheKey, Boolean.TRUE); return true; } - final boolean isValidWord = dictionaryFacilitator.isValidWord(testedWord, - false /* ignoreCase */); - if (isValidWord) { + final boolean Word = dictionaryFacilitator.isValidWord(testedWord, false /* ignoreCase */); + if (Word) { // Valid word is not a distractor. if (DEBUG) { Log.d(TAG, "isDistracter: false (valid word)"); @@ -283,4 +282,41 @@ public class DistracterFilterCheckingExactMatchesAndSuggestions implements Distr } return false; } + + private boolean shouldBeLowerCased(final PrevWordsInfo prevWordsInfo, final String testedWord, + final Locale locale) { + final DictionaryFacilitator dictionaryFacilitator = + mDictionaryFacilitatorLruCache.get(locale); + if (dictionaryFacilitator.isValidWord(testedWord, false /* ignoreCase */)) { + return false; + } + final String lowerCaseTargetWord = testedWord.toLowerCase(locale); + if (testedWord.equals(lowerCaseTargetWord)) { + return false; + } + if (dictionaryFacilitator.isValidWord(lowerCaseTargetWord, false /* ignoreCase */)) { + return true; + } + if (StringUtils.getCapitalizationType(testedWord) == StringUtils.CAPITALIZE_FIRST + && !prevWordsInfo.isValid()) { + // TODO: Check beginning-of-sentence. + return true; + } + return false; + } + + @Override + public int getWordHandlingType(final PrevWordsInfo prevWordsInfo, final String testedWord, + final Locale locale) { + // TODO: Use this method for user history dictionary. + if (testedWord == null|| locale == null) { + return HandlingType.getHandlingType(false /* shouldBeLowerCased */, false /* isOov */); + } + final boolean shouldBeLowerCased = shouldBeLowerCased(prevWordsInfo, testedWord, locale); + final String caseModifiedWord = + shouldBeLowerCased ? testedWord.toLowerCase(locale) : testedWord; + final boolean isOov = !mDictionaryFacilitatorLruCache.get(locale).isValidWord( + caseModifiedWord, false /* ignoreCase */); + return HandlingType.getHandlingType(shouldBeLowerCased, isOov); + } } diff --git a/java/src/com/android/inputmethod/latin/utils/DistracterFilterCheckingIsInDictionary.java b/java/src/com/android/inputmethod/latin/utils/DistracterFilterCheckingIsInDictionary.java index 4ad4ba784..349236f18 100644 --- a/java/src/com/android/inputmethod/latin/utils/DistracterFilterCheckingIsInDictionary.java +++ b/java/src/com/android/inputmethod/latin/utils/DistracterFilterCheckingIsInDictionary.java @@ -48,6 +48,12 @@ public class DistracterFilterCheckingIsInDictionary implements DistracterFilter } @Override + public int getWordHandlingType(final PrevWordsInfo prevWordsInfo, final String testedWord, + final Locale locale) { + return mDistracterFilter.getWordHandlingType(prevWordsInfo, testedWord, locale); + } + + @Override public void updateEnabledSubtypes(List<InputMethodSubtype> enabledSubtypes) { // Do nothing. } diff --git a/java/src/com/android/inputmethod/latin/utils/LanguageModelParam.java b/java/src/com/android/inputmethod/latin/utils/LanguageModelParam.java index fbce3f2fd..05d124764 100644 --- a/java/src/com/android/inputmethod/latin/utils/LanguageModelParam.java +++ b/java/src/com/android/inputmethod/latin/utils/LanguageModelParam.java @@ -22,6 +22,7 @@ import com.android.inputmethod.latin.Dictionary; import com.android.inputmethod.latin.DictionaryFacilitator; import com.android.inputmethod.latin.PrevWordsInfo; import com.android.inputmethod.latin.settings.SpacingAndPunctuations; +import com.android.inputmethod.latin.utils.DistracterFilter.HandlingType; import java.util.ArrayList; import java.util.List; @@ -81,8 +82,7 @@ public final class LanguageModelParam { // Process a list of words and return a list of {@link LanguageModelParam} objects. public static ArrayList<LanguageModelParam> createLanguageModelParamsFrom( final List<String> tokens, final int timestamp, - final DictionaryFacilitator dictionaryFacilitator, - final SpacingAndPunctuations spacingAndPunctuations, + final SpacingAndPunctuations spacingAndPunctuations, final Locale locale, final DistracterFilter distracterFilter) { final ArrayList<LanguageModelParam> languageModelParams = new ArrayList<>(); final int N = tokens.size(); @@ -111,8 +111,7 @@ public final class LanguageModelParam { } final LanguageModelParam languageModelParam = detectWhetherVaildWordOrNotAndGetLanguageModelParam( - prevWordsInfo, tempWord, timestamp, dictionaryFacilitator, - distracterFilter); + prevWordsInfo, tempWord, timestamp, locale, distracterFilter); if (languageModelParam == null) { continue; } @@ -125,47 +124,25 @@ public final class LanguageModelParam { private static LanguageModelParam detectWhetherVaildWordOrNotAndGetLanguageModelParam( final PrevWordsInfo prevWordsInfo, final String targetWord, final int timestamp, - final DictionaryFacilitator dictionaryFacilitator, - final DistracterFilter distracterFilter) { - final Locale locale = dictionaryFacilitator.getLocale(); + final Locale locale, final DistracterFilter distracterFilter) { if (locale == null) { return null; } - if (dictionaryFacilitator.isValidWord(targetWord, false /* ignoreCase */)) { - return createAndGetLanguageModelParamOfWord(prevWordsInfo, targetWord, timestamp, - true /* isValidWord */, locale, distracterFilter); - } - - final String lowerCaseTargetWord = targetWord.toLowerCase(locale); - if (dictionaryFacilitator.isValidWord(lowerCaseTargetWord, false /* ignoreCase */)) { - // Add the lower-cased word. - return createAndGetLanguageModelParamOfWord(prevWordsInfo, lowerCaseTargetWord, - timestamp, true /* isValidWord */, locale, distracterFilter); + final int wordHandlingType = distracterFilter.getWordHandlingType(prevWordsInfo, + targetWord, locale); + final String word = HandlingType.shouldBeLowerCased(wordHandlingType) ? + targetWord.toLowerCase(locale) : targetWord; + if (distracterFilter.isDistracterToWordsInDictionaries(prevWordsInfo, targetWord, locale)) { + // The word is a distracter. + return null; } - - // Treat the word as an OOV word. - return createAndGetLanguageModelParamOfWord(prevWordsInfo, targetWord, timestamp, - false /* isValidWord */, locale, distracterFilter); + return createAndGetLanguageModelParamOfWord(prevWordsInfo, word, timestamp, + !HandlingType.shouldBeHandledAsOov(wordHandlingType)); } private static LanguageModelParam createAndGetLanguageModelParamOfWord( - final PrevWordsInfo prevWordsInfo, final String targetWord, final int timestamp, - final boolean isValidWord, final Locale locale, - final DistracterFilter distracterFilter) { - final String word; - if (StringUtils.getCapitalizationType(targetWord) == StringUtils.CAPITALIZE_FIRST - && !prevWordsInfo.isValid() && !isValidWord) { - word = targetWord.toLowerCase(locale); - } else { - word = targetWord; - } - // Check whether the word is a distracter to words in the dictionaries. - if (distracterFilter.isDistracterToWordsInDictionaries(prevWordsInfo, word, locale)) { - if (DEBUG) { - Log.d(TAG, "The word (" + word + ") is a distracter. Skip this word."); - } - return null; - } + final PrevWordsInfo prevWordsInfo, final String word, final int timestamp, + final boolean isValidWord) { final int unigramProbability = isValidWord ? UNIGRAM_PROBABILITY_FOR_VALID_WORD : UNIGRAM_PROBABILITY_FOR_OOV_WORD; if (!prevWordsInfo.isValid()) { diff --git a/native/jni/NativeFileList.mk b/native/jni/NativeFileList.mk index 68d2bbd83..b896f386f 100644 --- a/native/jni/NativeFileList.mk +++ b/native/jni/NativeFileList.mk @@ -131,6 +131,7 @@ LATIN_IME_CORE_TEST_FILES := \ suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table_test.cpp \ suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer_test.cpp \ suggest/policyimpl/dictionary/utils/byte_array_utils_test.cpp \ + suggest/policyimpl/dictionary/utils/format_utils_test.cpp \ suggest/policyimpl/dictionary/utils/sparse_table_test.cpp \ suggest/policyimpl/dictionary/utils/trie_map_test.cpp \ suggest/policyimpl/utils/damerau_levenshtein_edit_distance_policy_test.cpp \ diff --git a/native/jni/src/suggest/core/dicnode/dic_node.h b/native/jni/src/suggest/core/dicnode/dic_node.h index 2230dc7b8..3970963e8 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node.h +++ b/native/jni/src/suggest/core/dicnode/dic_node.h @@ -26,6 +26,7 @@ #include "suggest/core/dictionary/error_type_utils.h" #include "suggest/core/layout/proximity_info_state.h" #include "utils/char_utils.h" +#include "utils/int_array_view.h" #if DEBUG_DICT #define LOGI_SHOW_ADD_COST_PROP \ @@ -136,18 +137,15 @@ class DicNode { } void initAsChild(const DicNode *const dicNode, const int childrenPtNodeArrayPos, - const int probability, const int wordId, const bool hasChildren, - const bool isBlacklistedOrNotAWord, const uint16_t mergedNodeCodePointCount, - const int *const mergedNodeCodePoints) { + const int wordId, const CodePointArrayView mergedCodePoints) { uint16_t newDepth = static_cast<uint16_t>(dicNode->getNodeCodePointCount() + 1); mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion; const uint16_t newLeavingDepth = static_cast<uint16_t>( - dicNode->mDicNodeProperties.getLeavingDepth() + mergedNodeCodePointCount); - mDicNodeProperties.init(childrenPtNodeArrayPos, mergedNodeCodePoints[0], - probability, wordId, hasChildren, isBlacklistedOrNotAWord, newDepth, - newLeavingDepth, dicNode->mDicNodeProperties.getPrevWordIds()); - mDicNodeState.init(&dicNode->mDicNodeState, mergedNodeCodePointCount, - mergedNodeCodePoints); + dicNode->mDicNodeProperties.getLeavingDepth() + mergedCodePoints.size()); + mDicNodeProperties.init(childrenPtNodeArrayPos, mergedCodePoints[0], + wordId, newDepth, newLeavingDepth, dicNode->mDicNodeProperties.getPrevWordIds()); + mDicNodeState.init(&dicNode->mDicNodeState, mergedCodePoints.size(), + mergedCodePoints.data()); PROF_NODE_COPY(&dicNode->mProfiler, mProfiler); } @@ -179,9 +177,6 @@ class DicNode { // Check if the current word and the previous word can be considered as a valid multiple word // suggestion. bool isValidMultipleWordSuggestion() const { - if (isBlacklistedOrNotAWord()) { - return false; - } // Treat suggestion as invalid if the current and the previous word are single character // words. const int prevWordLen = mDicNodeState.mDicNodeStateOutput.getPrevWordsLength() @@ -218,10 +213,6 @@ class DicNode { return mDicNodeProperties.getChildrenPtNodeArrayPos(); } - int getProbability() const { - return mDicNodeProperties.getProbability(); - } - AK_FORCE_INLINE bool isTerminalDicNode() const { const bool isTerminalPtNode = mDicNodeProperties.isTerminal(); const int currentDicNodeDepth = getNodeCodePointCount(); @@ -404,10 +395,6 @@ class DicNode { return mDicNodeState.mDicNodeStateScoring.getContainedErrorTypes(); } - bool isBlacklistedOrNotAWord() const { - return mDicNodeProperties.isBlacklistedOrNotAWord(); - } - inline uint16_t getNodeCodePointCount() const { return mDicNodeProperties.getDepth(); } diff --git a/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp b/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp index 87d245276..fe5fe8448 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp +++ b/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp @@ -18,7 +18,6 @@ #include "suggest/core/dicnode/dic_node.h" #include "suggest/core/dicnode/dic_node_vector.h" -#include "suggest/core/dictionary/multi_bigram_map.h" #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" namespace latinime { @@ -73,25 +72,16 @@ namespace latinime { if (dicNode->hasMultipleWords() && !dicNode->isValidMultipleWordSuggestion()) { return static_cast<float>(MAX_VALUE_FOR_WEIGHTING); } - const int probability = getBigramNodeProbability(dictionaryStructurePolicy, dicNode, - multiBigramMap); + const WordAttributes wordAttributes = dictionaryStructurePolicy->getWordAttributesInContext( + dicNode->getPrevWordIds(), dicNode->getWordId(), multiBigramMap); + if (dicNode->hasMultipleWords() + && (wordAttributes.isBlacklisted() || wordAttributes.isNotAWord())) { + return static_cast<float>(MAX_VALUE_FOR_WEIGHTING); + } // TODO: This equation to calculate the improbability looks unreasonable. Investigate this. - const float cost = static_cast<float>(MAX_PROBABILITY - probability) + const float cost = static_cast<float>(MAX_PROBABILITY - wordAttributes.getProbability()) / static_cast<float>(MAX_PROBABILITY); return cost; } -/* static */ int DicNodeUtils::getBigramNodeProbability( - const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, - const DicNode *const dicNode, MultiBigramMap *const multiBigramMap) { - const int unigramProbability = dicNode->getProbability(); - if (multiBigramMap) { - const int *const prevWordIds = dicNode->getPrevWordIds(); - return multiBigramMap->getBigramProbability(dictionaryStructurePolicy, - prevWordIds, dicNode->getWordId(), unigramProbability); - } - return dictionaryStructurePolicy->getProbability(unigramProbability, - NOT_A_PROBABILITY); -} - } // namespace latinime diff --git a/native/jni/src/suggest/core/dicnode/dic_node_utils.h b/native/jni/src/suggest/core/dicnode/dic_node_utils.h index 56ff6e3d0..961a1c29d 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node_utils.h +++ b/native/jni/src/suggest/core/dicnode/dic_node_utils.h @@ -46,10 +46,6 @@ class DicNodeUtils { DISALLOW_IMPLICIT_CONSTRUCTORS(DicNodeUtils); // Max number of bigrams to look up static const int MAX_BIGRAMS_CONSIDERED_PER_CONTEXT = 500; - - static int getBigramNodeProbability( - const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, - const DicNode *const dicNode, MultiBigramMap *const multiBigramMap); }; } // namespace latinime #endif // LATINIME_DIC_NODE_UTILS_H diff --git a/native/jni/src/suggest/core/dicnode/dic_node_vector.h b/native/jni/src/suggest/core/dicnode/dic_node_vector.h index b6a195103..e6b758954 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node_vector.h +++ b/native/jni/src/suggest/core/dicnode/dic_node_vector.h @@ -21,6 +21,7 @@ #include "defines.h" #include "suggest/core/dicnode/dic_node.h" +#include "utils/int_array_view.h" namespace latinime { @@ -59,14 +60,10 @@ class DicNodeVector { } void pushLeavingChild(const DicNode *const dicNode, const int childrenPtNodeArrayPos, - const int probability, const int wordId, const bool hasChildren, - const bool isBlacklistedOrNotAWord, const uint16_t mergedNodeCodePointCount, - const int *const mergedNodeCodePoints) { + const int wordId, const CodePointArrayView mergedCodePoints) { ASSERT(!mLock); mDicNodes.emplace_back(); - mDicNodes.back().initAsChild(dicNode, childrenPtNodeArrayPos, probability, - wordId, hasChildren, isBlacklistedOrNotAWord, mergedNodeCodePointCount, - mergedNodeCodePoints); + mDicNodes.back().initAsChild(dicNode, childrenPtNodeArrayPos, wordId, mergedCodePoints); } DicNode *operator[](const int id) { diff --git a/native/jni/src/suggest/core/dicnode/internal/dic_node_properties.h b/native/jni/src/suggest/core/dicnode/internal/dic_node_properties.h index be3134c91..6a1b84273 100644 --- a/native/jni/src/suggest/core/dicnode/internal/dic_node_properties.h +++ b/native/jni/src/suggest/core/dicnode/internal/dic_node_properties.h @@ -29,23 +29,17 @@ namespace latinime { class DicNodeProperties { public: AK_FORCE_INLINE DicNodeProperties() - : mChildrenPtNodeArrayPos(NOT_A_DICT_POS), mProbability(NOT_A_PROBABILITY), - mDicNodeCodePoint(NOT_A_CODE_POINT), mWordId(NOT_A_WORD_ID), - mHasChildrenPtNodes(false), mIsBlacklistedOrNotAWord(false), mDepth(0), - mLeavingDepth(0) {} + : mChildrenPtNodeArrayPos(NOT_A_DICT_POS), mDicNodeCodePoint(NOT_A_CODE_POINT), + mWordId(NOT_A_WORD_ID), mDepth(0), mLeavingDepth(0) {} ~DicNodeProperties() {} // Should be called only once per DicNode is initialized. - void init(const int childrenPos, const int nodeCodePoint, const int probability, - const int wordId, const bool hasChildren, const bool isBlacklistedOrNotAWord, + void init(const int childrenPos, const int nodeCodePoint, const int wordId, const uint16_t depth, const uint16_t leavingDepth, const int *const prevWordIds) { mChildrenPtNodeArrayPos = childrenPos; mDicNodeCodePoint = nodeCodePoint; - mProbability = probability; mWordId = wordId; - mHasChildrenPtNodes = hasChildren; - mIsBlacklistedOrNotAWord = isBlacklistedOrNotAWord; mDepth = depth; mLeavingDepth = leavingDepth; memmove(mPrevWordIds, prevWordIds, sizeof(mPrevWordIds)); @@ -55,10 +49,7 @@ class DicNodeProperties { void init(const int rootPtNodeArrayPos, const int *const prevWordIds) { mChildrenPtNodeArrayPos = rootPtNodeArrayPos; mDicNodeCodePoint = NOT_A_CODE_POINT; - mProbability = NOT_A_PROBABILITY; mWordId = NOT_A_WORD_ID; - mHasChildrenPtNodes = true; - mIsBlacklistedOrNotAWord = false; mDepth = 0; mLeavingDepth = 0; memmove(mPrevWordIds, prevWordIds, sizeof(mPrevWordIds)); @@ -67,10 +58,7 @@ class DicNodeProperties { void initByCopy(const DicNodeProperties *const dicNodeProp) { mChildrenPtNodeArrayPos = dicNodeProp->mChildrenPtNodeArrayPos; mDicNodeCodePoint = dicNodeProp->mDicNodeCodePoint; - mProbability = dicNodeProp->mProbability; mWordId = dicNodeProp->mWordId; - mHasChildrenPtNodes = dicNodeProp->mHasChildrenPtNodes; - mIsBlacklistedOrNotAWord = dicNodeProp->mIsBlacklistedOrNotAWord; mDepth = dicNodeProp->mDepth; mLeavingDepth = dicNodeProp->mLeavingDepth; memmove(mPrevWordIds, dicNodeProp->mPrevWordIds, sizeof(mPrevWordIds)); @@ -80,10 +68,7 @@ class DicNodeProperties { void init(const DicNodeProperties *const dicNodeProp, const int codePoint) { mChildrenPtNodeArrayPos = dicNodeProp->mChildrenPtNodeArrayPos; mDicNodeCodePoint = codePoint; // Overwrite the node char of a passing child - mProbability = dicNodeProp->mProbability; mWordId = dicNodeProp->mWordId; - mHasChildrenPtNodes = dicNodeProp->mHasChildrenPtNodes; - mIsBlacklistedOrNotAWord = dicNodeProp->mIsBlacklistedOrNotAWord; mDepth = dicNodeProp->mDepth + 1; // Increment the depth of a passing child mLeavingDepth = dicNodeProp->mLeavingDepth; memmove(mPrevWordIds, dicNodeProp->mPrevWordIds, sizeof(mPrevWordIds)); @@ -93,10 +78,6 @@ class DicNodeProperties { return mChildrenPtNodeArrayPos; } - int getProbability() const { - return mProbability; - } - int getDicNodeCodePoint() const { return mDicNodeCodePoint; } @@ -115,11 +96,7 @@ class DicNodeProperties { } bool hasChildren() const { - return mHasChildrenPtNodes || mDepth != mLeavingDepth; - } - - bool isBlacklistedOrNotAWord() const { - return mIsBlacklistedOrNotAWord; + return (mChildrenPtNodeArrayPos != NOT_A_DICT_POS) || mDepth != mLeavingDepth; } const int *getPrevWordIds() const { @@ -135,11 +112,8 @@ class DicNodeProperties { // Use a default copy constructor and an assign operator because shallow copies are ok // for this class int mChildrenPtNodeArrayPos; - int mProbability; int mDicNodeCodePoint; int mWordId; - bool mHasChildrenPtNodes; - bool mIsBlacklistedOrNotAWord; uint16_t mDepth; uint16_t mLeavingDepth; int mPrevWordIds[MAX_PREV_WORD_COUNT_FOR_N_GRAM]; diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp index 8f9b2aa12..1de405104 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.cpp +++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp @@ -61,10 +61,11 @@ void Dictionary::getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession } Dictionary::NgramListenerForPrediction::NgramListenerForPrediction( - const PrevWordsInfo *const prevWordsInfo, SuggestionResults *const suggestionResults, + const PrevWordsInfo *const prevWordsInfo, const WordIdArrayView prevWordIds, + SuggestionResults *const suggestionResults, const DictionaryStructureWithBufferPolicy *const dictStructurePolicy) - : mPrevWordsInfo(prevWordsInfo), mSuggestionResults(suggestionResults), - mDictStructurePolicy(dictStructurePolicy) {} + : mPrevWordsInfo(prevWordsInfo), mPrevWordIds(prevWordIds), + mSuggestionResults(suggestionResults), mDictStructurePolicy(dictStructurePolicy) {} void Dictionary::NgramListenerForPrediction::onVisitEntry(const int ngramProbability, const int targetWordId) { @@ -83,19 +84,21 @@ void Dictionary::NgramListenerForPrediction::onVisitEntry(const int ngramProbabi if (codePointCount <= 0) { return; } - const int probability = mDictStructurePolicy->getProbability( - unigramProbability, ngramProbability); - mSuggestionResults->addPrediction(targetWordCodePoints, codePointCount, probability); + const WordAttributes wordAttributes = mDictStructurePolicy->getWordAttributesInContext( + mPrevWordIds.data(), targetWordId, nullptr /* multiBigramMap */); + mSuggestionResults->addPrediction(targetWordCodePoints, codePointCount, + wordAttributes.getProbability()); } void Dictionary::getPredictions(const PrevWordsInfo *const prevWordsInfo, SuggestionResults *const outSuggestionResults) const { TimeKeeper::setCurrentTime(); - NgramListenerForPrediction listener(prevWordsInfo, outSuggestionResults, - mDictionaryStructureWithBufferPolicy.get()); int prevWordIds[MAX_PREV_WORD_COUNT_FOR_N_GRAM]; prevWordsInfo->getPrevWordIds(mDictionaryStructureWithBufferPolicy.get(), prevWordIds, true /* tryLowerCaseSearch */); + NgramListenerForPrediction listener(prevWordsInfo, + WordIdArrayView::fromFixedSizeArray(prevWordIds), outSuggestionResults, + mDictionaryStructureWithBufferPolicy.get()); mDictionaryStructureWithBufferPolicy->iterateNgramEntries(prevWordIds, &listener); } diff --git a/native/jni/src/suggest/core/dictionary/dictionary.h b/native/jni/src/suggest/core/dictionary/dictionary.h index 50951fbc1..0b54f30e9 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.h +++ b/native/jni/src/suggest/core/dictionary/dictionary.h @@ -26,6 +26,7 @@ #include "suggest/core/policy/dictionary_header_structure_policy.h" #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" #include "suggest/core/suggest_interface.h" +#include "utils/int_array_view.h" namespace latinime { @@ -118,7 +119,7 @@ class Dictionary { class NgramListenerForPrediction : public NgramListener { public: NgramListenerForPrediction(const PrevWordsInfo *const prevWordsInfo, - SuggestionResults *const suggestionResults, + const WordIdArrayView prevWordIds, SuggestionResults *const suggestionResults, const DictionaryStructureWithBufferPolicy *const dictStructurePolicy); virtual void onVisitEntry(const int ngramProbability, const int targetWordId); @@ -126,6 +127,7 @@ class Dictionary { DISALLOW_IMPLICIT_CONSTRUCTORS(NgramListenerForPrediction); const PrevWordsInfo *const mPrevWordsInfo; + const WordIdArrayView mPrevWordIds; SuggestionResults *const mSuggestionResults; const DictionaryStructureWithBufferPolicy *const mDictStructurePolicy; }; diff --git a/native/jni/src/suggest/core/dictionary/dictionary_utils.cpp b/native/jni/src/suggest/core/dictionary/dictionary_utils.cpp index b372b6b4f..f71d4c5f0 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary_utils.cpp +++ b/native/jni/src/suggest/core/dictionary/dictionary_utils.cpp @@ -59,8 +59,11 @@ namespace latinime { if (!dicNode.isTerminalDicNode()) { continue; } + const WordAttributes wordAttributes = + dictionaryStructurePolicy->getWordAttributesInContext(dicNode.getPrevWordIds(), + dicNode.getWordId(), nullptr /* multiBigramMap */); // dicNode can contain case errors, accent errors, intentional omissions or digraphs. - maxProbability = std::max(maxProbability, dicNode.getProbability()); + maxProbability = std::max(maxProbability, wordAttributes.getProbability()); } return maxProbability; } diff --git a/native/jni/src/suggest/core/dictionary/word_attributes.h b/native/jni/src/suggest/core/dictionary/word_attributes.h new file mode 100644 index 000000000..6e9da3570 --- /dev/null +++ b/native/jni/src/suggest/core/dictionary/word_attributes.h @@ -0,0 +1,60 @@ +/* + * Copyright (C) 2014, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_WORD_ATTRIBUTES_H +#define LATINIME_WORD_ATTRIBUTES_H + +#include "defines.h" + +class WordAttributes { + public: + // Invalid word attributes. + WordAttributes() + : mProbability(NOT_A_PROBABILITY), mIsBlacklisted(false), mIsNotAWord(false), + mIsPossiblyOffensive(false) {} + + WordAttributes(const int probability, const bool isBlacklisted, const bool isNotAWord, + const bool isPossiblyOffensive) + : mProbability(probability), mIsBlacklisted(isBlacklisted), mIsNotAWord(isNotAWord), + mIsPossiblyOffensive(isPossiblyOffensive) {} + + int getProbability() const { + return mProbability; + } + + bool isBlacklisted() const { + return mIsBlacklisted; + } + + bool isNotAWord() const { + return mIsNotAWord; + } + + bool isPossiblyOffensive() const { + return mIsPossiblyOffensive; + } + + private: + DISALLOW_ASSIGNMENT_OPERATOR(WordAttributes); + + int mProbability; + bool mIsBlacklisted; + bool mIsNotAWord; + bool mIsPossiblyOffensive; +}; + + // namespace +#endif /* LATINIME_WORD_ATTRIBUTES_H */ diff --git a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h index aeeb66f93..7414f696c 100644 --- a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h +++ b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h @@ -22,6 +22,7 @@ #include "defines.h" #include "suggest/core/dictionary/binary_dictionary_shortcut_iterator.h" #include "suggest/core/dictionary/property/word_property.h" +#include "suggest/core/dictionary/word_attributes.h" #include "utils/int_array_view.h" namespace latinime { @@ -29,6 +30,7 @@ namespace latinime { class DicNode; class DicNodeVector; class DictionaryHeaderStructurePolicy; +class MultiBigramMap; class NgramListener; class PrevWordsInfo; class UnigramProperty; @@ -56,6 +58,10 @@ class DictionaryStructureWithBufferPolicy { virtual int getWordId(const CodePointArrayView wordCodePoints, const bool forceLowerCaseSearch) const = 0; + virtual const WordAttributes getWordAttributesInContext(const int *const prevWordIds, + const int wordId, MultiBigramMap *const multiBigramMap) const = 0; + + // TODO: Remove virtual int getProbability(const int unigramProbability, const int bigramProbability) const = 0; virtual int getProbabilityOfWord(const int *const prevWordIds, const int wordId) const = 0; diff --git a/native/jni/src/suggest/core/policy/traversal.h b/native/jni/src/suggest/core/policy/traversal.h index 8ddaa0514..6dfa7e314 100644 --- a/native/jni/src/suggest/core/policy/traversal.h +++ b/native/jni/src/suggest/core/policy/traversal.h @@ -48,7 +48,8 @@ class Traversal { virtual int getTerminalCacheSize() const = 0; virtual bool isPossibleOmissionChildNode(const DicTraverseSession *const traverseSession, const DicNode *const parentDicNode, const DicNode *const dicNode) const = 0; - virtual bool isGoodToTraverseNextWord(const DicNode *const dicNode) const = 0; + virtual bool isGoodToTraverseNextWord(const DicNode *const dicNode, + const int probability) const = 0; protected: Traversal() {} diff --git a/native/jni/src/suggest/core/result/suggestions_output_utils.cpp b/native/jni/src/suggest/core/result/suggestions_output_utils.cpp index ad860c4a4..6e0193772 100644 --- a/native/jni/src/suggest/core/result/suggestions_output_utils.cpp +++ b/native/jni/src/suggest/core/result/suggestions_output_utils.cpp @@ -85,9 +85,9 @@ const int SuggestionsOutputUtils::MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT = 16; scoringPolicy->getDoubleLetterDemotionDistanceCost(terminalDicNode); const float compoundDistance = terminalDicNode->getCompoundDistance(languageWeight) + doubleLetterCost; - const bool isPossiblyOffensiveWord = - traverseSession->getDictionaryStructurePolicy()->getProbability( - terminalDicNode->getProbability(), NOT_A_PROBABILITY) <= 0; + const WordAttributes wordAttributes = traverseSession->getDictionaryStructurePolicy() + ->getWordAttributesInContext(terminalDicNode->getPrevWordIds(), + terminalDicNode->getWordId(), nullptr /* multiBigramMap */); const bool isExactMatch = ErrorTypeUtils::isExactMatch(terminalDicNode->getContainedErrorTypes()); const bool isExactMatchWithIntentionalOmission = @@ -97,19 +97,19 @@ const int SuggestionsOutputUtils::MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT = 16; // Heuristic: We exclude probability=0 first-char-uppercase words from exact match. // (e.g. "AMD" and "and") const bool isSafeExactMatch = isExactMatch - && !(isPossiblyOffensiveWord && isFirstCharUppercase); + && !(wordAttributes.isPossiblyOffensive() && isFirstCharUppercase); const int outputTypeFlags = - (isPossiblyOffensiveWord ? Dictionary::KIND_FLAG_POSSIBLY_OFFENSIVE : 0) + (wordAttributes.isPossiblyOffensive() ? Dictionary::KIND_FLAG_POSSIBLY_OFFENSIVE : 0) | ((isSafeExactMatch && boostExactMatches) ? Dictionary::KIND_FLAG_EXACT_MATCH : 0) | (isExactMatchWithIntentionalOmission ? Dictionary::KIND_FLAG_EXACT_MATCH_WITH_INTENTIONAL_OMISSION : 0); // Entries that are blacklisted or do not represent a word should not be output. - const bool isValidWord = !terminalDicNode->isBlacklistedOrNotAWord(); + const bool isValidWord = !(wordAttributes.isBlacklisted() || wordAttributes.isNotAWord()); // When we have to block offensive words, non-exact matched offensive words should not be // output. const bool blockOffensiveWords = traverseSession->getSuggestOptions()->blockOffensiveWords(); - const bool isBlockedOffensiveWord = blockOffensiveWords && isPossiblyOffensiveWord + const bool isBlockedOffensiveWord = blockOffensiveWords && wordAttributes.isPossiblyOffensive() && !isSafeExactMatch; // Increase output score of top typing suggestion to ensure autocorrection. diff --git a/native/jni/src/suggest/core/suggest.cpp b/native/jni/src/suggest/core/suggest.cpp index 66c87f04c..947d41f4b 100644 --- a/native/jni/src/suggest/core/suggest.cpp +++ b/native/jni/src/suggest/core/suggest.cpp @@ -21,6 +21,7 @@ #include "suggest/core/dicnode/dic_node_vector.h" #include "suggest/core/dictionary/dictionary.h" #include "suggest/core/dictionary/digraph_utils.h" +#include "suggest/core/dictionary/word_attributes.h" #include "suggest/core/layout/proximity_info.h" #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" #include "suggest/core/policy/traversal.h" @@ -412,7 +413,11 @@ void Suggest::weightChildNode(DicTraverseSession *traverseSession, DicNode *dicN */ void Suggest::createNextWordDicNode(DicTraverseSession *traverseSession, DicNode *dicNode, const bool spaceSubstitution) const { - if (!TRAVERSAL->isGoodToTraverseNextWord(dicNode)) { + const WordAttributes wordAttributes = + traverseSession->getDictionaryStructurePolicy()->getWordAttributesInContext( + dicNode->getPrevWordIds(), dicNode->getWordId(), + traverseSession->getMultiBigramMap()); + if (!TRAVERSAL->isGoodToTraverseNextWord(dicNode, wordAttributes.getProbability())) { return; } diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp index f9013310c..9b8a50b07 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp @@ -28,6 +28,7 @@ #include "suggest/core/dicnode/dic_node.h" #include "suggest/core/dicnode/dic_node_vector.h" +#include "suggest/core/dictionary/multi_bigram_map.h" #include "suggest/core/dictionary/ngram_listener.h" #include "suggest/core/dictionary/property/bigram_property.h" #include "suggest/core/dictionary/property/unigram_property.h" @@ -78,10 +79,7 @@ void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const d } const int wordId = isTerminal ? ptNodeParams.getHeadPos() : NOT_A_WORD_ID; childDicNodes->pushLeavingChild(dicNode, ptNodeParams.getChildrenPos(), - ptNodeParams.getProbability(), wordId, ptNodeParams.hasChildren(), - ptNodeParams.isBlacklisted() - || ptNodeParams.isNotAWord() /* isBlacklistedOrNotAWord */, - ptNodeParams.getCodePointCount(), ptNodeParams.getCodePoints()); + wordId, ptNodeParams.getCodePointArrayView()); } if (readingHelper.isError()) { mIsCorrupted = true; @@ -117,6 +115,35 @@ int Ver4PatriciaTriePolicy::getWordId(const CodePointArrayView wordCodePoints, return getWordIdFromTerminalPtNodePos(ptNodePos); } +const WordAttributes Ver4PatriciaTriePolicy::getWordAttributesInContext( + const int *const prevWordIds, const int wordId, + MultiBigramMap *const multiBigramMap) const { + if (wordId == NOT_A_WORD_ID) { + return WordAttributes(); + } + const int ptNodePos = getTerminalPtNodePosFromWordId(wordId); + const PtNodeParams ptNodeParams(mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos)); + if (multiBigramMap) { + const int probability = multiBigramMap->getBigramProbability(this /* structurePolicy */, + prevWordIds, wordId, ptNodeParams.getProbability()); + return getWordAttributes(probability, ptNodeParams); + } + if (prevWordIds) { + const int probability = getProbabilityOfWord(prevWordIds, wordId); + if (probability != NOT_A_PROBABILITY) { + return getWordAttributes(probability, ptNodeParams); + } + } + return getWordAttributes(getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY), + ptNodeParams); +} + +const WordAttributes Ver4PatriciaTriePolicy::getWordAttributes(const int probability, + const PtNodeParams &ptNodeParams) const { + return WordAttributes(probability, ptNodeParams.isBlacklisted(), ptNodeParams.isNotAWord(), + ptNodeParams.getProbability() == 0); +} + int Ver4PatriciaTriePolicy::getProbability(const int unigramProbability, const int bigramProbability) const { if (mHeaderPolicy->isDecayingDict()) { @@ -333,7 +360,7 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI } bool addedNewBigram = false; const int prevWordPtNodePos = getTerminalPtNodePosFromWordId(prevWordIds[0]); - if (mUpdatingHelper.addNgramEntry(PtNodePosArrayView::fromObject(&prevWordPtNodePos), + if (mUpdatingHelper.addNgramEntry(PtNodePosArrayView::singleElementView(&prevWordPtNodePos), wordPos, bigramProperty, &addedNewBigram)) { if (addedNewBigram) { mBigramCount++; @@ -375,7 +402,7 @@ bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWor } const int prevWordPtNodePos = getTerminalPtNodePosFromWordId(prevWordIds[0]); if (mUpdatingHelper.removeNgramEntry( - PtNodePosArrayView::fromObject(&prevWordPtNodePos), wordPos)) { + PtNodePosArrayView::singleElementView(&prevWordPtNodePos), wordPos)) { mBigramCount--; return true; } else { diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h index 562c219f4..871b556e1 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h @@ -91,6 +91,9 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { int getWordId(const CodePointArrayView wordCodePoints, const bool forceLowerCaseSearch) const; + const WordAttributes getWordAttributesInContext(const int *const prevWordIds, const int wordId, + MultiBigramMap *const multiBigramMap) const; + int getProbability(const int unigramProbability, const int bigramProbability) const; int getProbabilityOfWord(const int *const prevWordIds, const int wordId) const; @@ -163,6 +166,8 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { int getShortcutPositionOfPtNode(const int ptNodePos) const; int getWordIdFromTerminalPtNodePos(const int ptNodePos) const; int getTerminalPtNodePosFromWordId(const int wordId) const; + const WordAttributes getWordAttributes(const int probability, + const PtNodeParams &ptNodeParams) const; }; } // namespace v402 } // namespace backward diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h index b2e60a837..c12fed324 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h @@ -24,6 +24,7 @@ #include "suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h" #include "utils/char_utils.h" +#include "utils/int_array_view.h" namespace latinime { @@ -174,11 +175,17 @@ class PtNodeParams { return mParentPos; } + AK_FORCE_INLINE const CodePointArrayView getCodePointArrayView() const { + return CodePointArrayView(mCodePoints, mCodePointCount); + } + + // TODO: Remove // Number of code points AK_FORCE_INLINE uint8_t getCodePointCount() const { return mCodePointCount; } + // TODO: Remove AK_FORCE_INLINE const int *getCodePoints() const { return mCodePoints; } diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp index b36c6f4df..e76bae97c 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp @@ -21,6 +21,7 @@ #include "suggest/core/dicnode/dic_node.h" #include "suggest/core/dicnode/dic_node_vector.h" #include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h" +#include "suggest/core/dictionary/multi_bigram_map.h" #include "suggest/core/dictionary/ngram_listener.h" #include "suggest/core/session/prev_words_info.h" #include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h" @@ -281,6 +282,35 @@ int PatriciaTriePolicy::getWordId(const CodePointArrayView wordCodePoints, return getWordIdFromTerminalPtNodePos(ptNodePos); } +const WordAttributes PatriciaTriePolicy::getWordAttributesInContext(const int *const prevWordIds, + const int wordId, MultiBigramMap *const multiBigramMap) const { + if (wordId == NOT_A_WORD_ID) { + return WordAttributes(); + } + const int ptNodePos = getTerminalPtNodePosFromWordId(wordId); + const PtNodeParams ptNodeParams = + mPtNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos); + if (multiBigramMap) { + const int probability = multiBigramMap->getBigramProbability(this /* structurePolicy */, + prevWordIds, wordId, ptNodeParams.getProbability()); + return getWordAttributes(probability, ptNodeParams); + } + if (prevWordIds) { + const int bigramProbability = getProbabilityOfWord(prevWordIds, wordId); + if (bigramProbability != NOT_A_PROBABILITY) { + return getWordAttributes(bigramProbability, ptNodeParams); + } + } + return getWordAttributes(getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY), + ptNodeParams); +} + +const WordAttributes PatriciaTriePolicy::getWordAttributes(const int probability, + const PtNodeParams &ptNodeParams) const { + return WordAttributes(probability, ptNodeParams.isBlacklisted(), ptNodeParams.isNotAWord(), + ptNodeParams.getProbability() == 0); +} + int PatriciaTriePolicy::getProbability(const int unigramProbability, const int bigramProbability) const { // Due to space constraints, the probability for bigrams is approximate - the lower the unigram @@ -377,11 +407,8 @@ int PatriciaTriePolicy::createAndGetLeavingChildNode(const DicNode *const dicNod // Skip PtNodes don't start with Unicode code point because they represent non-word information. if (CharUtils::isInUnicodeSpace(mergedNodeCodePoints[0])) { const int wordId = PatriciaTrieReadingUtils::isTerminal(flags) ? ptNodePos : NOT_A_WORD_ID; - childDicNodes->pushLeavingChild(dicNode, childrenPos, probability, wordId, - PatriciaTrieReadingUtils::hasChildrenInFlags(flags), - PatriciaTrieReadingUtils::isBlacklisted(flags) - || PatriciaTrieReadingUtils::isNotAWord(flags), - mergedNodeCodePointCount, mergedNodeCodePoints); + childDicNodes->pushLeavingChild(dicNode, childrenPos, wordId, + CodePointArrayView(mergedNodeCodePoints, mergedNodeCodePointCount)); } return siblingPos; } diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h index 66df52779..8c1665d7d 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h @@ -66,6 +66,9 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { int getWordId(const CodePointArrayView wordCodePoints, const bool forceLowerCaseSearch) const; + const WordAttributes getWordAttributesInContext(const int *const prevWordIds, const int wordId, + MultiBigramMap *const multiBigramMap) const; + int getProbability(const int unigramProbability, const int bigramProbability) const; int getProbabilityOfWord(const int *const prevWordIds, const int wordId) const; @@ -160,6 +163,8 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { DicNodeVector *const childDicNodes) const; int getWordIdFromTerminalPtNodePos(const int ptNodePos) const; int getTerminalPtNodePosFromWordId(const int wordId) const; + const WordAttributes getWordAttributes(const int probability, + const PtNodeParams &ptNodeParams) const; }; } // namespace latinime #endif // LATINIME_PATRICIA_TRIE_POLICY_H diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp index d5749e9eb..f54bb151a 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp @@ -38,6 +38,40 @@ bool LanguageModelDictContent::runGC( 0 /* nextLevelBitmapEntryIndex */, outNgramCount); } +int LanguageModelDictContent::getWordProbability(const WordIdArrayView prevWordIds, + const int wordId) const { + int bitmapEntryIndices[MAX_PREV_WORD_COUNT_FOR_N_GRAM + 1]; + bitmapEntryIndices[0] = mTrieMap.getRootBitmapEntryIndex(); + int maxLevel = 0; + for (size_t i = 0; i < prevWordIds.size(); ++i) { + const int nextBitmapEntryIndex = + mTrieMap.get(prevWordIds[i], bitmapEntryIndices[i]).mNextLevelBitmapEntryIndex; + if (nextBitmapEntryIndex == TrieMap::INVALID_INDEX) { + break; + } + maxLevel = i + 1; + bitmapEntryIndices[i + 1] = nextBitmapEntryIndex; + } + + for (int i = maxLevel; i >= 0; --i) { + const TrieMap::Result result = mTrieMap.get(wordId, bitmapEntryIndices[i]); + if (!result.mIsValid) { + continue; + } + const int probability = + ProbabilityEntry::decode(result.mValue, mHasHistoricalInfo).getProbability(); + if (mHasHistoricalInfo) { + return std::min( + probability + ForgettingCurveUtils::getProbabilityBiasForNgram(i + 1 /* n */), + MAX_PROBABILITY); + } else { + return probability; + } + } + // Cannot find the word. + return NOT_A_PROBABILITY; +} + ProbabilityEntry LanguageModelDictContent::getNgramProbabilityEntry( const WordIdArrayView prevWordIds, const int wordId) const { const int bitmapEntryIndex = getBitmapEntryIndex(prevWordIds); diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h index aa612e35a..4e0b47036 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h @@ -128,6 +128,8 @@ class LanguageModelDictContent { const LanguageModelDictContent *const originalContent, int *const outNgramCount); + int getWordProbability(const WordIdArrayView prevWordIds, const int wordId) const; + ProbabilityEntry getProbabilityEntry(const int wordId) const { return getNgramProbabilityEntry(WordIdArrayView(), wordId); } diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp index aca2f6cae..0472a453a 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp @@ -20,6 +20,7 @@ #include "suggest/core/dicnode/dic_node.h" #include "suggest/core/dicnode/dic_node_vector.h" +#include "suggest/core/dictionary/multi_bigram_map.h" #include "suggest/core/dictionary/ngram_listener.h" #include "suggest/core/dictionary/property/bigram_property.h" #include "suggest/core/dictionary/property/unigram_property.h" @@ -68,10 +69,7 @@ void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const d } const int wordId = isTerminal ? ptNodeParams.getTerminalId() : NOT_A_WORD_ID; childDicNodes->pushLeavingChild(dicNode, ptNodeParams.getChildrenPos(), - ptNodeParams.getProbability(), wordId, ptNodeParams.hasChildren(), - ptNodeParams.isBlacklisted() - || ptNodeParams.isNotAWord() /* isBlacklistedOrNotAWord */, - ptNodeParams.getCodePointCount(), ptNodeParams.getCodePoints()); + wordId, ptNodeParams.getCodePointArrayView()); } if (readingHelper.isError()) { mIsCorrupted = true; @@ -112,6 +110,21 @@ int Ver4PatriciaTriePolicy::getWordId(const CodePointArrayView wordCodePoints, return ptNodeParams.getTerminalId(); } +const WordAttributes Ver4PatriciaTriePolicy::getWordAttributesInContext( + const int *const prevWordIds, const int wordId, + MultiBigramMap *const multiBigramMap) const { + if (wordId == NOT_A_WORD_ID) { + return WordAttributes(); + } + const int ptNodePos = + mBuffers->getTerminalPositionLookupTable()->getTerminalPtNodePosition(wordId); + const PtNodeParams ptNodeParams = mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos); + // TODO: Support n-gram. + return WordAttributes(mBuffers->getLanguageModelDictContent()->getWordProbability( + WordIdArrayView::singleElementView(prevWordIds), wordId), ptNodeParams.isBlacklisted(), + ptNodeParams.isNotAWord(), ptNodeParams.getProbability() == 0); +} + int Ver4PatriciaTriePolicy::getProbability(const int unigramProbability, const int bigramProbability) const { if (mHeaderPolicy->isDecayingDict()) { @@ -143,7 +156,7 @@ int Ver4PatriciaTriePolicy::getProbabilityOfWord(const int *const prevWordIds, // TODO: Support n-gram. const ProbabilityEntry probabilityEntry = mBuffers->getLanguageModelDictContent()->getNgramProbabilityEntry( - IntArrayView::fromObject(prevWordIds), wordId); + IntArrayView::singleElementView(prevWordIds), wordId); if (!probabilityEntry.isValid()) { return NOT_A_PROBABILITY; } @@ -171,7 +184,7 @@ void Ver4PatriciaTriePolicy::iterateNgramEntries(const int *const prevWordIds, // TODO: Support n-gram. const auto languageModelDictContent = mBuffers->getLanguageModelDictContent(); for (const auto entry : languageModelDictContent->getProbabilityEntries( - WordIdArrayView::fromObject(prevWordIds))) { + WordIdArrayView::singleElementView(prevWordIds))) { const ProbabilityEntry &probabilityEntry = entry.getProbabilityEntry(); const int probability = probabilityEntry.hasHistoricalInfo() ? ForgettingCurveUtils::decodeProbability( @@ -488,7 +501,7 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty( // Fetch bigram information. // TODO: Support n-gram. std::vector<BigramProperty> bigrams; - const WordIdArrayView prevWordIds = WordIdArrayView::fromObject(&wordId); + const WordIdArrayView prevWordIds = WordIdArrayView::singleElementView(&wordId); int bigramWord1CodePoints[MAX_WORD_LENGTH]; for (const auto entry : mBuffers->getLanguageModelDictContent()->getProbabilityEntries( prevWordIds)) { diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h index 0b8eec40b..980c16e4a 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h @@ -68,6 +68,9 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { int getWordId(const CodePointArrayView wordCodePoints, const bool forceLowerCaseSearch) const; + const WordAttributes getWordAttributesInContext(const int *const prevWordIds, const int wordId, + MultiBigramMap *const multiBigramMap) const; + int getProbability(const int unigramProbability, const int bigramProbability) const; int getProbabilityOfWord(const int *const prevWordIds, const int wordId) const; diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h index 9910777b8..313eb6b64 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h @@ -48,6 +48,11 @@ class ForgettingCurveUtils { static bool needsToDecay(const bool mindsBlockByDecay, const int unigramCount, const int bigramCount, const HeaderPolicy *const headerPolicy); + // TODO: Improve probability computation method and remove this. + static int getProbabilityBiasForNgram(const int n) { + return (n - 1) * MULTIPLIER_TWO_IN_PROBABILITY_SCALE; + } + AK_FORCE_INLINE static int getUnigramCountHardLimit(const int maxUnigramCount) { return static_cast<int>(static_cast<float>(maxUnigramCount) * UNIGRAM_COUNT_HARD_LIMIT_WEIGHT); diff --git a/native/jni/src/suggest/policyimpl/typing/typing_traversal.h b/native/jni/src/suggest/policyimpl/typing/typing_traversal.h index cb3dfac70..b64ee8be4 100644 --- a/native/jni/src/suggest/policyimpl/typing/typing_traversal.h +++ b/native/jni/src/suggest/policyimpl/typing/typing_traversal.h @@ -161,8 +161,8 @@ class TypingTraversal : public Traversal { return true; } - AK_FORCE_INLINE bool isGoodToTraverseNextWord(const DicNode *const dicNode) const { - const int probability = dicNode->getProbability(); + AK_FORCE_INLINE bool isGoodToTraverseNextWord(const DicNode *const dicNode, + const int probability) const { if (probability < ScoringParams::THRESHOLD_NEXT_WORD_PROBABILITY) { return false; } diff --git a/native/jni/src/utils/int_array_view.h b/native/jni/src/utils/int_array_view.h index c9c3b21d4..c39add9fe 100644 --- a/native/jni/src/utils/int_array_view.h +++ b/native/jni/src/utils/int_array_view.h @@ -17,8 +17,9 @@ #ifndef LATINIME_INT_ARRAY_VIEW_H #define LATINIME_INT_ARRAY_VIEW_H +#include <array> #include <cstdint> -#include <cstdlib> +#include <cstring> #include <vector> #include "defines.h" @@ -61,9 +62,9 @@ class IntArrayView { return IntArrayView(array, N); } - // Returns a view that points one int object. Does not take ownership of the given object. - AK_FORCE_INLINE static IntArrayView fromObject(const int *const object) { - return IntArrayView(object, 1); + // Returns a view that points one int object. + AK_FORCE_INLINE static IntArrayView singleElementView(const int *const ptr) { + return IntArrayView(ptr, 1); } AK_FORCE_INLINE int operator[](const size_t index) const { @@ -103,6 +104,12 @@ class IntArrayView { return IntArrayView(mPtr + n, mSize - n); } + template <size_t N> + void copyToArray(std::array<int, N> *const buffer, const size_t offset) const { + ASSERT(mSize + offset <= N); + memmove(buffer->data() + offset, mPtr, sizeof(int) * mSize); + } + private: DISALLOW_ASSIGNMENT_OPERATOR(IntArrayView); diff --git a/native/jni/tests/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_test.cpp b/native/jni/tests/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_test.cpp index ca8d56f27..e6f0353e3 100644 --- a/native/jni/tests/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_test.cpp +++ b/native/jni/tests/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_test.cpp @@ -26,28 +26,28 @@ namespace latinime { namespace { TEST(LanguageModelDictContentTest, TestUnigramProbability) { - LanguageModelDictContent LanguageModelDictContent(false /* useHistoricalInfo */); + LanguageModelDictContent languageModelDictContent(false /* useHistoricalInfo */); const int flag = 0xFF; const int probability = 10; const int wordId = 100; const ProbabilityEntry probabilityEntry(flag, probability); - LanguageModelDictContent.setProbabilityEntry(wordId, &probabilityEntry); + languageModelDictContent.setProbabilityEntry(wordId, &probabilityEntry); const ProbabilityEntry entry = - LanguageModelDictContent.getProbabilityEntry(wordId); + languageModelDictContent.getProbabilityEntry(wordId); EXPECT_EQ(flag, entry.getFlags()); EXPECT_EQ(probability, entry.getProbability()); // Remove - EXPECT_TRUE(LanguageModelDictContent.removeProbabilityEntry(wordId)); - EXPECT_FALSE(LanguageModelDictContent.getProbabilityEntry(wordId).isValid()); - EXPECT_FALSE(LanguageModelDictContent.removeProbabilityEntry(wordId)); - EXPECT_TRUE(LanguageModelDictContent.setProbabilityEntry(wordId, &probabilityEntry)); - EXPECT_TRUE(LanguageModelDictContent.getProbabilityEntry(wordId).isValid()); + EXPECT_TRUE(languageModelDictContent.removeProbabilityEntry(wordId)); + EXPECT_FALSE(languageModelDictContent.getProbabilityEntry(wordId).isValid()); + EXPECT_FALSE(languageModelDictContent.removeProbabilityEntry(wordId)); + EXPECT_TRUE(languageModelDictContent.setProbabilityEntry(wordId, &probabilityEntry)); + EXPECT_TRUE(languageModelDictContent.getProbabilityEntry(wordId).isValid()); } TEST(LanguageModelDictContentTest, TestUnigramProbabilityWithHistoricalInfo) { - LanguageModelDictContent LanguageModelDictContent(true /* useHistoricalInfo */); + LanguageModelDictContent languageModelDictContent(true /* useHistoricalInfo */); const int flag = 0xF0; const int timestamp = 0x3FFFFFFF; @@ -56,19 +56,19 @@ TEST(LanguageModelDictContentTest, TestUnigramProbabilityWithHistoricalInfo) { const int wordId = 100; const HistoricalInfo historicalInfo(timestamp, level, count); const ProbabilityEntry probabilityEntry(flag, &historicalInfo); - LanguageModelDictContent.setProbabilityEntry(wordId, &probabilityEntry); - const ProbabilityEntry entry = LanguageModelDictContent.getProbabilityEntry(wordId); + languageModelDictContent.setProbabilityEntry(wordId, &probabilityEntry); + const ProbabilityEntry entry = languageModelDictContent.getProbabilityEntry(wordId); EXPECT_EQ(flag, entry.getFlags()); EXPECT_EQ(timestamp, entry.getHistoricalInfo()->getTimeStamp()); EXPECT_EQ(level, entry.getHistoricalInfo()->getLevel()); EXPECT_EQ(count, entry.getHistoricalInfo()->getCount()); // Remove - EXPECT_TRUE(LanguageModelDictContent.removeProbabilityEntry(wordId)); - EXPECT_FALSE(LanguageModelDictContent.getProbabilityEntry(wordId).isValid()); - EXPECT_FALSE(LanguageModelDictContent.removeProbabilityEntry(wordId)); - EXPECT_TRUE(LanguageModelDictContent.setProbabilityEntry(wordId, &probabilityEntry)); - EXPECT_TRUE(LanguageModelDictContent.removeProbabilityEntry(wordId)); + EXPECT_TRUE(languageModelDictContent.removeProbabilityEntry(wordId)); + EXPECT_FALSE(languageModelDictContent.getProbabilityEntry(wordId).isValid()); + EXPECT_FALSE(languageModelDictContent.removeProbabilityEntry(wordId)); + EXPECT_TRUE(languageModelDictContent.setProbabilityEntry(wordId, &probabilityEntry)); + EXPECT_TRUE(languageModelDictContent.removeProbabilityEntry(wordId)); } TEST(LanguageModelDictContentTest, TestIterateProbabilityEntry) { @@ -89,5 +89,31 @@ TEST(LanguageModelDictContentTest, TestIterateProbabilityEntry) { EXPECT_TRUE(wordIdSet.empty()); } +TEST(LanguageModelDictContentTest, TestGetWordProbability) { + LanguageModelDictContent languageModelDictContent(false /* useHistoricalInfo */); + + const int flag = 0xFF; + const int probability = 10; + const int bigramProbability = 20; + const int trigramProbability = 30; + const int wordId = 100; + const int prevWordIdArray[] = { 1, 2 }; + const WordIdArrayView prevWordIds = WordIdArrayView::fromFixedSizeArray(prevWordIdArray); + + const ProbabilityEntry probabilityEntry(flag, probability); + languageModelDictContent.setProbabilityEntry(wordId, &probabilityEntry); + const ProbabilityEntry bigramProbabilityEntry(flag, bigramProbability); + languageModelDictContent.setProbabilityEntry(prevWordIds[0], &probabilityEntry); + languageModelDictContent.setNgramProbabilityEntry(prevWordIds.limit(1), wordId, + &bigramProbabilityEntry); + EXPECT_EQ(bigramProbability, languageModelDictContent.getWordProbability(prevWordIds, wordId)); + const ProbabilityEntry trigramProbabilityEntry(flag, trigramProbability); + languageModelDictContent.setNgramProbabilityEntry(prevWordIds.limit(1), + prevWordIds[1], &probabilityEntry); + languageModelDictContent.setNgramProbabilityEntry(prevWordIds.limit(2), wordId, + &trigramProbabilityEntry); + EXPECT_EQ(trigramProbability, languageModelDictContent.getWordProbability(prevWordIds, wordId)); +} + } // namespace } // namespace latinime diff --git a/native/jni/tests/suggest/policyimpl/dictionary/utils/format_utils_test.cpp b/native/jni/tests/suggest/policyimpl/dictionary/utils/format_utils_test.cpp new file mode 100644 index 000000000..15f560cd1 --- /dev/null +++ b/native/jni/tests/suggest/policyimpl/dictionary/utils/format_utils_test.cpp @@ -0,0 +1,97 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "suggest/policyimpl/dictionary/utils/format_utils.h" + +#include <gtest/gtest.h> + +#include <vector> + +#include "utils/byte_array_view.h" + +namespace latinime { +namespace { + +TEST(FormatUtilsTest, TestMagicNumber) { + EXPECT_EQ(0x9BC13AFE, FormatUtils::MAGIC_NUMBER) << "Magic number must not be changed."; +} + +const std::vector<uint8_t> getBuffer(const int magicNumber, const int version, const uint16_t flags, + const size_t headerSize) { + std::vector<uint8_t> buffer; + buffer.push_back(magicNumber >> 24); + buffer.push_back(magicNumber >> 16); + buffer.push_back(magicNumber >> 8); + buffer.push_back(magicNumber); + + buffer.push_back(version >> 8); + buffer.push_back(version); + + buffer.push_back(flags >> 8); + buffer.push_back(flags); + + buffer.push_back(headerSize >> 24); + buffer.push_back(headerSize >> 16); + buffer.push_back(headerSize >> 8); + buffer.push_back(headerSize); + return buffer; +} + +TEST(FormatUtilsTest, TestDetectFormatVersion) { + EXPECT_EQ(FormatUtils::UNKNOWN_VERSION, + FormatUtils::detectFormatVersion(ReadOnlyByteArrayView())); + + { + const std::vector<uint8_t> buffer = + getBuffer(FormatUtils::MAGIC_NUMBER, FormatUtils::VERSION_2, 0, 0); + EXPECT_EQ(FormatUtils::VERSION_2, FormatUtils::detectFormatVersion( + ReadOnlyByteArrayView(buffer.data(), buffer.size()))); + } + { + const std::vector<uint8_t> buffer = + getBuffer(FormatUtils::MAGIC_NUMBER, FormatUtils::VERSION_4, 0, 0); + EXPECT_EQ(FormatUtils::VERSION_4, FormatUtils::detectFormatVersion( + ReadOnlyByteArrayView(buffer.data(), buffer.size()))); + } + { + const std::vector<uint8_t> buffer = + getBuffer(FormatUtils::MAGIC_NUMBER, FormatUtils::VERSION_4_DEV, 0, 0); + EXPECT_EQ(FormatUtils::VERSION_4_DEV, FormatUtils::detectFormatVersion( + ReadOnlyByteArrayView(buffer.data(), buffer.size()))); + } + + { + const std::vector<uint8_t> buffer = + getBuffer(FormatUtils::MAGIC_NUMBER - 1, FormatUtils::VERSION_2, 0, 0); + EXPECT_EQ(FormatUtils::UNKNOWN_VERSION, FormatUtils::detectFormatVersion( + ReadOnlyByteArrayView(buffer.data(), buffer.size()))); + } + { + const std::vector<uint8_t> buffer = + getBuffer(FormatUtils::MAGIC_NUMBER, 100, 0, 0); + EXPECT_EQ(FormatUtils::UNKNOWN_VERSION, FormatUtils::detectFormatVersion( + ReadOnlyByteArrayView(buffer.data(), buffer.size()))); + } + { + const std::vector<uint8_t> buffer = + getBuffer(FormatUtils::MAGIC_NUMBER, FormatUtils::VERSION_2, 0, 0); + EXPECT_EQ(FormatUtils::UNKNOWN_VERSION, FormatUtils::detectFormatVersion( + ReadOnlyByteArrayView(buffer.data(), buffer.size() - 1))); + } +} + +} // namespace +} // namespace latinime diff --git a/native/jni/tests/utils/int_array_view_test.cpp b/native/jni/tests/utils/int_array_view_test.cpp index 161df2f43..ec57cf59c 100644 --- a/native/jni/tests/utils/int_array_view_test.cpp +++ b/native/jni/tests/utils/int_array_view_test.cpp @@ -18,6 +18,7 @@ #include <gtest/gtest.h> +#include <array> #include <vector> namespace latinime { @@ -52,7 +53,7 @@ TEST(IntArrayViewTest, TestConstructFromArray) { TEST(IntArrayViewTest, TestConstructFromObject) { const int object = 10; - const auto intArrayView = IntArrayView::fromObject(&object); + const auto intArrayView = IntArrayView::singleElementView(&object); EXPECT_EQ(1u, intArrayView.size()); EXPECT_EQ(object, intArrayView[0]); } @@ -89,5 +90,26 @@ TEST(IntArrayViewTest, TestSkip) { } } +TEST(IntArrayViewTest, TestCopyToArray) { + // "{{" to suppress warning. + std::array<int, 7> buffer = {{10, 20, 30, 40, 50, 60, 70}}; + const std::vector<int> intVector = {3, 2, 1, 0, -1, -2}; + IntArrayView intArrayView(intVector); + intArrayView.limit(0).copyToArray(&buffer, 0); + EXPECT_EQ(10, buffer[0]); + EXPECT_EQ(20, buffer[1]); + intArrayView.limit(1).copyToArray(&buffer, 0); + EXPECT_EQ(intVector[0], buffer[0]); + EXPECT_EQ(20, buffer[1]); + intArrayView.limit(1).copyToArray(&buffer, 1); + EXPECT_EQ(intVector[0], buffer[0]); + EXPECT_EQ(intVector[0], buffer[1]); + intArrayView.copyToArray(&buffer, 0); + for (size_t i = 0; i < intArrayView.size(); ++i) { + EXPECT_EQ(intVector[i], buffer[i]); + } + EXPECT_EQ(70, buffer[6]); +} + } // namespace } // namespace latinime diff --git a/tests/src/com/android/inputmethod/latin/personalization/PersonalizationDictionaryTests.java b/tests/src/com/android/inputmethod/latin/personalization/PersonalizationDictionaryTests.java index e9a97ff92..4e7e8140a 100644 --- a/tests/src/com/android/inputmethod/latin/personalization/PersonalizationDictionaryTests.java +++ b/tests/src/com/android/inputmethod/latin/personalization/PersonalizationDictionaryTests.java @@ -29,6 +29,7 @@ import com.android.inputmethod.latin.BinaryDictionary; import com.android.inputmethod.latin.Dictionary; import com.android.inputmethod.latin.DictionaryFacilitator; import com.android.inputmethod.latin.ExpandableBinaryDictionary; +import com.android.inputmethod.latin.RichInputMethodManager; import com.android.inputmethod.latin.ExpandableBinaryDictionary.AddMultipleDictionaryEntriesCallback; import com.android.inputmethod.latin.makedict.CodePointUtils; import com.android.inputmethod.latin.settings.SpacingAndPunctuations; @@ -36,6 +37,7 @@ import com.android.inputmethod.latin.settings.SpacingAndPunctuations; import android.test.AndroidTestCase; import android.test.suitebuilder.annotation.LargeTest; import android.util.Log; +import android.view.inputmethod.InputMethodSubtype; /** * Unit tests for personalization dictionary @@ -55,16 +57,28 @@ public class PersonalizationDictionaryTests extends AndroidTestCase { final DictionaryFacilitator dictionaryFacilitator = new DictionaryFacilitator(getContext()); dictionaryFacilitator.resetDictionariesForTesting(getContext(), LOCALE_EN_US, dictTypes, new HashMap<String, File>(), new HashMap<String, Map<String, String>>()); + // Set subtypes. + RichInputMethodManager.init(getContext()); + final RichInputMethodManager richImm = RichInputMethodManager.getInstance(); + final ArrayList<InputMethodSubtype> subtypes = new ArrayList<>(); + subtypes.add(richImm.findSubtypeByLocaleAndKeyboardLayoutSet( + LOCALE_EN_US.toString(), "qwerty")); + dictionaryFacilitator.updateEnabledSubtypes(subtypes); return dictionaryFacilitator; } public void testAddManyTokens() { final DictionaryFacilitator dictionaryFacilitator = getDictionaryFacilitator(); dictionaryFacilitator.clearPersonalizationDictionary(); - final int dataChunkCount = 20; - final int wordCountInOneChunk = 2000; + final int dataChunkCount = 100; + final int wordCountInOneChunk = 200; + final int uniqueWordCount = 100; final Random random = new Random(System.currentTimeMillis()); final int[] codePointSet = CodePointUtils.LATIN_ALPHABETS_LOWER; + final ArrayList<String> words = new ArrayList<>(); + for (int i = 0; i < uniqueWordCount; i++) { + words.add(CodePointUtils.generateWord(random, codePointSet)); + } final SpacingAndPunctuations spacingAndPunctuations = new SpacingAndPunctuations(getContext().getResources()); @@ -75,7 +89,7 @@ public class PersonalizationDictionaryTests extends AndroidTestCase { for (int i = 0; i < dataChunkCount; i++) { final ArrayList<String> tokens = new ArrayList<>(); for (int j = 0; j < wordCountInOneChunk; j++) { - tokens.add(CodePointUtils.generateWord(random, codePointSet)); + tokens.add(words.get(random.nextInt(words.size()))); } final PersonalizationDataChunk personalizationDataChunk = new PersonalizationDataChunk( true /* inputByUser */, tokens, timeStampInSeconds, DUMMY_PACKAGE_NAME, diff --git a/tests/src/com/android/inputmethod/latin/DistracterFilterTest.java b/tests/src/com/android/inputmethod/latin/utils/DistracterFilterTest.java index af22fb8b9..5fbd36ac7 100644 --- a/tests/src/com/android/inputmethod/latin/DistracterFilterTest.java +++ b/tests/src/com/android/inputmethod/latin/utils/DistracterFilterTest.java @@ -14,7 +14,7 @@ * limitations under the License. */ -package com.android.inputmethod.latin; +package com.android.inputmethod.latin.utils; import java.util.ArrayList; import java.util.Locale; @@ -24,24 +24,22 @@ import android.test.AndroidTestCase; import android.test.suitebuilder.annotation.LargeTest; import android.view.inputmethod.InputMethodSubtype; -import com.android.inputmethod.latin.utils.DistracterFilterCheckingExactMatchesAndSuggestions; +import com.android.inputmethod.latin.PrevWordsInfo; +import com.android.inputmethod.latin.RichInputMethodManager; +import com.android.inputmethod.latin.utils.DistracterFilter.HandlingType; /** * Unit test for DistracterFilter */ @LargeTest public class DistracterFilterTest extends AndroidTestCase { - private DictionaryFacilitatorLruCache mDictionaryFacilitatorLruCache; private DistracterFilterCheckingExactMatchesAndSuggestions mDistracterFilter; @Override protected void setUp() throws Exception { super.setUp(); final Context context = getContext(); - mDictionaryFacilitatorLruCache = new DictionaryFacilitatorLruCache(context, - 2 /* maxSize */, "" /* dictionaryNamePrefix */); - mDistracterFilter = new DistracterFilterCheckingExactMatchesAndSuggestions(context, - mDictionaryFacilitatorLruCache); + mDistracterFilter = new DistracterFilterCheckingExactMatchesAndSuggestions(context); RichInputMethodManager.init(context); final RichInputMethodManager richImm = RichInputMethodManager.getInstance(); final ArrayList<InputMethodSubtype> subtypes = new ArrayList<>(); @@ -56,7 +54,7 @@ public class DistracterFilterTest extends AndroidTestCase { @Override protected void tearDown() { - mDictionaryFacilitatorLruCache.evictAll(); + mDistracterFilter.close(); } public void testIsDistractorToWordsInDictionaries() { @@ -203,4 +201,25 @@ public class DistracterFilterTest extends AndroidTestCase { assertTrue(mDistracterFilter.isDistracterToWordsInDictionaries( EMPTY_PREV_WORDS_INFO, typedWord, localeFrFr)); } + + public void testGetWordHandlingType() { + final Locale localeEnUs = new Locale("en", "US"); + final PrevWordsInfo EMPTY_PREV_WORDS_INFO = PrevWordsInfo.EMPTY_PREV_WORDS_INFO; + int handlingType = 0; + + handlingType = mDistracterFilter.getWordHandlingType(EMPTY_PREV_WORDS_INFO, + "this", localeEnUs); + assertFalse(HandlingType.shouldBeLowerCased(handlingType)); + assertFalse(HandlingType.shouldBeHandledAsOov(handlingType)); + + handlingType = mDistracterFilter.getWordHandlingType(EMPTY_PREV_WORDS_INFO, + "This", localeEnUs); + assertTrue(HandlingType.shouldBeLowerCased(handlingType)); + assertFalse(HandlingType.shouldBeHandledAsOov(handlingType)); + + handlingType = mDistracterFilter.getWordHandlingType(EMPTY_PREV_WORDS_INFO, + "thibk", localeEnUs); + assertFalse(HandlingType.shouldBeLowerCased(handlingType)); + assertTrue(HandlingType.shouldBeHandledAsOov(handlingType)); + } } |