diff options
69 files changed, 836 insertions, 457 deletions
diff --git a/java/res/drawable-hdpi/sym_keyboard_smiley_holo_dark.png b/java/res/drawable-hdpi/sym_keyboard_smiley_holo_dark.png Binary files differnew file mode 100644 index 000000000..cfacbc2e7 --- /dev/null +++ b/java/res/drawable-hdpi/sym_keyboard_smiley_holo_dark.png diff --git a/java/res/drawable-mdpi/sym_keyboard_smiley_holo_dark.png b/java/res/drawable-mdpi/sym_keyboard_smiley_holo_dark.png Binary files differindex 71272bb88..067ad5496 100644 --- a/java/res/drawable-mdpi/sym_keyboard_smiley_holo_dark.png +++ b/java/res/drawable-mdpi/sym_keyboard_smiley_holo_dark.png diff --git a/java/res/drawable-xhdpi/sym_keyboard_smiley_holo_dark.png b/java/res/drawable-xhdpi/sym_keyboard_smiley_holo_dark.png Binary files differindex 686831fd3..e6baa2e59 100644 --- a/java/res/drawable-xhdpi/sym_keyboard_smiley_holo_dark.png +++ b/java/res/drawable-xhdpi/sym_keyboard_smiley_holo_dark.png diff --git a/java/res/drawable-xxhdpi/sym_keyboard_smiley_holo_dark.png b/java/res/drawable-xxhdpi/sym_keyboard_smiley_holo_dark.png Binary files differindex 04b721617..5973ac355 100644 --- a/java/res/drawable-xxhdpi/sym_keyboard_smiley_holo_dark.png +++ b/java/res/drawable-xxhdpi/sym_keyboard_smiley_holo_dark.png diff --git a/java/res/layout/emoji_keyboard_tab_icon.xml b/java/res/layout/emoji_keyboard_tab_icon.xml index d79276eb9..1609f6a26 100644 --- a/java/res/layout/emoji_keyboard_tab_icon.xml +++ b/java/res/layout/emoji_keyboard_tab_icon.xml @@ -23,4 +23,5 @@ android:layout_weight="1.0" android:layout_height="wrap_content" android:gravity="center" + android:scaleType="center" /> diff --git a/java/res/values-v19/emoji-categories.xml b/java/res/values-v19/emoji-categories.xml index 658bbfa83..0d5aa1aa1 100644 --- a/java/res/values-v19/emoji-categories.xml +++ b/java/res/values-v19/emoji-categories.xml @@ -204,18 +204,18 @@ name="emoji_symbols" format="string" > - <item>fe82e|0031,20e3</item> - <item>fe82f|0032,20e3</item> - <item>fe830|0033,20e3</item> - <item>fe831|0034,20e3</item> - <item>fe832|0035,20e3</item> - <item>fe833|0036,20e3</item> - <item>fe834|0037,20e3</item> - <item>fe835|0038,20e3</item> - <item>fe836|0039,20e3</item> - <item>fe837|0030,20e3</item> + <item>fe82e|0031,20e3|99</item> <!-- TODO: fix support min sdk version (99) --> + <item>fe82f|0032,20e3|99</item> <!-- TODO: fix support min sdk version (99) --> + <item>fe830|0033,20e3|99</item> <!-- TODO: fix support min sdk version (99) --> + <item>fe831|0034,20e3|99</item> <!-- TODO: fix support min sdk version (99) --> + <item>fe832|0035,20e3|99</item> <!-- TODO: fix support min sdk version (99) --> + <item>fe833|0036,20e3|99</item> <!-- TODO: fix support min sdk version (99) --> + <item>fe834|0037,20e3|99</item> <!-- TODO: fix support min sdk version (99) --> + <item>fe835|0038,20e3|99</item> <!-- TODO: fix support min sdk version (99) --> + <item>fe836|0039,20e3|99</item> <!-- TODO: fix support min sdk version (99) --> + <item>fe837|0030,20e3|99</item> <!-- TODO: fix support min sdk version (99) --> <item>1f51f</item> - <item>fe82c|0023,20e3</item> + <item>fe82c|0023,20e3|99</item> <!-- TODO: fix support min sdk version (99) --> <item>1f51d</item> <item>1f519</item> <item>1f51b</item> @@ -875,16 +875,16 @@ <item>1f48e</item> <item>1f490</item> <item>1f492</item> - <item>fe4e5|1f1ef,1f1f5</item> - <item>fe4e6|1f1fa,1f1f8</item> - <item>fe4e7|1f1eb,1f1f7</item> - <item>fe4e8|1f1e9,1f1ea</item> - <item>fe4e9|1f1ee,1f1f9</item> - <item>fe4ea|1f1ec,1f1e7</item> - <item>fe4eb|1f1ea,1f1f8</item> - <item>fe4ec|1f1f7,1f1fa</item> - <item>fe4ed|1f1e8,1f1f3</item> - <item>fe4ee|1f1f0,1f1f7</item> + <item>fe4e5|1f1ef,1f1f5|99</item> <!-- TODO: fix support min sdk version (99) --> + <item>fe4e6|1f1fa,1f1f8|99</item> <!-- TODO: fix support min sdk version (99) --> + <item>fe4e7|1f1eb,1f1f7|99</item> <!-- TODO: fix support min sdk version (99) --> + <item>fe4e8|1f1e9,1f1ea|99</item> <!-- TODO: fix support min sdk version (99) --> + <item>fe4e9|1f1ee,1f1f9|99</item> <!-- TODO: fix support min sdk version (99) --> + <item>fe4ea|1f1ec,1f1e7|99</item> <!-- TODO: fix support min sdk version (99) --> + <item>fe4eb|1f1ea,1f1f8|99</item> <!-- TODO: fix support min sdk version (99) --> + <item>fe4ec|1f1f7,1f1fa|99</item> <!-- TODO: fix support min sdk version (99) --> + <item>fe4ed|1f1e8,1f1f3|99</item> <!-- TODO: fix support min sdk version (99) --> + <item>fe4ee|1f1f0,1f1f7|99</item> <!-- TODO: fix support min sdk version (99) --> </array> <array name="emoji_emoticons" diff --git a/java/res/xml-sw600dp/keys_comma_period.xml b/java/res/xml-sw600dp/keys_comma_period.xml new file mode 100644 index 000000000..7604e033d --- /dev/null +++ b/java/res/xml-sw600dp/keys_comma_period.xml @@ -0,0 +1,103 @@ +<?xml version="1.0" encoding="utf-8"?> +<!-- +/* +** +** Copyright 2013, The Android Open Source Project +** +** Licensed under the Apache License, Version 2.0 (the "License"); +** you may not use this file except in compliance with the License. +** You may obtain a copy of the License at +** +** http://www.apache.org/licenses/LICENSE-2.0 +** +** Unless required by applicable law or agreed to in writing, software +** distributed under the License is distributed on an "AS IS" BASIS, +** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +** See the License for the specific language governing permissions and +** limitations under the License. +*/ +--> + +<merge + xmlns:latin="http://schemas.android.com/apk/res/com.android.inputmethod.latin" +> + <switch> + <case + latin:mode="email|url" + > + <Key + latin:keyLabel="," + latin:keyHintLabel="-" + latin:moreKeys="-" + latin:backgroundType="functional" + latin:keyStyle="hasShiftedLetterHintStyle" /> + <Key + latin:keyLabel="." + latin:keyHintLabel="_" + latin:moreKeys="_" + latin:backgroundType="functional" + latin:keyStyle="hasShiftedLetterHintStyle" /> + </case> + <case + latin:languageCode="ar" + > + <Key + latin:keyLabel="!text/keylabel_for_apostrophe" + latin:keyHintLabel="!text/keyhintlabel_for_apostrophe" + latin:moreKeys="!text/more_keys_for_apostrophe" + latin:backgroundType="functional" + latin:keyStyle="hasShiftedLetterHintStyle" /> + <Key + latin:keyLabel="." + latin:keyHintLabel="!text/keyhintlabel_for_arabic_diacritics" + latin:keyLabelFlags="hasPopupHint" + latin:moreKeys="!text/more_keys_for_arabic_diacritics" + latin:backgroundType="functional" + latin:keyStyle="hasShiftedLetterHintStyle" /> + </case> + <case + latin:languageCode="fa" + > + <Key + latin:keyLabel="!text/keylabel_for_apostrophe" + latin:keyHintLabel="!text/keyhintlabel_for_apostrophe" + latin:keyLabelFlags="hasPopupHint" + latin:moreKeys="!text/more_keys_for_apostrophe" + latin:backgroundType="functional" + latin:keyStyle="hasShiftedLetterHintStyle" /> + <Key + latin:keyLabel="." + latin:keyHintLabel="!text/keyhintlabel_for_arabic_diacritics" + latin:keyLabelFlags="hasPopupHint" + latin:moreKeys="!text/more_keys_for_arabic_diacritics" + latin:backgroundType="functional" + latin:keyStyle="hasShiftedLetterHintStyle" /> + </case> + <case + latin:languageCode="hy" + > + <!-- U+055D: "՝" ARMENIAN COMMA --> + <Key + latin:keyLabel="՝" + latin:backgroundType="functional" /> + <!-- U+0589: "։" ARMENIAN FULL STOP --> + <Key + latin:keyLabel="։" + latin:keyLabelFlags="hasPopupHint" + latin:backgroundType="functional" + latin:moreKeys="!text/more_keys_for_punctuation" /> + </case> + <default> + <Key + latin:keyLabel="!text/keylabel_for_tablet_comma" + latin:keyHintLabel="!text/keyhintlabel_for_tablet_comma" + latin:backgroundType="functional" + latin:moreKeys="!text/more_keys_for_tablet_comma" /> + <Key + latin:keyLabel="." + latin:keyHintLabel="!text/keyhintlabel_for_period" + latin:backgroundType="functional" + latin:moreKeys="!text/more_keys_for_period" /> + </default> + </switch> +</merge> diff --git a/java/res/xml-sw600dp/keys_exclamation_question.xml b/java/res/xml-sw600dp/keys_exclamation_question.xml index 983ef3897..cd38282ee 100644 --- a/java/res/xml-sw600dp/keys_exclamation_question.xml +++ b/java/res/xml-sw600dp/keys_exclamation_question.xml @@ -22,7 +22,7 @@ xmlns:latin="http://schemas.android.com/apk/res/com.android.inputmethod.latin" > <Key - latin:keyLabel="\?" /> - <Key latin:keyLabel="!" /> + <Key + latin:keyLabel="\?" /> </merge> diff --git a/java/res/xml/keys_comma_period.xml b/java/res/xml/keys_comma_period.xml index 02b46c23a..1b51e45ed 100644 --- a/java/res/xml/keys_comma_period.xml +++ b/java/res/xml/keys_comma_period.xml @@ -23,22 +23,6 @@ > <switch> <case - latin:mode="email|url" - > - <Key - latin:keyLabel="." - latin:keyHintLabel="_" - latin:moreKeys="_" - latin:backgroundType="functional" - latin:keyStyle="hasShiftedLetterHintStyle" /> - <Key - latin:keyLabel="," - latin:keyHintLabel="-" - latin:moreKeys="-" - latin:backgroundType="functional" - latin:keyStyle="hasShiftedLetterHintStyle" /> - </case> - <case latin:languageCode="ar" > <Key @@ -76,28 +60,28 @@ <case latin:languageCode="hy" > + <!-- U+055D: "՝" ARMENIAN COMMA --> + <Key + latin:keyLabel="՝" + latin:backgroundType="functional" /> <!-- U+0589: "։" ARMENIAN FULL STOP --> <Key latin:keyLabel="։" latin:keyLabelFlags="hasPopupHint" latin:backgroundType="functional" latin:moreKeys="!text/more_keys_for_punctuation" /> - <!-- U+055D: "՝" ARMENIAN COMMA --> - <Key - latin:keyLabel="՝" - latin:backgroundType="functional" /> </case> <default> <Key - latin:keyLabel="." - latin:keyHintLabel="!text/keyhintlabel_for_tablet_period" - latin:backgroundType="functional" - latin:moreKeys="!text/more_keys_for_tablet_period" /> - <Key latin:keyLabel="!text/keylabel_for_tablet_comma" latin:keyHintLabel="!text/keyhintlabel_for_tablet_comma" latin:backgroundType="functional" latin:moreKeys="!text/more_keys_for_tablet_comma" /> + <Key + latin:keyLabel="." + latin:keyHintLabel="!text/keyhintlabel_for_period" + latin:backgroundType="functional" + latin:moreKeys="!text/more_keys_for_period" /> </default> </switch> </merge> diff --git a/java/res/xml/rowkeys_symbols3.xml b/java/res/xml/rowkeys_symbols3.xml index 9f5e620e6..074078cb6 100644 --- a/java/res/xml/rowkeys_symbols3.xml +++ b/java/res/xml/rowkeys_symbols3.xml @@ -54,9 +54,9 @@ latin:keyLabel="!text/keylabel_for_symbols_semicolon" latin:moreKeys="!text/more_keys_for_symbols_semicolon" /> <Key - latin:keyLabel="!text/keylabel_for_symbols_question" - latin:moreKeys="!text/more_keys_for_symbols_question" /> - <Key latin:keyLabel="!" latin:moreKeys="!text/more_keys_for_symbols_exclamation" /> + <Key + latin:keyLabel="!text/keylabel_for_symbols_question" + latin:moreKeys="!text/more_keys_for_symbols_question" /> </merge> diff --git a/java/src/com/android/inputmethod/dictionarypack/DictionarySettingsActivity.java b/java/src/com/android/inputmethod/dictionarypack/DictionarySettingsActivity.java index 684165240..c28d72949 100644 --- a/java/src/com/android/inputmethod/dictionarypack/DictionarySettingsActivity.java +++ b/java/src/com/android/inputmethod/dictionarypack/DictionarySettingsActivity.java @@ -24,6 +24,8 @@ import android.preference.PreferenceActivity; * Preference screen. */ public final class DictionarySettingsActivity extends PreferenceActivity { + private static final String DEFAULT_FRAGMENT = DictionarySettingsFragment.class.getName(); + @Override protected void onCreate(final Bundle savedInstanceState) { super.onCreate(savedInstanceState); @@ -32,11 +34,17 @@ public final class DictionarySettingsActivity extends PreferenceActivity { @Override public Intent getIntent() { final Intent modIntent = new Intent(super.getIntent()); - modIntent.putExtra(EXTRA_SHOW_FRAGMENT, DictionarySettingsFragment.class.getName()); + modIntent.putExtra(EXTRA_SHOW_FRAGMENT, DEFAULT_FRAGMENT); modIntent.putExtra(EXTRA_NO_HEADERS, true); // Important note : the original intent should contain a String extra with the key // DictionarySettingsFragment.DICT_SETTINGS_FRAGMENT_CLIENT_ID_ARGUMENT so that the // fragment can know who the client is. return modIntent; } + + // TODO: Uncomment the override annotation once we start using SDK version 19. + // @Override + public boolean isValidFragment(String fragmentName) { + return fragmentName.equals(DEFAULT_FRAGMENT); + } } diff --git a/java/src/com/android/inputmethod/keyboard/EmojiCategoryPageIndicatorView.java b/java/src/com/android/inputmethod/keyboard/EmojiCategoryPageIndicatorView.java index fed134eb9..e23131a30 100644 --- a/java/src/com/android/inputmethod/keyboard/EmojiCategoryPageIndicatorView.java +++ b/java/src/com/android/inputmethod/keyboard/EmojiCategoryPageIndicatorView.java @@ -50,8 +50,9 @@ public class EmojiCategoryPageIndicatorView extends LinearLayout { @Override protected void onDraw(Canvas canvas) { - if (mCategoryPageSize == 0) { - // If the category is not set yet, just clear and return. + if (mCategoryPageSize <= 1) { + // If the category is not set yet or contains only one category, + // just clear and return. canvas.drawColor(0); return; } diff --git a/java/src/com/android/inputmethod/keyboard/EmojiLayoutParams.java b/java/src/com/android/inputmethod/keyboard/EmojiLayoutParams.java index 267fad5cd..71790b7d6 100644 --- a/java/src/com/android/inputmethod/keyboard/EmojiLayoutParams.java +++ b/java/src/com/android/inputmethod/keyboard/EmojiLayoutParams.java @@ -75,9 +75,7 @@ public class EmojiLayoutParams { public void setActionBarProperties(LinearLayout ll) { final LinearLayout.LayoutParams lp = (LinearLayout.LayoutParams) ll.getLayoutParams(); - lp.height = mEmojiActionBarHeight; - lp.topMargin = 0; - lp.bottomMargin = mBottomPadding; + lp.height = mEmojiActionBarHeight - mBottomPadding; ll.setLayoutParams(lp); } diff --git a/java/src/com/android/inputmethod/keyboard/KeyboardSwitcher.java b/java/src/com/android/inputmethod/keyboard/KeyboardSwitcher.java index 74edd87cf..ad6e2c0f2 100644 --- a/java/src/com/android/inputmethod/keyboard/KeyboardSwitcher.java +++ b/java/src/com/android/inputmethod/keyboard/KeyboardSwitcher.java @@ -155,7 +155,7 @@ public final class KeyboardSwitcher implements KeyboardState.SwitchActions { } public void saveKeyboardState() { - if (getKeyboard() != null) { + if (getKeyboard() != null || isShowingEmojiKeyboard()) { mState.onSaveKeyboardState(); } } diff --git a/java/src/com/android/inputmethod/keyboard/PointerTracker.java b/java/src/com/android/inputmethod/keyboard/PointerTracker.java index ee4ac950c..52f190e77 100644 --- a/java/src/com/android/inputmethod/keyboard/PointerTracker.java +++ b/java/src/com/android/inputmethod/keyboard/PointerTracker.java @@ -823,14 +823,16 @@ public final class PointerTracker implements PointerTrackerQueue.Element { final int size = sAggregratedPointers.getPointerSize(); if (size > sLastRecognitionPointSize && stroke.hasRecognitionTimePast(eventTime, sLastRecognitionTime)) { - sLastRecognitionPointSize = size; - sLastRecognitionTime = eventTime; if (DEBUG_LISTENER) { Log.d(TAG, String.format("[%d] onUpdateBatchInput: batchPoints=%d", mPointerId, size)); } mTimerProxy.startUpdateBatchInputTimer(this); mListener.onUpdateBatchInput(sAggregratedPointers); + // The listener may change the size of the pointers (when auto-committing + // for example), so we need to get the size from the pointers again. + sLastRecognitionPointSize = sAggregratedPointers.getPointerSize(); + sLastRecognitionTime = eventTime; } } } diff --git a/java/src/com/android/inputmethod/keyboard/internal/CodesArrayParser.java b/java/src/com/android/inputmethod/keyboard/internal/CodesArrayParser.java index c10fdbace..4ccecb2f0 100644 --- a/java/src/com/android/inputmethod/keyboard/internal/CodesArrayParser.java +++ b/java/src/com/android/inputmethod/keyboard/internal/CodesArrayParser.java @@ -18,6 +18,8 @@ package com.android.inputmethod.keyboard.internal; import com.android.inputmethod.latin.Constants; +import android.text.TextUtils; + /** * The string parser of codesArray specification for <GridRows />. The attribute codesArray is an * array of string. @@ -34,7 +36,7 @@ import com.android.inputmethod.latin.Constants; public final class CodesArrayParser { // Constants for parsing. private static final char COMMA = ','; - private static final char VERTICAL_BAR = '|'; + private static final String VERTICAL_BAR_STRING = "\\|"; private static final String COMMA_STRING = ","; private static final int BASE_HEX = 16; @@ -43,8 +45,11 @@ public final class CodesArrayParser { } private static String getLabelSpec(final String codesArraySpec) { - final int pos = codesArraySpec.indexOf(VERTICAL_BAR); - return (pos < 0) ? codesArraySpec : codesArraySpec.substring(0, pos); + final String[] strs = codesArraySpec.split(VERTICAL_BAR_STRING, -1); + if (strs.length <= 1) { + return codesArraySpec; + } + return strs[0]; } public static String parseLabel(final String codesArraySpec) { @@ -58,8 +63,25 @@ public final class CodesArrayParser { } private static String getCodeSpec(final String codesArraySpec) { - final int pos = codesArraySpec.indexOf(VERTICAL_BAR); - return (pos < 0) ? codesArraySpec : codesArraySpec.substring(pos + 1); + final String[] strs = codesArraySpec.split(VERTICAL_BAR_STRING, -1); + if (strs.length <= 1) { + return codesArraySpec; + } + return TextUtils.isEmpty(strs[1]) ? strs[0] : strs[1]; + } + + // codesArraySpec consists of: + // <label>|<code0>,<code1>,...|<minSupportSdkVersion> + public static int getMinSupportSdkVersion(final String codesArraySpec) { + final String[] strs = codesArraySpec.split(VERTICAL_BAR_STRING, -1); + if (strs.length <= 2) { + return 0; + } + try { + return Integer.parseInt(strs[2]); + } catch (NumberFormatException e) { + return 0; + } } public static int parseCode(final String codesArraySpec) { diff --git a/java/src/com/android/inputmethod/keyboard/internal/KeyboardBuilder.java b/java/src/com/android/inputmethod/keyboard/internal/KeyboardBuilder.java index 22f7a83fc..c1ae65695 100644 --- a/java/src/com/android/inputmethod/keyboard/internal/KeyboardBuilder.java +++ b/java/src/com/android/inputmethod/keyboard/internal/KeyboardBuilder.java @@ -20,6 +20,7 @@ import android.content.Context; import android.content.res.Resources; import android.content.res.TypedArray; import android.content.res.XmlResourceParser; +import android.os.Build; import android.util.AttributeSet; import android.util.Log; import android.util.TypedValue; @@ -436,17 +437,24 @@ public class KeyboardBuilder<KP extends KeyboardParams> { final String label; final int code; final String outputText; + final int supportedMinSdkVersion; if (codesArrayId != 0) { final String codeArraySpec = array[i]; label = CodesArrayParser.parseLabel(codeArraySpec); code = CodesArrayParser.parseCode(codeArraySpec); outputText = CodesArrayParser.parseOutputText(codeArraySpec); + supportedMinSdkVersion = + CodesArrayParser.getMinSupportSdkVersion(codeArraySpec); } else { final String textArraySpec = array[i]; // TODO: Utilize KeySpecParser or write more generic TextsArrayParser. label = textArraySpec; code = Constants.CODE_OUTPUT_TEXT; outputText = textArraySpec + (char)Constants.CODE_SPACE; + supportedMinSdkVersion = 0; + } + if (Build.VERSION.SDK_INT < supportedMinSdkVersion) { + continue; } final int x = (int)row.getKeyX(null); final int y = row.getKeyY(); diff --git a/java/src/com/android/inputmethod/keyboard/internal/KeyboardState.java b/java/src/com/android/inputmethod/keyboard/internal/KeyboardState.java index 9f9fdaa6f..506dfa751 100644 --- a/java/src/com/android/inputmethod/keyboard/internal/KeyboardState.java +++ b/java/src/com/android/inputmethod/keyboard/internal/KeyboardState.java @@ -178,6 +178,8 @@ public final class KeyboardState { if (!state.mIsAlphabetShiftLocked) { setShifted(state.mShiftMode); } + // TODO: is this the right place to do this? Should we do this in setShift* instead? + mSwitchActions.requestUpdatingShiftState(); } else { mPrevMainKeyboardWasShiftLocked = state.mIsAlphabetShiftLocked; } diff --git a/java/src/com/android/inputmethod/keyboard/internal/KeyboardTextsSet.java b/java/src/com/android/inputmethod/keyboard/internal/KeyboardTextsSet.java index 67553fb75..684cf632b 100644 --- a/java/src/com/android/inputmethod/keyboard/internal/KeyboardTextsSet.java +++ b/java/src/com/android/inputmethod/keyboard/internal/KeyboardTextsSet.java @@ -209,8 +209,8 @@ public final class KeyboardTextsSet { /* 104 */ "keylabel_for_tablet_comma", /* 105 */ "keyhintlabel_for_tablet_comma", /* 106 */ "more_keys_for_tablet_comma", - /* 107 */ "keyhintlabel_for_tablet_period", - /* 108 */ "more_keys_for_tablet_period", + /* 107 */ "keyhintlabel_for_period", + /* 108 */ "more_keys_for_period", /* 109 */ "keylabel_for_apostrophe", /* 110 */ "keyhintlabel_for_apostrophe", /* 111 */ "more_keys_for_apostrophe", diff --git a/java/src/com/android/inputmethod/latin/BinaryDictionary.java b/java/src/com/android/inputmethod/latin/BinaryDictionary.java index d9bad7e57..541e69788 100644 --- a/java/src/com/android/inputmethod/latin/BinaryDictionary.java +++ b/java/src/com/android/inputmethod/latin/BinaryDictionary.java @@ -44,9 +44,9 @@ public final class BinaryDictionary extends Dictionary { private static final int MAX_WORD_LENGTH = Constants.DICTIONARY_MAX_WORD_LENGTH; // Must be equal to MAX_RESULTS in native/jni/src/defines.h private static final int MAX_RESULTS = 18; - // Required space count for auto commit. - // TODO: Remove this heuristic. - private static final int SPACE_COUNT_FOR_AUTO_COMMIT = 3; + // The cutoff returned by native for auto-commit confidence. + // Must be equal to CONFIDENCE_TO_AUTO_COMMIT in native/jni/src/defines.h + private static final int CONFIDENCE_TO_AUTO_COMMIT = 1000000; @UsedForTesting public static final String UNIGRAM_COUNT_QUERY = "UNIGRAM_COUNT"; @@ -343,18 +343,7 @@ public final class BinaryDictionary extends Dictionary { @Override public boolean shouldAutoCommit(final SuggestedWordInfo candidate) { - // TODO: actually use the confidence rather than use this completely broken heuristic - final String word = candidate.mWord; - final int length = word.length(); - int remainingSpaces = SPACE_COUNT_FOR_AUTO_COMMIT; - for (int i = 0; i < length; ++i) { - // This is okay because no low-surrogate and no high-surrogate can ever match the - // space character, so we don't need to take care of iterating on code points. - if (Constants.CODE_SPACE == word.charAt(i)) { - if (0 >= --remainingSpaces) return true; - } - } - return false; + return candidate.mAutoCommitFirstWordConfidence > CONFIDENCE_TO_AUTO_COMMIT; } @Override diff --git a/java/src/com/android/inputmethod/latin/LatinIME.java b/java/src/com/android/inputmethod/latin/LatinIME.java index 5b7e9351a..0f3d28976 100644 --- a/java/src/com/android/inputmethod/latin/LatinIME.java +++ b/java/src/com/android/inputmethod/latin/LatinIME.java @@ -2932,6 +2932,7 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen return; } tryFixLyingCursorPosition(); + mKeyboardSwitcher.updateShiftState(); if (tryResumeSuggestions) mHandler.postResumeSuggestions(); } diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java index 665c7a27c..2c3d1346f 100644 --- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java +++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java @@ -295,7 +295,6 @@ public final class BinaryDictDecoderUtils { return address; } } - int address; switch (optionFlags & FormatSpec.MASK_CHILDREN_ADDRESS_TYPE) { case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_ONEBYTE: return dictBuffer.readUnsignedByte(); diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java index af61f2979..b6024243f 100644 --- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java +++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java @@ -278,7 +278,6 @@ public class BinaryDictEncoderUtils { // For future reference, the code to remove duplicate is a simple : list.remove(node); list.add(ptNodeArray); final ArrayList<PtNode> branches = ptNodeArray.mData; - final int nodeSize = branches.size(); for (PtNode ptNode : branches) { if (null != ptNode.mChildren) flattenTreeInner(list, ptNode.mChildren); } @@ -427,9 +426,6 @@ public class BinaryDictEncoderUtils { nodeCountSize + nodeArrayOffset + nodeffset; nodeffset += ptNode.mCachedSize; } - final int nodeSize = nodeCountSize + nodeffset - + (formatOptions.mSupportsDynamicUpdate - ? FormatSpec.FORWARD_LINK_ADDRESS_SIZE : 0); nodeArrayOffset += nodeArray.mCachedSize; } return nodeArrayOffset; @@ -653,8 +649,8 @@ public class BinaryDictEncoderUtils { return flags; } - /* package */ static byte makePtNodeFlags(final PtNode node, final int ptNodeAddress, - final int childrenOffset, final FormatOptions formatOptions) { + /* package */ static byte makePtNodeFlags(final PtNode node, final int childrenOffset, + final FormatOptions formatOptions) { return (byte) makePtNodeFlags(node.mChars.length > 1, node.mFrequency >= 0, getByteSize(childrenOffset), node.mShortcutTargets != null && !node.mShortcutTargets.isEmpty(), diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java index a282f595c..e90137674 100644 --- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java +++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java @@ -288,6 +288,8 @@ public final class BinaryDictIOUtils { return BinaryDictEncoderUtils.getByteSize(value); } + // TODO: Remove this method. + @Deprecated static void skipPtNode(final DictBuffer dictBuffer, final FormatOptions formatOptions) { final int flags = dictBuffer.readUnsignedByte(); BinaryDictDecoderUtils.readParentAddress(dictBuffer, formatOptions); diff --git a/java/src/com/android/inputmethod/latin/makedict/DictDecoder.java b/java/src/com/android/inputmethod/latin/makedict/DictDecoder.java index 3796a466c..e251f7df7 100644 --- a/java/src/com/android/inputmethod/latin/makedict/DictDecoder.java +++ b/java/src/com/android/inputmethod/latin/makedict/DictDecoder.java @@ -391,4 +391,6 @@ public abstract class DictDecoder { return readLength; } } + + public abstract void skipPtNode(final FormatOptions formatOptions); } diff --git a/java/src/com/android/inputmethod/latin/makedict/DynamicBinaryDictIOUtils.java b/java/src/com/android/inputmethod/latin/makedict/DynamicBinaryDictIOUtils.java index 411e265b3..5c6994119 100644 --- a/java/src/com/android/inputmethod/latin/makedict/DynamicBinaryDictIOUtils.java +++ b/java/src/com/android/inputmethod/latin/makedict/DynamicBinaryDictIOUtils.java @@ -59,7 +59,7 @@ public final class DynamicBinaryDictIOUtils { throws IOException, UnsupportedFormatException { final DictBuffer dictBuffer = dictDecoder.getDictBuffer(); dictBuffer.position(0); - final FileHeader header = dictDecoder.readHeader(); + dictDecoder.readHeader(); final int wordPosition = dictDecoder.getTerminalPosition(word); if (wordPosition == FormatSpec.NOT_VALID_WORD) return; @@ -142,8 +142,7 @@ public final class DynamicBinaryDictIOUtils { final int originalPosition = dictBuffer.position(); dictBuffer.position(ptNodeOriginAddress); final int flags = dictBuffer.readUnsignedByte(); - final int parentAddress = BinaryDictDecoderUtils.readParentAddress(dictBuffer, - formatOptions); + BinaryDictDecoderUtils.readParentAddress(dictBuffer, formatOptions); BinaryDictIOUtils.skipString(dictBuffer, (flags & FormatSpec.FLAG_HAS_MULTIPLE_CHARS) != 0); if ((flags & FormatSpec.FLAG_IS_TERMINAL) != 0) dictBuffer.readUnsignedByte(); final int childrenOffset = newChildrenAddress == FormatSpec.NO_CHILDREN_ADDRESS diff --git a/java/src/com/android/inputmethod/latin/makedict/SparseTable.java b/java/src/com/android/inputmethod/latin/makedict/SparseTable.java index 96d057a44..7592a0c13 100644 --- a/java/src/com/android/inputmethod/latin/makedict/SparseTable.java +++ b/java/src/com/android/inputmethod/latin/makedict/SparseTable.java @@ -17,6 +17,7 @@ package com.android.inputmethod.latin.makedict; import com.android.inputmethod.annotations.UsedForTesting; +import com.android.inputmethod.latin.utils.CollectionUtils; import java.io.File; import java.io.FileInputStream; @@ -37,35 +38,39 @@ public class SparseTable { /** * mLookupTable is indexed by terminal ID, containing exactly one entry for every mBlockSize * terminals. - * It contains at index i = j / mBlockSize the index in mContentsTable where the values for - * terminals with IDs j to j + mBlockSize - 1 are stored as an mBlockSize-sized integer array. + * It contains at index i = j / mBlockSize the index in each ArrayList in mContentsTables where + * the values for terminals with IDs j to j + mBlockSize - 1 are stored as an mBlockSize-sized + * integer array. */ private final ArrayList<Integer> mLookupTable; - private final ArrayList<Integer> mContentTable; + private final ArrayList<ArrayList<Integer>> mContentTables; private final int mBlockSize; + private final int mContentTableCount; public static final int NOT_EXIST = -1; + public static final int SIZE_OF_INT_IN_BYTES = 4; @UsedForTesting - public SparseTable(final int initialCapacity, final int blockSize) { + public SparseTable(final int initialCapacity, final int blockSize, + final int contentTableCount) { mBlockSize = blockSize; final int lookupTableSize = initialCapacity / mBlockSize + (initialCapacity % mBlockSize > 0 ? 1 : 0); mLookupTable = new ArrayList<Integer>(Collections.nCopies(lookupTableSize, NOT_EXIST)); - mContentTable = new ArrayList<Integer>(); + mContentTableCount = contentTableCount; + mContentTables = CollectionUtils.newArrayList(); + for (int i = 0; i < mContentTableCount; ++i) { + mContentTables.add(new ArrayList<Integer>()); + } } @UsedForTesting - public SparseTable(final int[] lookupTable, final int[] contentTable, final int blockSize) { + public SparseTable(final ArrayList<Integer> lookupTable, + final ArrayList<ArrayList<Integer>> contentTables, final int blockSize) { mBlockSize = blockSize; - mLookupTable = new ArrayList<Integer>(lookupTable.length); - for (int i = 0; i < lookupTable.length; ++i) { - mLookupTable.add(lookupTable[i]); - } - mContentTable = new ArrayList<Integer>(contentTable.length); - for (int i = 0; i < contentTable.length; ++i) { - mContentTable.add(contentTable[i]); - } + mContentTableCount = contentTables.size(); + mLookupTable = lookupTable; + mContentTables = contentTables; } /** @@ -75,8 +80,8 @@ public class SparseTable { * Otherwise, IndexOutOfBoundsException will be raised. */ @UsedForTesting - private static void convertByteArrayToIntegerArray(final byte[] byteArray, - final ArrayList<Integer> integerArray) { + private static ArrayList<Integer> convertByteArrayToIntegerArray(final byte[] byteArray) { + final ArrayList<Integer> integerArray = new ArrayList<Integer>(byteArray.length / 4); for (int i = 0; i < byteArray.length; i += 4) { int value = 0; for (int j = i; j < i + 4; ++j) { @@ -85,39 +90,43 @@ public class SparseTable { } integerArray.add(value); } + return integerArray; } @UsedForTesting - public SparseTable(final byte[] lookupTable, final byte[] contentTable, final int blockSize) { - mBlockSize = blockSize; - mLookupTable = new ArrayList<Integer>(lookupTable.length / 4); - mContentTable = new ArrayList<Integer>(contentTable.length / 4); - convertByteArrayToIntegerArray(lookupTable, mLookupTable); - convertByteArrayToIntegerArray(contentTable, mContentTable); + public int get(final int contentTableIndex, final int index) { + if (!contains(index)) { + return NOT_EXIST; + } + return mContentTables.get(contentTableIndex).get( + mLookupTable.get(index / mBlockSize) + (index % mBlockSize)); } @UsedForTesting - public int get(final int index) { - if (index < 0 || index / mBlockSize >= mLookupTable.size() - || mLookupTable.get(index / mBlockSize) == NOT_EXIST) { - return NOT_EXIST; + public ArrayList<Integer> getAll(final int index) { + final ArrayList<Integer> ret = CollectionUtils.newArrayList(); + for (int i = 0; i < mContentTableCount; ++i) { + ret.add(get(i, index)); } - return mContentTable.get(mLookupTable.get(index / mBlockSize) + (index % mBlockSize)); + return ret; } @UsedForTesting - public void set(final int index, final int value) { + public void set(final int contentTableIndex, final int index, final int value) { if (mLookupTable.get(index / mBlockSize) == NOT_EXIST) { - mLookupTable.set(index / mBlockSize, mContentTable.size()); - for (int i = 0; i < mBlockSize; ++i) { - mContentTable.add(NOT_EXIST); + mLookupTable.set(index / mBlockSize, mContentTables.get(contentTableIndex).size()); + for (int i = 0; i < mContentTableCount; ++i) { + for (int j = 0; j < mBlockSize; ++j) { + mContentTables.get(i).add(NOT_EXIST); + } } } - mContentTable.set(mLookupTable.get(index / mBlockSize) + (index % mBlockSize), value); + mContentTables.get(contentTableIndex).set( + mLookupTable.get(index / mBlockSize) + (index % mBlockSize), value); } - public void remove(final int index) { - set(index, NOT_EXIST); + public void remove(final int indexOfContent, final int index) { + set(indexOfContent, index, NOT_EXIST); } @UsedForTesting @@ -127,7 +136,8 @@ public class SparseTable { @UsedForTesting /* package */ int getContentTableSize() { - return mContentTable.size(); + // This class always has at least one content table. + return mContentTables.get(0).size(); } @UsedForTesting @@ -136,36 +146,51 @@ public class SparseTable { } public boolean contains(final int index) { - return get(index) != NOT_EXIST; + if (index < 0 || index / mBlockSize >= mLookupTable.size() + || mLookupTable.get(index / mBlockSize) == NOT_EXIST) { + return false; + } + return true; } @UsedForTesting - public void write(final OutputStream lookupOutStream, final OutputStream contentOutStream) + public void write(final OutputStream lookupOutStream, final OutputStream[] contentOutStreams) throws IOException { + if (contentOutStreams.length != mContentTableCount) { + throw new RuntimeException(contentOutStreams.length + " streams are given, but the" + + " table has " + mContentTableCount + " content tables."); + } for (final int index : mLookupTable) { - BinaryDictEncoderUtils.writeUIntToStream(lookupOutStream, index, 4); + BinaryDictEncoderUtils.writeUIntToStream(lookupOutStream, index, SIZE_OF_INT_IN_BYTES); } - for (final int index : mContentTable) { - BinaryDictEncoderUtils.writeUIntToStream(contentOutStream, index, 4); + for (int i = 0; i < contentOutStreams.length; ++i) { + for (final int data : mContentTables.get(i)) { + BinaryDictEncoderUtils.writeUIntToStream(contentOutStreams[i], data, + SIZE_OF_INT_IN_BYTES); + } } } @UsedForTesting - public void writeToFiles(final File lookupTableFile, final File contentFile) + public void writeToFiles(final File lookupTableFile, final File[] contentFiles) throws IOException { - FileOutputStream lookupTableOutStream = null; - FileOutputStream contentOutStream = null; + FileOutputStream lookupTableOutStream = null; + final FileOutputStream[] contentTableOutStreams = new FileOutputStream[mContentTableCount]; try { lookupTableOutStream = new FileOutputStream(lookupTableFile); - contentOutStream = new FileOutputStream(contentFile); - write(lookupTableOutStream, contentOutStream); + for (int i = 0; i < contentFiles.length; ++i) { + contentTableOutStreams[i] = new FileOutputStream(contentFiles[i]); + } + write(lookupTableOutStream, contentTableOutStreams); } finally { if (lookupTableOutStream != null) { lookupTableOutStream.close(); } - if (contentOutStream != null) { - contentOutStream.close(); + for (int i = 0; i < contentTableOutStreams.length; ++i) { + if (contentTableOutStreams[i] != null) { + contentTableOutStreams[i].close(); + } } } } @@ -185,10 +210,14 @@ public class SparseTable { } @UsedForTesting - public static SparseTable readFromFiles(final File lookupTableFile, final File contentFile, + public static SparseTable readFromFiles(final File lookupTableFile, final File[] contentFiles, final int blockSize) throws IOException { - final byte[] lookupTable = readFileToByteArray(lookupTableFile); - final byte[] content = readFileToByteArray(contentFile); - return new SparseTable(lookupTable, content, blockSize); + final ArrayList<ArrayList<Integer>> contentTables = + new ArrayList<ArrayList<Integer>>(contentFiles.length); + for (int i = 0; i < contentFiles.length; ++i) { + contentTables.add(convertByteArrayToIntegerArray(readFileToByteArray(contentFiles[i]))); + } + return new SparseTable(convertByteArrayToIntegerArray(readFileToByteArray(lookupTableFile)), + contentTables, blockSize); } } diff --git a/java/src/com/android/inputmethod/latin/makedict/Ver3DictDecoder.java b/java/src/com/android/inputmethod/latin/makedict/Ver3DictDecoder.java index 848277cd4..bf5a28d62 100644 --- a/java/src/com/android/inputmethod/latin/makedict/Ver3DictDecoder.java +++ b/java/src/com/android/inputmethod/latin/makedict/Ver3DictDecoder.java @@ -169,7 +169,8 @@ public class Ver3DictDecoder extends DictDecoder { addressPointer += PtNodeReader.readBigramAddresses(mDictBuffer, bigrams, addressPointer); if (bigrams.size() >= FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) { - MakedictLog.d("too many bigrams in a PtNode."); + throw new RuntimeException("Too many bigrams in a PtNode (" + bigrams.size() + + " but max is " + FormatSpec.MAX_BIGRAMS_IN_A_PTNODE + ")"); } } else { bigrams = null; @@ -231,4 +232,40 @@ public class Ver3DictDecoder extends DictDecoder { public boolean hasNextPtNodeArray() { return mDictBuffer.position() != FormatSpec.NO_FORWARD_LINK_ADDRESS; } + + @Override + public void skipPtNode(final FormatOptions formatOptions) { + final int flags = PtNodeReader.readPtNodeOptionFlags(mDictBuffer); + PtNodeReader.readParentAddress(mDictBuffer, formatOptions); + BinaryDictIOUtils.skipString(mDictBuffer, + (flags & FormatSpec.FLAG_HAS_MULTIPLE_CHARS) != 0); + PtNodeReader.readChildrenAddress(mDictBuffer, flags, formatOptions); + if ((flags & FormatSpec.FLAG_IS_TERMINAL) != 0) PtNodeReader.readFrequency(mDictBuffer); + if ((flags & FormatSpec.FLAG_HAS_SHORTCUT_TARGETS) != 0) { + final int shortcutsSize = mDictBuffer.readUnsignedShort(); + mDictBuffer.position(mDictBuffer.position() + shortcutsSize + - FormatSpec.PTNODE_SHORTCUT_LIST_SIZE_SIZE); + } + if ((flags & FormatSpec.FLAG_HAS_BIGRAMS) != 0) { + int bigramCount = 0; + while (bigramCount++ < FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) { + final int bigramFlags = mDictBuffer.readUnsignedByte(); + switch (bigramFlags & FormatSpec.MASK_BIGRAM_ATTR_ADDRESS_TYPE) { + case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_ONEBYTE: + mDictBuffer.readUnsignedByte(); + break; + case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_TWOBYTES: + mDictBuffer.readUnsignedShort(); + break; + case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_THREEBYTES: + mDictBuffer.readUnsignedInt24(); + break; + } + if ((bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT) == 0) break; + } + if (bigramCount >= FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) { + throw new RuntimeException("Too many bigrams in a PtNode."); + } + } + } } diff --git a/java/src/com/android/inputmethod/latin/makedict/Ver3DictEncoder.java b/java/src/com/android/inputmethod/latin/makedict/Ver3DictEncoder.java index 76f0f4052..d9e19899c 100644 --- a/java/src/com/android/inputmethod/latin/makedict/Ver3DictEncoder.java +++ b/java/src/com/android/inputmethod/latin/makedict/Ver3DictEncoder.java @@ -133,12 +133,10 @@ public class Ver3DictEncoder implements DictEncoder { countSize); } - private void writePtNodeFlags(final PtNode ptNode, final int parentAddress, - final FormatOptions formatOptions) { + private void writePtNodeFlags(final PtNode ptNode, final FormatOptions formatOptions) { final int childrenPos = BinaryDictEncoderUtils.getChildrenPosition(ptNode, formatOptions); mPosition = BinaryDictEncoderUtils.writeUIntToBuffer(mBuffer, mPosition, - BinaryDictEncoderUtils.makePtNodeFlags(ptNode, mPosition, childrenPos, - formatOptions), + BinaryDictEncoderUtils.makePtNodeFlags(ptNode, childrenPos, formatOptions), FormatSpec.PTNODE_FLAGS_SIZE); } @@ -244,7 +242,7 @@ public class Ver3DictEncoder implements DictEncoder { @Override public void writePtNode(final PtNode ptNode, final int parentPosition, final FormatOptions formatOptions, final FusionDictionary dict) { - writePtNodeFlags(ptNode, parentPosition, formatOptions); + writePtNodeFlags(ptNode, formatOptions); writeParentPosition(parentPosition, ptNode, formatOptions); writeCharacters(ptNode.mChars, ptNode.hasSeveralChars()); writeFrequency(ptNode.mFrequency); diff --git a/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java b/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java index 0aa431966..624b2784f 100644 --- a/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java +++ b/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java @@ -95,7 +95,6 @@ public class Ver4DictDecoder extends DictDecoder { @Override public void openDictBuffer() throws FileNotFoundException, IOException { - final String filename = mDictDirectory.getName(); mDictBuffer = mBufferFactory.getDictionaryBuffer(getFile(FILETYPE_TRIE)); mFrequencyBuffer = mBufferFactory.getDictionaryBuffer(getFile(FILETYPE_FREQUENCY)); mTerminalAddressTableBuffer = mBufferFactory.getDictionaryBuffer( @@ -131,7 +130,7 @@ public class Ver4DictDecoder extends DictDecoder { mDictDirectory.getName() + FormatSpec.BIGRAM_LOOKUP_TABLE_FILE_EXTENSION); final File contentFile = new File(mDictDirectory, mDictDirectory.getName() + FormatSpec.BIGRAM_ADDRESS_TABLE_FILE_EXTENSION); - mBigramAddressTable = SparseTable.readFromFiles(lookupIndexFile, contentFile, + mBigramAddressTable = SparseTable.readFromFiles(lookupIndexFile, new File[] { contentFile }, FormatSpec.BIGRAM_ADDRESS_TABLE_BLOCK_SIZE); } @@ -208,7 +207,7 @@ public class Ver4DictDecoder extends DictDecoder { final ArrayList<PendingAttribute> bigrams; if (0 != (flags & FormatSpec.FLAG_HAS_BIGRAMS)) { bigrams = new ArrayList<PendingAttribute>(); - final int posOfBigrams = mBigramAddressTable.get(terminalId); + final int posOfBigrams = mBigramAddressTable.get(0 /* contentTableIndex */, terminalId); mBigramBuffer.position(posOfBigrams); while (bigrams.size() < FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) { // If bigrams.size() reaches FormatSpec.MAX_BIGRAMS_IN_A_PTNODE, @@ -224,7 +223,8 @@ public class Ver4DictDecoder extends DictDecoder { if (0 == (bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) break; } if (bigrams.size() >= FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) { - MakedictLog.d("too many bigrams in a node."); + throw new RuntimeException("Too many bigrams in a PtNode (" + bigrams.size() + + " but max is " + FormatSpec.MAX_BIGRAMS_IN_A_PTNODE + ")"); } } else { bigrams = null; @@ -293,4 +293,14 @@ public class Ver4DictDecoder extends DictDecoder { public boolean hasNextPtNodeArray() { return mDictBuffer.position() != FormatSpec.NO_FORWARD_LINK_ADDRESS; } + + @Override + public void skipPtNode(final FormatOptions formatOptions) { + final int flags = PtNodeReader.readPtNodeOptionFlags(mDictBuffer); + PtNodeReader.readParentAddress(mDictBuffer, formatOptions); + BinaryDictIOUtils.skipString(mDictBuffer, + (flags & FormatSpec.FLAG_HAS_MULTIPLE_CHARS) != 0); + if ((flags & FormatSpec.FLAG_IS_TERMINAL) != 0) PtNodeReader.readTerminalId(mDictBuffer); + PtNodeReader.readChildrenAddress(mDictBuffer, flags, formatOptions); + } } diff --git a/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java b/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java index 4c25faf88..a403e25db 100644 --- a/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java +++ b/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java @@ -136,7 +136,7 @@ public class Ver4DictEncoder implements DictEncoder { writeTerminalData(flatNodes, terminalCount); mBigramAddressTable = new SparseTable(terminalCount, - FormatSpec.BIGRAM_ADDRESS_TABLE_BLOCK_SIZE); + FormatSpec.BIGRAM_ADDRESS_TABLE_BLOCK_SIZE, 1 /* contentTableCount */); writeBigrams(flatNodes, dict); writeBigramAddressSparseTable(); @@ -181,12 +181,10 @@ public class Ver4DictEncoder implements DictEncoder { countSize); } - private void writePtNodeFlags(final PtNode ptNode, final int parentAddress, - final FormatOptions formatOptions) { + private void writePtNodeFlags(final PtNode ptNode, final FormatOptions formatOptions) { final int childrenPos = BinaryDictEncoderUtils.getChildrenPosition(ptNode, formatOptions); mTriePos = BinaryDictEncoderUtils.writeUIntToBuffer(mTrieBuf, mTriePos, - BinaryDictEncoderUtils.makePtNodeFlags(ptNode, mTriePos, childrenPos, - formatOptions), + BinaryDictEncoderUtils.makePtNodeFlags(ptNode, childrenPos, formatOptions), FormatSpec.PTNODE_FLAGS_SIZE); } @@ -231,8 +229,7 @@ public class Ver4DictEncoder implements DictEncoder { while (shortcutIterator.hasNext()) { final WeightedString target = shortcutIterator.next(); final int shortcutFlags = BinaryDictEncoderUtils.makeShortcutFlags( - shortcutIterator.hasNext(), - target.mFrequency); + shortcutIterator.hasNext(), target.mFrequency); mTrieBuf[mTriePos++] = (byte)shortcutFlags; final int shortcutShift = CharEncoding.writeString(mTrieBuf, mTriePos, target.mWord); @@ -254,7 +251,8 @@ public class Ver4DictEncoder implements DictEncoder { for (final PtNode ptNode : nodeArray.mData) { if (ptNode.mBigrams != null) { final int startPos = bigramBuffer.size(); - mBigramAddressTable.set(ptNode.mTerminalId, startPos); + mBigramAddressTable.set(0 /* contentTableIndex */, ptNode.mTerminalId, + startPos); final Iterator<WeightedString> bigramIterator = ptNode.mBigrams.iterator(); while (bigramIterator.hasNext()) { final WeightedString bigram = bigramIterator.next(); @@ -280,7 +278,7 @@ public class Ver4DictEncoder implements DictEncoder { new File(mDictDir, mBaseFilename + FormatSpec.BIGRAM_LOOKUP_TABLE_FILE_EXTENSION); final File contentFile = new File(mDictDir, mBaseFilename + FormatSpec.BIGRAM_ADDRESS_TABLE_FILE_EXTENSION); - mBigramAddressTable.writeToFiles(lookupIndexFile, contentFile); + mBigramAddressTable.writeToFiles(lookupIndexFile, new File[] { contentFile }); } @Override @@ -292,7 +290,7 @@ public class Ver4DictEncoder implements DictEncoder { @Override public void writePtNode(final PtNode ptNode, final int parentPosition, final FormatOptions formatOptions, final FusionDictionary dict) { - writePtNodeFlags(ptNode, parentPosition, formatOptions); + writePtNodeFlags(ptNode, formatOptions); writeParentPosition(parentPosition, ptNode, formatOptions); writeCharacters(ptNode.mChars, ptNode.hasSeveralChars()); if (ptNode.isTerminal()) { diff --git a/java/src/com/android/inputmethod/latin/settings/DebugSettingsActivity.java b/java/src/com/android/inputmethod/latin/settings/DebugSettingsActivity.java index b499c26b6..ef6ab2a38 100644 --- a/java/src/com/android/inputmethod/latin/settings/DebugSettingsActivity.java +++ b/java/src/com/android/inputmethod/latin/settings/DebugSettingsActivity.java @@ -38,4 +38,10 @@ public final class DebugSettingsActivity extends PreferenceActivity { super.onCreate(savedInstanceState); setTitle(R.string.english_ime_debug_settings); } + + // TODO: Uncomment the override annotation once we start using SDK version 19. + // @Override + public boolean isValidFragment(String fragmentName) { + return fragmentName.equals(DEFAULT_FRAGMENT); + } } diff --git a/java/src/com/android/inputmethod/latin/settings/SettingsActivity.java b/java/src/com/android/inputmethod/latin/settings/SettingsActivity.java index 6c3818651..ad68f8c37 100644 --- a/java/src/com/android/inputmethod/latin/settings/SettingsActivity.java +++ b/java/src/com/android/inputmethod/latin/settings/SettingsActivity.java @@ -32,4 +32,10 @@ public final class SettingsActivity extends PreferenceActivity { intent.putExtra(EXTRA_NO_HEADERS, true); return intent; } + + // TODO: Uncomment the override annotation once we start using SDK version 19. + // @Override + public boolean isValidFragment(String fragmentName) { + return fragmentName.equals(DEFAULT_FRAGMENT); + } } diff --git a/java/src/com/android/inputmethod/latin/spellcheck/SpellCheckerSettingsActivity.java b/java/src/com/android/inputmethod/latin/spellcheck/SpellCheckerSettingsActivity.java index 119ca4755..aba563746 100644 --- a/java/src/com/android/inputmethod/latin/spellcheck/SpellCheckerSettingsActivity.java +++ b/java/src/com/android/inputmethod/latin/spellcheck/SpellCheckerSettingsActivity.java @@ -24,6 +24,8 @@ import android.preference.PreferenceActivity; * Spell checker preference screen. */ public final class SpellCheckerSettingsActivity extends PreferenceActivity { + private static final String DEFAULT_FRAGMENT = SpellCheckerSettingsFragment.class.getName(); + @Override protected void onCreate(Bundle savedInstanceState) { super.onCreate(savedInstanceState); @@ -32,8 +34,14 @@ public final class SpellCheckerSettingsActivity extends PreferenceActivity { @Override public Intent getIntent() { final Intent modIntent = new Intent(super.getIntent()); - modIntent.putExtra(EXTRA_SHOW_FRAGMENT, SpellCheckerSettingsFragment.class.getName()); + modIntent.putExtra(EXTRA_SHOW_FRAGMENT, DEFAULT_FRAGMENT); modIntent.putExtra(EXTRA_NO_HEADERS, true); return modIntent; } + + // TODO: Uncomment the override annotation once we start using SDK version 19. + // @Override + public boolean isValidFragment(String fragmentName) { + return fragmentName.equals(DEFAULT_FRAGMENT); + } } diff --git a/native/jni/Android.mk b/native/jni/Android.mk index 36afea54b..ca6a77997 100644 --- a/native/jni/Android.mk +++ b/native/jni/Android.mk @@ -85,8 +85,8 @@ LATIN_IME_CORE_SRC_FILES := \ $(addprefix suggest/policyimpl/dictionary/utils/, \ buffer_with_extendable_buffer.cpp \ byte_array_utils.cpp \ - decaying_utils.cpp \ dict_file_writing_utils.cpp \ + forgetting_curve_utils.cpp \ format_utils.cpp) \ suggest/policyimpl/gesture/gesture_suggest_policy_factory.cpp \ $(addprefix suggest/policyimpl/typing/, \ diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp index 21c15ea67..38159b0f3 100644 --- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp +++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp @@ -344,8 +344,7 @@ static jstring latinime_BinaryDictionary_getProperty(JNIEnv *env, jclass clazz, static const int GET_PROPERTY_RESULT_LENGTH = 100; char resultChars[GET_PROPERTY_RESULT_LENGTH]; resultChars[0] = '\0'; - dictionary->getDictionaryStructurePolicy()->getProperty(queryChars, resultChars, - GET_PROPERTY_RESULT_LENGTH); + dictionary->getProperty(queryChars, resultChars, GET_PROPERTY_RESULT_LENGTH); return env->NewStringUTF(resultChars); } diff --git a/native/jni/src/defines.h b/native/jni/src/defines.h index c920f64b4..742e388e4 100644 --- a/native/jni/src/defines.h +++ b/native/jni/src/defines.h @@ -298,9 +298,19 @@ static inline void prof_out(void) { #define NOT_AN_INDEX (-1) #define NOT_A_PROBABILITY (-1) #define NOT_A_DICT_POS (S_INT_MIN) + // A special value to mean the first word confidence makes no sense in this case, // e.g. this is not a multi-word suggestion. -#define NOT_A_FIRST_WORD_CONFIDENCE (S_INT_MIN) +#define NOT_A_FIRST_WORD_CONFIDENCE (S_INT_MAX) +// How high the confidence needs to be for us to auto-commit. Arbitrary. +// This needs to be the same as CONFIDENCE_FOR_AUTO_COMMIT in BinaryDictionary.java +#define CONFIDENCE_FOR_AUTO_COMMIT (1000000) +// 80% of the full confidence +#define DISTANCE_WEIGHT_FOR_AUTO_COMMIT (80 * CONFIDENCE_FOR_AUTO_COMMIT / 100) +// 100% of the full confidence +#define LENGTH_WEIGHT_FOR_AUTO_COMMIT (CONFIDENCE_FOR_AUTO_COMMIT) +// 80% of the full confidence +#define SPACE_COUNT_WEIGHT_FOR_AUTO_COMMIT (80 * CONFIDENCE_FOR_AUTO_COMMIT / 100) #define KEYCODE_SPACE ' ' #define KEYCODE_SINGLE_QUOTE '\'' diff --git a/native/jni/src/suggest/core/dicnode/dic_node.h b/native/jni/src/suggest/core/dicnode/dic_node.h index 9099e8285..49cfdecac 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node.h +++ b/native/jni/src/suggest/core/dicnode/dic_node.h @@ -271,7 +271,7 @@ class DicNode { return isTerminalNodes && currentNodeDepth > 0 && currentNodeDepth == terminalNodeDepth; } - bool shouldBeFilterdBySafetyNetForBigram() const { + bool shouldBeFilteredBySafetyNetForBigram() const { const uint16_t currentDepth = getNodeCodePointCount(); const int prevWordLen = mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength() - mDicNodeState.mDicNodeStatePrevWord.getPrevWordStart() - 1; @@ -321,6 +321,16 @@ class DicNode { DUMP_WORD_AND_SCORE("OUTPUT"); } + // "Total" in this context (and other methods in this class) means the whole suggestion. When + // this represents a multi-word suggestion, the referenced PtNode (in mDicNodeState) is only + // the one that corresponds to the last word of the suggestion, and all the previous words + // are concatenated together in mPrevWord - which contains a space at the end. + int getTotalNodeSpaceCount() const { + if (isFirstWord()) return 0; + return CharUtils::getSpaceCount(mDicNodeState.mDicNodeStatePrevWord.mPrevWord, + mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength()); + } + int getSecondWordFirstInputIndex(const ProximityInfoState *const pInfoState) const { const int inputIndex = mDicNodeState.mDicNodeStatePrevWord.getSecondWordFirstInputIndex(); if (inputIndex == NOT_AN_INDEX) { diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp index 5969b31cc..59ead1894 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.cpp +++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp @@ -129,7 +129,7 @@ bool Dictionary::needsToRunGC(const bool mindsBlockByGC) { } void Dictionary::getProperty(const char *const query, char *const outResult, - const int maxResultLength) const { + const int maxResultLength) { return mDictionaryStructureWithBufferPolicy->getProperty(query, outResult, maxResultLength); } diff --git a/native/jni/src/suggest/core/dictionary/dictionary.h b/native/jni/src/suggest/core/dictionary/dictionary.h index 43d3b964d..0195d5bf0 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.h +++ b/native/jni/src/suggest/core/dictionary/dictionary.h @@ -84,7 +84,7 @@ class Dictionary { bool needsToRunGC(const bool mindsBlockByGC); void getProperty(const char *const query, char *const outResult, - const int maxResultLength) const; + const int maxResultLength); const DictionaryStructureWithBufferPolicy *getDictionaryStructurePolicy() const { return mDictionaryStructureWithBufferPolicy; diff --git a/native/jni/src/suggest/core/layout/proximity_info_params.cpp b/native/jni/src/suggest/core/layout/proximity_info_params.cpp index 0e887f700..49df10301 100644 --- a/native/jni/src/suggest/core/layout/proximity_info_params.cpp +++ b/native/jni/src/suggest/core/layout/proximity_info_params.cpp @@ -69,13 +69,13 @@ const float ProximityInfoParams::STRAIGHT_ANGLE_THRESHOLD = M_PI_F * 15.0f / 180 const float ProximityInfoParams::SKIP_CORNER_PROBABILITY = 0.4f; const float ProximityInfoParams::SPEED_MARGIN = 0.1f; const float ProximityInfoParams::CENTER_VALUE_OF_NORMALIZED_DISTRIBUTION = 0.0f; -// TODO: The variance is critical for accuracy; thus, adjusting these parameter by machine +// TODO: The variance is critical for accuracy; thus, adjusting these parameters by machine // learning or something would be efficient. -const float ProximityInfoParams::SPEEDxANGLE_WEIGHT_FOR_STANDARD_DIVIATION = 0.3f; -const float ProximityInfoParams::MAX_SPEEDxANGLE_RATE_FOR_STANDERD_DIVIATION = 0.25f; -const float ProximityInfoParams::SPEEDxNEAREST_WEIGHT_FOR_STANDARD_DIVIATION = 0.5f; -const float ProximityInfoParams::MAX_SPEEDxNEAREST_RATE_FOR_STANDERD_DIVIATION = 0.15f; -const float ProximityInfoParams::MIN_STANDERD_DIVIATION = 0.37f; +const float ProximityInfoParams::SPEEDxANGLE_WEIGHT_FOR_STANDARD_DEVIATION = 0.3f; +const float ProximityInfoParams::MAX_SPEEDxANGLE_RATE_FOR_STANDARD_DEVIATION = 0.25f; +const float ProximityInfoParams::SPEEDxNEAREST_WEIGHT_FOR_STANDARD_DEVIATION = 0.5f; +const float ProximityInfoParams::MAX_SPEEDxNEAREST_RATE_FOR_STANDARD_DEVIATION = 0.15f; +const float ProximityInfoParams::MIN_STANDARD_DEVIATION = 0.37f; const float ProximityInfoParams::PREV_DISTANCE_WEIGHT = 0.5f; const float ProximityInfoParams::NEXT_DISTANCE_WEIGHT = 0.6f; diff --git a/native/jni/src/suggest/core/layout/proximity_info_params.h b/native/jni/src/suggest/core/layout/proximity_info_params.h index 4e47f7308..ae1f82c22 100644 --- a/native/jni/src/suggest/core/layout/proximity_info_params.h +++ b/native/jni/src/suggest/core/layout/proximity_info_params.h @@ -73,11 +73,11 @@ class ProximityInfoParams { static const float SKIP_CORNER_PROBABILITY; static const float SPEED_MARGIN; static const float CENTER_VALUE_OF_NORMALIZED_DISTRIBUTION; - static const float SPEEDxANGLE_WEIGHT_FOR_STANDARD_DIVIATION; - static const float MAX_SPEEDxANGLE_RATE_FOR_STANDERD_DIVIATION; - static const float SPEEDxNEAREST_WEIGHT_FOR_STANDARD_DIVIATION; - static const float MAX_SPEEDxNEAREST_RATE_FOR_STANDERD_DIVIATION; - static const float MIN_STANDERD_DIVIATION; + static const float SPEEDxANGLE_WEIGHT_FOR_STANDARD_DEVIATION; + static const float MAX_SPEEDxANGLE_RATE_FOR_STANDARD_DEVIATION; + static const float SPEEDxNEAREST_WEIGHT_FOR_STANDARD_DEVIATION; + static const float MAX_SPEEDxNEAREST_RATE_FOR_STANDARD_DEVIATION; + static const float MIN_STANDARD_DEVIATION; static const float PREV_DISTANCE_WEIGHT; static const float NEXT_DISTANCE_WEIGHT; diff --git a/native/jni/src/suggest/core/layout/proximity_info_state_utils.cpp b/native/jni/src/suggest/core/layout/proximity_info_state_utils.cpp index 904671f7f..e1b35340b 100644 --- a/native/jni/src/suggest/core/layout/proximity_info_state_utils.cpp +++ b/native/jni/src/suggest/core/layout/proximity_info_state_utils.cpp @@ -708,13 +708,13 @@ namespace latinime { const float inputCharProbability = 1.0f - skipProbability; const float speedxAngleRate = min(speedRate * currentAngle / M_PI_F - * ProximityInfoParams::SPEEDxANGLE_WEIGHT_FOR_STANDARD_DIVIATION, - ProximityInfoParams::MAX_SPEEDxANGLE_RATE_FOR_STANDERD_DIVIATION); + * ProximityInfoParams::SPEEDxANGLE_WEIGHT_FOR_STANDARD_DEVIATION, + ProximityInfoParams::MAX_SPEEDxANGLE_RATE_FOR_STANDARD_DEVIATION); const float speedxNearestKeyDistanceRate = min(speedRate * nearestKeyDistance - * ProximityInfoParams::SPEEDxNEAREST_WEIGHT_FOR_STANDARD_DIVIATION, - ProximityInfoParams::MAX_SPEEDxNEAREST_RATE_FOR_STANDERD_DIVIATION); + * ProximityInfoParams::SPEEDxNEAREST_WEIGHT_FOR_STANDARD_DEVIATION, + ProximityInfoParams::MAX_SPEEDxNEAREST_RATE_FOR_STANDARD_DEVIATION); const float sigma = speedxAngleRate + speedxNearestKeyDistanceRate - + ProximityInfoParams::MIN_STANDERD_DIVIATION; + + ProximityInfoParams::MIN_STANDARD_DEVIATION; ProximityInfoUtils::NormalDistribution distribution(ProximityInfoParams::CENTER_VALUE_OF_NORMALIZED_DISTRIBUTION, sigma); diff --git a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h index c7ffef0d5..41f82049f 100644 --- a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h +++ b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h @@ -80,8 +80,10 @@ class DictionaryStructureWithBufferPolicy { virtual bool needsToRunGC(const bool mindsBlockByGC) const = 0; + // Currently, this method is used only for testing. You may want to consider creating new + // dedicated method instead of this if you want to use this in the production. virtual void getProperty(const char *const query, char *const outResult, - const int maxResultLength) const = 0; + const int maxResultLength) = 0; protected: DictionaryStructureWithBufferPolicy() {} diff --git a/native/jni/src/suggest/core/suggest.cpp b/native/jni/src/suggest/core/suggest.cpp index 51cfba17a..73ccebc88 100644 --- a/native/jni/src/suggest/core/suggest.cpp +++ b/native/jni/src/suggest/core/suggest.cpp @@ -166,7 +166,11 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen // TODO: have partial commit work even with multiple pointers. const bool outputSecondWordFirstLetterInputIndex = traverseSession->isOnlyOnePointerUsed(0 /* pointerId */); - outputAutoCommitFirstWordConfidence[0] = computeFirstWordConfidence(); + if (terminalSize > 0) { + // If we have no suggestions, don't write this + outputAutoCommitFirstWordConfidence[0] = + computeFirstWordConfidence(&terminals[0]); + } // Output suggestion results here for (int terminalIndex = 0; terminalIndex < terminalSize && outputWordIndex < MAX_RESULTS; @@ -255,9 +259,55 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen return outputWordIndex; } -int Suggest::computeFirstWordConfidence() const { - // TODO: implement this. - return NOT_A_FIRST_WORD_CONFIDENCE; +int Suggest::computeFirstWordConfidence(const DicNode *const terminalDicNode) const { + // Get the number of spaces in the first suggestion + const int spaceCount = terminalDicNode->getTotalNodeSpaceCount(); + // Get the number of characters in the first suggestion + const int length = terminalDicNode->getTotalNodeCodePointCount(); + // Get the distance for the first word of the suggestion + const float distance = terminalDicNode->getNormalizedCompoundDistanceAfterFirstWord(); + + // Arbitrarily, we give a score whose useful values range from 0 to 1,000,000. + // 1,000,000 will be the cutoff to auto-commit. It's fine if the number is under 0 or + // above 1,000,000 : under 0 just means it's very bad to commit, and above 1,000,000 means + // we are very confident. + // Expected space count is 1 ~ 5 + static const int MIN_EXPECTED_SPACE_COUNT = 1; + static const int MAX_EXPECTED_SPACE_COUNT = 5; + // Expected length is about 4 ~ 30 + static const int MIN_EXPECTED_LENGTH = 4; + static const int MAX_EXPECTED_LENGTH = 30; + // Expected distance is about 0.2 ~ 2.0, but consider 0.0 ~ 2.0 + static const float MIN_EXPECTED_DISTANCE = 0.0; + static const float MAX_EXPECTED_DISTANCE = 2.0; + // This is not strict: it's where most stuff will be falling, but it's still fine if it's + // outside these values. We want to output a value that reflects all of these. Each factor + // contributes a bit. + + // We need at least a space. + if (spaceCount < 1) return NOT_A_FIRST_WORD_CONFIDENCE; + + // The smaller the edit distance, the higher the contribution. MIN_EXPECTED_DISTANCE means 0 + // contribution, while MAX_EXPECTED_DISTANCE means full contribution according to the + // weight of the distance. Clamp to avoid overflows. + const float clampedDistance = distance < MIN_EXPECTED_DISTANCE ? MIN_EXPECTED_DISTANCE + : distance > MAX_EXPECTED_DISTANCE ? MAX_EXPECTED_DISTANCE : distance; + const int distanceContribution = DISTANCE_WEIGHT_FOR_AUTO_COMMIT + * (MAX_EXPECTED_DISTANCE - clampedDistance) + / (MAX_EXPECTED_DISTANCE - MIN_EXPECTED_DISTANCE); + // The larger the suggestion length, the larger the contribution. MIN_EXPECTED_LENGTH is no + // contribution, MAX_EXPECTED_LENGTH is full contribution according to the weight of the + // length. Length is guaranteed to be between 1 and 48, so we don't need to clamp. + const int lengthContribution = LENGTH_WEIGHT_FOR_AUTO_COMMIT + * (length - MIN_EXPECTED_LENGTH) / (MAX_EXPECTED_LENGTH - MIN_EXPECTED_LENGTH); + // The more spaces, the larger the contribution. MIN_EXPECTED_SPACE_COUNT space is no + // contribution, MAX_EXPECTED_SPACE_COUNT spaces is full contribution according to the + // weight of the space count. + const int spaceContribution = SPACE_COUNT_WEIGHT_FOR_AUTO_COMMIT + * (spaceCount - MIN_EXPECTED_SPACE_COUNT) + / (MAX_EXPECTED_SPACE_COUNT - MIN_EXPECTED_SPACE_COUNT); + + return distanceContribution + lengthContribution + spaceContribution; } /** @@ -395,7 +445,7 @@ void Suggest::processTerminalDicNode( if (!dicNode->isTerminalWordNode()) { return; } - if (dicNode->shouldBeFilterdBySafetyNetForBigram()) { + if (dicNode->shouldBeFilteredBySafetyNetForBigram()) { return; } // Create a non-cached node here. diff --git a/native/jni/src/suggest/core/suggest.h b/native/jni/src/suggest/core/suggest.h index 0e8bd1195..b20343d29 100644 --- a/native/jni/src/suggest/core/suggest.h +++ b/native/jni/src/suggest/core/suggest.h @@ -58,7 +58,7 @@ class Suggest : public SuggestInterface { int outputSuggestions(DicTraverseSession *traverseSession, int *frequencies, int *outputCodePoints, int *outputIndicesToPartialCommit, int *outputTypes, int *outputAutoCommitFirstWordConfidence) const; - int computeFirstWordConfidence() const; + int computeFirstWordConfidence(const DicNode *const terminalDicNode) const; void initializeSearch(DicTraverseSession *traverseSession, int commitPoint) const; void expandCurrentDicNodes(DicTraverseSession *traverseSession) const; void processTerminalDicNode(DicTraverseSession *traverseSession, DicNode *dicNode) const; diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp index 67a085de3..8753c6eb0 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp @@ -20,7 +20,7 @@ #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h" #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" -#include "suggest/policyimpl/dictionary/utils/decaying_utils.h" +#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h" namespace latinime { @@ -43,7 +43,7 @@ void DynamicBigramListPolicy::getNextBigram(int *const outBigramPos, int *const } *outProbability = BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags); *outHasNext = BigramListReadWriteUtils::hasNext(bigramFlags); - if (mIsDecayingDict && !DecayingUtils::isValidBigram(*outProbability)) { + if (mIsDecayingDict && !ForgettingCurveUtils::isValidEncodedProbability(*outProbability)) { // This bigram is too weak to output. *outBigramPos = NOT_A_DICT_POS; } else { @@ -261,8 +261,8 @@ bool DynamicBigramListPolicy::addNewBigramEntryToBigramList(const int bigramTarg const int originalProbability = BigramListReadWriteUtils::getProbabilityFromFlags( bigramFlags); const int probabilityToWrite = mIsDecayingDict ? - DecayingUtils::getUpdatedBigramProbabilityDelta( - originalProbability, probability) : probability; + ForgettingCurveUtils::getUpdatedEncodedProbability(originalProbability, + probability) : probability; const BigramListReadWriteUtils::BigramFlags updatedFlags = BigramListReadWriteUtils::setProbabilityInFlags(bigramFlags, probabilityToWrite); @@ -294,7 +294,7 @@ bool DynamicBigramListPolicy::writeNewBigramEntry(const int bigramTargetPos, con int *const writingPos) { // hasNext is false because we are adding a new bigram entry at the end of the bigram list. const int probabilityToWrite = mIsDecayingDict ? - DecayingUtils::getUpdatedBigramProbabilityDelta(NOT_A_PROBABILITY, probability) : + ForgettingCurveUtils::getUpdatedEncodedProbability(NOT_A_PROBABILITY, probability) : probability; return BigramListReadWriteUtils::createAndWriteBigramEntry(mBuffer, bigramTargetPos, probabilityToWrite, false /* hasNext */, writingPos); @@ -365,9 +365,9 @@ bool DynamicBigramListPolicy::updateProbabilityForDecay( *outRemoved = false; if (mIsDecayingDict) { // Update bigram probability for decaying. - const int newProbability = DecayingUtils::getBigramProbabilityDeltaToSave( + const int newProbability = ForgettingCurveUtils::getEncodedProbabilityToSave( BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags)); - if (DecayingUtils::isValidBigram(newProbability)) { + if (ForgettingCurveUtils::isValidEncodedProbability(newProbability)) { // Write new probability. const BigramListReadWriteUtils::BigramFlags updatedBigramFlags = BigramListReadWriteUtils::setProbabilityInFlags( diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp index 081163a4d..324b53062 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp @@ -16,7 +16,7 @@ #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h" -#include "suggest/policyimpl/dictionary/utils/decaying_utils.h" +#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h" namespace latinime { @@ -29,14 +29,14 @@ bool DynamicPatriciaTrieGcEventListeners bool isUselessPtNode = !node->isTerminal(); if (node->isTerminal() && mIsDecayingDict) { const int newProbability = - DecayingUtils::getUnigramProbabilityToSave(node->getProbability()); + ForgettingCurveUtils::getEncodedProbabilityToSave(node->getProbability()); int writingPos = node->getProbabilityFieldPos(); // Update probability. if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition( mBuffer, newProbability, &writingPos)) { return false; } - if (!DecayingUtils::isValidUnigram(newProbability)) { + if (!ForgettingCurveUtils::isValidEncodedProbability(newProbability)) { isUselessPtNode = false; } } diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp index 0d8c92768..60d0db0c0 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp @@ -28,17 +28,21 @@ #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h" #include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h" -#include "suggest/policyimpl/dictionary/utils/decaying_utils.h" +#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h" #include "suggest/policyimpl/dictionary/utils/probability_utils.h" namespace latinime { +// Note that these are corresponding definitions in Java side in BinaryDictionaryTests and +// BinaryDictionaryDecayingTests. const char *const DynamicPatriciaTriePolicy::UNIGRAM_COUNT_QUERY = "UNIGRAM_COUNT"; const char *const DynamicPatriciaTriePolicy::BIGRAM_COUNT_QUERY = "BIGRAM_COUNT"; +const char *const DynamicPatriciaTriePolicy::SET_NEEDS_TO_DECAY_FOR_TESTING_QUERY = + "SET_NEEDS_TO_DECAY_FOR_TESTING"; const int DynamicPatriciaTriePolicy::MAX_DICT_EXTENDED_REGION_SIZE = 1024 * 1024; const int DynamicPatriciaTriePolicy::MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS = DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE - 1024; -const int DynamicPatriciaTriePolicy::MIN_SECONDS_TO_REQUIRE_GC_WHEN_WRITING = 2 * 60 * 60; +const int DynamicPatriciaTriePolicy::DECAY_INTERVAL_FOR_DECAYING_DICTS = 2 * 60 * 60; void DynamicPatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode, DicNodeVector *const childDicNodes) const { @@ -150,7 +154,7 @@ int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const in int DynamicPatriciaTriePolicy::getProbability(const int unigramProbability, const int bigramProbability) const { if (mHeaderPolicy.isDecayingDict()) { - return DecayingUtils::getProbability(unigramProbability, bigramProbability); + return ForgettingCurveUtils::getProbability(unigramProbability, bigramProbability); } else { if (unigramProbability == NOT_A_PROBABILITY) { return NOT_A_PROBABILITY; @@ -301,7 +305,7 @@ void DynamicPatriciaTriePolicy::flush(const char *const filePath) { return; } DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer, - &mBigramListPolicy, &mShortcutListPolicy, mHeaderPolicy.isDecayingDict()); + &mBigramListPolicy, &mShortcutListPolicy, false /* needsToDecay */); writingHelper.writeToDictFile(filePath, &mHeaderPolicy, mUnigramCount, mBigramCount); } @@ -310,9 +314,15 @@ void DynamicPatriciaTriePolicy::flushWithGC(const char *const filePath) { AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary."); return; } + const bool runGCwithDecay = needsToDecay(); + DynamicBigramListPolicy bigramListPolicyForGC(&mBufferWithExtendableBuffer, + &mShortcutListPolicy, runGCwithDecay); DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer, - &mBigramListPolicy, &mShortcutListPolicy, mHeaderPolicy.isDecayingDict()); + &bigramListPolicyForGC, &mShortcutListPolicy, runGCwithDecay); writingHelper.writeToDictFileWithGC(getRootPosition(), filePath, &mHeaderPolicy); + if (runGCwithDecay) { + mNeedsToDecayForTesting = false; + } } bool DynamicPatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const { @@ -334,14 +344,13 @@ bool DynamicPatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const { // Needs to reduce dictionary size. return true; } else if (mHeaderPolicy.isDecayingDict()) { - if (mUnigramCount >= DecayingUtils::MAX_UNIGRAM_COUNT) { + if (mUnigramCount >= ForgettingCurveUtils::MAX_UNIGRAM_COUNT) { // Unigram count exceeds the limit. return true; - } else if (mBigramCount >= DecayingUtils::MAX_BIGRAM_COUNT) { + } else if (mBigramCount >= ForgettingCurveUtils::MAX_BIGRAM_COUNT) { // Bigram count exceeds the limit. return true; - } else if (mindsBlockByGC && mHeaderPolicy.getLastUpdatedTime() - + MIN_SECONDS_TO_REQUIRE_GC_WHEN_WRITING < time(0)) { + } else if (mindsBlockByGC && needsToDecay()) { // Time to update probabilities for decaying. return true; } @@ -350,12 +359,19 @@ bool DynamicPatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const { } void DynamicPatriciaTriePolicy::getProperty(const char *const query, char *const outResult, - const int maxResultLength) const { + const int maxResultLength) { if (strncmp(query, UNIGRAM_COUNT_QUERY, maxResultLength) == 0) { snprintf(outResult, maxResultLength, "%d", mUnigramCount); } else if (strncmp(query, BIGRAM_COUNT_QUERY, maxResultLength) == 0) { snprintf(outResult, maxResultLength, "%d", mBigramCount); + } else if (strncmp(query, SET_NEEDS_TO_DECAY_FOR_TESTING_QUERY, maxResultLength) == 0) { + mNeedsToDecayForTesting = true; } } +bool DynamicPatriciaTriePolicy::needsToDecay() const { + return mHeaderPolicy.isDecayingDict() && (mNeedsToDecayForTesting + || mHeaderPolicy.getLastDecayedTime() + DECAY_INTERVAL_FOR_DECAYING_DICTS < time(0)); +} + } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h index d3150c6fc..c3bbe9977 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h @@ -40,7 +40,7 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { mBigramListPolicy(&mBufferWithExtendableBuffer, &mShortcutListPolicy, mHeaderPolicy.isDecayingDict()), mUnigramCount(mHeaderPolicy.getUnigramCount()), - mBigramCount(mHeaderPolicy.getBigramCount()) {} + mBigramCount(mHeaderPolicy.getBigramCount()), mNeedsToDecayForTesting(false) {} ~DynamicPatriciaTriePolicy() { delete mBuffer; @@ -95,16 +95,17 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { bool needsToRunGC(const bool mindsBlockByGC) const; void getProperty(const char *const query, char *const outResult, - const int maxResultLength) const; + const int maxResultLength); private: DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTriePolicy); - static const char*const UNIGRAM_COUNT_QUERY; - static const char*const BIGRAM_COUNT_QUERY; + static const char *const UNIGRAM_COUNT_QUERY; + static const char *const BIGRAM_COUNT_QUERY; + static const char *const SET_NEEDS_TO_DECAY_FOR_TESTING_QUERY; static const int MAX_DICT_EXTENDED_REGION_SIZE; static const int MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS; - static const int MIN_SECONDS_TO_REQUIRE_GC_WHEN_WRITING; + static const int DECAY_INTERVAL_FOR_DECAYING_DICTS; const MmappedBuffer *const mBuffer; const HeaderPolicy mHeaderPolicy; @@ -113,6 +114,9 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { DynamicBigramListPolicy mBigramListPolicy; int mUnigramCount; int mBigramCount; + int mNeedsToDecayForTesting; + + bool needsToDecay() const; }; } // namespace latinime #endif // LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp index 28124d251..70a9ee564 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp @@ -25,8 +25,8 @@ #include "suggest/policyimpl/dictionary/header/header_policy.h" #include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h" -#include "suggest/policyimpl/dictionary/utils/decaying_utils.h" #include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h" +#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h" #include "utils/hash_map_compat.h" namespace latinime { @@ -153,7 +153,7 @@ void DynamicPatriciaTrieWritingHelper::writeToDictFile(const char *const fileNam const int extendedRegionSize = headerPolicy->getExtendedRegionSize() + mBuffer->getUsedAdditionalBufferSize(); if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, false /* updatesLastUpdatedTime */, - unigramCount, bigramCount, extendedRegionSize)) { + false /* updatesLastDecayedTime */, unigramCount, bigramCount, extendedRegionSize)) { return; } DictFileWritingUtils::flushAllHeaderAndBodyToFile(fileName, &headerBuffer, mBuffer); @@ -170,7 +170,7 @@ void DynamicPatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNod } BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */); if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, true /* updatesLastUpdatedTime */, - unigramCount, bigramCount, 0 /* extendedRegionSize */)) { + mNeedsToDecay, unigramCount, bigramCount, 0 /* extendedRegionSize */)) { return; } DictFileWritingUtils::flushAllHeaderAndBodyToFile(fileName, &headerBuffer, &newDictBuffer); @@ -488,13 +488,13 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, DynamicPatriciaTrieGcEventListeners ::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted( - this, mBuffer, mIsDecayingDict); + this, mBuffer, mNeedsToDecay); if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner( &traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted)) { return false; } - if (mIsDecayingDict && traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted - .getValidUnigramCount() > DecayingUtils::MAX_UNIGRAM_COUNT_AFTER_GC) { + if (mNeedsToDecay && traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted + .getValidUnigramCount() > ForgettingCurveUtils::MAX_UNIGRAM_COUNT_AFTER_GC) { // TODO: Remove more unigrams. } @@ -506,8 +506,8 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, return false; } - if (mIsDecayingDict && traversePolicyToUpdateBigramProbability.getValidBigramEntryCount() - > DecayingUtils::MAX_BIGRAM_COUNT_AFTER_GC) { + if (mNeedsToDecay && traversePolicyToUpdateBigramProbability.getValidBigramEntryCount() + > ForgettingCurveUtils::MAX_BIGRAM_COUNT_AFTER_GC) { // TODO: Remove more bigrams. } @@ -525,7 +525,7 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, // Create policy instance for the GCed dictionary. DynamicShortcutListPolicy newDictShortcutPolicy(bufferToWrite); DynamicBigramListPolicy newDictBigramPolicy(bufferToWrite, &newDictShortcutPolicy, - mIsDecayingDict); + mNeedsToDecay); // Create reading helper for the GCed dictionary. DynamicPatriciaTrieReadingHelper newDictReadingHelper(bufferToWrite, &newDictBigramPolicy, &newDictShortcutPolicy); @@ -544,8 +544,9 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, int DynamicPatriciaTrieWritingHelper::getUpdatedProbability(const int originalProbability, const int newProbability) { - if (mIsDecayingDict) { - return DecayingUtils::getUpdatedUnigramProbability(originalProbability, newProbability); + if (mNeedsToDecay) { + return ForgettingCurveUtils::getUpdatedEncodedProbability(originalProbability, + newProbability); } else { return newProbability; } diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h index ecee2cdbf..0caf29120 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h @@ -51,9 +51,9 @@ class DynamicPatriciaTrieWritingHelper { DynamicPatriciaTrieWritingHelper(BufferWithExtendableBuffer *const buffer, DynamicBigramListPolicy *const bigramPolicy, - DynamicShortcutListPolicy *const shortcutPolicy, const bool isDecayingDict) + DynamicShortcutListPolicy *const shortcutPolicy, const bool needsToDecay) : mBuffer(buffer), mBigramPolicy(bigramPolicy), mShortcutPolicy(shortcutPolicy), - mIsDecayingDict(isDecayingDict) {} + mNeedsToDecay(needsToDecay) {} ~DynamicPatriciaTrieWritingHelper() {} @@ -94,7 +94,7 @@ class DynamicPatriciaTrieWritingHelper { BufferWithExtendableBuffer *const mBuffer; DynamicBigramListPolicy *const mBigramPolicy; DynamicShortcutListPolicy *const mShortcutPolicy; - const bool mIsDecayingDict; + const bool mNeedsToDecay; bool markNodeAsMovedAndSetPosition(const DynamicPatriciaTrieNodeReader *const nodeToUpdate, const int movedPos, const int bigramLinkedNodePos); diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp index 9ce9994dd..eb072fbaf 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp @@ -23,6 +23,7 @@ const char *const HeaderPolicy::MULTIPLE_WORDS_DEMOTION_RATE_KEY = "MULTIPLE_WOR // TODO: Change attribute string to "IS_DECAYING_DICT". const char *const HeaderPolicy::IS_DECAYING_DICT_KEY = "USES_FORGETTING_CURVE"; const char *const HeaderPolicy::LAST_UPDATED_TIME_KEY = "date"; +const char *const HeaderPolicy::LAST_DECAYED_TIME_KEY = "LAST_DECAYED_TIME"; const char *const HeaderPolicy::UNIGRAM_COUNT_KEY = "UNIGRAM_COUNT"; const char *const HeaderPolicy::BIGRAM_COUNT_KEY = "BIGRAM_COUNT"; const char *const HeaderPolicy::EXTENDED_REGION_SIZE_KEY = "EXTENDED_REGION_SIZE"; @@ -63,8 +64,8 @@ float HeaderPolicy::readMultipleWordCostMultiplier() const { } bool HeaderPolicy::writeHeaderToBuffer(BufferWithExtendableBuffer *const bufferToWrite, - const bool updatesLastUpdatedTime, const int unigramCount, const int bigramCount, - const int extendedRegionSize) const { + const bool updatesLastUpdatedTime, const bool updatesLastDecayedTime, + const int unigramCount, const int bigramCount, const int extendedRegionSize) const { int writingPos = 0; if (!HeaderReadWriteUtils::writeDictionaryVersion(bufferToWrite, mDictFormatVersion, &writingPos)) { @@ -90,6 +91,11 @@ bool HeaderPolicy::writeHeaderToBuffer(BufferWithExtendableBuffer *const bufferT HeaderReadWriteUtils::setIntAttribute(&attributeMapTowrite, LAST_UPDATED_TIME_KEY, time(0)); } + if (updatesLastDecayedTime) { + // Set current time as a last updated time. + HeaderReadWriteUtils::setIntAttribute(&attributeMapTowrite, LAST_DECAYED_TIME_KEY, + time(0)); + } if (!HeaderReadWriteUtils::writeHeaderAttributes(bufferToWrite, &attributeMapTowrite, &writingPos)) { return false; diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h index 4261667fa..a9c7805a8 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h @@ -40,6 +40,8 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { IS_DECAYING_DICT_KEY, false /* defaultValue */)), mLastUpdatedTime(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap, LAST_UPDATED_TIME_KEY, time(0) /* defaultValue */)), + mLastDecayedTime(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap, + LAST_DECAYED_TIME_KEY, time(0) /* defaultValue */)), mUnigramCount(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap, UNIGRAM_COUNT_KEY, 0 /* defaultValue */)), mBigramCount(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap, @@ -58,6 +60,8 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { IS_DECAYING_DICT_KEY, false /* defaultValue */)), mLastUpdatedTime(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap, LAST_UPDATED_TIME_KEY, time(0) /* defaultValue */)), + mLastDecayedTime(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap, + LAST_UPDATED_TIME_KEY, time(0) /* defaultValue */)), mUnigramCount(0), mBigramCount(0), mExtendedRegionSize(0) {} ~HeaderPolicy() {} @@ -90,6 +94,10 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { return mLastUpdatedTime; } + AK_FORCE_INLINE int getLastDecayedTime() const { + return mLastDecayedTime; + } + AK_FORCE_INLINE int getUnigramCount() const { return mUnigramCount; } @@ -106,8 +114,8 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { int *outValue, int outValueSize) const; bool writeHeaderToBuffer(BufferWithExtendableBuffer *const bufferToWrite, - const bool updatesLastUpdatedTime, const int unigramCount, - const int bigramCount, const int extendedRegionSize) const; + const bool updatesLastUpdatedTime, const bool updatesLastDecayedTime, + const int unigramCount, const int bigramCount, const int extendedRegionSize) const; private: DISALLOW_IMPLICIT_CONSTRUCTORS(HeaderPolicy); @@ -115,6 +123,7 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { static const char *const MULTIPLE_WORDS_DEMOTION_RATE_KEY; static const char *const IS_DECAYING_DICT_KEY; static const char *const LAST_UPDATED_TIME_KEY; + static const char *const LAST_DECAYED_TIME_KEY; static const char *const UNIGRAM_COUNT_KEY; static const char *const BIGRAM_COUNT_KEY; static const char *const EXTENDED_REGION_SIZE_KEY; @@ -128,6 +137,7 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { const float mMultiWordCostMultiplier; const bool mIsDecayingDict; const int mLastUpdatedTime; + const int mLastDecayedTime; const int mUnigramCount; const int mBigramCount; const int mExtendedRegionSize; diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h index 8d88c68e8..0f8662aea 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h @@ -114,7 +114,7 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { } void getProperty(const char *const query, char *const outResult, - const int maxResultLength) const { + const int maxResultLength) { // getProperty is not supported for this class. if (maxResultLength > 0) { outResult[0] = '\0'; diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/decaying_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/decaying_utils.cpp deleted file mode 100644 index 942a74238..000000000 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/decaying_utils.cpp +++ /dev/null @@ -1,129 +0,0 @@ -/* - * Copyright (C) 2013, The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "suggest/policyimpl/dictionary/utils/decaying_utils.h" - -#include "suggest/policyimpl/dictionary/utils/probability_utils.h" - -namespace latinime { - -const int DecayingUtils::MAX_UNIGRAM_COUNT = 12000; -const int DecayingUtils::MAX_UNIGRAM_COUNT_AFTER_GC = 10000; -const int DecayingUtils::MAX_BIGRAM_COUNT = 12000; -const int DecayingUtils::MAX_BIGRAM_COUNT_AFTER_GC = 10000; - -const int DecayingUtils::MAX_COMPUTED_PROBABILITY = 127; -const int DecayingUtils::MAX_UNIGRAM_PROBABILITY = 120; -const int DecayingUtils::MIN_VALID_UNIGRAM_PROBABILITY = 24; -const int DecayingUtils::UNIGRAM_PROBABILITY_STEP = 8; -const int DecayingUtils::MAX_BIGRAM_PROBABILITY_DELTA = 15; -const int DecayingUtils::MIN_VALID_BIGRAM_PROBABILITY_DELTA = 3; -const int DecayingUtils::BIGRAM_PROBABILITY_DELTA_STEP = 1; - -/* static */ int DecayingUtils::getProbability(const int encodedUnigramProbability, - const int encodedBigramProbabilityDelta) { - if (encodedUnigramProbability == NOT_A_PROBABILITY) { - return NOT_A_PROBABILITY; - } else if (encodedBigramProbabilityDelta == NOT_A_PROBABILITY) { - const int rawProbability = ProbabilityUtils::backoff(decodeUnigramProbability( - encodedUnigramProbability)); - return min(getDecayedProbability(rawProbability), MAX_COMPUTED_PROBABILITY); - } else { - const int rawProbability = ProbabilityUtils::computeProbabilityForBigram( - decodeUnigramProbability(encodedUnigramProbability), - decodeBigramProbabilityDelta(encodedBigramProbabilityDelta)); - return min(getDecayedProbability(rawProbability), MAX_COMPUTED_PROBABILITY); - } -} - -/* static */ int DecayingUtils::getUpdatedUnigramProbability(const int originalEncodedProbability, - const int newProbability) { - if (originalEncodedProbability == NOT_A_PROBABILITY) { - // The unigram is not in this dictionary. - if (newProbability == NOT_A_PROBABILITY) { - // The unigram is not in other dictionaries. - return 0; - } else { - return MIN_VALID_UNIGRAM_PROBABILITY; - } - } else { - if (newProbability != NOT_A_PROBABILITY - && originalEncodedProbability < MIN_VALID_UNIGRAM_PROBABILITY) { - return MIN_VALID_UNIGRAM_PROBABILITY; - } - return min(originalEncodedProbability + UNIGRAM_PROBABILITY_STEP, MAX_UNIGRAM_PROBABILITY); - } -} - -/* static */ int DecayingUtils::getUnigramProbabilityToSave(const int encodedProbability) { - return max(encodedProbability - UNIGRAM_PROBABILITY_STEP, 0); -} - -/* static */ int DecayingUtils::getBigramProbabilityDeltaToSave(const int encodedProbabilityDelta) { - return max(encodedProbabilityDelta - BIGRAM_PROBABILITY_DELTA_STEP, 0); -} - -/* static */ int DecayingUtils::getUpdatedBigramProbabilityDelta( - const int originalEncodedProbabilityDelta, const int newProbability) { - if (originalEncodedProbabilityDelta == NOT_A_PROBABILITY) { - // The bigram relation is not in this dictionary. - if (newProbability == NOT_A_PROBABILITY) { - // The bigram target is not in other dictionaries. - return 0; - } else { - return MIN_VALID_BIGRAM_PROBABILITY_DELTA; - } - } else { - if (newProbability != NOT_A_PROBABILITY - && originalEncodedProbabilityDelta < MIN_VALID_BIGRAM_PROBABILITY_DELTA) { - return MIN_VALID_BIGRAM_PROBABILITY_DELTA; - } - return min(originalEncodedProbabilityDelta + BIGRAM_PROBABILITY_DELTA_STEP, - MAX_BIGRAM_PROBABILITY_DELTA); - } -} - -/* static */ int DecayingUtils::isValidUnigram(const int encodedUnigramProbability) { - return encodedUnigramProbability >= MIN_VALID_UNIGRAM_PROBABILITY; -} - -/* static */ int DecayingUtils::isValidBigram(const int encodedBigramProbabilityDelta) { - return encodedBigramProbabilityDelta >= MIN_VALID_BIGRAM_PROBABILITY_DELTA; -} - -/* static */ int DecayingUtils::decodeUnigramProbability(const int encodedProbability) { - const int probability = encodedProbability - MIN_VALID_UNIGRAM_PROBABILITY; - if (probability < 0) { - return NOT_A_PROBABILITY; - } else { - return min(probability, MAX_UNIGRAM_PROBABILITY); - } -} - -/* static */ int DecayingUtils::decodeBigramProbabilityDelta(const int encodedProbabilityDelta) { - const int probabilityDelta = encodedProbabilityDelta - MIN_VALID_BIGRAM_PROBABILITY_DELTA; - if (probabilityDelta < 0) { - return NOT_A_PROBABILITY; - } else { - return min(probabilityDelta, MAX_BIGRAM_PROBABILITY_DELTA); - } -} - -/* static */ int DecayingUtils::getDecayedProbability(const int rawProbability) { - return rawProbability; -} - -} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp index f22e94c6a..994826fa8 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp @@ -44,7 +44,8 @@ const char *const DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE = BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */); HeaderPolicy headerPolicy(FormatUtils::VERSION_3, attributeMap); headerPolicy.writeHeaderToBuffer(&headerBuffer, true /* updatesLastUpdatedTime */, - 0 /* unigramCount */, 0 /* bigramCount */, 0 /* extendedRegionSize */); + true /* updatesLastDecayedTime */, 0 /* unigramCount */, 0 /* bigramCount */, + 0 /* extendedRegionSize */); BufferWithExtendableBuffer bodyBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */); if (!DynamicPatriciaTrieWritingUtils::writeEmptyDictionary(&bodyBuffer, 0 /* rootPos */)) { return false; diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp new file mode 100644 index 000000000..4ff31ba0a --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp @@ -0,0 +1,124 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <cmath> +#include <stdlib.h> + +#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h" + +#include "suggest/policyimpl/dictionary/utils/probability_utils.h" + +namespace latinime { + +const int ForgettingCurveUtils::MAX_UNIGRAM_COUNT = 12000; +const int ForgettingCurveUtils::MAX_UNIGRAM_COUNT_AFTER_GC = 10000; +const int ForgettingCurveUtils::MAX_BIGRAM_COUNT = 12000; +const int ForgettingCurveUtils::MAX_BIGRAM_COUNT_AFTER_GC = 10000; + +const int ForgettingCurveUtils::MAX_COMPUTED_PROBABILITY = 127; +const int ForgettingCurveUtils::MAX_ENCODED_PROBABILITY = 15; +const int ForgettingCurveUtils::MIN_VALID_ENCODED_PROBABILITY = 3; +const int ForgettingCurveUtils::ENCODED_PROBABILITY_STEP = 1; +// Currently, we try to decay each uni/bigram once every 2 hours. Accordingly, the expected +// duration of the decay is approximately 66hours. +const float ForgettingCurveUtils::MIN_PROBABILITY_TO_DECAY = 0.03f; + +const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityTable; + +/* static */ int ForgettingCurveUtils::getProbability(const int encodedUnigramProbability, + const int encodedBigramProbability) { + if (encodedUnigramProbability == NOT_A_PROBABILITY) { + return NOT_A_PROBABILITY; + } else if (encodedBigramProbability == NOT_A_PROBABILITY) { + return backoff(decodeProbability(encodedUnigramProbability)); + } else { + const int unigramProbability = decodeProbability(encodedUnigramProbability); + const int bigramProbability = decodeProbability(encodedBigramProbability); + return min(max(unigramProbability, bigramProbability), MAX_COMPUTED_PROBABILITY); + } +} + +// Caveat: Unlike getProbability(), this method doesn't assume special bigram probability encoding +// (i.e. unigram probability + bigram probability delta). +/* static */ int ForgettingCurveUtils::getUpdatedEncodedProbability( + const int originalEncodedProbability, const int newProbability) { + if (originalEncodedProbability == NOT_A_PROBABILITY) { + // The bigram relation is not in this dictionary. + if (newProbability == NOT_A_PROBABILITY) { + // The bigram target is not in other dictionaries. + return 0; + } else { + return MIN_VALID_ENCODED_PROBABILITY; + } + } else { + if (newProbability != NOT_A_PROBABILITY + && originalEncodedProbability < MIN_VALID_ENCODED_PROBABILITY) { + return MIN_VALID_ENCODED_PROBABILITY; + } + return min(originalEncodedProbability + ENCODED_PROBABILITY_STEP, MAX_ENCODED_PROBABILITY); + } +} + +/* static */ int ForgettingCurveUtils::isValidEncodedProbability(const int encodedProbability) { + return encodedProbability >= MIN_VALID_ENCODED_PROBABILITY; +} + +/* static */ int ForgettingCurveUtils::getEncodedProbabilityToSave(const int encodedProbability) { + const int currentEncodedProbability = max(min(encodedProbability, MAX_ENCODED_PROBABILITY), 0); + // TODO: Implement the decay in more proper way. + const float currentRate = static_cast<float>(currentEncodedProbability) + / static_cast<float>(MAX_ENCODED_PROBABILITY); + const float thresholdToDecay = MIN_PROBABILITY_TO_DECAY + + (1.0f - MIN_PROBABILITY_TO_DECAY) * (1.0f - currentRate); + const float randValue = static_cast<float>(rand()) / static_cast<float>(RAND_MAX); + if (thresholdToDecay < randValue) { + return max(currentEncodedProbability - ENCODED_PROBABILITY_STEP, 0); + } else { + return currentEncodedProbability; + } +} + +/* static */ int ForgettingCurveUtils::decodeProbability(const int encodedProbability) { + const int probability = encodedProbability - MIN_VALID_ENCODED_PROBABILITY; + if (encodedProbability < MIN_VALID_ENCODED_PROBABILITY) { + return NOT_A_PROBABILITY; + } else { + return min(sProbabilityTable.getProbability(encodedProbability), MAX_ENCODED_PROBABILITY); + } +} + +// See comments in ProbabilityUtils::backoff(). +/* static */ int ForgettingCurveUtils::backoff(const int unigramProbability) { + if (unigramProbability == NOT_A_PROBABILITY) { + return NOT_A_PROBABILITY; + } else { + return max(unigramProbability - 8, 0); + } +} + +ForgettingCurveUtils::ProbabilityTable::ProbabilityTable() : mTable() { + // Table entry is as follows: + // 1, 1, 1, 2, 3, 5, 6, 9, 13, 18, 25, 34, 48, 66, 91, 127. + // Note that first MIN_VALID_ENCODED_PROBABILITY values are not used. + mTable.resize(MAX_ENCODED_PROBABILITY + 1); + for (int i = 0; i <= MAX_ENCODED_PROBABILITY; ++i) { + const int probability = static_cast<int>(powf(static_cast<float>(MAX_COMPUTED_PROBABILITY), + static_cast<float>(i) / static_cast<float>(MAX_ENCODED_PROBABILITY))); + mTable[i] = min(MAX_COMPUTED_PROBABILITY, max(0, probability)); + } +} + +} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/decaying_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h index 1ca03918f..d666f22aa 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/decaying_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h @@ -14,8 +14,10 @@ * limitations under the License. */ -#ifndef LATINIME_DECAYING_UTILS_H -#define LATINIME_DECAYING_UTILS_H +#ifndef LATINIME_FORGETTING_CURVE_UTILS_H +#define LATINIME_FORGETTING_CURVE_UTILS_H + +#include <vector> #include "defines.h" @@ -24,8 +26,7 @@ namespace latinime { // TODO: Check the elapsed time and decrease the probability depending on the time. Time field is // required to introduced to each terminal PtNode and bigram entry. // TODO: Quit using bigram probability to indicate the delta. -// TODO: Quit using bigram probability delta. -class DecayingUtils { +class ForgettingCurveUtils { public: static const int MAX_UNIGRAM_COUNT; static const int MAX_UNIGRAM_COUNT_AFTER_GC; @@ -33,38 +34,46 @@ class DecayingUtils { static const int MAX_BIGRAM_COUNT_AFTER_GC; static int getProbability(const int encodedUnigramProbability, - const int encodedBigramProbabilityDelta); + const int encodedBigramProbability); - static int getUpdatedUnigramProbability(const int originalEncodedProbability, + static int getUpdatedEncodedProbability(const int originalEncodedProbability, const int newProbability); - static int getUpdatedBigramProbabilityDelta(const int originalEncodedProbabilityDelta, - const int newProbability); + static int isValidEncodedProbability(const int encodedProbability); - static int isValidUnigram(const int encodedUnigramProbability); + static int getEncodedProbabilityToSave(const int encodedProbability); - static int isValidBigram(const int encodedProbabilityDelta); + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(ForgettingCurveUtils); - static int getUnigramProbabilityToSave(const int encodedProbability); + class ProbabilityTable { + public: + ProbabilityTable(); - static int getBigramProbabilityDeltaToSave(const int encodedProbabilityDelta); + int getProbability(const int encodedProbability) const { + if (encodedProbability < 0 || encodedProbability > static_cast<int>(mTable.size())) { + return NOT_A_PROBABILITY; + } + return mTable[encodedProbability]; + } - private: - DISALLOW_IMPLICIT_CONSTRUCTORS(DecayingUtils); + private: + DISALLOW_COPY_AND_ASSIGN(ProbabilityTable); + + std::vector<int> mTable; + }; static const int MAX_COMPUTED_PROBABILITY; - static const int MAX_UNIGRAM_PROBABILITY; - static const int MIN_VALID_UNIGRAM_PROBABILITY; - static const int UNIGRAM_PROBABILITY_STEP; - static const int MAX_BIGRAM_PROBABILITY_DELTA; - static const int MIN_VALID_BIGRAM_PROBABILITY_DELTA; - static const int BIGRAM_PROBABILITY_DELTA_STEP; + static const int MAX_ENCODED_PROBABILITY; + static const int MIN_VALID_ENCODED_PROBABILITY; + static const int ENCODED_PROBABILITY_STEP; + static const float MIN_PROBABILITY_TO_DECAY; - static int decodeUnigramProbability(const int encodedProbability); + static const ProbabilityTable sProbabilityTable; - static int decodeBigramProbabilityDelta(const int encodedProbability); + static int decodeProbability(const int encodedProbability); - static int getDecayedProbability(const int rawProbability); + static int backoff(const int unigramProbability); }; } // namespace latinime -#endif /* LATINIME_DECAYING_UTILS_H */ +#endif /* LATINIME_FORGETTING_CURVE_UTILS_H */ diff --git a/native/jni/src/suggest/policyimpl/typing/typing_traversal.h b/native/jni/src/suggest/policyimpl/typing/typing_traversal.h index 89e53f441..007c19e0a 100644 --- a/native/jni/src/suggest/policyimpl/typing/typing_traversal.h +++ b/native/jni/src/suggest/policyimpl/typing/typing_traversal.h @@ -101,7 +101,7 @@ class TypingTraversal : public Traversal { } const int16_t pointIndex = dicNode->getInputIndex(0); return pointIndex <= inputSize && !dicNode->isTotalInputSizeExceedingLimit() - && !dicNode->shouldBeFilterdBySafetyNetForBigram(); + && !dicNode->shouldBeFilteredBySafetyNetForBigram(); } AK_FORCE_INLINE bool shouldDepthLevelCache( diff --git a/native/jni/src/utils/char_utils.h b/native/jni/src/utils/char_utils.h index 2e735a81c..41663c81a 100644 --- a/native/jni/src/utils/char_utils.h +++ b/native/jni/src/utils/char_utils.h @@ -75,6 +75,16 @@ class CharUtils { return c; } + static AK_FORCE_INLINE int getSpaceCount(const int *const codePointBuffer, const int length) { + int spaceCount = 0; + for (int i = 0; i < length; ++i) { + if (codePointBuffer[i] == KEYCODE_SPACE) { + ++spaceCount; + } + } + return spaceCount; + } + static unsigned short latin_tolower(const unsigned short c); private: diff --git a/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java b/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java index cf85d97a0..b2d31c21f 100644 --- a/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java +++ b/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java @@ -32,6 +32,11 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase { private static final String TEST_DICT_FILE_EXTENSION = ".testDict"; private static final String TEST_LOCALE = "test"; + // Note that these are corresponding definitions in native code in + // latinime::DynamicPatriciaTriePolicy. + private static final String SET_NEEDS_TO_DECAY_FOR_TESTING_KEY = + "SET_NEEDS_TO_DECAY_FOR_TESTING"; + private static final int DUMMY_PROBABILITY = 0; @Override @@ -45,14 +50,20 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase { } private void forcePassingShortTime(final BinaryDictionary binaryDictionary) { - binaryDictionary.flushWithGC(); + // Entries having low probability would be suppressed once in 2 GCs. + final int count = 2; + for (int i = 0; i < count; i++) { + binaryDictionary.getPropertyForTests(SET_NEEDS_TO_DECAY_FOR_TESTING_KEY); + binaryDictionary.flushWithGC(); + } } private void forcePassingLongTime(final BinaryDictionary binaryDictionary) { // Currently, probabilities are decayed when GC is run. All entries that have never been - // typed in 32 GCs are removed. - final int count = 32; + // typed in 128 GCs would be removed. + final int count = 128; for (int i = 0; i < count; i++) { + binaryDictionary.getPropertyForTests(SET_NEEDS_TO_DECAY_FOR_TESTING_KEY); binaryDictionary.flushWithGC(); } } diff --git a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java index a4d94262f..0cf4ef9f1 100644 --- a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java +++ b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java @@ -104,7 +104,9 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { } sStarBigrams.put(0, new ArrayList<Integer>()); - for (int i = 1; i < sWords.size(); ++i) { + // MAX - 1 because we added one above already + final int maxBigrams = Math.min(sWords.size(), FormatSpec.MAX_BIGRAMS_IN_A_PTNODE - 1); + for (int i = 1; i < maxBigrams; ++i) { sStarBigrams.get(0).add(i); } @@ -544,8 +546,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { } private long checkGetTerminalPosition(final DictDecoder dictDecoder, final String word, - int index, boolean contained) { - final int expectedFrequency = (UNIGRAM_FREQ + index) % 255; + final boolean contained) { long diff = -1; int position = -1; try { @@ -603,7 +604,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { // Test a word that is contained within the dictionary. long sum = 0; for (int i = 0; i < sWords.size(); ++i) { - final long time = checkGetTerminalPosition(dictDecoder, sWords.get(i), i, true); + final long time = checkGetTerminalPosition(dictDecoder, sWords.get(i), true); sum += time == -1 ? 0 : time; } Log.d(TAG, "per search : " + (((double)sum) / sWords.size() / 1000000) + " : " + message @@ -616,11 +617,11 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { for (int i = 0; i < 1000; ++i) { final String word = CodePointUtils.generateWord(random, codePointSet); if (sWords.indexOf(word) != -1) continue; - checkGetTerminalPosition(dictDecoder, word, i, false); + checkGetTerminalPosition(dictDecoder, word, false); } } - private void runGetTerminalPositionTests(final ArrayList<String> results, final int bufferType, + private void runGetTerminalPositionTests(final int bufferType, final FormatOptions formatOptions) { runGetTerminalPosition(sWords, sEmptyBigrams, bufferType, formatOptions, "unigram"); } @@ -628,17 +629,17 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { public void testGetTerminalPosition() { final ArrayList<String> results = CollectionUtils.newArrayList(); - runGetTerminalPositionTests(results, USE_BYTE_ARRAY, VERSION2); - runGetTerminalPositionTests(results, USE_BYTE_ARRAY, VERSION3_WITHOUT_DYNAMIC_UPDATE); - runGetTerminalPositionTests(results, USE_BYTE_ARRAY, VERSION3_WITH_DYNAMIC_UPDATE); - runGetTerminalPositionTests(results, USE_BYTE_ARRAY, VERSION4_WITHOUT_DYNAMIC_UPDATE); - runGetTerminalPositionTests(results, USE_BYTE_ARRAY, VERSION4_WITH_DYNAMIC_UPDATE); - - runGetTerminalPositionTests(results, USE_BYTE_BUFFER, VERSION2); - runGetTerminalPositionTests(results, USE_BYTE_BUFFER, VERSION3_WITHOUT_DYNAMIC_UPDATE); - runGetTerminalPositionTests(results, USE_BYTE_BUFFER, VERSION3_WITH_DYNAMIC_UPDATE); - runGetTerminalPositionTests(results, USE_BYTE_BUFFER, VERSION4_WITHOUT_DYNAMIC_UPDATE); - runGetTerminalPositionTests(results, USE_BYTE_BUFFER, VERSION4_WITH_DYNAMIC_UPDATE); + runGetTerminalPositionTests(USE_BYTE_ARRAY, VERSION2); + runGetTerminalPositionTests(USE_BYTE_ARRAY, VERSION3_WITHOUT_DYNAMIC_UPDATE); + runGetTerminalPositionTests(USE_BYTE_ARRAY, VERSION3_WITH_DYNAMIC_UPDATE); + runGetTerminalPositionTests(USE_BYTE_ARRAY, VERSION4_WITHOUT_DYNAMIC_UPDATE); + runGetTerminalPositionTests(USE_BYTE_ARRAY, VERSION4_WITH_DYNAMIC_UPDATE); + + runGetTerminalPositionTests(USE_BYTE_BUFFER, VERSION2); + runGetTerminalPositionTests(USE_BYTE_BUFFER, VERSION3_WITHOUT_DYNAMIC_UPDATE); + runGetTerminalPositionTests(USE_BYTE_BUFFER, VERSION3_WITH_DYNAMIC_UPDATE); + runGetTerminalPositionTests(USE_BYTE_BUFFER, VERSION4_WITHOUT_DYNAMIC_UPDATE); + runGetTerminalPositionTests(USE_BYTE_BUFFER, VERSION4_WITH_DYNAMIC_UPDATE); for (final String result : results) { Log.d(TAG, result); diff --git a/tests/src/com/android/inputmethod/latin/makedict/SparseTableTests.java b/tests/src/com/android/inputmethod/latin/makedict/SparseTableTests.java index 132483d5e..aeb8552bd 100644 --- a/tests/src/com/android/inputmethod/latin/makedict/SparseTableTests.java +++ b/tests/src/com/android/inputmethod/latin/makedict/SparseTableTests.java @@ -21,10 +21,8 @@ import android.test.suitebuilder.annotation.LargeTest; import android.util.Log; import java.io.File; -import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; -import java.io.InputStream; import java.io.OutputStream; import java.util.ArrayList; import java.util.Random; @@ -36,9 +34,6 @@ import java.util.Random; public class SparseTableTests extends AndroidTestCase { private static final String TAG = SparseTableTests.class.getSimpleName(); - private static final int[] SMALL_INDEX = { SparseTable.NOT_EXIST, 0 }; - private static final int[] BIG_INDEX = { SparseTable.NOT_EXIST, 1, 2, 3, 4, 5, 6, 7}; - private final Random mRandom; private final ArrayList<Integer> mRandomIndex; @@ -59,32 +54,21 @@ public class SparseTableTests extends AndroidTestCase { } } - public void testInitializeWithArray() { - final SparseTable table = new SparseTable(SMALL_INDEX, BIG_INDEX, BLOCK_SIZE); - for (int i = 0; i < 8; ++i) { - assertEquals(SparseTable.NOT_EXIST, table.get(i)); - } - assertEquals(SparseTable.NOT_EXIST, table.get(8)); - for (int i = 9; i < 16; ++i) { - assertEquals(i - 8, table.get(i)); - } - } - public void testSet() { - final SparseTable table = new SparseTable(16, BLOCK_SIZE); - table.set(3, 6); - table.set(8, 16); + final SparseTable table = new SparseTable(16, BLOCK_SIZE, 1); + table.set(0, 3, 6); + table.set(0, 8, 16); for (int i = 0; i < 16; ++i) { if (i == 3 || i == 8) { - assertEquals(i * 2, table.get(i)); + assertEquals(i * 2, table.get(0, i)); } else { - assertEquals(SparseTable.NOT_EXIST, table.get(i)); + assertEquals(SparseTable.NOT_EXIST, table.get(0, i)); } } } private void generateRandomIndex(final int size, final int prop) { - for (int i = 0; i < DEFAULT_SIZE; ++i) { + for (int i = 0; i < size; ++i) { if (mRandom.nextInt(100) < prop) { mRandomIndex.set(i, mRandom.nextInt()); } else { @@ -94,11 +78,11 @@ public class SparseTableTests extends AndroidTestCase { } private void runTestRandomSet() { - final SparseTable table = new SparseTable(DEFAULT_SIZE, BLOCK_SIZE); + final SparseTable table = new SparseTable(DEFAULT_SIZE, BLOCK_SIZE, 1); int elementCount = 0; for (int i = 0; i < DEFAULT_SIZE; ++i) { if (mRandomIndex.get(i) != SparseTable.NOT_EXIST) { - table.set(i, mRandomIndex.get(i)); + table.set(0, i, mRandomIndex.get(i)); elementCount++; } } @@ -107,29 +91,24 @@ public class SparseTableTests extends AndroidTestCase { + table.getContentTableSize()); Log.d(TAG, "the table has " + elementCount + " elements"); for (int i = 0; i < DEFAULT_SIZE; ++i) { - assertEquals(table.get(i), (int)mRandomIndex.get(i)); + assertEquals(table.get(0, i), (int)mRandomIndex.get(i)); } // flush and reload OutputStream lookupOutStream = null; OutputStream contentOutStream = null; - InputStream lookupInStream = null; - InputStream contentInStream = null; try { final File lookupIndexFile = File.createTempFile("testRandomSet", ".small"); final File contentFile = File.createTempFile("testRandomSet", ".big"); lookupOutStream = new FileOutputStream(lookupIndexFile); contentOutStream = new FileOutputStream(contentFile); - table.write(lookupOutStream, contentOutStream); - lookupInStream = new FileInputStream(lookupIndexFile); - contentInStream = new FileInputStream(contentFile); - final byte[] lookupArray = new byte[(int) lookupIndexFile.length()]; - final byte[] contentArray = new byte[(int) contentFile.length()]; - lookupInStream.read(lookupArray); - contentInStream.read(contentArray); - final SparseTable newTable = new SparseTable(lookupArray, contentArray, BLOCK_SIZE); + table.write(lookupOutStream, new OutputStream[] { contentOutStream }); + lookupOutStream.flush(); + contentOutStream.flush(); + final SparseTable newTable = SparseTable.readFromFiles(lookupIndexFile, + new File[] { contentFile }, BLOCK_SIZE); for (int i = 0; i < DEFAULT_SIZE; ++i) { - assertEquals(table.get(i), newTable.get(i)); + assertEquals(table.get(0, i), newTable.get(0, i)); } } catch (IOException e) { Log.d(TAG, "IOException while flushing and realoding", e); @@ -157,4 +136,60 @@ public class SparseTableTests extends AndroidTestCase { runTestRandomSet(); } } + + public void testMultipleContents() { + final int numOfContents = 5; + generateRandomIndex(DEFAULT_SIZE, 20); + final SparseTable table = new SparseTable(DEFAULT_SIZE, BLOCK_SIZE, numOfContents); + for (int i = 0; i < mRandomIndex.size(); ++i) { + if (mRandomIndex.get(i) != SparseTable.NOT_EXIST) { + for (int j = 0; j < numOfContents; ++j) { + table.set(j, i, mRandomIndex.get(i)); + } + } + } + + OutputStream lookupOutStream = null; + OutputStream[] contentsOutStream = new OutputStream[numOfContents]; + try { + final File lookupIndexFile = File.createTempFile("testMultipleContents", "small"); + lookupOutStream = new FileOutputStream(lookupIndexFile); + final File[] contentFiles = new File[numOfContents]; + for (int i = 0; i < numOfContents; ++i) { + contentFiles[i] = File.createTempFile("testMultipleContents", "big" + i); + contentsOutStream[i] = new FileOutputStream(contentFiles[i]); + } + table.write(lookupOutStream, contentsOutStream); + lookupOutStream.flush(); + for (int i = 0; i < numOfContents; ++i) { + contentsOutStream[i].flush(); + } + final SparseTable newTable = SparseTable.readFromFiles(lookupIndexFile, contentFiles, + BLOCK_SIZE); + for (int i = 0; i < numOfContents; ++i) { + for (int j = 0; j < DEFAULT_SIZE; ++j) { + assertEquals(table.get(i, j), newTable.get(i, j)); + } + } + } catch (IOException e) { + Log.d(TAG, "IOException while flushing and reloading", e); + } finally { + if (lookupOutStream != null) { + try { + lookupOutStream.close(); + } catch (IOException e) { + Log.d(TAG, "IOException while closing the stream", e); + } + } + for (int i = 0; i < numOfContents; ++i) { + if (contentsOutStream[i] != null) { + try { + contentsOutStream[i].close(); + } catch (IOException e) { + Log.d(TAG, "IOException while closing the stream.", e); + } + } + } + } + } } diff --git a/tools/make-keyboard-text/res/values-ca/donottranslate-more-keys.xml b/tools/make-keyboard-text/res/values-ca/donottranslate-more-keys.xml index 9728c9963..4cf742441 100644 --- a/tools/make-keyboard-text/res/values-ca/donottranslate-more-keys.xml +++ b/tools/make-keyboard-text/res/values-ca/donottranslate-more-keys.xml @@ -72,7 +72,7 @@ <string name="more_keys_for_l">l·l,ł</string> <!-- U+00B7: "·" MIDDLE DOT --> <string name="more_keys_for_punctuation">"!fixedColumnOrder!4,·,!,\\,,\?,:,;,\@"</string> - <string name="more_keys_for_tablet_period">\?,·</string> + <string name="more_keys_for_period">\?,·</string> <!-- U+00E7: "ç" LATIN SMALL LETTER C WITH CEDILLA --> <string name="keylabel_for_spanish_row2_10">ç</string> </resources> diff --git a/tools/make-keyboard-text/res/values-es/donottranslate-more-keys.xml b/tools/make-keyboard-text/res/values-es/donottranslate-more-keys.xml index 849429629..8e6b4ee06 100644 --- a/tools/make-keyboard-text/res/values-es/donottranslate-more-keys.xml +++ b/tools/make-keyboard-text/res/values-es/donottranslate-more-keys.xml @@ -75,7 +75,7 @@ <!-- U+00A1: "¡" INVERTED EXCLAMATION MARK --> <string name="more_keys_for_tablet_comma">"!,¡"</string> <!-- U+00BF: "¿" INVERTED QUESTION MARK --> - <string name="more_keys_for_tablet_period">"\?,¿"</string> + <string name="more_keys_for_period">"\?,¿"</string> <string name="keylabel_for_apostrophe">\"</string> <string name="keyhintlabel_for_apostrophe">\'</string> <string name="more_keys_for_apostrophe">\'</string> diff --git a/tools/make-keyboard-text/res/values-fa/donottranslate-more-keys.xml b/tools/make-keyboard-text/res/values-fa/donottranslate-more-keys.xml index 5a03c803c..ab4fbda44 100644 --- a/tools/make-keyboard-text/res/values-fa/donottranslate-more-keys.xml +++ b/tools/make-keyboard-text/res/values-fa/donottranslate-more-keys.xml @@ -81,8 +81,8 @@ <string name="keylabel_for_tablet_comma">"،"</string> <string name="keyhintlabel_for_tablet_comma">"!"</string> <string name="more_keys_for_tablet_comma">"!,\\,"</string> - <string name="keyhintlabel_for_tablet_period">"؟"</string> - <string name="more_keys_for_tablet_period">"؟,\?"</string> + <string name="keyhintlabel_for_period">"؟"</string> + <string name="more_keys_for_period">"؟,\?"</string> <string name="keylabel_for_apostrophe">،</string> <string name="keyhintlabel_for_apostrophe">؟</string> <string name="more_keys_for_apostrophe">"!fixedColumnOrder!4,:,!,؟,؛,-,/,«|»,»|«"</string> diff --git a/tools/make-keyboard-text/res/values-iw/donottranslate-more-keys.xml b/tools/make-keyboard-text/res/values-iw/donottranslate-more-keys.xml index feaed4c98..a1633316f 100644 --- a/tools/make-keyboard-text/res/values-iw/donottranslate-more-keys.xml +++ b/tools/make-keyboard-text/res/values-iw/donottranslate-more-keys.xml @@ -55,6 +55,6 @@ <string name="keylabel_for_currency">₪</string> <string name="keyhintlabel_for_tablet_comma">!</string> <string name="more_keys_for_tablet_comma">!</string> - <string name="keyhintlabel_for_tablet_period">\?</string> - <string name="more_keys_for_tablet_period">\?</string> + <string name="keyhintlabel_for_period">\?</string> + <string name="more_keys_for_period">\?</string> </resources> diff --git a/tools/make-keyboard-text/res/values/donottranslate-more-keys.xml b/tools/make-keyboard-text/res/values/donottranslate-more-keys.xml index cc09f7fe5..44aa64cbe 100644 --- a/tools/make-keyboard-text/res/values/donottranslate-more-keys.xml +++ b/tools/make-keyboard-text/res/values/donottranslate-more-keys.xml @@ -169,9 +169,9 @@ <string name="keylabel_for_tablet_comma">,</string> <string name="keyhintlabel_for_tablet_comma"></string> <string name="more_keys_for_tablet_comma"></string> - <string name="keyhintlabel_for_tablet_period"></string> + <string name="keyhintlabel_for_period"></string> <!-- U+2026: "…" HORIZONTAL ELLIPSIS --> - <string name="more_keys_for_tablet_period">…</string> + <string name="more_keys_for_period">…</string> <string name="keylabel_for_apostrophe">\'</string> <string name="keyhintlabel_for_apostrophe">\"</string> <string name="more_keys_for_apostrophe">\"</string> |