diff options
83 files changed, 1147 insertions, 247 deletions
diff --git a/dictionaries/cs_wordlist.combined.gz b/dictionaries/cs_wordlist.combined.gz Binary files differindex d69ef6451..7829d6573 100644 --- a/dictionaries/cs_wordlist.combined.gz +++ b/dictionaries/cs_wordlist.combined.gz diff --git a/dictionaries/da_wordlist.combined.gz b/dictionaries/da_wordlist.combined.gz Binary files differindex 919d28e1d..e7140195b 100644 --- a/dictionaries/da_wordlist.combined.gz +++ b/dictionaries/da_wordlist.combined.gz diff --git a/dictionaries/de_wordlist.combined.gz b/dictionaries/de_wordlist.combined.gz Binary files differindex f5cce9d5a..6a4bd445c 100644 --- a/dictionaries/de_wordlist.combined.gz +++ b/dictionaries/de_wordlist.combined.gz diff --git a/dictionaries/en_GB_wordlist.combined.gz b/dictionaries/en_GB_wordlist.combined.gz Binary files differindex afef676b2..839f3efca 100644 --- a/dictionaries/en_GB_wordlist.combined.gz +++ b/dictionaries/en_GB_wordlist.combined.gz diff --git a/dictionaries/en_US_wordlist.combined.gz b/dictionaries/en_US_wordlist.combined.gz Binary files differindex eafbc9d30..5595c75c0 100644 --- a/dictionaries/en_US_wordlist.combined.gz +++ b/dictionaries/en_US_wordlist.combined.gz diff --git a/dictionaries/en_wordlist.combined.gz b/dictionaries/en_wordlist.combined.gz Binary files differindex 9cbca0b41..69c39d5d9 100644 --- a/dictionaries/en_wordlist.combined.gz +++ b/dictionaries/en_wordlist.combined.gz diff --git a/dictionaries/es_wordlist.combined.gz b/dictionaries/es_wordlist.combined.gz Binary files differindex 53b86076d..0a48b6d11 100644 --- a/dictionaries/es_wordlist.combined.gz +++ b/dictionaries/es_wordlist.combined.gz diff --git a/dictionaries/fi_wordlist.combined.gz b/dictionaries/fi_wordlist.combined.gz Binary files differindex 272011659..eefbfe51a 100644 --- a/dictionaries/fi_wordlist.combined.gz +++ b/dictionaries/fi_wordlist.combined.gz diff --git a/dictionaries/fr_wordlist.combined.gz b/dictionaries/fr_wordlist.combined.gz Binary files differindex 1815e4732..1a1832079 100644 --- a/dictionaries/fr_wordlist.combined.gz +++ b/dictionaries/fr_wordlist.combined.gz diff --git a/dictionaries/hr_wordlist.combined.gz b/dictionaries/hr_wordlist.combined.gz Binary files differindex 7694a2ade..864f67651 100644 --- a/dictionaries/hr_wordlist.combined.gz +++ b/dictionaries/hr_wordlist.combined.gz diff --git a/dictionaries/it_wordlist.combined.gz b/dictionaries/it_wordlist.combined.gz Binary files differindex 3b84cd741..dfb175259 100644 --- a/dictionaries/it_wordlist.combined.gz +++ b/dictionaries/it_wordlist.combined.gz diff --git a/dictionaries/lt_wordlist.combined.gz b/dictionaries/lt_wordlist.combined.gz Binary files differindex 316a5af01..029722d95 100644 --- a/dictionaries/lt_wordlist.combined.gz +++ b/dictionaries/lt_wordlist.combined.gz diff --git a/dictionaries/lv_wordlist.combined.gz b/dictionaries/lv_wordlist.combined.gz Binary files differindex b036ac21a..41e1c28bd 100644 --- a/dictionaries/lv_wordlist.combined.gz +++ b/dictionaries/lv_wordlist.combined.gz diff --git a/dictionaries/nb_wordlist.combined.gz b/dictionaries/nb_wordlist.combined.gz Binary files differindex b6e0d42d3..b699912b7 100644 --- a/dictionaries/nb_wordlist.combined.gz +++ b/dictionaries/nb_wordlist.combined.gz diff --git a/dictionaries/nl_wordlist.combined.gz b/dictionaries/nl_wordlist.combined.gz Binary files differindex 48ab0f473..89c238830 100644 --- a/dictionaries/nl_wordlist.combined.gz +++ b/dictionaries/nl_wordlist.combined.gz diff --git a/dictionaries/pl_wordlist.combined.gz b/dictionaries/pl_wordlist.combined.gz Binary files differindex bf02298c1..2b53f69cb 100644 --- a/dictionaries/pl_wordlist.combined.gz +++ b/dictionaries/pl_wordlist.combined.gz diff --git a/dictionaries/pt_BR_wordlist.combined.gz b/dictionaries/pt_BR_wordlist.combined.gz Binary files differindex 876eb71c2..2d22447e4 100644 --- a/dictionaries/pt_BR_wordlist.combined.gz +++ b/dictionaries/pt_BR_wordlist.combined.gz diff --git a/dictionaries/pt_PT_wordlist.combined.gz b/dictionaries/pt_PT_wordlist.combined.gz Binary files differindex 406869059..1504165d0 100644 --- a/dictionaries/pt_PT_wordlist.combined.gz +++ b/dictionaries/pt_PT_wordlist.combined.gz diff --git a/dictionaries/sl_wordlist.combined.gz b/dictionaries/sl_wordlist.combined.gz Binary files differindex 41a576bde..55e1bb1c8 100644 --- a/dictionaries/sl_wordlist.combined.gz +++ b/dictionaries/sl_wordlist.combined.gz diff --git a/dictionaries/sr_wordlist.combined.gz b/dictionaries/sr_wordlist.combined.gz Binary files differindex dec6ae89e..8488a08b4 100644 --- a/dictionaries/sr_wordlist.combined.gz +++ b/dictionaries/sr_wordlist.combined.gz diff --git a/dictionaries/sv_wordlist.combined.gz b/dictionaries/sv_wordlist.combined.gz Binary files differindex 0471772e7..63425206e 100644 --- a/dictionaries/sv_wordlist.combined.gz +++ b/dictionaries/sv_wordlist.combined.gz diff --git a/dictionaries/tr_wordlist.combined.gz b/dictionaries/tr_wordlist.combined.gz Binary files differindex fae79ca21..0251778c7 100644 --- a/dictionaries/tr_wordlist.combined.gz +++ b/dictionaries/tr_wordlist.combined.gz diff --git a/java/res/raw/main_de.dict b/java/res/raw/main_de.dict Binary files differindex 5d35e64a2..69796bbaa 100644 --- a/java/res/raw/main_de.dict +++ b/java/res/raw/main_de.dict diff --git a/java/res/raw/main_en.dict b/java/res/raw/main_en.dict Binary files differindex 8660c28e2..bef6b1005 100644 --- a/java/res/raw/main_en.dict +++ b/java/res/raw/main_en.dict diff --git a/java/res/raw/main_es.dict b/java/res/raw/main_es.dict Binary files differindex f5906c2e2..261ab8c87 100644 --- a/java/res/raw/main_es.dict +++ b/java/res/raw/main_es.dict diff --git a/java/res/raw/main_fr.dict b/java/res/raw/main_fr.dict Binary files differindex 0d2e51837..18f529887 100644 --- a/java/res/raw/main_fr.dict +++ b/java/res/raw/main_fr.dict diff --git a/java/res/raw/main_it.dict b/java/res/raw/main_it.dict Binary files differindex 523f645c7..e161c2475 100644 --- a/java/res/raw/main_it.dict +++ b/java/res/raw/main_it.dict diff --git a/java/res/raw/main_pt_br.dict b/java/res/raw/main_pt_br.dict Binary files differindex 98a27c7c8..21bbe7c67 100644 --- a/java/res/raw/main_pt_br.dict +++ b/java/res/raw/main_pt_br.dict diff --git a/java/res/values/strings.xml b/java/res/values/strings.xml index 69da1e862..06f4b4789 100644 --- a/java/res/values/strings.xml +++ b/java/res/values/strings.xml @@ -174,6 +174,11 @@ <!-- Spoken description when there is no text entered --> <string name="spoken_no_text_entered">No text entered</string> + <!-- Spoken description to let the user know what auto-correction will be performed when a key is pressed. --> + <string name="spoken_auto_correct"><xliff:g id="key" example="Space">%1$s</xliff:g> corrects <xliff:g id="original">%2$s</xliff:g> to <xliff:g id="corrected">%3$s</xliff:g></string> + <!-- Spoken description used during obscured (e.g. password) entry to let the user know that auto-correction will be performed when a key is pressed. --> + <string name="spoken_auto_correct_obscured"><xliff:g id="key" example="Space">%1$s</xliff:g> has auto-correction</string> + <!-- Spoken description for unknown keyboard keys. --> <string name="spoken_description_unknown">Key code %d</string> <!-- Spoken description for the "Shift" keyboard key when "Shift" is off. --> diff --git a/java/src/com/android/inputmethod/accessibility/AccessibilityEntityProvider.java b/java/src/com/android/inputmethod/accessibility/AccessibilityEntityProvider.java index 7639432aa..c628c5b09 100644 --- a/java/src/com/android/inputmethod/accessibility/AccessibilityEntityProvider.java +++ b/java/src/com/android/inputmethod/accessibility/AccessibilityEntityProvider.java @@ -35,6 +35,8 @@ import android.view.inputmethod.EditorInfo; import com.android.inputmethod.keyboard.Key; import com.android.inputmethod.keyboard.Keyboard; import com.android.inputmethod.keyboard.KeyboardView; +import com.android.inputmethod.latin.settings.Settings; +import com.android.inputmethod.latin.settings.SettingsValues; import com.android.inputmethod.latin.utils.CollectionUtils; import com.android.inputmethod.latin.utils.CoordinateUtils; @@ -285,9 +287,15 @@ public final class AccessibilityEntityProvider extends AccessibilityNodeProvider private String getKeyDescription(final Key key) { final EditorInfo editorInfo = mInputMethodService.getCurrentInputEditorInfo(); final boolean shouldObscure = mAccessibilityUtils.shouldObscureInput(editorInfo); - final String keyDescription = mKeyCodeDescriptionMapper.getDescriptionForKey( + final SettingsValues currentSettings = Settings.getInstance().getCurrent(); + final String keyCodeDescription = mKeyCodeDescriptionMapper.getDescriptionForKey( mKeyboardView.getContext(), mKeyboardView.getKeyboard(), key, shouldObscure); - return keyDescription; + if (currentSettings.isWordSeparator(key.getCode())) { + return mAccessibilityUtils.getAutoCorrectionDescription( + keyCodeDescription, shouldObscure); + } else { + return keyCodeDescription; + } } /** diff --git a/java/src/com/android/inputmethod/accessibility/AccessibilityUtils.java b/java/src/com/android/inputmethod/accessibility/AccessibilityUtils.java index 8929dc7e9..10fb9fef4 100644 --- a/java/src/com/android/inputmethod/accessibility/AccessibilityUtils.java +++ b/java/src/com/android/inputmethod/accessibility/AccessibilityUtils.java @@ -23,6 +23,7 @@ import android.os.Build; import android.os.SystemClock; import android.provider.Settings; import android.support.v4.view.accessibility.AccessibilityEventCompat; +import android.text.TextUtils; import android.util.Log; import android.view.MotionEvent; import android.view.View; @@ -34,6 +35,7 @@ import android.view.inputmethod.EditorInfo; import com.android.inputmethod.compat.SettingsSecureCompatUtils; import com.android.inputmethod.latin.R; +import com.android.inputmethod.latin.SuggestedWords; import com.android.inputmethod.latin.utils.InputTypeUtils; public final class AccessibilityUtils { @@ -48,6 +50,12 @@ public final class AccessibilityUtils { private AccessibilityManager mAccessibilityManager; private AudioManager mAudioManager; + /** The most recent auto-correction. */ + private String mAutoCorrectionWord; + + /** The most recent typed word for auto-correction. */ + private String mTypedWord; + /* * Setting this constant to {@code false} will disable all keyboard * accessibility code, regardless of whether Accessibility is turned on in @@ -142,6 +150,51 @@ public final class AccessibilityUtils { } /** + * Sets the current auto-correction word and typed word. These may be used + * to provide the user with a spoken description of what auto-correction + * will occur when a key is typed. + * + * @param suggestedWords the list of suggested auto-correction words + * @param typedWord the currently typed word + */ + public void setAutoCorrection(final SuggestedWords suggestedWords, final String typedWord) { + if (suggestedWords != null && suggestedWords.mWillAutoCorrect) { + mAutoCorrectionWord = suggestedWords.getWord(SuggestedWords.INDEX_OF_AUTO_CORRECTION); + mTypedWord = typedWord; + } else { + mAutoCorrectionWord = null; + mTypedWord = null; + } + } + + /** + * Obtains a description for an auto-correction key, taking into account the + * currently typed word and auto-correction. + * + * @param keyCodeDescription spoken description of the key that will insert + * an auto-correction + * @param shouldObscure whether the key should be obscured + * @return a description including a description of the auto-correction, if + * needed + */ + public String getAutoCorrectionDescription( + final String keyCodeDescription, final boolean shouldObscure) { + if (!TextUtils.isEmpty(mAutoCorrectionWord)) { + if (!TextUtils.equals(mAutoCorrectionWord, mTypedWord)) { + if (shouldObscure) { + // This should never happen, but just in case... + return mContext.getString(R.string.spoken_auto_correct_obscured, + keyCodeDescription); + } + return mContext.getString(R.string.spoken_auto_correct, keyCodeDescription, + mTypedWord, mAutoCorrectionWord); + } + } + + return keyCodeDescription; + } + + /** * Sends the specified text to the {@link AccessibilityManager} to be * spoken. * diff --git a/java/src/com/android/inputmethod/keyboard/EmojiKeyboardView.java b/java/src/com/android/inputmethod/keyboard/EmojiKeyboardView.java index 61dc56ed1..eb48d01f6 100644 --- a/java/src/com/android/inputmethod/keyboard/EmojiKeyboardView.java +++ b/java/src/com/android/inputmethod/keyboard/EmojiKeyboardView.java @@ -718,12 +718,14 @@ public final class EmojiKeyboardView extends LinearLayout implements OnTabChange @Override public void run() { + int repeatCount = 1; int timeCount = 0; while (timeCount < MAX_REPEAT_COUNT_TIME && !mAborted) { if (timeCount > mKeyRepeatStartTimeout) { - pressDelete(); + pressDelete(repeatCount); } timeCount += mKeyRepeatInterval; + ++repeatCount; try { Thread.sleep(mKeyRepeatInterval); } catch (InterruptedException e) { @@ -736,9 +738,9 @@ public final class EmojiKeyboardView extends LinearLayout implements OnTabChange } } - public void pressDelete() { + public void pressDelete(int repeatCount) { mKeyboardActionListener.onPressKey( - Constants.CODE_DELETE, 0 /* repeatCount */, true /* isSinglePointer */); + Constants.CODE_DELETE, repeatCount, true /* isSinglePointer */); mKeyboardActionListener.onCodeInput( Constants.CODE_DELETE, NOT_A_COORDINATE, NOT_A_COORDINATE); mKeyboardActionListener.onReleaseKey( @@ -754,7 +756,7 @@ public final class EmojiKeyboardView extends LinearLayout implements OnTabChange switch(event.getAction()) { case MotionEvent.ACTION_DOWN: v.setBackgroundColor(mDeleteKeyPressedBackgroundColor); - pressDelete(); + pressDelete(0 /* repeatCount */); startRepeat(); return true; case MotionEvent.ACTION_UP: diff --git a/java/src/com/android/inputmethod/latin/AbstractDictionaryWriter.java b/java/src/com/android/inputmethod/latin/AbstractDictionaryWriter.java index 845a9b987..4a0ce3735 100644 --- a/java/src/com/android/inputmethod/latin/AbstractDictionaryWriter.java +++ b/java/src/com/android/inputmethod/latin/AbstractDictionaryWriter.java @@ -25,6 +25,7 @@ import com.android.inputmethod.latin.makedict.Ver3DictEncoder; import java.io.File; import java.io.IOException; +import java.util.Map; // TODO: Quit extending Dictionary after implementing dynamic binary dictionary. abstract public class AbstractDictionaryWriter extends Dictionary { @@ -50,16 +51,16 @@ abstract public class AbstractDictionaryWriter extends Dictionary { abstract public void removeBigramWords(final String word0, final String word1); - abstract protected void writeDictionary(final DictEncoder dictEncoder) - throws IOException, UnsupportedFormatException; + abstract protected void writeDictionary(final DictEncoder dictEncoder, + final Map<String, String> attributeMap) throws IOException, UnsupportedFormatException; - public void write(final String fileName) { + public void write(final String fileName, final Map<String, String> attributeMap) { final String tempFileName = fileName + ".temp"; final File file = new File(mContext.getFilesDir(), fileName); final File tempFile = new File(mContext.getFilesDir(), tempFileName); try { final DictEncoder dictEncoder = new Ver3DictEncoder(tempFile); - writeDictionary(dictEncoder); + writeDictionary(dictEncoder, attributeMap); tempFile.renameTo(file); } catch (IOException e) { Log.e(TAG, "IO exception while writing file", e); diff --git a/java/src/com/android/inputmethod/latin/BinaryDictionary.java b/java/src/com/android/inputmethod/latin/BinaryDictionary.java index 6ec7aeec3..29c6c0451 100644 --- a/java/src/com/android/inputmethod/latin/BinaryDictionary.java +++ b/java/src/com/android/inputmethod/latin/BinaryDictionary.java @@ -115,7 +115,7 @@ public final class BinaryDictionary extends Dictionary { private static native long openNative(String sourceDir, long dictOffset, long dictSize, boolean isUpdatable); private static native void flushNative(long dict, String filePath); - private static native boolean needsToRunGCNative(long dict); + private static native boolean needsToRunGCNative(long dict, boolean mindsBlockByGC); private static native void flushWithGCNative(long dict, String filePath); private static native void closeNative(long dict); private static native int getProbabilityNative(long dict, int[] word); @@ -270,7 +270,7 @@ public final class BinaryDictionary extends Dictionary { } private void runGCIfRequired() { - if (needsToRunGCNative(mNativeDict)) { + if (needsToRunGC(true /* mindsBlockByGC */)) { flushWithGC(); } } @@ -326,9 +326,15 @@ public final class BinaryDictionary extends Dictionary { reopen(); } - public boolean needsToRunGC() { + /** + * Checks whether GC is needed to run or not. + * @param mindsBlockByGC Whether to mind operations blocked by GC. We don't need to care about + * the blocking in some situations such as in idle time or just before closing. + * @return whether GC is needed to run or not. + */ + public boolean needsToRunGC(final boolean mindsBlockByGC) { if (!isValidDictionary()) return false; - return needsToRunGCNative(mNativeDict); + return needsToRunGCNative(mNativeDict, mindsBlockByGC); } @UsedForTesting diff --git a/java/src/com/android/inputmethod/latin/DictionaryFactory.java b/java/src/com/android/inputmethod/latin/DictionaryFactory.java index 3721132c5..828e54f14 100644 --- a/java/src/com/android/inputmethod/latin/DictionaryFactory.java +++ b/java/src/com/android/inputmethod/latin/DictionaryFactory.java @@ -51,7 +51,7 @@ public final class DictionaryFactory { if (null == locale) { Log.e(TAG, "No locale defined for dictionary"); return new DictionaryCollection(Dictionary.TYPE_MAIN, - createBinaryDictionary(context, locale)); + createReadOnlyBinaryDictionary(context, locale)); } final LinkedList<Dictionary> dictList = CollectionUtils.newLinkedList(); @@ -59,11 +59,11 @@ public final class DictionaryFactory { BinaryDictionaryGetter.getDictionaryFiles(locale, context); if (null != assetFileList) { for (final AssetFileAddress f : assetFileList) { - final BinaryDictionary binaryDictionary = new BinaryDictionary(f.mFilename, - f.mOffset, f.mLength, useFullEditDistance, locale, Dictionary.TYPE_MAIN, - false /* isUpdatable */); - if (binaryDictionary.isValidDictionary()) { - dictList.add(binaryDictionary); + final ReadOnlyBinaryDictionary readOnlyBinaryDictionary = + new ReadOnlyBinaryDictionary(f.mFilename, f.mOffset, f.mLength, + useFullEditDistance, locale, Dictionary.TYPE_MAIN); + if (readOnlyBinaryDictionary.isValidDictionary()) { + dictList.add(readOnlyBinaryDictionary); } } } @@ -89,12 +89,12 @@ public final class DictionaryFactory { } /** - * Initializes a dictionary from a raw resource file + * Initializes a read-only binary dictionary from a raw resource file * @param context application context for reading resources * @param locale the locale to use for the resource - * @return an initialized instance of BinaryDictionary + * @return an initialized instance of ReadOnlyBinaryDictionary */ - protected static BinaryDictionary createBinaryDictionary(final Context context, + protected static ReadOnlyBinaryDictionary createReadOnlyBinaryDictionary(final Context context, final Locale locale) { AssetFileDescriptor afd = null; try { @@ -113,9 +113,8 @@ public final class DictionaryFactory { Log.e(TAG, "sourceDir is not a file: " + sourceDir); return null; } - return new BinaryDictionary(sourceDir, afd.getStartOffset(), afd.getLength(), - false /* useFullEditDistance */, locale, Dictionary.TYPE_MAIN, - false /* isUpdatable */); + return new ReadOnlyBinaryDictionary(sourceDir, afd.getStartOffset(), afd.getLength(), + false /* useFullEditDistance */, locale, Dictionary.TYPE_MAIN); } catch (android.content.res.Resources.NotFoundException e) { Log.e(TAG, "Could not find the resource"); return null; @@ -142,10 +141,10 @@ public final class DictionaryFactory { final DictionaryCollection dictionaryCollection = new DictionaryCollection(Dictionary.TYPE_MAIN); for (final AssetFileAddress address : dictionaryList) { - final BinaryDictionary binaryDictionary = new BinaryDictionary(address.mFilename, - address.mOffset, address.mLength, useFullEditDistance, locale, - Dictionary.TYPE_MAIN, false /* isUpdatable */); - dictionaryCollection.addDictionary(binaryDictionary); + final ReadOnlyBinaryDictionary readOnlyBinaryDictionary = new ReadOnlyBinaryDictionary( + address.mFilename, address.mOffset, address.mLength, useFullEditDistance, + locale, Dictionary.TYPE_MAIN); + dictionaryCollection.addDictionary(readOnlyBinaryDictionary); } return dictionaryCollection; } diff --git a/java/src/com/android/inputmethod/latin/DictionaryWriter.java b/java/src/com/android/inputmethod/latin/DictionaryWriter.java index 5a453dde5..84abfa66d 100644 --- a/java/src/com/android/inputmethod/latin/DictionaryWriter.java +++ b/java/src/com/android/inputmethod/latin/DictionaryWriter.java @@ -31,6 +31,7 @@ import com.android.inputmethod.latin.utils.CollectionUtils; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; +import java.util.Map; /** * An in memory dictionary for memorizing entries and writing a binary dictionary. @@ -84,8 +85,11 @@ public class DictionaryWriter extends AbstractDictionaryWriter { } @Override - protected void writeDictionary(final DictEncoder dictEncoder) - throws IOException, UnsupportedFormatException { + protected void writeDictionary(final DictEncoder dictEncoder, + final Map<String, String> attributeMap) throws IOException, UnsupportedFormatException { + for (final Map.Entry<String, String> entry : attributeMap.entrySet()) { + mFusionDictionary.addOptionAttribute(entry.getKey(), entry.getValue()); + } dictEncoder.writeDictionary(mFusionDictionary, FORMAT_OPTIONS); } diff --git a/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java b/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java index d3da068bd..2d1ca51e2 100644 --- a/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java +++ b/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java @@ -236,6 +236,7 @@ abstract public class ExpandableBinaryDictionary extends Dictionary { HashMap<String, String> attributeMap = new HashMap<String, String>(); attributeMap.put(FormatSpec.FileHeader.SUPPORTS_DYNAMIC_UPDATE_ATTRIBUTE, SUPPORTS_DYNAMIC_UPDATE); + attributeMap.put(FormatSpec.FileHeader.DICTIONARY_ID_ATTRIBUTE, mFilename); return attributeMap; } @@ -496,7 +497,7 @@ abstract public class ExpandableBinaryDictionary extends Dictionary { if (needsToReloadBeforeWriting()) { mDictionaryWriter.clear(); loadDictionaryAsync(); - mDictionaryWriter.write(mFilename); + mDictionaryWriter.write(mFilename, getHeaderAttributeMap()); } else { if (ENABLE_BINARY_DICTIONARY_DYNAMIC_UPDATE) { if (mBinaryDictionary == null || !mBinaryDictionary.isValidDictionary()) { @@ -504,14 +505,14 @@ abstract public class ExpandableBinaryDictionary extends Dictionary { BinaryDictionary.createEmptyDictFile(file.getAbsolutePath(), DICTIONARY_FORMAT_VERSION, getHeaderAttributeMap()); } else { - if (mBinaryDictionary.needsToRunGC()) { + if (mBinaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) { mBinaryDictionary.flushWithGC(); } else { mBinaryDictionary.flush(); } } } else { - mDictionaryWriter.write(mFilename); + mDictionaryWriter.write(mFilename, getHeaderAttributeMap()); } } } diff --git a/java/src/com/android/inputmethod/latin/LatinIME.java b/java/src/com/android/inputmethod/latin/LatinIME.java index 5fbbc1764..96e16de0d 100644 --- a/java/src/com/android/inputmethod/latin/LatinIME.java +++ b/java/src/com/android/inputmethod/latin/LatinIME.java @@ -81,7 +81,7 @@ import com.android.inputmethod.latin.personalization.PersonalizationDictionary; import com.android.inputmethod.latin.personalization.PersonalizationDictionarySessionRegister; import com.android.inputmethod.latin.personalization.PersonalizationHelper; import com.android.inputmethod.latin.personalization.PersonalizationPredictionDictionary; -import com.android.inputmethod.latin.personalization.UserHistoryPredictionDictionary; +import com.android.inputmethod.latin.personalization.UserHistoryDictionary; import com.android.inputmethod.latin.settings.Settings; import com.android.inputmethod.latin.settings.SettingsActivity; import com.android.inputmethod.latin.settings.SettingsValues; @@ -179,7 +179,7 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen private boolean mIsMainDictionaryAvailable; private UserBinaryDictionary mUserDictionary; - private UserHistoryPredictionDictionary mUserHistoryPredictionDictionary; + private UserHistoryDictionary mUserHistoryDictionary; private PersonalizationPredictionDictionary mPersonalizationPredictionDictionary; private PersonalizationDictionary mPersonalizationDictionary; private boolean mIsUserDictionaryAvailable; @@ -623,9 +623,9 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen final SharedPreferences prefs = PreferenceManager.getDefaultSharedPreferences(this); - mUserHistoryPredictionDictionary = PersonalizationHelper - .getUserHistoryPredictionDictionary(this, localeStr, prefs); - newSuggest.setUserHistoryPredictionDictionary(mUserHistoryPredictionDictionary); + mUserHistoryDictionary = PersonalizationHelper.getUserHistoryDictionary( + this, localeStr, prefs); + newSuggest.setUserHistoryDictionary(mUserHistoryDictionary); mPersonalizationDictionary = PersonalizationHelper .getPersonalizationDictionary(this, localeStr, prefs); newSuggest.setPersonalizationDictionary(mPersonalizationDictionary); @@ -2557,6 +2557,8 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen private void showSuggestionStripWithTypedWord(final SuggestedWords suggestedWords, final String typedWord) { if (suggestedWords.isEmpty()) { + // No auto-correction is available, clear the cached values. + AccessibilityUtils.getInstance().setAutoCorrection(null, null); clearSuggestionStrip(); return; } @@ -2565,6 +2567,9 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen setSuggestedWords(suggestedWords, isAutoCorrection); setAutoCorrectionIndicator(isAutoCorrection); setSuggestionStripShown(isSuggestionsStripVisible()); + // An auto-correction is available, cache it in accessibility code so + // we can be speak it if the user touches a key that will insert it. + AccessibilityUtils.getInstance().setAutoCorrection(suggestedWords, typedWord); } private void showSuggestionStrip(final SuggestedWords suggestedWords) { @@ -2750,9 +2755,8 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen final SettingsValues currentSettings = mSettings.getCurrent(); if (!currentSettings.mCorrectionEnabled) return null; - final UserHistoryPredictionDictionary userHistoryPredictionDictionary = - mUserHistoryPredictionDictionary; - if (userHistoryPredictionDictionary == null) return null; + final UserHistoryDictionary userHistoryDictionary = mUserHistoryDictionary; + if (userHistoryDictionary == null) return null; final String prevWord = mConnection.getNthPreviousWord(currentSettings.mWordSeparators, 2); final String secondWord; @@ -2766,8 +2770,7 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen final int maxFreq = AutoCorrectionUtils.getMaxFrequency( suggest.getUnigramDictionaries(), suggestion); if (maxFreq == 0) return null; - userHistoryPredictionDictionary - .addToPersonalizationPredictionDictionary(prevWord, secondWord, maxFreq > 0); + userHistoryDictionary.addToDictionary(prevWord, secondWord, maxFreq > 0); return prevWord; } @@ -2953,7 +2956,7 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen } mConnection.deleteSurroundingText(deleteLength, 0); if (!TextUtils.isEmpty(previousWord) && !TextUtils.isEmpty(committedWord)) { - mUserHistoryPredictionDictionary.cancelAddingUserHistory(previousWord, committedWord); + mUserHistoryDictionary.cancelAddingUserHistory(previousWord, committedWord); } final String stringToCommit = originallyTypedWord + mLastComposedWord.mSeparatorString; if (mSettings.getCurrent().mCurrentLanguageHasSpaces) { diff --git a/java/src/com/android/inputmethod/latin/ReadOnlyBinaryDictionary.java b/java/src/com/android/inputmethod/latin/ReadOnlyBinaryDictionary.java new file mode 100644 index 000000000..68505ce38 --- /dev/null +++ b/java/src/com/android/inputmethod/latin/ReadOnlyBinaryDictionary.java @@ -0,0 +1,120 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.android.inputmethod.latin; + +import com.android.inputmethod.keyboard.ProximityInfo; +import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo; + +import java.util.ArrayList; +import java.util.Locale; +import java.util.concurrent.locks.ReentrantReadWriteLock; + +/** + * This class provides binary dictionary reading operations with locking. An instance of this class + * can be used by multiple threads. Note that different session IDs must be used when multiple + * threads get suggestions using this class. + */ +public final class ReadOnlyBinaryDictionary extends Dictionary { + /** + * A lock for accessing binary dictionary. Only closing binary dictionary is the operation + * that change the state of dictionary. + */ + private final ReentrantReadWriteLock mLock = new ReentrantReadWriteLock(); + + private final BinaryDictionary mBinaryDictionary; + + public ReadOnlyBinaryDictionary(final String filename, final long offset, final long length, + final boolean useFullEditDistance, final Locale locale, final String dictType) { + super(dictType); + mBinaryDictionary = new BinaryDictionary(filename, offset, length, useFullEditDistance, + locale, dictType, false /* isUpdatable */); + } + + public boolean isValidDictionary() { + return mBinaryDictionary.isValidDictionary(); + } + + @Override + public ArrayList<SuggestedWordInfo> getSuggestions(final WordComposer composer, + final String prevWord, final ProximityInfo proximityInfo, + final boolean blockOffensiveWords, final int[] additionalFeaturesOptions) { + return getSuggestionsWithSessionId(composer, prevWord, proximityInfo, blockOffensiveWords, + additionalFeaturesOptions, 0 /* sessionId */); + } + + @Override + public ArrayList<SuggestedWordInfo> getSuggestionsWithSessionId(final WordComposer composer, + final String prevWord, final ProximityInfo proximityInfo, + final boolean blockOffensiveWords, final int[] additionalFeaturesOptions, + final int sessionId) { + if (mLock.readLock().tryLock()) { + try { + return mBinaryDictionary.getSuggestions(composer, prevWord, proximityInfo, + blockOffensiveWords, additionalFeaturesOptions); + } finally { + mLock.readLock().unlock(); + } + } + return null; + } + + @Override + public boolean isValidWord(final String word) { + if (mLock.readLock().tryLock()) { + try { + return mBinaryDictionary.isValidWord(word); + } finally { + mLock.readLock().unlock(); + } + } + return false; + } + + @Override + public boolean shouldAutoCommit(final SuggestedWordInfo candidate) { + if (mLock.readLock().tryLock()) { + try { + return mBinaryDictionary.shouldAutoCommit(candidate); + } finally { + mLock.readLock().unlock(); + } + } + return false; + } + + @Override + public int getFrequency(final String word) { + if (mLock.readLock().tryLock()) { + try { + return mBinaryDictionary.getFrequency(word); + } finally { + mLock.readLock().unlock(); + } + } + return NOT_A_PROBABILITY; + } + + @Override + public void close() { + mLock.writeLock().lock(); + try { + mBinaryDictionary.close(); + } finally { + mLock.writeLock().unlock(); + } + } +} diff --git a/java/src/com/android/inputmethod/latin/Suggest.java b/java/src/com/android/inputmethod/latin/Suggest.java index 6c18c948f..9fd1f53a2 100644 --- a/java/src/com/android/inputmethod/latin/Suggest.java +++ b/java/src/com/android/inputmethod/latin/Suggest.java @@ -26,7 +26,7 @@ import com.android.inputmethod.keyboard.ProximityInfo; import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo; import com.android.inputmethod.latin.personalization.PersonalizationDictionary; import com.android.inputmethod.latin.personalization.PersonalizationPredictionDictionary; -import com.android.inputmethod.latin.personalization.UserHistoryPredictionDictionary; +import com.android.inputmethod.latin.personalization.UserHistoryDictionary; import com.android.inputmethod.latin.settings.Settings; import com.android.inputmethod.latin.utils.AutoCorrectionUtils; import com.android.inputmethod.latin.utils.BoundedTreeSet; @@ -190,10 +190,8 @@ public final class Suggest { addOrReplaceDictionaryInternal(Dictionary.TYPE_CONTACTS, contactsDictionary); } - public void setUserHistoryPredictionDictionary( - final UserHistoryPredictionDictionary userHistoryPredictionDictionary) { - addOrReplaceDictionaryInternal(Dictionary.TYPE_USER_HISTORY, - userHistoryPredictionDictionary); + public void setUserHistoryDictionary(final UserHistoryDictionary userHistoryDictionary) { + addOrReplaceDictionaryInternal(Dictionary.TYPE_USER_HISTORY, userHistoryDictionary); } public void setPersonalizationPredictionDictionary( diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java index 6cc0bfb76..af61f2979 100644 --- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java +++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java @@ -385,12 +385,14 @@ public class BinaryDictEncoderUtils { nodeSize + size, ptNode.mChildren)); } nodeSize += getShortcutListSize(ptNode.mShortcutTargets); - if (null != ptNode.mBigrams) { - for (WeightedString bigram : ptNode.mBigrams) { - final int offset = getOffsetToTargetPtNodeDuringUpdate(ptNodeArray, - nodeSize + size + FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE, - FusionDictionary.findWordInTree(dict.mRootNodeArray, bigram.mWord)); - nodeSize += getByteSize(offset) + FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE; + if (formatOptions.mVersion < FormatSpec.FIRST_VERSION_WITH_TERMINAL_ID) { + if (null != ptNode.mBigrams) { + for (WeightedString bigram : ptNode.mBigrams) { + final int offset = getOffsetToTargetPtNodeDuringUpdate(ptNodeArray, + nodeSize + size + FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE, + FusionDictionary.findWordInTree(dict.mRootNodeArray, bigram.mWord)); + nodeSize += getByteSize(offset) + FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE; + } } } ptNode.mCachedSize = nodeSize; diff --git a/java/src/com/android/inputmethod/latin/makedict/DynamicBinaryDictIOUtils.java b/java/src/com/android/inputmethod/latin/makedict/DynamicBinaryDictIOUtils.java index bf3d19101..411e265b3 100644 --- a/java/src/com/android/inputmethod/latin/makedict/DynamicBinaryDictIOUtils.java +++ b/java/src/com/android/inputmethod/latin/makedict/DynamicBinaryDictIOUtils.java @@ -77,7 +77,7 @@ public final class DynamicBinaryDictIOUtils { * @param newParentAddress the absolute address of the parent. * @param formatOptions file format options. */ - public static void updateParentAddress(final DictBuffer dictBuffer, + private static void updateParentAddress(final DictBuffer dictBuffer, final int ptNodeOriginAddress, final int newParentAddress, final FormatOptions formatOptions) { final int originalPosition = dictBuffer.position(); @@ -109,7 +109,7 @@ public final class DynamicBinaryDictIOUtils { * @param newParentAddress the address to be written. * @param formatOptions file format options. */ - public static void updateParentAddresses(final DictBuffer dictBuffer, + private static void updateParentAddresses(final DictBuffer dictBuffer, final int ptNodeOriginAddress, final int newParentAddress, final FormatOptions formatOptions) { final int originalPosition = dictBuffer.position(); @@ -136,7 +136,7 @@ public final class DynamicBinaryDictIOUtils { * @param newChildrenAddress the absolute address of the child. * @param formatOptions file format options. */ - public static void updateChildrenAddress(final DictBuffer dictBuffer, + private static void updateChildrenAddress(final DictBuffer dictBuffer, final int ptNodeOriginAddress, final int newChildrenAddress, final FormatOptions formatOptions) { final int originalPosition = dictBuffer.position(); diff --git a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java index 849bff050..9481a8c14 100644 --- a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java +++ b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java @@ -265,8 +265,12 @@ public final class FormatSpec { static final String FREQ_FILE_EXTENSION = ".freq"; // tat = Terminal Address Table static final String TERMINAL_ADDRESS_TABLE_FILE_EXTENSION = ".tat"; + static final String BIGRAM_FILE_EXTENSION = ".bigram"; + static final String BIGRAM_LOOKUP_TABLE_FILE_EXTENSION = ".bigram_lookup"; + static final String BIGRAM_ADDRESS_TABLE_FILE_EXTENSION = ".bigram_index"; static final int FREQUENCY_AND_FLAGS_SIZE = 2; static final int TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE = 3; + static final int BIGRAM_ADDRESS_TABLE_BLOCK_SIZE = 4; static final int NO_CHILDREN_ADDRESS = Integer.MIN_VALUE; static final int NO_PARENT_ADDRESS = 0; @@ -331,9 +335,9 @@ public final class FormatSpec { public static final String USES_FORGETTING_CURVE_ATTRIBUTE = "USES_FORGETTING_CURVE"; public static final String ATTRIBUTE_VALUE_TRUE = "1"; - private static final String DICTIONARY_VERSION_ATTRIBUTE = "version"; - private static final String DICTIONARY_LOCALE_ATTRIBUTE = "locale"; - private static final String DICTIONARY_ID_ATTRIBUTE = "dictionary"; + public static final String DICTIONARY_VERSION_ATTRIBUTE = "version"; + public static final String DICTIONARY_LOCALE_ATTRIBUTE = "locale"; + public static final String DICTIONARY_ID_ATTRIBUTE = "dictionary"; private static final String DICTIONARY_DESCRIPTION_ATTRIBUTE = "description"; public FileHeader(final int headerSize, final DictionaryOptions dictionaryOptions, final FormatOptions formatOptions) { diff --git a/java/src/com/android/inputmethod/latin/makedict/SparseTable.java b/java/src/com/android/inputmethod/latin/makedict/SparseTable.java index 0b9cf91d2..96d057a44 100644 --- a/java/src/com/android/inputmethod/latin/makedict/SparseTable.java +++ b/java/src/com/android/inputmethod/latin/makedict/SparseTable.java @@ -18,6 +18,9 @@ package com.android.inputmethod.latin.makedict; import com.android.inputmethod.annotations.UsedForTesting; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStream; import java.util.ArrayList; @@ -147,4 +150,45 @@ public class SparseTable { BinaryDictEncoderUtils.writeUIntToStream(contentOutStream, index, 4); } } + + @UsedForTesting + public void writeToFiles(final File lookupTableFile, final File contentFile) + throws IOException { + FileOutputStream lookupTableOutStream = null; + FileOutputStream contentOutStream = null; + try { + lookupTableOutStream = new FileOutputStream(lookupTableFile); + contentOutStream = new FileOutputStream(contentFile); + write(lookupTableOutStream, contentOutStream); + } finally { + if (lookupTableOutStream != null) { + lookupTableOutStream.close(); + } + if (contentOutStream != null) { + contentOutStream.close(); + } + } + } + + private static byte[] readFileToByteArray(final File file) throws IOException { + final byte[] contents = new byte[(int) file.length()]; + FileInputStream inStream = null; + try { + inStream = new FileInputStream(file); + inStream.read(contents); + } finally { + if (inStream != null) { + inStream.close(); + } + } + return contents; + } + + @UsedForTesting + public static SparseTable readFromFiles(final File lookupTableFile, final File contentFile, + final int blockSize) throws IOException { + final byte[] lookupTable = readFileToByteArray(lookupTableFile); + final byte[] content = readFileToByteArray(contentFile); + return new SparseTable(lookupTable, content, blockSize); + } } diff --git a/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java b/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java index 4c8ff8ea4..0aa431966 100644 --- a/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java +++ b/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java @@ -42,12 +42,15 @@ public class Ver4DictDecoder extends DictDecoder { private static final int FILETYPE_TRIE = 1; private static final int FILETYPE_FREQUENCY = 2; private static final int FILETYPE_TERMINAL_ADDRESS_TABLE = 3; + private static final int FILETYPE_BIGRAM = 4; private final File mDictDirectory; private final DictionaryBufferFactory mBufferFactory; private DictBuffer mDictBuffer; private DictBuffer mFrequencyBuffer; private DictBuffer mTerminalAddressTableBuffer; + private DictBuffer mBigramBuffer; + private SparseTable mBigramAddressTable; @UsedForTesting /* package */ Ver4DictDecoder(final File dictDirectory, final int factoryFlag) { @@ -82,6 +85,9 @@ public class Ver4DictDecoder extends DictDecoder { } else if (fileType == FILETYPE_TERMINAL_ADDRESS_TABLE) { return new File(mDictDirectory, mDictDirectory.getName() + FormatSpec.TERMINAL_ADDRESS_TABLE_FILE_EXTENSION); + } else if (fileType == FILETYPE_BIGRAM) { + return new File(mDictDirectory, + mDictDirectory.getName() + FormatSpec.BIGRAM_FILE_EXTENSION); } else { throw new RuntimeException("Unsupported kind of file : " + fileType); } @@ -94,6 +100,8 @@ public class Ver4DictDecoder extends DictDecoder { mFrequencyBuffer = mBufferFactory.getDictionaryBuffer(getFile(FILETYPE_FREQUENCY)); mTerminalAddressTableBuffer = mBufferFactory.getDictionaryBuffer( getFile(FILETYPE_TERMINAL_ADDRESS_TABLE)); + mBigramBuffer = mBufferFactory.getDictionaryBuffer(getFile(FILETYPE_BIGRAM)); + loadBigramAddressSparseTable(); } @Override @@ -118,6 +126,15 @@ public class Ver4DictDecoder extends DictDecoder { return header; } + private void loadBigramAddressSparseTable() throws IOException { + final File lookupIndexFile = new File(mDictDirectory, + mDictDirectory.getName() + FormatSpec.BIGRAM_LOOKUP_TABLE_FILE_EXTENSION); + final File contentFile = new File(mDictDirectory, + mDictDirectory.getName() + FormatSpec.BIGRAM_ADDRESS_TABLE_FILE_EXTENSION); + mBigramAddressTable = SparseTable.readFromFiles(lookupIndexFile, contentFile, + FormatSpec.BIGRAM_ADDRESS_TABLE_BLOCK_SIZE); + } + protected static class PtNodeReader extends DictDecoder.PtNodeReader { protected static int readFrequency(final DictBuffer frequencyBuffer, final int terminalId) { frequencyBuffer.position(terminalId * FormatSpec.FREQUENCY_AND_FLAGS_SIZE + 1); @@ -191,8 +208,21 @@ public class Ver4DictDecoder extends DictDecoder { final ArrayList<PendingAttribute> bigrams; if (0 != (flags & FormatSpec.FLAG_HAS_BIGRAMS)) { bigrams = new ArrayList<PendingAttribute>(); - addressPointer += PtNodeReader.readBigramAddresses(mDictBuffer, bigrams, - addressPointer); + final int posOfBigrams = mBigramAddressTable.get(terminalId); + mBigramBuffer.position(posOfBigrams); + while (bigrams.size() < FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) { + // If bigrams.size() reaches FormatSpec.MAX_BIGRAMS_IN_A_PTNODE, + // remaining bigram entries are ignored. + final int bigramFlags = mBigramBuffer.readUnsignedByte(); + final int targetTerminalId = mBigramBuffer.readUnsignedInt24(); + mTerminalAddressTableBuffer.position( + targetTerminalId * FormatSpec.TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE); + final int targetAddress = mTerminalAddressTableBuffer.readUnsignedInt24(); + bigrams.add(new PendingAttribute( + bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY, + targetAddress)); + if (0 == (bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) break; + } if (bigrams.size() >= FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) { MakedictLog.d("too many bigrams in a node."); } diff --git a/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java b/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java index 4fb89671f..4c25faf88 100644 --- a/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java +++ b/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java @@ -26,6 +26,7 @@ import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; +import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileNotFoundException; import java.io.FileOutputStream; @@ -43,9 +44,13 @@ public class Ver4DictEncoder implements DictEncoder { private byte[] mTrieBuf; private int mTriePos; private int mHeaderSize; + private SparseTable mBigramAddressTable; private OutputStream mTrieOutStream; private OutputStream mFreqOutStream; private OutputStream mTerminalAddressTableOutStream; + private OutputStream mBigramOutStream; + private File mDictDir; + private String mBaseFilename; @UsedForTesting public Ver4DictEncoder(final File dictPlacedDir) { @@ -55,12 +60,14 @@ public class Ver4DictEncoder implements DictEncoder { private void openStreams(final FormatOptions formatOptions, final DictionaryOptions dictOptions) throws FileNotFoundException, IOException { final FileHeader header = new FileHeader(0, dictOptions, formatOptions); - final String filename = header.getId() + "." + header.getVersion(); - final File mDictDir = new File(mDictPlacedDir, filename); - final File trieFile = new File(mDictDir, filename + FormatSpec.TRIE_FILE_EXTENSION); - final File freqFile = new File(mDictDir, filename + FormatSpec.FREQ_FILE_EXTENSION); + mBaseFilename = header.getId() + "." + header.getVersion(); + mDictDir = new File(mDictPlacedDir, mBaseFilename); + final File trieFile = new File(mDictDir, mBaseFilename + FormatSpec.TRIE_FILE_EXTENSION); + final File freqFile = new File(mDictDir, mBaseFilename + FormatSpec.FREQ_FILE_EXTENSION); final File terminalAddressTableFile = new File(mDictDir, - filename + FormatSpec.TERMINAL_ADDRESS_TABLE_FILE_EXTENSION); + mBaseFilename + FormatSpec.TERMINAL_ADDRESS_TABLE_FILE_EXTENSION); + final File bigramFile = new File(mDictDir, + mBaseFilename + FormatSpec.BIGRAM_FILE_EXTENSION); if (!mDictDir.isDirectory()) { if (mDictDir.exists()) mDictDir.delete(); mDictDir.mkdirs(); @@ -71,6 +78,7 @@ public class Ver4DictEncoder implements DictEncoder { mTrieOutStream = new FileOutputStream(trieFile); mFreqOutStream = new FileOutputStream(freqFile); mTerminalAddressTableOutStream = new FileOutputStream(terminalAddressTableFile); + mBigramOutStream = new FileOutputStream(bigramFile); } private void close() throws IOException { @@ -84,10 +92,14 @@ public class Ver4DictEncoder implements DictEncoder { if (mTerminalAddressTableOutStream != null) { mTerminalAddressTableOutStream.close(); } + if (mBigramOutStream != null) { + mBigramOutStream.close(); + } } finally { mTrieOutStream = null; mFreqOutStream = null; mTerminalAddressTableOutStream = null; + mBigramOutStream = null; } } @@ -123,6 +135,10 @@ public class Ver4DictEncoder implements DictEncoder { if (MakedictLog.DBG) BinaryDictEncoderUtils.checkFlatPtNodeArrayList(flatNodes); writeTerminalData(flatNodes, terminalCount); + mBigramAddressTable = new SparseTable(terminalCount, + FormatSpec.BIGRAM_ADDRESS_TABLE_BLOCK_SIZE); + writeBigrams(flatNodes, dict); + writeBigramAddressSparseTable(); final PtNodeArray lastNodeArray = flatNodes.get(flatNodes.size() - 1); final int bufferSize = lastNodeArray.mCachedAddressAfterUpdate + lastNodeArray.mCachedSize; @@ -230,24 +246,41 @@ public class Ver4DictEncoder implements DictEncoder { shortcutByteSize, FormatSpec.PTNODE_SHORTCUT_LIST_SIZE_SIZE); } - private void writeBigrams(ArrayList<WeightedString> bigrams, FusionDictionary dict) { - if (bigrams == null) return; - - final Iterator<WeightedString> bigramIterator = bigrams.iterator(); - while (bigramIterator.hasNext()) { - final WeightedString bigram = bigramIterator.next(); - final PtNode target = - FusionDictionary.findWordInTree(dict.mRootNodeArray, bigram.mWord); - final int addressOfBigram = target.mCachedAddressAfterUpdate; - final int unigramFrequencyForThisWord = target.mFrequency; - final int offset = addressOfBigram - - (mTriePos + FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE); - int bigramFlags = BinaryDictEncoderUtils.makeBigramFlags(bigramIterator.hasNext(), - offset, bigram.mFrequency, unigramFrequencyForThisWord, bigram.mWord); - mTrieBuf[mTriePos++] = (byte) bigramFlags; - mTriePos += BinaryDictEncoderUtils.writeChildrenPosition(mTrieBuf, - mTriePos, Math.abs(offset)); + private void writeBigrams(final ArrayList<PtNodeArray> flatNodes, final FusionDictionary dict) + throws IOException { + final ByteArrayOutputStream bigramBuffer = new ByteArrayOutputStream(); + + for (final PtNodeArray nodeArray : flatNodes) { + for (final PtNode ptNode : nodeArray.mData) { + if (ptNode.mBigrams != null) { + final int startPos = bigramBuffer.size(); + mBigramAddressTable.set(ptNode.mTerminalId, startPos); + final Iterator<WeightedString> bigramIterator = ptNode.mBigrams.iterator(); + while (bigramIterator.hasNext()) { + final WeightedString bigram = bigramIterator.next(); + final PtNode target = + FusionDictionary.findWordInTree(dict.mRootNodeArray, bigram.mWord); + final int unigramFrequencyForThisWord = target.mFrequency; + final int bigramFlags = BinaryDictEncoderUtils.makeBigramFlags( + bigramIterator.hasNext(), 0, bigram.mFrequency, + unigramFrequencyForThisWord, bigram.mWord); + BinaryDictEncoderUtils.writeUIntToStream(bigramBuffer, bigramFlags, + FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE); + BinaryDictEncoderUtils.writeUIntToStream(bigramBuffer, target.mTerminalId, + FormatSpec.PTNODE_ATTRIBUTE_MAX_ADDRESS_SIZE); + } + } + } } + bigramBuffer.writeTo(mBigramOutStream); + } + + private void writeBigramAddressSparseTable() throws IOException { + final File lookupIndexFile = + new File(mDictDir, mBaseFilename + FormatSpec.BIGRAM_LOOKUP_TABLE_FILE_EXTENSION); + final File contentFile = + new File(mDictDir, mBaseFilename + FormatSpec.BIGRAM_ADDRESS_TABLE_FILE_EXTENSION); + mBigramAddressTable.writeToFiles(lookupIndexFile, contentFile); } @Override @@ -267,7 +300,6 @@ public class Ver4DictEncoder implements DictEncoder { } writeChildrenPosition(ptNode, formatOptions); writeShortcuts(ptNode.mShortcutTargets); - writeBigrams(ptNode.mBigrams, dict); } private void writeTerminalData(final ArrayList<PtNodeArray> flatNodes, diff --git a/java/src/com/android/inputmethod/latin/personalization/DecayingExpandableBinaryDictionaryBase.java b/java/src/com/android/inputmethod/latin/personalization/DecayingExpandableBinaryDictionaryBase.java index 66517a800..7cf4f0c88 100644 --- a/java/src/com/android/inputmethod/latin/personalization/DecayingExpandableBinaryDictionaryBase.java +++ b/java/src/com/android/inputmethod/latin/personalization/DecayingExpandableBinaryDictionaryBase.java @@ -22,6 +22,7 @@ import android.util.Log; import com.android.inputmethod.annotations.UsedForTesting; import com.android.inputmethod.latin.Constants; +import com.android.inputmethod.latin.Dictionary; import com.android.inputmethod.latin.ExpandableBinaryDictionary; import com.android.inputmethod.latin.LatinImeLogger; import com.android.inputmethod.latin.makedict.DictDecoder; @@ -50,6 +51,9 @@ public abstract class DecayingExpandableBinaryDictionaryBase extends ExpandableB /** Any pair being typed or picked */ public static final int FREQUENCY_FOR_TYPED = 2; + public static final int FREQUENCY_FOR_WORDS_IN_DICTS = FREQUENCY_FOR_TYPED; + public static final int FREQUENCY_FOR_WORDS_NOT_IN_DICTS = Dictionary.NOT_A_PROBABILITY; + /** Locale for which this user history dictionary is storing words */ private final String mLocale; @@ -94,6 +98,8 @@ public abstract class DecayingExpandableBinaryDictionaryBase extends ExpandableB FormatSpec.FileHeader.ATTRIBUTE_VALUE_TRUE); attributeMap.put(FormatSpec.FileHeader.USES_FORGETTING_CURVE_ATTRIBUTE, FormatSpec.FileHeader.ATTRIBUTE_VALUE_TRUE); + attributeMap.put(FormatSpec.FileHeader.DICTIONARY_ID_ATTRIBUTE, mFileName); + attributeMap.put(FormatSpec.FileHeader.DICTIONARY_LOCALE_ATTRIBUTE, mLocale); return attributeMap; } @@ -117,27 +123,29 @@ public abstract class DecayingExpandableBinaryDictionaryBase extends ExpandableB } /** - * Pair will be added to the personalization prediction dictionary. + * Pair will be added to the decaying dictionary. * * The first word may be null. That means we don't know the context, in other words, * it's only a unigram. The first word may also be an empty string : this means start * context, as in beginning of a sentence for example. * The second word may not be null (a NullPointerException would be thrown). */ - public void addToPersonalizationPredictionDictionary( - final String word0, final String word1, final boolean isValid) { + public void addToDictionary(final String word0, final String word1, final boolean isValid) { if (word1.length() >= Constants.DICTIONARY_MAX_WORD_LENGTH || (word0 != null && word0.length() >= Constants.DICTIONARY_MAX_WORD_LENGTH)) { return; } - addWordDynamically(word1, null /* the "shortcut" parameter is null */, FREQUENCY_FOR_TYPED, + final int frequency = ENABLE_BINARY_DICTIONARY_DYNAMIC_UPDATE ? + (isValid ? FREQUENCY_FOR_WORDS_IN_DICTS : FREQUENCY_FOR_WORDS_NOT_IN_DICTS) : + FREQUENCY_FOR_TYPED; + addWordDynamically(word1, null /* the "shortcut" parameter is null */, frequency, false /* isNotAWord */); // Do not insert a word as a bigram of itself if (word1.equals(word0)) { return; } if (null != word0) { - addBigramDynamically(word0, word1, FREQUENCY_FOR_TYPED, isValid); + addBigramDynamically(word0, word1, frequency, isValid); } } diff --git a/java/src/com/android/inputmethod/latin/personalization/DynamicPersonalizationDictionaryWriter.java b/java/src/com/android/inputmethod/latin/personalization/DynamicPersonalizationDictionaryWriter.java index 305088536..039b25337 100644 --- a/java/src/com/android/inputmethod/latin/personalization/DynamicPersonalizationDictionaryWriter.java +++ b/java/src/com/android/inputmethod/latin/personalization/DynamicPersonalizationDictionaryWriter.java @@ -36,6 +36,7 @@ import com.android.inputmethod.latin.utils.UserHistoryForgettingCurveUtils.Forge import java.io.IOException; import java.util.ArrayList; +import java.util.Map; // Currently this class is used to implement dynamic prodiction dictionary. // TODO: Move to native code. @@ -113,8 +114,8 @@ public class DynamicPersonalizationDictionaryWriter extends AbstractDictionaryWr } @Override - protected void writeDictionary(final DictEncoder dictEncoder) - throws IOException, UnsupportedFormatException { + protected void writeDictionary(final DictEncoder dictEncoder, + final Map<String, String> attributeMap) throws IOException, UnsupportedFormatException { UserHistoryDictIOUtils.writeDictionary(dictEncoder, new FrequencyProvider(mBigramList, mExpandableDictionary, mMaxHistoryBigrams), mBigramList, FORMAT_OPTIONS); diff --git a/java/src/com/android/inputmethod/latin/personalization/PersonalizationDictionaryUpdateSession.java b/java/src/com/android/inputmethod/latin/personalization/PersonalizationDictionaryUpdateSession.java index c616a296c..a86f6e584 100644 --- a/java/src/com/android/inputmethod/latin/personalization/PersonalizationDictionaryUpdateSession.java +++ b/java/src/com/android/inputmethod/latin/personalization/PersonalizationDictionaryUpdateSession.java @@ -110,7 +110,7 @@ public abstract class PersonalizationDictionaryUpdateSession { if (dictionary == null) { return; } - dictionary.addToPersonalizationPredictionDictionary(word0, word1, isValid); + dictionary.addToDictionary(word0, word1, isValid); } // Bulk import @@ -122,8 +122,7 @@ public abstract class PersonalizationDictionaryUpdateSession { return; } for (final PersonalizationLanguageModelParam lmParam : lmParams) { - dictionary.addToPersonalizationPredictionDictionary( - lmParam.mWord0, lmParam.mWord1, lmParam.mIsValid); + dictionary.addToDictionary(lmParam.mWord0, lmParam.mWord1, lmParam.mIsValid); } } } diff --git a/java/src/com/android/inputmethod/latin/personalization/PersonalizationHelper.java b/java/src/com/android/inputmethod/latin/personalization/PersonalizationHelper.java index 5f702ee3f..8c9484b12 100644 --- a/java/src/com/android/inputmethod/latin/personalization/PersonalizationHelper.java +++ b/java/src/com/android/inputmethod/latin/personalization/PersonalizationHelper.java @@ -30,7 +30,7 @@ public class PersonalizationHelper { private static final String TAG = PersonalizationHelper.class.getSimpleName(); private static final boolean DEBUG = false; - private static final ConcurrentHashMap<String, SoftReference<UserHistoryPredictionDictionary>> + private static final ConcurrentHashMap<String, SoftReference<UserHistoryDictionary>> sLangUserHistoryDictCache = CollectionUtils.newConcurrentHashMap(); private static final ConcurrentHashMap<String, SoftReference<PersonalizationDictionary>> @@ -41,25 +41,23 @@ public class PersonalizationHelper { sLangPersonalizationPredictionDictCache = CollectionUtils.newConcurrentHashMap(); - public static UserHistoryPredictionDictionary getUserHistoryPredictionDictionary( + public static UserHistoryDictionary getUserHistoryDictionary( final Context context, final String locale, final SharedPreferences sp) { synchronized (sLangUserHistoryDictCache) { if (sLangUserHistoryDictCache.containsKey(locale)) { - final SoftReference<UserHistoryPredictionDictionary> ref = + final SoftReference<UserHistoryDictionary> ref = sLangUserHistoryDictCache.get(locale); - final UserHistoryPredictionDictionary dict = ref == null ? null : ref.get(); + final UserHistoryDictionary dict = ref == null ? null : ref.get(); if (dict != null) { if (DEBUG) { - Log.w(TAG, "Use cached UserHistoryPredictionDictionary for " + locale); + Log.w(TAG, "Use cached UserHistoryDictionary for " + locale); } dict.reloadDictionaryIfRequired(); return dict; } } - final UserHistoryPredictionDictionary dict = - new UserHistoryPredictionDictionary(context, locale, sp); - sLangUserHistoryDictCache.put( - locale, new SoftReference<UserHistoryPredictionDictionary>(dict)); + final UserHistoryDictionary dict = new UserHistoryDictionary(context, locale, sp); + sLangUserHistoryDictCache.put(locale, new SoftReference<UserHistoryDictionary>(dict)); return dict; } } diff --git a/java/src/com/android/inputmethod/latin/personalization/UserHistoryPredictionDictionary.java b/java/src/com/android/inputmethod/latin/personalization/UserHistoryDictionary.java index 38e308a4e..a60226d7e 100644 --- a/java/src/com/android/inputmethod/latin/personalization/UserHistoryPredictionDictionary.java +++ b/java/src/com/android/inputmethod/latin/personalization/UserHistoryDictionary.java @@ -26,10 +26,10 @@ import android.content.SharedPreferences; * Locally gathers stats about the words user types and various other signals like auto-correction * cancellation or manual picks. This allows the keyboard to adapt to the typist over time. */ -public class UserHistoryPredictionDictionary extends DecayingExpandableBinaryDictionaryBase { +public class UserHistoryDictionary extends DecayingExpandableBinaryDictionaryBase { /* package for tests */ static final String NAME = - UserHistoryPredictionDictionary.class.getSimpleName(); - /* package */ UserHistoryPredictionDictionary(final Context context, final String locale, + UserHistoryDictionary.class.getSimpleName(); + /* package */ UserHistoryDictionary(final Context context, final String locale, final SharedPreferences sp) { super(context, locale, sp, Dictionary.TYPE_USER_HISTORY, getDictionaryFileName(locale)); } diff --git a/native/jni/Android.mk b/native/jni/Android.mk index 0594ddff0..36afea54b 100644 --- a/native/jni/Android.mk +++ b/native/jni/Android.mk @@ -85,6 +85,7 @@ LATIN_IME_CORE_SRC_FILES := \ $(addprefix suggest/policyimpl/dictionary/utils/, \ buffer_with_extendable_buffer.cpp \ byte_array_utils.cpp \ + decaying_utils.cpp \ dict_file_writing_utils.cpp \ format_utils.cpp) \ suggest/policyimpl/gesture/gesture_suggest_policy_factory.cpp \ diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp index 85e100e33..c5ef264fc 100644 --- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp +++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp @@ -113,10 +113,10 @@ static void latinime_BinaryDictionary_flush(JNIEnv *env, jclass clazz, jlong dic } static bool latinime_BinaryDictionary_needsToRunGC(JNIEnv *env, jclass clazz, - jlong dict) { + jlong dict, jboolean mindsBlockByGC) { Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); if (!dictionary) return false; - return dictionary->needsToRunGC(); + return dictionary->needsToRunGC(mindsBlockByGC == JNI_TRUE); } static void latinime_BinaryDictionary_flushWithGC(JNIEnv *env, jclass clazz, jlong dict, @@ -364,7 +364,7 @@ static const JNINativeMethod sMethods[] = { }, { const_cast<char *>("needsToRunGCNative"), - const_cast<char *>("(J)Z"), + const_cast<char *>("(JZ)Z"), reinterpret_cast<void *>(latinime_BinaryDictionary_needsToRunGC) }, { diff --git a/native/jni/src/defines.h b/native/jni/src/defines.h index 89dfa39b3..c2aa8ba0e 100644 --- a/native/jni/src/defines.h +++ b/native/jni/src/defines.h @@ -375,7 +375,7 @@ typedef enum { CT_TERMINAL, CT_TERMINAL_INSERTION, // Create new word with space omission - CT_NEW_WORD_SPACE_OMITTION, + CT_NEW_WORD_SPACE_OMISSION, // Create new word with space substitution CT_NEW_WORD_SPACE_SUBSTITUTION, } CorrectionType; diff --git a/native/jni/src/suggest/core/dicnode/dic_node.h b/native/jni/src/suggest/core/dicnode/dic_node.h index 41ef9d2b2..9099e8285 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node.h +++ b/native/jni/src/suggest/core/dicnode/dic_node.h @@ -38,10 +38,10 @@ INTS_TO_CHARS(mDicNodeState.mDicNodeStatePrevWord.mPrevWord, \ mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength(), prevWordCharBuf, \ NELEMS(prevWordCharBuf)); \ - AKLOGI("#%8s, %5f, %5f, %5f, %5f, %s, %s, %d,,", header, \ + AKLOGI("#%8s, %5f, %5f, %5f, %5f, %s, %s, %d, %5f,", header, \ getSpatialDistanceForScoring(), getLanguageDistanceForScoring(), \ getNormalizedCompoundDistance(), getRawLength(), prevWordCharBuf, charBuf, \ - getInputIndex(0)); \ + getInputIndex(0), getNormalizedCompoundDistanceAfterFirstWord()); \ } while (0) #else #define LOGI_SHOW_ADD_COST_PROP @@ -434,6 +434,13 @@ class DicNode { return mDicNodeState.mDicNodeStateScoring.getLanguageDistance(); } + // For space-aware gestures, we store the normalized distance at the char index + // that ends the first word of the suggestion. We call this the distance after + // first word. + float getNormalizedCompoundDistanceAfterFirstWord() const { + return mDicNodeState.mDicNodeStateScoring.getNormalizedCompoundDistanceAfterFirstWord(); + } + float getLanguageDistanceRatePerWordForScoring() const { const float langDist = getLanguageDistanceForScoring(); const float totalWordCount = @@ -565,6 +572,12 @@ class DicNode { inputSize, getTotalInputIndex(), errorType); } + // Saves the current normalized compound distance for space-aware gestures. + // See getNormalizedCompoundDistanceAfterFirstWord for details. + AK_FORCE_INLINE void saveNormalizedCompoundDistanceAfterFirstWordIfNoneYet() { + mDicNodeState.mDicNodeStateScoring.saveNormalizedCompoundDistanceAfterFirstWordIfNoneYet(); + } + // Caveat: Must not be called outside Weighting // This restriction is guaranteed by "friend" AK_FORCE_INLINE void forwardInputIndex(const int pointerId, const int count, diff --git a/native/jni/src/suggest/core/dicnode/internal/dic_node_state_scoring.h b/native/jni/src/suggest/core/dicnode/internal/dic_node_state_scoring.h index 4c884225a..3c85d0e9d 100644 --- a/native/jni/src/suggest/core/dicnode/internal/dic_node_state_scoring.h +++ b/native/jni/src/suggest/core/dicnode/internal/dic_node_state_scoring.h @@ -31,7 +31,8 @@ class DicNodeStateScoring { mDigraphIndex(DigraphUtils::NOT_A_DIGRAPH_INDEX), mEditCorrectionCount(0), mProximityCorrectionCount(0), mNormalizedCompoundDistance(0.0f), mSpatialDistance(0.0f), mLanguageDistance(0.0f), - mRawLength(0.0f), mExactMatch(true) { + mRawLength(0.0f), mExactMatch(true), + mNormalizedCompoundDistanceAfterFirstWord(MAX_VALUE_FOR_WEIGHTING) { } virtual ~DicNodeStateScoring() {} @@ -45,6 +46,7 @@ class DicNodeStateScoring { mRawLength = 0.0f; mDoubleLetterLevel = NOT_A_DOUBLE_LETTER; mDigraphIndex = DigraphUtils::NOT_A_DIGRAPH_INDEX; + mNormalizedCompoundDistanceAfterFirstWord = MAX_VALUE_FOR_WEIGHTING; mExactMatch = true; } @@ -58,6 +60,8 @@ class DicNodeStateScoring { mDoubleLetterLevel = scoring->mDoubleLetterLevel; mDigraphIndex = scoring->mDigraphIndex; mExactMatch = scoring->mExactMatch; + mNormalizedCompoundDistanceAfterFirstWord = + scoring->mNormalizedCompoundDistanceAfterFirstWord; } void addCost(const float spatialCost, const float languageCost, const bool doNormalization, @@ -86,6 +90,17 @@ class DicNodeStateScoring { } } + // Saves the current normalized distance for space-aware gestures. + // See getNormalizedCompoundDistanceAfterFirstWord for details. + void saveNormalizedCompoundDistanceAfterFirstWordIfNoneYet() { + // We get called here after each word. We only want to store the distance after + // the first word, so if we already have a distance we skip saving -- hence "IfNoneYet" + // in the method name. + if (mNormalizedCompoundDistanceAfterFirstWord >= MAX_VALUE_FOR_WEIGHTING) { + mNormalizedCompoundDistanceAfterFirstWord = getNormalizedCompoundDistance(); + } + } + void addRawLength(const float rawLength) { mRawLength += rawLength; } @@ -102,6 +117,13 @@ class DicNodeStateScoring { return mNormalizedCompoundDistance; } + // For space-aware gestures, we store the normalized distance at the char index + // that ends the first word of the suggestion. We call this the distance after + // first word. + float getNormalizedCompoundDistanceAfterFirstWord() const { + return mNormalizedCompoundDistanceAfterFirstWord; + } + float getSpatialDistance() const { return mSpatialDistance; } @@ -178,6 +200,7 @@ class DicNodeStateScoring { float mLanguageDistance; float mRawLength; bool mExactMatch; + float mNormalizedCompoundDistanceAfterFirstWord; AK_FORCE_INLINE void addDistance(float spatialDistance, float languageDistance, bool doNormalization, int inputSize, int totalInputIndex) { diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp index 214df1bbf..b1d01ed86 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.cpp +++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp @@ -33,6 +33,8 @@ namespace latinime { +const int Dictionary::HEADER_ATTRIBUTE_BUFFER_SIZE = 32; + Dictionary::Dictionary(JNIEnv *env, DictionaryStructureWithBufferPolicy *const dictionaryStructureWithBufferPolicy) : mDictionaryStructureWithBufferPolicy(dictionaryStructureWithBufferPolicy), @@ -121,8 +123,8 @@ void Dictionary::flushWithGC(const char *const filePath) { mDictionaryStructureWithBufferPolicy->flushWithGC(filePath); } -bool Dictionary::needsToRunGC() { - return mDictionaryStructureWithBufferPolicy->needsToRunGC(); +bool Dictionary::needsToRunGC(const bool mindsBlockByGC) { + return mDictionaryStructureWithBufferPolicy->needsToRunGC(mindsBlockByGC); } void Dictionary::getProperty(const char *const query, char *const outResult, @@ -131,27 +133,27 @@ void Dictionary::getProperty(const char *const query, char *const outResult, } void Dictionary::logDictionaryInfo(JNIEnv *const env) const { - const int BUFFER_SIZE = 16; - int dictionaryIdCodePointBuffer[BUFFER_SIZE]; - int versionStringCodePointBuffer[BUFFER_SIZE]; - int dateStringCodePointBuffer[BUFFER_SIZE]; + int dictionaryIdCodePointBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE]; + int versionStringCodePointBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE]; + int dateStringCodePointBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE]; const DictionaryHeaderStructurePolicy *const headerPolicy = getDictionaryStructurePolicy()->getHeaderStructurePolicy(); headerPolicy->readHeaderValueOrQuestionMark("dictionary", dictionaryIdCodePointBuffer, - BUFFER_SIZE); + HEADER_ATTRIBUTE_BUFFER_SIZE); headerPolicy->readHeaderValueOrQuestionMark("version", versionStringCodePointBuffer, - BUFFER_SIZE); - headerPolicy->readHeaderValueOrQuestionMark("date", dateStringCodePointBuffer, BUFFER_SIZE); - - char dictionaryIdCharBuffer[BUFFER_SIZE]; - char versionStringCharBuffer[BUFFER_SIZE]; - char dateStringCharBuffer[BUFFER_SIZE]; - intArrayToCharArray(dictionaryIdCodePointBuffer, BUFFER_SIZE, - dictionaryIdCharBuffer, BUFFER_SIZE); - intArrayToCharArray(versionStringCodePointBuffer, BUFFER_SIZE, - versionStringCharBuffer, BUFFER_SIZE); - intArrayToCharArray(dateStringCodePointBuffer, BUFFER_SIZE, - dateStringCharBuffer, BUFFER_SIZE); + HEADER_ATTRIBUTE_BUFFER_SIZE); + headerPolicy->readHeaderValueOrQuestionMark("date", dateStringCodePointBuffer, + HEADER_ATTRIBUTE_BUFFER_SIZE); + + char dictionaryIdCharBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE]; + char versionStringCharBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE]; + char dateStringCharBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE]; + intArrayToCharArray(dictionaryIdCodePointBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE, + dictionaryIdCharBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE); + intArrayToCharArray(versionStringCodePointBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE, + versionStringCharBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE); + intArrayToCharArray(dateStringCodePointBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE, + dateStringCharBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE); LogUtils::logToJava(env, "Dictionary info: dictionary = %s ; version = %s ; date = %s", diff --git a/native/jni/src/suggest/core/dictionary/dictionary.h b/native/jni/src/suggest/core/dictionary/dictionary.h index 800751745..d8a0f3e58 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.h +++ b/native/jni/src/suggest/core/dictionary/dictionary.h @@ -81,7 +81,7 @@ class Dictionary { void flushWithGC(const char *const filePath); - bool needsToRunGC(); + bool needsToRunGC(const bool mindsBlockByGC); void getProperty(const char *const query, char *const outResult, const int maxResultLength) const; @@ -95,6 +95,8 @@ class Dictionary { private: DISALLOW_IMPLICIT_CONSTRUCTORS(Dictionary); + static const int HEADER_ATTRIBUTE_BUFFER_SIZE; + DictionaryStructureWithBufferPolicy *const mDictionaryStructureWithBufferPolicy; const BigramDictionary *const mBigramDictionary; const SuggestInterface *const mGestureSuggest; diff --git a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h index 2434287b1..c7ffef0d5 100644 --- a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h +++ b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h @@ -78,7 +78,7 @@ class DictionaryStructureWithBufferPolicy { virtual void flushWithGC(const char *const filePath) = 0; - virtual bool needsToRunGC() const = 0; + virtual bool needsToRunGC(const bool mindsBlockByGC) const = 0; virtual void getProperty(const char *const query, char *const outResult, const int maxResultLength) const = 0; diff --git a/native/jni/src/suggest/core/policy/weighting.cpp b/native/jni/src/suggest/core/policy/weighting.cpp index f9b777df2..0c4016893 100644 --- a/native/jni/src/suggest/core/policy/weighting.cpp +++ b/native/jni/src/suggest/core/policy/weighting.cpp @@ -38,7 +38,7 @@ static inline void profile(const CorrectionType correctionType, DicNode *const n case CT_SUBSTITUTION: PROF_SUBSTITUTION(node->mProfiler); return; - case CT_NEW_WORD_SPACE_OMITTION: + case CT_NEW_WORD_SPACE_OMISSION: PROF_NEW_WORD(node->mProfiler); return; case CT_MATCH: @@ -93,6 +93,11 @@ static inline void profile(const CorrectionType correctionType, DicNode *const n } dicNode->addCost(spatialCost, languageCost, weighting->needsToNormalizeCompoundDistance(), inputSize, errorType); + if (CT_NEW_WORD_SPACE_OMISSION == correctionType) { + // When we are on a terminal, we save the current distance for evaluating + // when to auto-commit partial suggestions. + dicNode->saveNormalizedCompoundDistanceAfterFirstWordIfNoneYet(); + } } /* static */ float Weighting::getSpatialCost(const Weighting *const weighting, @@ -108,7 +113,7 @@ static inline void profile(const CorrectionType correctionType, DicNode *const n case CT_SUBSTITUTION: // only used for typing return weighting->getSubstitutionCost(); - case CT_NEW_WORD_SPACE_OMITTION: + case CT_NEW_WORD_SPACE_OMISSION: return weighting->getNewWordSpatialCost(traverseSession, dicNode, inputStateG); case CT_MATCH: return weighting->getMatchedCost(traverseSession, dicNode, inputStateG); @@ -138,7 +143,7 @@ static inline void profile(const CorrectionType correctionType, DicNode *const n return 0.0f; case CT_SUBSTITUTION: return 0.0f; - case CT_NEW_WORD_SPACE_OMITTION: + case CT_NEW_WORD_SPACE_OMISSION: return weighting->getNewWordBigramLanguageCost( traverseSession, parentDicNode, multiBigramMap); case CT_MATCH: @@ -173,7 +178,7 @@ static inline void profile(const CorrectionType correctionType, DicNode *const n return 0; /* 0 because CT_MATCH will be called */ case CT_SUBSTITUTION: return 0; /* 0 because CT_MATCH will be called */ - case CT_NEW_WORD_SPACE_OMITTION: + case CT_NEW_WORD_SPACE_OMISSION: return 0; case CT_MATCH: return 1; diff --git a/native/jni/src/suggest/core/suggest.cpp b/native/jni/src/suggest/core/suggest.cpp index b1340e12f..e20bc497a 100644 --- a/native/jni/src/suggest/core/suggest.cpp +++ b/native/jni/src/suggest/core/suggest.cpp @@ -574,7 +574,7 @@ void Suggest::createNextWordDicNode(DicTraverseSession *traverseSession, DicNode DicNodeUtils::initAsRootWithPreviousWord( traverseSession->getDictionaryStructurePolicy(), dicNode, &newDicNode); const CorrectionType correctionType = spaceSubstitution ? - CT_NEW_WORD_SPACE_SUBSTITUTION : CT_NEW_WORD_SPACE_OMITTION; + CT_NEW_WORD_SPACE_SUBSTITUTION : CT_NEW_WORD_SPACE_OMISSION; Weighting::addCostAndForwardInputIndex(WEIGHTING, correctionType, traverseSession, dicNode, &newDicNode, traverseSession->getMultiBigramMap()); if (newDicNode.getCompoundDistance() < static_cast<float>(MAX_VALUE_FOR_WEIGHTING)) { diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp index e02f4cbf1..67a085de3 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp @@ -17,10 +17,10 @@ #include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h" #include "suggest/core/policy/dictionary_shortcuts_structure_policy.h" -#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h" #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h" #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" +#include "suggest/policyimpl/dictionary/utils/decaying_utils.h" namespace latinime { @@ -41,9 +41,14 @@ void DynamicBigramListPolicy::getNextBigram(int *const outBigramPos, int *const if (usesAdditionalBuffer && originalBigramPos != NOT_A_DICT_POS) { originalBigramPos += mBuffer->getOriginalBufferSize(); } - *outBigramPos = followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos); *outProbability = BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags); *outHasNext = BigramListReadWriteUtils::hasNext(bigramFlags); + if (mIsDecayingDict && !DecayingUtils::isValidBigram(*outProbability)) { + // This bigram is too weak to output. + *outBigramPos = NOT_A_DICT_POS; + } else { + *outBigramPos = followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos); + } if (usesAdditionalBuffer) { *bigramEntryPos += mBuffer->getOriginalBufferSize(); } @@ -153,15 +158,21 @@ bool DynamicBigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries( const int bigramTargetNodePos = followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos); nodeReader.fetchNodeInfoInBufferFromPtNodePos(bigramTargetNodePos); - // TODO: Update probability for supporting probability decaying. if (nodeReader.isDeleted() || !nodeReader.isTerminal() || bigramTargetNodePos == NOT_A_DICT_POS) { // The target is no longer valid terminal. Invalidate the current bigram entry. if (!BigramListReadWriteUtils::writeBigramEntry(mBuffer, bigramFlags, - NOT_A_DICT_POS /* targetOffset */, &bigramEntryPos)) { + NOT_A_DICT_POS /* targetPtNodePos */, &bigramEntryPos)) { return false; } - } else { + continue; + } + bool isRemoved = false; + if (!updateProbabilityForDecay(bigramFlags, bigramTargetNodePos, &bigramEntryPos, + &isRemoved)) { + return false; + } + if (!isRemoved) { (*outValidBigramEntryCount) += 1; } } while(BigramListReadWriteUtils::hasNext(bigramFlags)); @@ -247,8 +258,14 @@ bool DynamicBigramListPolicy::addNewBigramEntryToBigramList(const int bigramTarg if (followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos) == bigramTargetPos) { // Update this bigram entry. *outAddedNewBigram = false; + const int originalProbability = BigramListReadWriteUtils::getProbabilityFromFlags( + bigramFlags); + const int probabilityToWrite = mIsDecayingDict ? + DecayingUtils::getUpdatedBigramProbabilityDelta( + originalProbability, probability) : probability; const BigramListReadWriteUtils::BigramFlags updatedFlags = - BigramListReadWriteUtils::setProbabilityInFlags(bigramFlags, probability); + BigramListReadWriteUtils::setProbabilityInFlags(bigramFlags, + probabilityToWrite); return BigramListReadWriteUtils::writeBigramEntry(mBuffer, updatedFlags, originalBigramPos, &entryPos); } @@ -276,8 +293,11 @@ bool DynamicBigramListPolicy::addNewBigramEntryToBigramList(const int bigramTarg bool DynamicBigramListPolicy::writeNewBigramEntry(const int bigramTargetPos, const int probability, int *const writingPos) { // hasNext is false because we are adding a new bigram entry at the end of the bigram list. + const int probabilityToWrite = mIsDecayingDict ? + DecayingUtils::getUpdatedBigramProbabilityDelta(NOT_A_PROBABILITY, probability) : + probability; return BigramListReadWriteUtils::createAndWriteBigramEntry(mBuffer, bigramTargetPos, - probability, false /* hasNext */, writingPos); + probabilityToWrite, false /* hasNext */, writingPos); } bool DynamicBigramListPolicy::removeBigram(const int bigramListPos, const int bigramTargetPos) { @@ -339,4 +359,33 @@ int DynamicBigramListPolicy::followBigramLinkAndGetCurrentBigramPtNodePos( return currentPos; } +bool DynamicBigramListPolicy::updateProbabilityForDecay( + BigramListReadWriteUtils::BigramFlags bigramFlags, const int targetPtNodePos, + int *const bigramEntryPos, bool *const outRemoved) const { + *outRemoved = false; + if (mIsDecayingDict) { + // Update bigram probability for decaying. + const int newProbability = DecayingUtils::getBigramProbabilityDeltaToSave( + BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags)); + if (DecayingUtils::isValidBigram(newProbability)) { + // Write new probability. + const BigramListReadWriteUtils::BigramFlags updatedBigramFlags = + BigramListReadWriteUtils::setProbabilityInFlags( + bigramFlags, newProbability); + if (!BigramListReadWriteUtils::writeBigramEntry(mBuffer, updatedBigramFlags, + targetPtNodePos, bigramEntryPos)) { + return false; + } + } else { + // Remove current bigram entry. + *outRemoved = true; + if (!BigramListReadWriteUtils::writeBigramEntry(mBuffer, bigramFlags, + NOT_A_DICT_POS /* targetPtNodePos */, bigramEntryPos)) { + return false; + } + } + } + return true; +} + } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h index 3ebf69946..b358b4ed5 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h @@ -21,6 +21,7 @@ #include "defines.h" #include "suggest/core/policy/dictionary_bigrams_structure_policy.h" +#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h" #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h" namespace latinime { @@ -34,8 +35,9 @@ class DictionaryShortcutsStructurePolicy; class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy { public: DynamicBigramListPolicy(BufferWithExtendableBuffer *const buffer, - const DictionaryShortcutsStructurePolicy *const shortcutPolicy) - : mBuffer(buffer), mShortcutPolicy(shortcutPolicy) {} + const DictionaryShortcutsStructurePolicy *const shortcutPolicy, + const bool isDecayingDict) + : mBuffer(buffer), mShortcutPolicy(shortcutPolicy), mIsDecayingDict(isDecayingDict) {} ~DynamicBigramListPolicy() {} @@ -74,9 +76,13 @@ class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy { BufferWithExtendableBuffer *const mBuffer; const DictionaryShortcutsStructurePolicy *const mShortcutPolicy; + const bool mIsDecayingDict; // Follow bigram link and return the position of bigram target PtNode that is currently valid. int followBigramLinkAndGetCurrentBigramPtNodePos(const int originalBigramPos) const; + + bool updateProbabilityForDecay(BigramListReadWriteUtils::BigramFlags bigramFlags, + const int targetPtNodePos, int *const bigramEntryPos, bool *const outRemoved) const; }; } // namespace latinime #endif // LATINIME_DYNAMIC_BIGRAM_LIST_POLICY_H diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp index 5eb473300..081163a4d 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp @@ -16,6 +16,8 @@ #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h" +#include "suggest/policyimpl/dictionary/utils/decaying_utils.h" + namespace latinime { bool DynamicPatriciaTrieGcEventListeners @@ -25,6 +27,19 @@ bool DynamicPatriciaTrieGcEventListeners // PtNode is useless when the PtNode is not a terminal and doesn't have any not useless // children. bool isUselessPtNode = !node->isTerminal(); + if (node->isTerminal() && mIsDecayingDict) { + const int newProbability = + DecayingUtils::getUnigramProbabilityToSave(node->getProbability()); + int writingPos = node->getProbabilityFieldPos(); + // Update probability. + if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition( + mBuffer, newProbability, &writingPos)) { + return false; + } + if (!DecayingUtils::isValidUnigram(newProbability)) { + isUselessPtNode = false; + } + } if (mChildrenValue > 0) { isUselessPtNode = false; } else if (node->isTerminal()) { @@ -41,7 +56,7 @@ bool DynamicPatriciaTrieGcEventListeners return false; } } else { - valueStack.back() += 1; + mValueStack.back() += 1; if (node->isTerminal()) { mValidUnigramCount += 1; } @@ -49,6 +64,23 @@ bool DynamicPatriciaTrieGcEventListeners return true; } +bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateBigramProbability + ::onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node, + const int *const nodeCodePoints) { + if (!node->isDeleted()) { + int pos = node->getBigramsPos(); + if (pos != NOT_A_DICT_POS) { + int bigramEntryCount = 0; + if (!mBigramPolicy->updateAllBigramEntriesAndDeleteUselessEntries(&pos, + &bigramEntryCount)) { + return false; + } + mValidBigramEntryCount += bigramEntryCount; + } + } + return true; +} + // Writes dummy PtNode array size when the head of PtNode array is read. bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer ::onDescend(const int ptNodeArrayPos) { diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h index aa6e60959..463715af5 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h @@ -39,23 +39,23 @@ class DynamicPatriciaTrieGcEventListeners { public: TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted( DynamicPatriciaTrieWritingHelper *const writingHelper, - BufferWithExtendableBuffer *const buffer) - : mWritingHelper(writingHelper), mBuffer(buffer), valueStack(), - mChildrenValue(0), mValidUnigramCount(0) {} + BufferWithExtendableBuffer *const buffer, const bool isDecayingDict) + : mWritingHelper(writingHelper), mBuffer(buffer), mIsDecayingDict(isDecayingDict), + mValueStack(), mChildrenValue(0), mValidUnigramCount(0) {} ~TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted() {}; bool onAscend() { - if (valueStack.empty()) { + if (mValueStack.empty()) { return false; } - mChildrenValue = valueStack.back(); - valueStack.pop_back(); + mChildrenValue = mValueStack.back(); + mValueStack.pop_back(); return true; } bool onDescend(const int ptNodeArrayPos) { - valueStack.push_back(0); + mValueStack.push_back(0); return true; } @@ -74,7 +74,8 @@ class DynamicPatriciaTrieGcEventListeners { DynamicPatriciaTrieWritingHelper *const mWritingHelper; BufferWithExtendableBuffer *const mBuffer; - std::vector<int> valueStack; + const int mIsDecayingDict; + std::vector<int> mValueStack; int mChildrenValue; int mValidUnigramCount; }; @@ -94,20 +95,7 @@ class DynamicPatriciaTrieGcEventListeners { bool onReadingPtNodeArrayTail() { return true; } bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node, - const int *const nodeCodePoints) { - if (!node->isDeleted()) { - int pos = node->getBigramsPos(); - if (pos != NOT_A_DICT_POS) { - int bigramEntryCount = 0; - if (!mBigramPolicy->updateAllBigramEntriesAndDeleteUselessEntries(&pos, - &bigramEntryCount)) { - return false; - } - mValidBigramEntryCount += bigramEntryCount; - } - } - return true; - } + const int *const nodeCodePoints); int getValidBigramEntryCount() const { return mValidBigramEntryCount; diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp index 4581ec093..0d8c92768 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp @@ -18,6 +18,7 @@ #include <cstdio> #include <cstring> +#include <ctime> #include "defines.h" #include "suggest/core/dicnode/dic_node.h" @@ -27,12 +28,17 @@ #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h" #include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/utils/decaying_utils.h" #include "suggest/policyimpl/dictionary/utils/probability_utils.h" namespace latinime { const char *const DynamicPatriciaTriePolicy::UNIGRAM_COUNT_QUERY = "UNIGRAM_COUNT"; const char *const DynamicPatriciaTriePolicy::BIGRAM_COUNT_QUERY = "BIGRAM_COUNT"; +const int DynamicPatriciaTriePolicy::MAX_DICT_EXTENDED_REGION_SIZE = 1024 * 1024; +const int DynamicPatriciaTriePolicy::MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS = + DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE - 1024; +const int DynamicPatriciaTriePolicy::MIN_SECONDS_TO_REQUIRE_GC_WHEN_WRITING = 2 * 60 * 60; void DynamicPatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode, DicNodeVector *const childDicNodes) const { @@ -143,14 +149,17 @@ int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const in int DynamicPatriciaTriePolicy::getProbability(const int unigramProbability, const int bigramProbability) const { - // TODO: check mHeaderPolicy.usesForgettingCurve(); - if (unigramProbability == NOT_A_PROBABILITY) { - return NOT_A_PROBABILITY; - } else if (bigramProbability == NOT_A_PROBABILITY) { - return ProbabilityUtils::backoff(unigramProbability); + if (mHeaderPolicy.isDecayingDict()) { + return DecayingUtils::getProbability(unigramProbability, bigramProbability); } else { - return ProbabilityUtils::computeProbabilityForBigram(unigramProbability, - bigramProbability); + if (unigramProbability == NOT_A_PROBABILITY) { + return NOT_A_PROBABILITY; + } else if (bigramProbability == NOT_A_PROBABILITY) { + return ProbabilityUtils::backoff(unigramProbability); + } else { + return ProbabilityUtils::computeProbabilityForBigram(unigramProbability, + bigramProbability); + } } } @@ -199,11 +208,16 @@ bool DynamicPatriciaTriePolicy::addUnigramWord(const int *const word, const int AKLOGI("Warning: addUnigramWord() is called for non-updatable dictionary."); return false; } + if (mBufferWithExtendableBuffer.getTailPosition() + >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) { + AKLOGE("The dictionary is too large to dynamically update."); + return false; + } DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer, getBigramsStructurePolicy(), getShortcutsStructurePolicy()); readingHelper.initWithPtNodeArrayPos(getRootPosition()); DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer, - &mBigramListPolicy, &mShortcutListPolicy); + &mBigramListPolicy, &mShortcutListPolicy, mHeaderPolicy.isDecayingDict()); bool addedNewUnigram = false; if (writingHelper.addUnigramWord(&readingHelper, word, length, probability, &addedNewUnigram)) { @@ -222,6 +236,11 @@ bool DynamicPatriciaTriePolicy::addBigramWords(const int *const word0, const int AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary."); return false; } + if (mBufferWithExtendableBuffer.getTailPosition() + >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) { + AKLOGE("The dictionary is too large to dynamically update."); + return false; + } const int word0Pos = getTerminalNodePositionOfWord(word0, length0, false /* forceLowerCaseSearch */); if (word0Pos == NOT_A_DICT_POS) { @@ -233,7 +252,7 @@ bool DynamicPatriciaTriePolicy::addBigramWords(const int *const word0, const int return false; } DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer, - &mBigramListPolicy, &mShortcutListPolicy); + &mBigramListPolicy, &mShortcutListPolicy, mHeaderPolicy.isDecayingDict()); bool addedNewBigram = false; if (writingHelper.addBigramWords(word0Pos, word1Pos, probability, &addedNewBigram)) { if (addedNewBigram) { @@ -251,6 +270,11 @@ bool DynamicPatriciaTriePolicy::removeBigramWords(const int *const word0, const AKLOGI("Warning: removeBigramWords() is called for non-updatable dictionary."); return false; } + if (mBufferWithExtendableBuffer.getTailPosition() + >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) { + AKLOGE("The dictionary is too large to dynamically update."); + return false; + } const int word0Pos = getTerminalNodePositionOfWord(word0, length0, false /* forceLowerCaseSearch */); if (word0Pos == NOT_A_DICT_POS) { @@ -262,7 +286,7 @@ bool DynamicPatriciaTriePolicy::removeBigramWords(const int *const word0, const return false; } DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer, - &mBigramListPolicy, &mShortcutListPolicy); + &mBigramListPolicy, &mShortcutListPolicy, mHeaderPolicy.isDecayingDict()); if (writingHelper.removeBigramWords(word0Pos, word1Pos)) { mBigramCount--; return true; @@ -277,7 +301,7 @@ void DynamicPatriciaTriePolicy::flush(const char *const filePath) { return; } DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer, - &mBigramListPolicy, &mShortcutListPolicy); + &mBigramListPolicy, &mShortcutListPolicy, mHeaderPolicy.isDecayingDict()); writingHelper.writeToDictFile(filePath, &mHeaderPolicy, mUnigramCount, mBigramCount); } @@ -287,17 +311,42 @@ void DynamicPatriciaTriePolicy::flushWithGC(const char *const filePath) { return; } DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer, - &mBigramListPolicy, &mShortcutListPolicy); + &mBigramListPolicy, &mShortcutListPolicy, mHeaderPolicy.isDecayingDict()); writingHelper.writeToDictFileWithGC(getRootPosition(), filePath, &mHeaderPolicy); } -bool DynamicPatriciaTriePolicy::needsToRunGC() const { +bool DynamicPatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const { if (!mBuffer->isUpdatable()) { AKLOGI("Warning: needsToRunGC() is called for non-updatable dictionary."); return false; } - // TODO: Implement more properly. - return mBufferWithExtendableBuffer.isNearSizeLimit(); + if (mBufferWithExtendableBuffer.isNearSizeLimit()) { + // Additional buffer size is near the limit. + return true; + } else if (mHeaderPolicy.getExtendedRegionSize() + + mBufferWithExtendableBuffer.getUsedAdditionalBufferSize() + > MAX_DICT_EXTENDED_REGION_SIZE) { + // Total extended region size exceeds the limit. + return true; + } else if (mBufferWithExtendableBuffer.getTailPosition() + >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS + && mBufferWithExtendableBuffer.getUsedAdditionalBufferSize() > 0) { + // Needs to reduce dictionary size. + return true; + } else if (mHeaderPolicy.isDecayingDict()) { + if (mUnigramCount >= DecayingUtils::MAX_UNIGRAM_COUNT) { + // Unigram count exceeds the limit. + return true; + } else if (mBigramCount >= DecayingUtils::MAX_BIGRAM_COUNT) { + // Bigram count exceeds the limit. + return true; + } else if (mindsBlockByGC && mHeaderPolicy.getLastUpdatedTime() + + MIN_SECONDS_TO_REQUIRE_GC_WHEN_WRITING < time(0)) { + // Time to update probabilities for decaying. + return true; + } + } + return false; } void DynamicPatriciaTriePolicy::getProperty(const char *const query, char *const outResult, diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h index 7f9d4d924..d3150c6fc 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h @@ -37,7 +37,8 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { mBufferWithExtendableBuffer(mBuffer->getBuffer() + mHeaderPolicy.getSize(), mBuffer->getBufferSize() - mHeaderPolicy.getSize()), mShortcutListPolicy(&mBufferWithExtendableBuffer), - mBigramListPolicy(&mBufferWithExtendableBuffer, &mShortcutListPolicy), + mBigramListPolicy(&mBufferWithExtendableBuffer, &mShortcutListPolicy, + mHeaderPolicy.isDecayingDict()), mUnigramCount(mHeaderPolicy.getUnigramCount()), mBigramCount(mHeaderPolicy.getBigramCount()) {} @@ -91,7 +92,7 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { void flushWithGC(const char *const filePath); - bool needsToRunGC() const; + bool needsToRunGC(const bool mindsBlockByGC) const; void getProperty(const char *const query, char *const outResult, const int maxResultLength) const; @@ -101,6 +102,9 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { static const char*const UNIGRAM_COUNT_QUERY; static const char*const BIGRAM_COUNT_QUERY; + static const int MAX_DICT_EXTENDED_REGION_SIZE; + static const int MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS; + static const int MIN_SECONDS_TO_REQUIRE_GC_WHEN_WRITING; const MmappedBuffer *const mBuffer; const HeaderPolicy mHeaderPolicy; diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp index bae5e8cad..28124d251 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp @@ -25,6 +25,7 @@ #include "suggest/policyimpl/dictionary/header/header_policy.h" #include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h" +#include "suggest/policyimpl/dictionary/utils/decaying_utils.h" #include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h" #include "utils/hash_map_compat.h" @@ -57,7 +58,9 @@ bool DynamicPatriciaTrieWritingHelper::addUnigramWord( wordCodePoints[matchedCodePointCount + j])) { *outAddedNewUnigram = true; return reallocatePtNodeAndAddNewPtNodes(nodeReader, - readingHelper->getMergedNodeCodePoints(), j, probability, + readingHelper->getMergedNodeCodePoints(), j, + getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, + probability), wordCodePoints + matchedCodePointCount, codePointCount - matchedCodePointCount); } @@ -69,7 +72,8 @@ bool DynamicPatriciaTrieWritingHelper::addUnigramWord( } if (!nodeReader->hasChildren()) { *outAddedNewUnigram = true; - return createChildrenPtNodeArrayAndAChildPtNode(nodeReader, probability, + return createChildrenPtNodeArrayAndAChildPtNode(nodeReader, + getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, probability), wordCodePoints + readingHelper->getTotalCodePointCount(), codePointCount - readingHelper->getTotalCodePointCount()); } @@ -86,7 +90,7 @@ bool DynamicPatriciaTrieWritingHelper::addUnigramWord( return createAndInsertNodeIntoPtNodeArray(parentPos, wordCodePoints + readingHelper->getPrevTotalCodePointCount(), codePointCount - readingHelper->getPrevTotalCodePointCount(), - probability, &pos); + getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, probability), &pos); } bool DynamicPatriciaTrieWritingHelper::addBigramWords(const int word0Pos, const int word1Pos, @@ -147,7 +151,7 @@ void DynamicPatriciaTrieWritingHelper::writeToDictFile(const char *const fileNam const HeaderPolicy *const headerPolicy, const int unigramCount, const int bigramCount) { BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */); const int extendedRegionSize = headerPolicy->getExtendedRegionSize() + - mBuffer->getTailPosition() - mBuffer->getOriginalBufferSize(); + mBuffer->getUsedAdditionalBufferSize(); if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, false /* updatesLastUpdatedTime */, unigramCount, bigramCount, extendedRegionSize)) { return; @@ -351,9 +355,11 @@ bool DynamicPatriciaTrieWritingHelper::setPtNodeProbability( if (originalPtNode->isTerminal()) { // Overwrites the probability. *outAddedNewUnigram = false; + const int probabilityToWrite = getUpdatedProbability(originalPtNode->getProbability(), + probability); int probabilityFieldPos = originalPtNode->getProbabilityFieldPos(); if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(mBuffer, - probability, &probabilityFieldPos)) { + probabilityToWrite, &probabilityFieldPos)) { return false; } } else { @@ -365,7 +371,8 @@ bool DynamicPatriciaTrieWritingHelper::setPtNodeProbability( } if (!writePtNodeToBufferByCopyingPtNodeInfo(mBuffer, originalPtNode, originalPtNode->getParentPos(), codePoints, originalPtNode->getCodePointCount(), - probability, &movedPos)) { + getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, probability), + &movedPos)) { return false; } } @@ -481,11 +488,15 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, DynamicPatriciaTrieGcEventListeners ::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted( - this, mBuffer); + this, mBuffer, mIsDecayingDict); if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner( &traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted)) { return false; } + if (mIsDecayingDict && traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted + .getValidUnigramCount() > DecayingUtils::MAX_UNIGRAM_COUNT_AFTER_GC) { + // TODO: Remove more unigrams. + } readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateBigramProbability @@ -495,6 +506,11 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, return false; } + if (mIsDecayingDict && traversePolicyToUpdateBigramProbability.getValidBigramEntryCount() + > DecayingUtils::MAX_BIGRAM_COUNT_AFTER_GC) { + // TODO: Remove more bigrams. + } + // Mapping from positions in mBuffer to positions in bufferToWrite. DictPositionRelocationMap dictPositionRelocationMap; readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); @@ -508,7 +524,8 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, // Create policy instance for the GCed dictionary. DynamicShortcutListPolicy newDictShortcutPolicy(bufferToWrite); - DynamicBigramListPolicy newDictBigramPolicy(bufferToWrite, &newDictShortcutPolicy); + DynamicBigramListPolicy newDictBigramPolicy(bufferToWrite, &newDictShortcutPolicy, + mIsDecayingDict); // Create reading helper for the GCed dictionary. DynamicPatriciaTrieReadingHelper newDictReadingHelper(bufferToWrite, &newDictBigramPolicy, &newDictShortcutPolicy); @@ -525,4 +542,13 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, return true; } +int DynamicPatriciaTrieWritingHelper::getUpdatedProbability(const int originalProbability, + const int newProbability) { + if (mIsDecayingDict) { + return DecayingUtils::getUpdatedUnigramProbability(originalProbability, newProbability); + } else { + return newProbability; + } +} + } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h index 827b6097f..ecee2cdbf 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h @@ -47,10 +47,13 @@ class DynamicPatriciaTrieWritingHelper { DISALLOW_COPY_AND_ASSIGN(DictPositionRelocationMap); }; + static const size_t MAX_DICTIONARY_SIZE; + DynamicPatriciaTrieWritingHelper(BufferWithExtendableBuffer *const buffer, DynamicBigramListPolicy *const bigramPolicy, - DynamicShortcutListPolicy *const shortcutPolicy) - : mBuffer(buffer), mBigramPolicy(bigramPolicy), mShortcutPolicy(shortcutPolicy) {} + DynamicShortcutListPolicy *const shortcutPolicy, const bool isDecayingDict) + : mBuffer(buffer), mBigramPolicy(bigramPolicy), mShortcutPolicy(shortcutPolicy), + mIsDecayingDict(isDecayingDict) {} ~DynamicPatriciaTrieWritingHelper() {} @@ -87,11 +90,11 @@ class DynamicPatriciaTrieWritingHelper { DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieWritingHelper); static const int CHILDREN_POSITION_FIELD_SIZE; - static const size_t MAX_DICTIONARY_SIZE; BufferWithExtendableBuffer *const mBuffer; DynamicBigramListPolicy *const mBigramPolicy; DynamicShortcutListPolicy *const mShortcutPolicy; + const bool mIsDecayingDict; bool markNodeAsMovedAndSetPosition(const DynamicPatriciaTrieNodeReader *const nodeToUpdate, const int movedPos, const int bigramLinkedNodePos); @@ -127,6 +130,8 @@ class DynamicPatriciaTrieWritingHelper { bool runGC(const int rootPtNodeArrayPos, BufferWithExtendableBuffer *const bufferToWrite, int *const outUnigramCount, int *const outBigramCount); + + int getUpdatedProbability(const int originalProbability, const int newProbability); }; } // namespace latinime #endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp index 78c6c042f..9ce9994dd 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp @@ -20,7 +20,8 @@ namespace latinime { // Note that these are corresponding definitions in Java side in FormatSpec.FileHeader. const char *const HeaderPolicy::MULTIPLE_WORDS_DEMOTION_RATE_KEY = "MULTIPLE_WORDS_DEMOTION_RATE"; -const char *const HeaderPolicy::USES_FORGETTING_CURVE_KEY = "USES_FORGETTING_CURVE"; +// TODO: Change attribute string to "IS_DECAYING_DICT". +const char *const HeaderPolicy::IS_DECAYING_DICT_KEY = "USES_FORGETTING_CURVE"; const char *const HeaderPolicy::LAST_UPDATED_TIME_KEY = "date"; const char *const HeaderPolicy::UNIGRAM_COUNT_KEY = "UNIGRAM_COUNT"; const char *const HeaderPolicy::BIGRAM_COUNT_KEY = "BIGRAM_COUNT"; diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h index 93b9c6fcb..4261667fa 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h @@ -36,8 +36,8 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { mSize(HeaderReadWriteUtils::getHeaderSize(dictBuf)), mAttributeMap(createAttributeMapAndReadAllAttributes(dictBuf)), mMultiWordCostMultiplier(readMultipleWordCostMultiplier()), - mUsesForgettingCurve(HeaderReadWriteUtils::readBoolAttributeValue(&mAttributeMap, - USES_FORGETTING_CURVE_KEY, false /* defaultValue */)), + mIsDecayingDict(HeaderReadWriteUtils::readBoolAttributeValue(&mAttributeMap, + IS_DECAYING_DICT_KEY, false /* defaultValue */)), mLastUpdatedTime(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap, LAST_UPDATED_TIME_KEY, time(0) /* defaultValue */)), mUnigramCount(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap, @@ -54,8 +54,8 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { mDictionaryFlags(HeaderReadWriteUtils::createAndGetDictionaryFlagsUsingAttributeMap( attributeMap)), mSize(0), mAttributeMap(*attributeMap), mMultiWordCostMultiplier(readMultipleWordCostMultiplier()), - mUsesForgettingCurve(HeaderReadWriteUtils::readBoolAttributeValue(&mAttributeMap, - USES_FORGETTING_CURVE_KEY, false /* defaultValue */)), + mIsDecayingDict(HeaderReadWriteUtils::readBoolAttributeValue(&mAttributeMap, + IS_DECAYING_DICT_KEY, false /* defaultValue */)), mLastUpdatedTime(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap, LAST_UPDATED_TIME_KEY, time(0) /* defaultValue */)), mUnigramCount(0), mBigramCount(0), mExtendedRegionSize(0) {} @@ -82,8 +82,8 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { return mMultiWordCostMultiplier; } - AK_FORCE_INLINE bool usesForgettingCurve() const { - return mUsesForgettingCurve; + AK_FORCE_INLINE bool isDecayingDict() const { + return mIsDecayingDict; } AK_FORCE_INLINE int getLastUpdatedTime() const { @@ -113,7 +113,7 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { DISALLOW_IMPLICIT_CONSTRUCTORS(HeaderPolicy); static const char *const MULTIPLE_WORDS_DEMOTION_RATE_KEY; - static const char *const USES_FORGETTING_CURVE_KEY; + static const char *const IS_DECAYING_DICT_KEY; static const char *const LAST_UPDATED_TIME_KEY; static const char *const UNIGRAM_COUNT_KEY; static const char *const BIGRAM_COUNT_KEY; @@ -126,7 +126,7 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { const int mSize; HeaderReadWriteUtils::AttributeMap mAttributeMap; const float mMultiWordCostMultiplier; - const bool mUsesForgettingCurve; + const bool mIsDecayingDict; const int mLastUpdatedTime; const int mUnigramCount; const int mBigramCount; diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h index 4277ff5d7..8d88c68e8 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h @@ -107,7 +107,7 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary."); } - bool needsToRunGC() const { + bool needsToRunGC(const bool mindsBlockByGC) const { // This method should not be called for non-updatable dictionary. AKLOGI("Warning: needsToRunGC() is called for non-updatable dictionary."); return false; diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h index 17d2e39c2..9dc34823c 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h @@ -42,6 +42,10 @@ class BufferWithExtendableBuffer { return mOriginalBufferSize + mUsedAdditionalBufferSize; } + AK_FORCE_INLINE int getUsedAdditionalBufferSize() const { + return mUsedAdditionalBufferSize; + } + /** * For reading. */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/decaying_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/decaying_utils.cpp new file mode 100644 index 000000000..942a74238 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/decaying_utils.cpp @@ -0,0 +1,129 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "suggest/policyimpl/dictionary/utils/decaying_utils.h" + +#include "suggest/policyimpl/dictionary/utils/probability_utils.h" + +namespace latinime { + +const int DecayingUtils::MAX_UNIGRAM_COUNT = 12000; +const int DecayingUtils::MAX_UNIGRAM_COUNT_AFTER_GC = 10000; +const int DecayingUtils::MAX_BIGRAM_COUNT = 12000; +const int DecayingUtils::MAX_BIGRAM_COUNT_AFTER_GC = 10000; + +const int DecayingUtils::MAX_COMPUTED_PROBABILITY = 127; +const int DecayingUtils::MAX_UNIGRAM_PROBABILITY = 120; +const int DecayingUtils::MIN_VALID_UNIGRAM_PROBABILITY = 24; +const int DecayingUtils::UNIGRAM_PROBABILITY_STEP = 8; +const int DecayingUtils::MAX_BIGRAM_PROBABILITY_DELTA = 15; +const int DecayingUtils::MIN_VALID_BIGRAM_PROBABILITY_DELTA = 3; +const int DecayingUtils::BIGRAM_PROBABILITY_DELTA_STEP = 1; + +/* static */ int DecayingUtils::getProbability(const int encodedUnigramProbability, + const int encodedBigramProbabilityDelta) { + if (encodedUnigramProbability == NOT_A_PROBABILITY) { + return NOT_A_PROBABILITY; + } else if (encodedBigramProbabilityDelta == NOT_A_PROBABILITY) { + const int rawProbability = ProbabilityUtils::backoff(decodeUnigramProbability( + encodedUnigramProbability)); + return min(getDecayedProbability(rawProbability), MAX_COMPUTED_PROBABILITY); + } else { + const int rawProbability = ProbabilityUtils::computeProbabilityForBigram( + decodeUnigramProbability(encodedUnigramProbability), + decodeBigramProbabilityDelta(encodedBigramProbabilityDelta)); + return min(getDecayedProbability(rawProbability), MAX_COMPUTED_PROBABILITY); + } +} + +/* static */ int DecayingUtils::getUpdatedUnigramProbability(const int originalEncodedProbability, + const int newProbability) { + if (originalEncodedProbability == NOT_A_PROBABILITY) { + // The unigram is not in this dictionary. + if (newProbability == NOT_A_PROBABILITY) { + // The unigram is not in other dictionaries. + return 0; + } else { + return MIN_VALID_UNIGRAM_PROBABILITY; + } + } else { + if (newProbability != NOT_A_PROBABILITY + && originalEncodedProbability < MIN_VALID_UNIGRAM_PROBABILITY) { + return MIN_VALID_UNIGRAM_PROBABILITY; + } + return min(originalEncodedProbability + UNIGRAM_PROBABILITY_STEP, MAX_UNIGRAM_PROBABILITY); + } +} + +/* static */ int DecayingUtils::getUnigramProbabilityToSave(const int encodedProbability) { + return max(encodedProbability - UNIGRAM_PROBABILITY_STEP, 0); +} + +/* static */ int DecayingUtils::getBigramProbabilityDeltaToSave(const int encodedProbabilityDelta) { + return max(encodedProbabilityDelta - BIGRAM_PROBABILITY_DELTA_STEP, 0); +} + +/* static */ int DecayingUtils::getUpdatedBigramProbabilityDelta( + const int originalEncodedProbabilityDelta, const int newProbability) { + if (originalEncodedProbabilityDelta == NOT_A_PROBABILITY) { + // The bigram relation is not in this dictionary. + if (newProbability == NOT_A_PROBABILITY) { + // The bigram target is not in other dictionaries. + return 0; + } else { + return MIN_VALID_BIGRAM_PROBABILITY_DELTA; + } + } else { + if (newProbability != NOT_A_PROBABILITY + && originalEncodedProbabilityDelta < MIN_VALID_BIGRAM_PROBABILITY_DELTA) { + return MIN_VALID_BIGRAM_PROBABILITY_DELTA; + } + return min(originalEncodedProbabilityDelta + BIGRAM_PROBABILITY_DELTA_STEP, + MAX_BIGRAM_PROBABILITY_DELTA); + } +} + +/* static */ int DecayingUtils::isValidUnigram(const int encodedUnigramProbability) { + return encodedUnigramProbability >= MIN_VALID_UNIGRAM_PROBABILITY; +} + +/* static */ int DecayingUtils::isValidBigram(const int encodedBigramProbabilityDelta) { + return encodedBigramProbabilityDelta >= MIN_VALID_BIGRAM_PROBABILITY_DELTA; +} + +/* static */ int DecayingUtils::decodeUnigramProbability(const int encodedProbability) { + const int probability = encodedProbability - MIN_VALID_UNIGRAM_PROBABILITY; + if (probability < 0) { + return NOT_A_PROBABILITY; + } else { + return min(probability, MAX_UNIGRAM_PROBABILITY); + } +} + +/* static */ int DecayingUtils::decodeBigramProbabilityDelta(const int encodedProbabilityDelta) { + const int probabilityDelta = encodedProbabilityDelta - MIN_VALID_BIGRAM_PROBABILITY_DELTA; + if (probabilityDelta < 0) { + return NOT_A_PROBABILITY; + } else { + return min(probabilityDelta, MAX_BIGRAM_PROBABILITY_DELTA); + } +} + +/* static */ int DecayingUtils::getDecayedProbability(const int rawProbability) { + return rawProbability; +} + +} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/decaying_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/decaying_utils.h new file mode 100644 index 000000000..1ca03918f --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/decaying_utils.h @@ -0,0 +1,70 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DECAYING_UTILS_H +#define LATINIME_DECAYING_UTILS_H + +#include "defines.h" + +namespace latinime { + +// TODO: Check the elapsed time and decrease the probability depending on the time. Time field is +// required to introduced to each terminal PtNode and bigram entry. +// TODO: Quit using bigram probability to indicate the delta. +// TODO: Quit using bigram probability delta. +class DecayingUtils { + public: + static const int MAX_UNIGRAM_COUNT; + static const int MAX_UNIGRAM_COUNT_AFTER_GC; + static const int MAX_BIGRAM_COUNT; + static const int MAX_BIGRAM_COUNT_AFTER_GC; + + static int getProbability(const int encodedUnigramProbability, + const int encodedBigramProbabilityDelta); + + static int getUpdatedUnigramProbability(const int originalEncodedProbability, + const int newProbability); + + static int getUpdatedBigramProbabilityDelta(const int originalEncodedProbabilityDelta, + const int newProbability); + + static int isValidUnigram(const int encodedUnigramProbability); + + static int isValidBigram(const int encodedProbabilityDelta); + + static int getUnigramProbabilityToSave(const int encodedProbability); + + static int getBigramProbabilityDeltaToSave(const int encodedProbabilityDelta); + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(DecayingUtils); + + static const int MAX_COMPUTED_PROBABILITY; + static const int MAX_UNIGRAM_PROBABILITY; + static const int MIN_VALID_UNIGRAM_PROBABILITY; + static const int UNIGRAM_PROBABILITY_STEP; + static const int MAX_BIGRAM_PROBABILITY_DELTA; + static const int MIN_VALID_BIGRAM_PROBABILITY_DELTA; + static const int BIGRAM_PROBABILITY_DELTA_STEP; + + static int decodeUnigramProbability(const int encodedProbability); + + static int decodeBigramProbabilityDelta(const int encodedProbability); + + static int getDecayedProbability(const int rawProbability); +}; +} // namespace latinime +#endif /* LATINIME_DECAYING_UTILS_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp index 4fae91936..f22e94c6a 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp @@ -96,7 +96,7 @@ const char *const DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE = fclose(file); return false; } - const int additionalBufSize = buffer->getTailPosition() - buffer->getOriginalBufferSize(); + const int additionalBufSize = buffer->getUsedAdditionalBufferSize(); if (additionalBufSize > 0 && fwrite(buffer->getBuffer(true /* usesAdditionalBuffer */), additionalBufSize, 1, file) < 1) { fclose(file); diff --git a/native/jni/src/suggest/policyimpl/typing/typing_weighting.cpp b/native/jni/src/suggest/policyimpl/typing/typing_weighting.cpp index 408b12ae9..5b6b5e874 100644 --- a/native/jni/src/suggest/policyimpl/typing/typing_weighting.cpp +++ b/native/jni/src/suggest/policyimpl/typing/typing_weighting.cpp @@ -47,7 +47,7 @@ ErrorType TypingWeighting::getErrorType(const CorrectionType correctionType, case CT_TERMINAL_INSERTION: case CT_TRANSPOSITION: return ET_EDIT_CORRECTION; - case CT_NEW_WORD_SPACE_OMITTION: + case CT_NEW_WORD_SPACE_OMISSION: case CT_NEW_WORD_SPACE_SUBSTITUTION: return ET_NEW_WORD; case CT_TERMINAL: diff --git a/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java b/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java new file mode 100644 index 000000000..cf85d97a0 --- /dev/null +++ b/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java @@ -0,0 +1,171 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.android.inputmethod.latin; + +import android.test.AndroidTestCase; +import android.test.suitebuilder.annotation.LargeTest; + +import com.android.inputmethod.latin.makedict.FormatSpec; + +import java.io.File; +import java.io.IOException; +import java.util.HashMap; +import java.util.Locale; +import java.util.Map; + +@LargeTest +public class BinaryDictionaryDecayingTests extends AndroidTestCase { + private static final String TEST_DICT_FILE_EXTENSION = ".testDict"; + private static final String TEST_LOCALE = "test"; + + private static final int DUMMY_PROBABILITY = 0; + + @Override + protected void setUp() throws Exception { + super.setUp(); + } + + @Override + protected void tearDown() throws Exception { + super.tearDown(); + } + + private void forcePassingShortTime(final BinaryDictionary binaryDictionary) { + binaryDictionary.flushWithGC(); + } + + private void forcePassingLongTime(final BinaryDictionary binaryDictionary) { + // Currently, probabilities are decayed when GC is run. All entries that have never been + // typed in 32 GCs are removed. + final int count = 32; + for (int i = 0; i < count; i++) { + binaryDictionary.flushWithGC(); + } + } + + private File createEmptyDictionaryAndGetFile(final String filename) throws IOException { + final File file = File.createTempFile(filename, TEST_DICT_FILE_EXTENSION, + getContext().getCacheDir()); + Map<String, String> attributeMap = new HashMap<String, String>(); + attributeMap.put(FormatSpec.FileHeader.SUPPORTS_DYNAMIC_UPDATE_ATTRIBUTE, + FormatSpec.FileHeader.ATTRIBUTE_VALUE_TRUE); + attributeMap.put(FormatSpec.FileHeader.USES_FORGETTING_CURVE_ATTRIBUTE, + FormatSpec.FileHeader.ATTRIBUTE_VALUE_TRUE); + if (BinaryDictionary.createEmptyDictFile(file.getAbsolutePath(), + 3 /* dictVersion */, attributeMap)) { + return file; + } else { + throw new IOException("Empty dictionary cannot be created."); + } + } + + public void testAddValidAndInvalidWords() { + File dictFile = null; + try { + dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); + } catch (IOException e) { + fail("IOException while writing an initial dictionary : " + e); + } + BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), + 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, + Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); + + binaryDictionary.addUnigramWord("a", Dictionary.NOT_A_PROBABILITY); + assertFalse(binaryDictionary.isValidWord("a")); + binaryDictionary.addUnigramWord("a", Dictionary.NOT_A_PROBABILITY); + assertFalse(binaryDictionary.isValidWord("a")); + binaryDictionary.addUnigramWord("a", Dictionary.NOT_A_PROBABILITY); + assertFalse(binaryDictionary.isValidWord("a")); + binaryDictionary.addUnigramWord("a", Dictionary.NOT_A_PROBABILITY); + assertTrue(binaryDictionary.isValidWord("a")); + + binaryDictionary.addUnigramWord("b", DUMMY_PROBABILITY); + assertTrue(binaryDictionary.isValidWord("b")); + + final int unigramProbability = binaryDictionary.getFrequency("a"); + binaryDictionary.addBigramWords("a", "b", Dictionary.NOT_A_PROBABILITY); + assertFalse(binaryDictionary.isValidBigram("a", "b")); + binaryDictionary.addBigramWords("a", "b", Dictionary.NOT_A_PROBABILITY); + assertFalse(binaryDictionary.isValidBigram("a", "b")); + binaryDictionary.addBigramWords("a", "b", Dictionary.NOT_A_PROBABILITY); + assertFalse(binaryDictionary.isValidBigram("a", "b")); + binaryDictionary.addBigramWords("a", "b", Dictionary.NOT_A_PROBABILITY); + assertTrue(binaryDictionary.isValidBigram("a", "b")); + + binaryDictionary.addUnigramWord("c", DUMMY_PROBABILITY); + binaryDictionary.addBigramWords("a", "c", DUMMY_PROBABILITY); + assertTrue(binaryDictionary.isValidBigram("a", "c")); + + binaryDictionary.close(); + dictFile.delete(); + } + + // TODO: Add large tests. + public void testDecayingProbability() { + File dictFile = null; + try { + dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); + } catch (IOException e) { + fail("IOException while writing an initial dictionary : " + e); + } + BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), + 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, + Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); + + binaryDictionary.addUnigramWord("a", DUMMY_PROBABILITY); + assertTrue(binaryDictionary.isValidWord("a")); + forcePassingShortTime(binaryDictionary); + assertFalse(binaryDictionary.isValidWord("a")); + + binaryDictionary.addUnigramWord("a", DUMMY_PROBABILITY); + binaryDictionary.addUnigramWord("a", DUMMY_PROBABILITY); + binaryDictionary.addUnigramWord("a", DUMMY_PROBABILITY); + binaryDictionary.addUnigramWord("a", DUMMY_PROBABILITY); + forcePassingShortTime(binaryDictionary); + assertTrue(binaryDictionary.isValidWord("a")); + forcePassingLongTime(binaryDictionary); + assertFalse(binaryDictionary.isValidWord("a")); + + binaryDictionary.addUnigramWord("a", DUMMY_PROBABILITY); + binaryDictionary.addUnigramWord("b", DUMMY_PROBABILITY); + binaryDictionary.addBigramWords("a", "b", DUMMY_PROBABILITY); + assertTrue(binaryDictionary.isValidBigram("a", "b")); + forcePassingShortTime(binaryDictionary); + assertFalse(binaryDictionary.isValidBigram("a", "b")); + + binaryDictionary.addUnigramWord("a", DUMMY_PROBABILITY); + binaryDictionary.addUnigramWord("b", DUMMY_PROBABILITY); + binaryDictionary.addBigramWords("a", "b", DUMMY_PROBABILITY); + binaryDictionary.addUnigramWord("a", DUMMY_PROBABILITY); + binaryDictionary.addUnigramWord("b", DUMMY_PROBABILITY); + binaryDictionary.addBigramWords("a", "b", DUMMY_PROBABILITY); + binaryDictionary.addUnigramWord("a", DUMMY_PROBABILITY); + binaryDictionary.addUnigramWord("b", DUMMY_PROBABILITY); + binaryDictionary.addBigramWords("a", "b", DUMMY_PROBABILITY); + binaryDictionary.addUnigramWord("a", DUMMY_PROBABILITY); + binaryDictionary.addUnigramWord("b", DUMMY_PROBABILITY); + binaryDictionary.addBigramWords("a", "b", DUMMY_PROBABILITY); + assertTrue(binaryDictionary.isValidBigram("a", "b")); + forcePassingShortTime(binaryDictionary); + assertTrue(binaryDictionary.isValidBigram("a", "b")); + forcePassingLongTime(binaryDictionary); + assertFalse(binaryDictionary.isValidBigram("a", "b")); + + binaryDictionary.close(); + dictFile.delete(); + } +} diff --git a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java index 826c0f7b2..6a21522f9 100644 --- a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java +++ b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java @@ -606,7 +606,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); - while(!binaryDictionary.needsToRunGC()) { + while(!binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) { final String word = CodePointUtils.generateWord(random, codePointSet); words.add(word); final int unigramProbability = random.nextInt(0xFF); diff --git a/tests/src/com/android/inputmethod/latin/personalization/UserHistoryDictionaryTests.java b/tests/src/com/android/inputmethod/latin/personalization/UserHistoryDictionaryTests.java index 06c427193..ddc9546c5 100644 --- a/tests/src/com/android/inputmethod/latin/personalization/UserHistoryDictionaryTests.java +++ b/tests/src/com/android/inputmethod/latin/personalization/UserHistoryDictionaryTests.java @@ -75,10 +75,10 @@ public class UserHistoryDictionaryTests extends AndroidTestCase { return new ArrayList<String>(wordSet); } - private void addToDict(final UserHistoryPredictionDictionary dict, final List<String> words) { + private void addToDict(final UserHistoryDictionary dict, final List<String> words) { String prevWord = null; for (String word : words) { - dict.addToPersonalizationPredictionDictionary(prevWord, word, true); + dict.addToDictionary(prevWord, word, true); prevWord = word; } } @@ -90,8 +90,8 @@ public class UserHistoryDictionaryTests extends AndroidTestCase { private void addAndWriteRandomWords(final String testFilenameSuffix, final int numberOfWords, final Random random, final boolean checkContents) { final List<String> words = generateWords(numberOfWords, random); - final UserHistoryPredictionDictionary dict = - PersonalizationHelper.getUserHistoryPredictionDictionary(getContext(), + final UserHistoryDictionary dict = + PersonalizationHelper.getUserHistoryDictionary(getContext(), testFilenameSuffix /* locale */, mPrefs); // Add random words to the user history dictionary. addToDict(dict, words); @@ -122,8 +122,8 @@ public class UserHistoryDictionaryTests extends AndroidTestCase { true /* checksContents */); } finally { try { - final UserHistoryPredictionDictionary dict = - PersonalizationHelper.getUserHistoryPredictionDictionary(getContext(), + final UserHistoryDictionary dict = + PersonalizationHelper.getUserHistoryDictionary(getContext(), testFilenameSuffix, mPrefs); Log.d(TAG, "waiting for writing ..."); dict.shutdownExecutorForTests(); @@ -134,7 +134,7 @@ public class UserHistoryDictionaryTests extends AndroidTestCase { Log.d(TAG, "InterruptedException: " + e); } - final String fileName = UserHistoryPredictionDictionary.NAME + "." + testFilenameSuffix + final String fileName = UserHistoryDictionary.NAME + "." + testFilenameSuffix + ExpandableBinaryDictionary.DICT_FILE_EXTENSION; dictFile = new File(getContext().getFilesDir(), fileName); @@ -159,7 +159,7 @@ public class UserHistoryDictionaryTests extends AndroidTestCase { // Create filename suffixes for this test. for (int i = 0; i < numberOfLanguages; i++) { testFilenameSuffixes[i] = "testSwitchingLanguages" + i; - final String fileName = UserHistoryPredictionDictionary.NAME + "." + + final String fileName = UserHistoryDictionary.NAME + "." + testFilenameSuffixes[i] + ExpandableBinaryDictionary.DICT_FILE_EXTENSION; dictFiles[i] = new File(getContext().getFilesDir(), fileName); } @@ -181,8 +181,8 @@ public class UserHistoryDictionaryTests extends AndroidTestCase { try { Log.d(TAG, "waiting for writing ..."); for (int i = 0; i < numberOfLanguages; i++) { - final UserHistoryPredictionDictionary dict = - PersonalizationHelper.getUserHistoryPredictionDictionary(getContext(), + final UserHistoryDictionary dict = + PersonalizationHelper.getUserHistoryDictionary(getContext(), testFilenameSuffixes[i], mPrefs); dict.shutdownExecutorForTests(); while (!dict.isTerminatedForTests()) { @@ -210,8 +210,8 @@ public class UserHistoryDictionaryTests extends AndroidTestCase { 10000 : 1000; final Random random = new Random(123456); - UserHistoryPredictionDictionary dict = - PersonalizationHelper.getUserHistoryPredictionDictionary(getContext(), + UserHistoryDictionary dict = + PersonalizationHelper.getUserHistoryDictionary(getContext(), testFilenameSuffix, mPrefs); try { addAndWriteRandomWords(testFilenameSuffix, numberOfWords, random, @@ -227,7 +227,7 @@ public class UserHistoryDictionaryTests extends AndroidTestCase { } catch (InterruptedException e) { Log.d(TAG, "InterruptedException: ", e); } - final String fileName = UserHistoryPredictionDictionary.NAME + "." + testFilenameSuffix + final String fileName = UserHistoryDictionary.NAME + "." + testFilenameSuffix + ExpandableBinaryDictionary.DICT_FILE_EXTENSION; dictFile = new File(getContext().getFilesDir(), fileName); if (dictFile != null) { diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java index 6c4cbcf9d..bd06e9f3a 100644 --- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java +++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java @@ -80,17 +80,17 @@ public final class BinaryDictOffdeviceUtils { } /** - * Returns a decrypted/uncompressed binary dictionary. + * Returns a decrypted/uncompressed dictionary. * - * This will decrypt/uncompress any number of times as necessary until it finds the binary + * This will decrypt/uncompress any number of times as necessary until it finds the * dictionary signature, and copy the decoded file to a temporary place. - * If this is not a binary dictionary, the method returns null. + * If this is not a dictionary, the method returns null. */ - public static DecoderChainSpec getRawBinaryDictionaryOrNull(final File src) { - return getRawBinaryDictionaryOrNullInternal(new DecoderChainSpec(), src, 0); + public static DecoderChainSpec getRawDictionaryOrNull(final File src) { + return getRawDictionaryOrNullInternal(new DecoderChainSpec(), src, 0); } - private static DecoderChainSpec getRawBinaryDictionaryOrNullInternal( + private static DecoderChainSpec getRawDictionaryOrNullInternal( final DecoderChainSpec spec, final File src, final int depth) { // Unfortunately the decoding scheme we use can consider any data to be encrypted // and will product some output, meaning it's not possible to reliably detect encrypted @@ -98,7 +98,8 @@ public final class BinaryDictOffdeviceUtils { // over and over, ending in a stack overflow. Hence we limit the depth at which we try // decoding the file. if (depth > MAX_DECODE_DEPTH) return null; - if (BinaryDictDecoderUtils.isBinaryDictionary(src)) { + if (BinaryDictDecoderUtils.isBinaryDictionary(src) + || CombinedInputOutput.isCombinedDictionary(src.getAbsolutePath())) { spec.mFile = src; return spec; } @@ -106,7 +107,7 @@ public final class BinaryDictOffdeviceUtils { final File uncompressedFile = tryGetUncompressedFile(src); if (null != uncompressedFile) { final DecoderChainSpec newSpec = - getRawBinaryDictionaryOrNullInternal(spec, uncompressedFile, depth + 1); + getRawDictionaryOrNullInternal(spec, uncompressedFile, depth + 1); if (null == newSpec) return null; return newSpec.addStep(COMPRESSION); } @@ -114,7 +115,7 @@ public final class BinaryDictOffdeviceUtils { final File decryptedFile = tryGetDecryptedFile(src); if (null != decryptedFile) { final DecoderChainSpec newSpec = - getRawBinaryDictionaryOrNullInternal(spec, decryptedFile, depth + 1); + getRawDictionaryOrNullInternal(spec, decryptedFile, depth + 1); if (null == newSpec) return null; return newSpec.addStep(ENCRYPTION); } @@ -175,15 +176,16 @@ public final class BinaryDictOffdeviceUtils { return XmlDictInputOutput.readDictionaryXml( new BufferedInputStream(new FileInputStream(file)), null /* shortcuts */, null /* bigrams */); - } else if (CombinedInputOutput.isCombinedDictionary(filename)) { - if (report) System.out.println("Format : Combined format"); - return CombinedInputOutput.readDictionaryCombined( - new BufferedInputStream(new FileInputStream(file))); } else { - final DecoderChainSpec decodedSpec = getRawBinaryDictionaryOrNull(file); + final DecoderChainSpec decodedSpec = getRawDictionaryOrNull(file); if (null == decodedSpec) { crash(filename, new RuntimeException( filename + " does not seem to be a dictionary file")); + } else if (CombinedInputOutput.isCombinedDictionary( + decodedSpec.mFile.getAbsolutePath())){ + if (report) System.out.println("Format : Combined format"); + return CombinedInputOutput.readDictionaryCombined( + new BufferedInputStream(new FileInputStream(decodedSpec.mFile))); } else { final DictDecoder dictDecoder = FormatSpec.getDictDecoder(decodedSpec.mFile, DictDecoder.USE_BYTEARRAY); diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Package.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Package.java index 9274dcd2e..dff3387be 100644 --- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Package.java +++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Package.java @@ -79,7 +79,7 @@ public class Package { throw new RuntimeException("Too many/too few arguments for command " + COMMAND); } final BinaryDictOffdeviceUtils.DecoderChainSpec decodedSpec = - BinaryDictOffdeviceUtils.getRawBinaryDictionaryOrNull(new File(mArgs[0])); + BinaryDictOffdeviceUtils.getRawDictionaryOrNull(new File(mArgs[0])); if (null == decodedSpec) { System.out.println(mArgs[0] + " does not seem to be a dictionary"); return; diff --git a/tools/dicttool/tests/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtilsTests.java b/tools/dicttool/tests/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtilsTests.java index 1eff497c1..1baeb7a47 100644 --- a/tools/dicttool/tests/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtilsTests.java +++ b/tools/dicttool/tests/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtilsTests.java @@ -64,7 +64,7 @@ public class BinaryDictOffdeviceUtilsTests extends TestCase { // Test for an actually compressed dictionary and its contents final BinaryDictOffdeviceUtils.DecoderChainSpec decodeSpec = - BinaryDictOffdeviceUtils.getRawBinaryDictionaryOrNull(dst); + BinaryDictOffdeviceUtils.getRawDictionaryOrNull(dst); for (final String step : decodeSpec.mDecoderSpec) { assertEquals("Wrong decode spec", BinaryDictOffdeviceUtils.COMPRESSION, step); } @@ -90,7 +90,7 @@ public class BinaryDictOffdeviceUtilsTests extends TestCase { // Test that a random data file actually fails assertNull("Wrongly identified data file", - BinaryDictOffdeviceUtils.getRawBinaryDictionaryOrNull(dst)); + BinaryDictOffdeviceUtils.getRawDictionaryOrNull(dst)); final File gzDst = File.createTempFile("testGetRawDict", ".tmp"); gzDst.deleteOnExit(); @@ -103,6 +103,6 @@ public class BinaryDictOffdeviceUtilsTests extends TestCase { // Test that a compressed random data file actually fails assertNull("Wrongly identified data file", - BinaryDictOffdeviceUtils.getRawBinaryDictionaryOrNull(gzDst)); + BinaryDictOffdeviceUtils.getRawDictionaryOrNull(gzDst)); } } |