diff options
50 files changed, 1006 insertions, 348 deletions
diff --git a/dictionaries/en_GB_wordlist.combined.gz b/dictionaries/en_GB_wordlist.combined.gz Binary files differindex d28ef485b..afef676b2 100644 --- a/dictionaries/en_GB_wordlist.combined.gz +++ b/dictionaries/en_GB_wordlist.combined.gz diff --git a/dictionaries/en_US_wordlist.combined.gz b/dictionaries/en_US_wordlist.combined.gz Binary files differindex b656f880d..eafbc9d30 100644 --- a/dictionaries/en_US_wordlist.combined.gz +++ b/dictionaries/en_US_wordlist.combined.gz diff --git a/dictionaries/en_wordlist.combined.gz b/dictionaries/en_wordlist.combined.gz Binary files differindex 8aa40e944..9cbca0b41 100644 --- a/dictionaries/en_wordlist.combined.gz +++ b/dictionaries/en_wordlist.combined.gz diff --git a/dictionaries/fr_wordlist.combined.gz b/dictionaries/fr_wordlist.combined.gz Binary files differindex 1d988d6fe..1815e4732 100644 --- a/dictionaries/fr_wordlist.combined.gz +++ b/dictionaries/fr_wordlist.combined.gz diff --git a/dictionaries/pt_BR_wordlist.combined.gz b/dictionaries/pt_BR_wordlist.combined.gz Binary files differindex 221ea7508..876eb71c2 100644 --- a/dictionaries/pt_BR_wordlist.combined.gz +++ b/dictionaries/pt_BR_wordlist.combined.gz diff --git a/dictionaries/pt_PT_wordlist.combined.gz b/dictionaries/pt_PT_wordlist.combined.gz Binary files differindex 6a041d9cf..406869059 100644 --- a/dictionaries/pt_PT_wordlist.combined.gz +++ b/dictionaries/pt_PT_wordlist.combined.gz diff --git a/java/res/raw/main_en.dict b/java/res/raw/main_en.dict Binary files differindex 6564d47fa..8660c28e2 100644 --- a/java/res/raw/main_en.dict +++ b/java/res/raw/main_en.dict diff --git a/java/res/raw/main_fr.dict b/java/res/raw/main_fr.dict Binary files differindex 10adad092..0d2e51837 100644 --- a/java/res/raw/main_fr.dict +++ b/java/res/raw/main_fr.dict diff --git a/java/res/raw/main_pt_br.dict b/java/res/raw/main_pt_br.dict Binary files differindex f9ae9b561..98a27c7c8 100644 --- a/java/res/raw/main_pt_br.dict +++ b/java/res/raw/main_pt_br.dict diff --git a/java/res/values-kk/strings-appname.xml b/java/res/values-kk/strings-appname.xml new file mode 100644 index 000000000..1e201fa9e --- /dev/null +++ b/java/res/values-kk/strings-appname.xml @@ -0,0 +1,27 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- +/* +** +** Copyright 2013, The Android Open Source Project +** +** Licensed under the Apache License, Version 2.0 (the "License"); +** you may not use this file except in compliance with the License. +** You may obtain a copy of the License at +** +** http://www.apache.org/licenses/LICENSE-2.0 +** +** Unless required by applicable law or agreed to in writing, software +** distributed under the License is distributed on an "AS IS" BASIS, +** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +** See the License for the specific language governing permissions and +** limitations under the License. +*/ + --> + +<resources xmlns:android="http://schemas.android.com/apk/res/android" + xmlns:xliff="urn:oasis:names:tc:xliff:document:1.2"> + <string name="english_ime_name" msgid="5940510615957428904">"Android пернетақтасы (AOSP)"</string> + <string name="spell_checker_service_name" msgid="1254221805440242662">"Android емлені тексеру құралы (AOSP)"</string> + <string name="english_ime_settings" msgid="5760361067176802794">"Android пернетақтасының параметрлері (AOSP)"</string> + <string name="android_spell_checker_settings" msgid="6123949487832861885">"Android емлені тексеру құралының параметрлері (AOSP)"</string> +</resources> diff --git a/java/src/com/android/inputmethod/latin/BinaryDictionary.java b/java/src/com/android/inputmethod/latin/BinaryDictionary.java index fdde98da1..6ec7aeec3 100644 --- a/java/src/com/android/inputmethod/latin/BinaryDictionary.java +++ b/java/src/com/android/inputmethod/latin/BinaryDictionary.java @@ -31,6 +31,7 @@ import java.io.File; import java.util.ArrayList; import java.util.Arrays; import java.util.Locale; +import java.util.Map; /** * Implements a static, compacted, binary dictionary of standard words. @@ -47,6 +48,11 @@ public final class BinaryDictionary extends Dictionary { // TODO: Remove this heuristic. private static final int SPACE_COUNT_FOR_AUTO_COMMIT = 3; + @UsedForTesting + public static final String UNIGRAM_COUNT_QUERY = "UNIGRAM_COUNT"; + @UsedForTesting + public static final String BIGRAM_COUNT_QUERY = "BIGRAM_COUNT"; + private long mNativeDict; private final Locale mLocale; private final long mDictSize; @@ -104,6 +110,8 @@ public final class BinaryDictionary extends Dictionary { JniUtils.loadNativeLibrary(); } + private static native boolean createEmptyDictFileNative(String filePath, long dictVersion, + String[] attributeKeyStringArray, String[] attributeValueStringArray); private static native long openNative(String sourceDir, long dictOffset, long dictSize, boolean isUpdatable); private static native void flushNative(long dict, String filePath); @@ -126,6 +134,21 @@ public final class BinaryDictionary extends Dictionary { private static native void removeBigramWordsNative(long dict, int[] word0, int[] word1); private static native int calculateProbabilityNative(long dict, int unigramProbability, int bigramProbability); + private static native String getPropertyNative(long dict, String query); + + @UsedForTesting + public static boolean createEmptyDictFile(final String filePath, final long dictVersion, + final Map<String, String> attributeMap) { + final String[] keyArray = new String[attributeMap.size()]; + final String[] valueArray = new String[attributeMap.size()]; + int index = 0; + for (final String key : attributeMap.keySet()) { + keyArray[index] = key; + valueArray[index] = attributeMap.get(key); + index++; + } + return createEmptyDictFileNative(filePath, dictVersion, keyArray, valueArray); + } // TODO: Move native dict into session private final void loadDictionary(final String path, final long startOffset, @@ -314,6 +337,12 @@ public final class BinaryDictionary extends Dictionary { return calculateProbabilityNative(mNativeDict, unigramProbability, bigramProbability); } + @UsedForTesting + public String getPropertyForTests(String query) { + if (!isValidDictionary()) return ""; + return getPropertyNative(mNativeDict, query); + } + @Override public boolean shouldAutoCommit(final SuggestedWordInfo candidate) { // TODO: actually use the confidence rather than use this completely broken heuristic diff --git a/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java b/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java index 0774ce203..d3da068bd 100644 --- a/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java +++ b/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java @@ -22,12 +22,7 @@ import android.util.Log; import com.android.inputmethod.annotations.UsedForTesting; import com.android.inputmethod.keyboard.ProximityInfo; -import com.android.inputmethod.latin.makedict.DictEncoder; import com.android.inputmethod.latin.makedict.FormatSpec; -import com.android.inputmethod.latin.makedict.FusionDictionary; -import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; -import com.android.inputmethod.latin.makedict.UnsupportedFormatException; -import com.android.inputmethod.latin.makedict.Ver3DictEncoder; import com.android.inputmethod.latin.personalization.DynamicPersonalizationDictionaryWriter; import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo; import com.android.inputmethod.latin.utils.AsyncResultHolder; @@ -35,10 +30,11 @@ import com.android.inputmethod.latin.utils.CollectionUtils; import com.android.inputmethod.latin.utils.PrioritizedSerialExecutor; import java.io.File; -import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; +import java.util.Map; import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicReference; /** @@ -68,18 +64,21 @@ abstract public class ExpandableBinaryDictionary extends Dictionary { */ protected static final int MAX_WORD_LENGTH = Constants.DICTIONARY_MAX_WORD_LENGTH; - private static final FormatSpec.FormatOptions FORMAT_OPTIONS = - new FormatSpec.FormatOptions(3 /* version */, true /* supportsDynamicUpdate */); + private static final int DICTIONARY_FORMAT_VERSION = 3; + + private static final String SUPPORTS_DYNAMIC_UPDATE = + FormatSpec.FileHeader.ATTRIBUTE_VALUE_TRUE; /** - * A static map of time recorders, each of which records the time of accesses to a single binary - * dictionary file. The key for this map is the filename and the value is the shared dictionary - * time recorder associated with that filename. + * A static map of update controllers, each of which records the time of accesses to a single + * binary dictionary file and tracks whether the file is regenerating. The key for this map is + * the filename and the value is the shared dictionary time recorder associated with that + * filename. */ - private static volatile ConcurrentHashMap<String, DictionaryTimeRecorder> - sFilenameDictionaryTimeRecorderMap = CollectionUtils.newConcurrentHashMap(); + private static final ConcurrentHashMap<String, DictionaryUpdateController> + sFilenameDictionaryUpdateControllerMap = CollectionUtils.newConcurrentHashMap(); - private static volatile ConcurrentHashMap<String, PrioritizedSerialExecutor> + private static final ConcurrentHashMap<String, PrioritizedSerialExecutor> sFilenameExecutorMap = CollectionUtils.newConcurrentHashMap(); /** The application context. */ @@ -106,13 +105,13 @@ abstract public class ExpandableBinaryDictionary extends Dictionary { private final boolean mIsUpdatable; // TODO: remove, once dynamic operations is serialized - /** Records access to the shared binary dictionary file across multiple instances. */ - private final DictionaryTimeRecorder mFilenameDictionaryTimeRecorder; + /** Controls updating the shared binary dictionary file across multiple instances. */ + private final DictionaryUpdateController mFilenameDictionaryUpdateController; // TODO: remove, once dynamic operations is serialized - /** Records access to the local binary dictionary for this instance. */ - private final DictionaryTimeRecorder mPerInstanceDictionaryTimeRecorder = - new DictionaryTimeRecorder(); + /** Controls updating the local binary dictionary for this instance. */ + private final DictionaryUpdateController mPerInstanceDictionaryUpdateController = + new DictionaryUpdateController(); /* A extension for a binary dictionary file. */ public static final String DICT_FILE_EXTENSION = ".dict"; @@ -134,15 +133,15 @@ abstract public class ExpandableBinaryDictionary extends Dictionary { protected abstract boolean hasContentChanged(); /** - * Gets the dictionary time recorder for the given filename. + * Gets the dictionary update controller for the given filename. */ - private static DictionaryTimeRecorder getDictionaryTimeRecorder( + private static DictionaryUpdateController getDictionaryUpdateController( String filename) { - DictionaryTimeRecorder recorder = sFilenameDictionaryTimeRecorderMap.get(filename); + DictionaryUpdateController recorder = sFilenameDictionaryUpdateControllerMap.get(filename); if (recorder == null) { - synchronized(sFilenameDictionaryTimeRecorderMap) { - recorder = new DictionaryTimeRecorder(); - sFilenameDictionaryTimeRecorderMap.put(filename, recorder); + synchronized(sFilenameDictionaryUpdateControllerMap) { + recorder = new DictionaryUpdateController(); + sFilenameDictionaryUpdateControllerMap.put(filename, recorder); } } return recorder; @@ -192,7 +191,7 @@ abstract public class ExpandableBinaryDictionary extends Dictionary { mContext = context; mIsUpdatable = isUpdatable; mBinaryDictionary = null; - mFilenameDictionaryTimeRecorder = getDictionaryTimeRecorder(filename); + mFilenameDictionaryUpdateController = getDictionaryUpdateController(filename); // Currently, only dynamic personalization dictionary is updatable. mDictionaryWriter = getDictionaryWriter(context, dictType, isUpdatable); } @@ -233,6 +232,13 @@ abstract public class ExpandableBinaryDictionary extends Dictionary { }); } + protected Map<String, String> getHeaderAttributeMap() { + HashMap<String, String> attributeMap = new HashMap<String, String>(); + attributeMap.put(FormatSpec.FileHeader.SUPPORTS_DYNAMIC_UPDATE_ATTRIBUTE, + SUPPORTS_DYNAMIC_UPDATE); + return attributeMap; + } + protected void clear() { getExecutor(mFilename).execute(new Runnable() { @Override @@ -240,17 +246,8 @@ abstract public class ExpandableBinaryDictionary extends Dictionary { if (ENABLE_BINARY_DICTIONARY_DYNAMIC_UPDATE && mDictionaryWriter == null) { mBinaryDictionary.close(); final File file = new File(mContext.getFilesDir(), mFilename); - final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), - new FusionDictionary.DictionaryOptions(new HashMap<String,String>(), - false, false)); - final DictEncoder dictEncoder = new Ver3DictEncoder(file); - try { - dictEncoder.writeDictionary(dict, FORMAT_OPTIONS); - } catch (IOException e) { - Log.e(TAG, "Exception in creating new dictionary file.", e); - } catch (UnsupportedFormatException e) { - Log.e(TAG, "Exception in creating new dictionary file.", e); - } + BinaryDictionary.createEmptyDictFile(file.getAbsolutePath(), + DICTIONARY_FORMAT_VERSION, getHeaderAttributeMap()); } else { mDictionaryWriter.clear(); } @@ -352,6 +349,9 @@ abstract public class ExpandableBinaryDictionary extends Dictionary { final boolean blockOffensiveWords, final int[] additionalFeaturesOptions, final int sessionId) { reloadDictionaryIfRequired(); + if (isRegenerating()) { + return null; + } final ArrayList<SuggestedWordInfo> suggestions = CollectionUtils.newArrayList(); final AsyncResultHolder<ArrayList<SuggestedWordInfo>> holder = new AsyncResultHolder<ArrayList<SuggestedWordInfo>>(); @@ -412,6 +412,9 @@ abstract public class ExpandableBinaryDictionary extends Dictionary { } protected boolean isValidWordInner(final String word) { + if (isRegenerating()) { + return false; + } final AsyncResultHolder<Boolean> holder = new AsyncResultHolder<Boolean>(); getExecutor(mFilename).executePrioritized(new Runnable() { @Override @@ -437,7 +440,7 @@ abstract public class ExpandableBinaryDictionary extends Dictionary { * dictionary exists, this method will generate one. */ protected void loadDictionary() { - mPerInstanceDictionaryTimeRecorder.mLastUpdateRequestTime = SystemClock.uptimeMillis(); + mPerInstanceDictionaryUpdateController.mLastUpdateRequestTime = SystemClock.uptimeMillis(); reloadDictionaryIfRequired(); } @@ -448,8 +451,8 @@ abstract public class ExpandableBinaryDictionary extends Dictionary { private void loadBinaryDictionary() { if (DEBUG) { Log.d(TAG, "Loading binary dictionary: " + mFilename + " request=" - + mFilenameDictionaryTimeRecorder.mLastUpdateRequestTime + " update=" - + mFilenameDictionaryTimeRecorder.mLastUpdateTime); + + mFilenameDictionaryUpdateController.mLastUpdateRequestTime + " update=" + + mFilenameDictionaryUpdateController.mLastUpdateTime); } final File file = new File(mContext.getFilesDir(), mFilename); @@ -487,8 +490,8 @@ abstract public class ExpandableBinaryDictionary extends Dictionary { private void writeBinaryDictionary() { if (DEBUG) { Log.d(TAG, "Generating binary dictionary: " + mFilename + " request=" - + mFilenameDictionaryTimeRecorder.mLastUpdateRequestTime + " update=" - + mFilenameDictionaryTimeRecorder.mLastUpdateTime); + + mFilenameDictionaryUpdateController.mLastUpdateRequestTime + " update=" + + mFilenameDictionaryUpdateController.mLastUpdateTime); } if (needsToReloadBeforeWriting()) { mDictionaryWriter.clear(); @@ -498,17 +501,8 @@ abstract public class ExpandableBinaryDictionary extends Dictionary { if (ENABLE_BINARY_DICTIONARY_DYNAMIC_UPDATE) { if (mBinaryDictionary == null || !mBinaryDictionary.isValidDictionary()) { final File file = new File(mContext.getFilesDir(), mFilename); - final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), - new FusionDictionary.DictionaryOptions(new HashMap<String,String>(), - false, false)); - final DictEncoder dictEncoder = new Ver3DictEncoder(file); - try { - dictEncoder.writeDictionary(dict, FORMAT_OPTIONS); - } catch (IOException e) { - Log.e(TAG, "Exception in creating new dictionary file.", e); - } catch (UnsupportedFormatException e) { - Log.e(TAG, "Exception in creating new dictionary file.", e); - } + BinaryDictionary.createEmptyDictFile(file.getAbsolutePath(), + DICTIONARY_FORMAT_VERSION, getHeaderAttributeMap()); } else { if (mBinaryDictionary.needsToRunGC()) { mBinaryDictionary.flushWithGC(); @@ -531,11 +525,11 @@ abstract public class ExpandableBinaryDictionary extends Dictionary { */ protected void setRequiresReload(final boolean requiresRebuild) { final long time = SystemClock.uptimeMillis(); - mPerInstanceDictionaryTimeRecorder.mLastUpdateRequestTime = time; - mFilenameDictionaryTimeRecorder.mLastUpdateRequestTime = time; + mPerInstanceDictionaryUpdateController.mLastUpdateRequestTime = time; + mFilenameDictionaryUpdateController.mLastUpdateRequestTime = time; if (DEBUG) { Log.d(TAG, "Reload request: " + mFilename + ": request=" + time + " update=" - + mFilenameDictionaryTimeRecorder.mLastUpdateTime); + + mFilenameDictionaryUpdateController.mLastUpdateTime); } } @@ -544,14 +538,26 @@ abstract public class ExpandableBinaryDictionary extends Dictionary { */ public final void reloadDictionaryIfRequired() { if (!isReloadRequired()) return; - reloadDictionary(); + if (setIsRegeneratingIfNotRegenerating()) { + reloadDictionary(); + } } /** * Returns whether a dictionary reload is required. */ private boolean isReloadRequired() { - return mBinaryDictionary == null || mPerInstanceDictionaryTimeRecorder.isOutOfDate(); + return mBinaryDictionary == null || mPerInstanceDictionaryUpdateController.isOutOfDate(); + } + + private boolean isRegenerating() { + return mFilenameDictionaryUpdateController.mIsRegenerating.get(); + } + + // Returns whether the dictionary can be regenerated. + private boolean setIsRegeneratingIfNotRegenerating() { + return mFilenameDictionaryUpdateController.mIsRegenerating.compareAndSet( + false /* expect */ , true /* update */); } /** @@ -564,39 +570,44 @@ abstract public class ExpandableBinaryDictionary extends Dictionary { getExecutor(mFilename).execute(new Runnable() { @Override public void run() { - final long time = SystemClock.uptimeMillis(); - final boolean dictionaryFileExists = dictionaryFileExists(); - if (mFilenameDictionaryTimeRecorder.isOutOfDate() || !dictionaryFileExists) { - // If the shared dictionary file does not exist or is out of date, the first - // instance that acquires the lock will generate a new one. - if (hasContentChanged() || !dictionaryFileExists) { - // If the source content has changed or the dictionary does not exist, - // rebuild the binary dictionary. Empty dictionaries are supported (in the - // case where loadDictionaryAsync() adds nothing) in order to provide a - // uniform framework. - mFilenameDictionaryTimeRecorder.mLastUpdateTime = time; + try { + final long time = SystemClock.uptimeMillis(); + final boolean dictionaryFileExists = dictionaryFileExists(); + if (mFilenameDictionaryUpdateController.isOutOfDate() + || !dictionaryFileExists) { + // If the shared dictionary file does not exist or is out of date, the + // first instance that acquires the lock will generate a new one. + if (hasContentChanged() || !dictionaryFileExists) { + // If the source content has changed or the dictionary does not exist, + // rebuild the binary dictionary. Empty dictionaries are supported (in + // the case where loadDictionaryAsync() adds nothing) in order to + // provide a uniform framework. + mFilenameDictionaryUpdateController.mLastUpdateTime = time; + writeBinaryDictionary(); + loadBinaryDictionary(); + } else { + // If not, the reload request was unnecessary so revert + // LastUpdateRequestTime to LastUpdateTime. + mFilenameDictionaryUpdateController.mLastUpdateRequestTime = + mFilenameDictionaryUpdateController.mLastUpdateTime; + } + } else if (mBinaryDictionary == null || + mPerInstanceDictionaryUpdateController.mLastUpdateTime + < mFilenameDictionaryUpdateController.mLastUpdateTime) { + // Otherwise, if the local dictionary is older than the shared dictionary, + // load the shared dictionary. + loadBinaryDictionary(); + } + if (mBinaryDictionary != null && !mBinaryDictionary.isValidDictionary()) { + // Binary dictionary is not valid. Regenerate the dictionary file. + mFilenameDictionaryUpdateController.mLastUpdateTime = time; writeBinaryDictionary(); loadBinaryDictionary(); - } else { - // If not, the reload request was unnecessary so revert - // LastUpdateRequestTime to LastUpdateTime. - mFilenameDictionaryTimeRecorder.mLastUpdateRequestTime = - mFilenameDictionaryTimeRecorder.mLastUpdateTime; } - } else if (mBinaryDictionary == null || - mPerInstanceDictionaryTimeRecorder.mLastUpdateTime - < mFilenameDictionaryTimeRecorder.mLastUpdateTime) { - // Otherwise, if the local dictionary is older than the shared dictionary, load - // the shared dictionary. - loadBinaryDictionary(); - } - if (mBinaryDictionary != null && !mBinaryDictionary.isValidDictionary()) { - // Binary dictionary is not valid. Regenerate the dictionary file. - mFilenameDictionaryTimeRecorder.mLastUpdateTime = time; - writeBinaryDictionary(); - loadBinaryDictionary(); + mPerInstanceDictionaryUpdateController.mLastUpdateTime = time; + } finally { + mFilenameDictionaryUpdateController.mIsRegenerating.set(false); } - mPerInstanceDictionaryTimeRecorder.mLastUpdateTime = time; } }); } @@ -636,59 +647,19 @@ abstract public class ExpandableBinaryDictionary extends Dictionary { } /** - * Time recorder for tracking whether the dictionary is out of date. + * For tracking whether the dictionary is out of date and the dictionary is regenerating. * Can be shared across multiple dictionary instances that access the same filename. */ - private static class DictionaryTimeRecorder { - private volatile long mLastUpdateTime = 0; - private volatile long mLastUpdateRequestTime = 0; + private static class DictionaryUpdateController { + public volatile long mLastUpdateTime = 0; + public volatile long mLastUpdateRequestTime = 0; + public volatile AtomicBoolean mIsRegenerating = new AtomicBoolean(); - private boolean isOutOfDate() { + public boolean isOutOfDate() { return (mLastUpdateRequestTime > mLastUpdateTime); } } - /** - * Dynamically adds a word unigram to the dictionary for testing with blocking-lock. - */ - @UsedForTesting - protected void addWordDynamicallyForTests(final String word, final String shortcutTarget, - final int frequency, final boolean isNotAWord) { - getExecutor(mFilename).executePrioritized(new Runnable() { - @Override - public void run() { - addWordDynamically(word, shortcutTarget, frequency, isNotAWord); - } - }); - } - - /** - * Dynamically adds a word bigram in the dictionary for testing with blocking-lock. - */ - @UsedForTesting - protected void addBigramDynamicallyForTests(final String word0, final String word1, - final int frequency, final boolean isValid) { - getExecutor(mFilename).executePrioritized(new Runnable() { - @Override - public void run() { - addBigramDynamically(word0, word1, frequency, isValid); - } - }); - } - - /** - * Dynamically remove a word bigram in the dictionary for testing with blocking-lock. - */ - @UsedForTesting - protected void removeBigramDynamicallyForTests(final String word0, final String word1) { - getExecutor(mFilename).executePrioritized(new Runnable() { - @Override - public void run() { - removeBigramDynamically(word0, word1); - } - }); - } - // TODO: Implement native binary methods once the dynamic dictionary implementation is done. @UsedForTesting public boolean isInDictionaryForTests(final String word) { @@ -701,7 +672,7 @@ abstract public class ExpandableBinaryDictionary extends Dictionary { holder.set(mBinaryDictionary.isValidWord(word)); } else { holder.set(((DynamicPersonalizationDictionaryWriter) mDictionaryWriter) - .isInDictionaryForTests(word)); + .isInBigramListForTests(word)); } } } diff --git a/java/src/com/android/inputmethod/latin/InputAttributes.java b/java/src/com/android/inputmethod/latin/InputAttributes.java index 21b103e5a..8caf6f17f 100644 --- a/java/src/com/android/inputmethod/latin/InputAttributes.java +++ b/java/src/com/android/inputmethod/latin/InputAttributes.java @@ -103,6 +103,10 @@ public final class InputAttributes { } } + public boolean isTypeNull() { + return InputType.TYPE_NULL == mInputType; + } + public boolean isSameInputType(final EditorInfo editorInfo) { return editorInfo.inputType == mInputType; } diff --git a/java/src/com/android/inputmethod/latin/LatinIME.java b/java/src/com/android/inputmethod/latin/LatinIME.java index 270dc4c06..5fbbc1764 100644 --- a/java/src/com/android/inputmethod/latin/LatinIME.java +++ b/java/src/com/android/inputmethod/latin/LatinIME.java @@ -1521,7 +1521,7 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen mSubtypeState.switchSubtype(token, mRichImm); } - private void sendDownUpKeyEventForBackwardCompatibility(final int code) { + private void sendDownUpKeyEvent(final int code) { final long eventTime = SystemClock.uptimeMillis(); mConnection.sendKeyEvent(new KeyEvent(eventTime, eventTime, KeyEvent.ACTION_DOWN, code, 0, 0, KeyCharacterMap.VIRTUAL_KEYBOARD, 0, @@ -1538,7 +1538,7 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen // TODO: Remove this special handling of digit letters. // For backward compatibility. See {@link InputMethodService#sendKeyChar(char)}. if (code >= '0' && code <= '9') { - sendDownUpKeyEventForBackwardCompatibility(code - '0' + KeyEvent.KEYCODE_0); + sendDownUpKeyEvent(code - '0' + KeyEvent.KEYCODE_0); return; } @@ -1547,7 +1547,7 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen // a hardware keyboard event on pressing enter or delete. This is bad for many // reasons (there are race conditions with commits) but some applications are // relying on this behavior so we continue to support it for older apps. - sendDownUpKeyEventForBackwardCompatibility(KeyEvent.KEYCODE_ENTER); + sendDownUpKeyEvent(KeyEvent.KEYCODE_ENTER); } else { final String text = new String(new int[] { code }, 0, 1); mConnection.commitText(text, text.length()); @@ -2104,12 +2104,16 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen } final int lengthToDelete = Character.isSupplementaryCodePoint( mConnection.getCodePointBeforeCursor()) ? 2 : 1; - if (mAppWorkAroundsUtils.isBeforeJellyBean()) { - // Backward compatibility mode. Before Jelly bean, the keyboard would simulate - // a hardware keyboard event on pressing enter or delete. This is bad for many - // reasons (there are race conditions with commits) but some applications are - // relying on this behavior so we continue to support it for older apps. - sendDownUpKeyEventForBackwardCompatibility(KeyEvent.KEYCODE_DEL); + if (mAppWorkAroundsUtils.isBeforeJellyBean() || + currentSettings.mInputAttributes.isTypeNull()) { + // There are two possible reasons to send a key event: either the field has + // type TYPE_NULL, in which case the keyboard should send events, or we are + // running in backward compatibility mode. Before Jelly bean, the keyboard + // would simulate a hardware keyboard event on pressing enter or delete. This + // is bad for many reasons (there are race conditions with commits) but some + // applications are relying on this behavior so we continue to support it for + // older apps, so we retain this behavior if the app has target SDK < JellyBean. + sendDownUpKeyEvent(KeyEvent.KEYCODE_DEL); } else { mConnection.deleteSurroundingText(lengthToDelete, 0); } diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java index 3b1d2427b..6cc0bfb76 100644 --- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java +++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java @@ -225,6 +225,26 @@ public class BinaryDictEncoderUtils { return position; } + static void writeUIntToStream(final OutputStream stream, final int value, final int size) + throws IOException { + switch(size) { + case 4: + stream.write((value >> 24) & 0xFF); + /* fall through */ + case 3: + stream.write((value >> 16) & 0xFF); + /* fall through */ + case 2: + stream.write((value >> 8) & 0xFF); + /* fall through */ + case 1: + stream.write(value & 0xFF); + break; + default: + /* nop */ + } + } + // End utility methods // This method is responsible for finding a nice ordering of the nodes that favors run-time diff --git a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java index aa5129ccb..849bff050 100644 --- a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java +++ b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java @@ -325,6 +325,12 @@ public final class FormatSpec { public final int mHeaderSize; public final DictionaryOptions mDictionaryOptions; public final FormatOptions mFormatOptions; + // Note that these are corresponding definitions in native code in latinime::HeaderPolicy + // and latinime::HeaderReadWriteUtils. + public static final String SUPPORTS_DYNAMIC_UPDATE_ATTRIBUTE = "SUPPORTS_DYNAMIC_UPDATE"; + public static final String USES_FORGETTING_CURVE_ATTRIBUTE = "USES_FORGETTING_CURVE"; + public static final String ATTRIBUTE_VALUE_TRUE = "1"; + private static final String DICTIONARY_VERSION_ATTRIBUTE = "version"; private static final String DICTIONARY_LOCALE_ATTRIBUTE = "locale"; private static final String DICTIONARY_ID_ATTRIBUTE = "dictionary"; diff --git a/java/src/com/android/inputmethod/latin/makedict/SparseTable.java b/java/src/com/android/inputmethod/latin/makedict/SparseTable.java new file mode 100644 index 000000000..0b9cf91d2 --- /dev/null +++ b/java/src/com/android/inputmethod/latin/makedict/SparseTable.java @@ -0,0 +1,150 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.android.inputmethod.latin.makedict; + +import com.android.inputmethod.annotations.UsedForTesting; + +import java.io.IOException; +import java.io.OutputStream; +import java.util.ArrayList; +import java.util.Collections; + +/** + * SparseTable is an extensible map from integer to integer. + * This holds one value for every mBlockSize keys, so it uses 1/mBlockSize'th of the full index + * memory. + */ +@UsedForTesting +public class SparseTable { + + /** + * mLookupTable is indexed by terminal ID, containing exactly one entry for every mBlockSize + * terminals. + * It contains at index i = j / mBlockSize the index in mContentsTable where the values for + * terminals with IDs j to j + mBlockSize - 1 are stored as an mBlockSize-sized integer array. + */ + private final ArrayList<Integer> mLookupTable; + private final ArrayList<Integer> mContentTable; + + private final int mBlockSize; + public static final int NOT_EXIST = -1; + + @UsedForTesting + public SparseTable(final int initialCapacity, final int blockSize) { + mBlockSize = blockSize; + final int lookupTableSize = initialCapacity / mBlockSize + + (initialCapacity % mBlockSize > 0 ? 1 : 0); + mLookupTable = new ArrayList<Integer>(Collections.nCopies(lookupTableSize, NOT_EXIST)); + mContentTable = new ArrayList<Integer>(); + } + + @UsedForTesting + public SparseTable(final int[] lookupTable, final int[] contentTable, final int blockSize) { + mBlockSize = blockSize; + mLookupTable = new ArrayList<Integer>(lookupTable.length); + for (int i = 0; i < lookupTable.length; ++i) { + mLookupTable.add(lookupTable[i]); + } + mContentTable = new ArrayList<Integer>(contentTable.length); + for (int i = 0; i < contentTable.length; ++i) { + mContentTable.add(contentTable[i]); + } + } + + /** + * Converts an byte array to an int array considering each set of 4 bytes is an int stored in + * big-endian. + * The length of byteArray must be a multiple of four. + * Otherwise, IndexOutOfBoundsException will be raised. + */ + @UsedForTesting + private static void convertByteArrayToIntegerArray(final byte[] byteArray, + final ArrayList<Integer> integerArray) { + for (int i = 0; i < byteArray.length; i += 4) { + int value = 0; + for (int j = i; j < i + 4; ++j) { + value <<= 8; + value |= byteArray[j] & 0xFF; + } + integerArray.add(value); + } + } + + @UsedForTesting + public SparseTable(final byte[] lookupTable, final byte[] contentTable, final int blockSize) { + mBlockSize = blockSize; + mLookupTable = new ArrayList<Integer>(lookupTable.length / 4); + mContentTable = new ArrayList<Integer>(contentTable.length / 4); + convertByteArrayToIntegerArray(lookupTable, mLookupTable); + convertByteArrayToIntegerArray(contentTable, mContentTable); + } + + @UsedForTesting + public int get(final int index) { + if (index < 0 || index / mBlockSize >= mLookupTable.size() + || mLookupTable.get(index / mBlockSize) == NOT_EXIST) { + return NOT_EXIST; + } + return mContentTable.get(mLookupTable.get(index / mBlockSize) + (index % mBlockSize)); + } + + @UsedForTesting + public void set(final int index, final int value) { + if (mLookupTable.get(index / mBlockSize) == NOT_EXIST) { + mLookupTable.set(index / mBlockSize, mContentTable.size()); + for (int i = 0; i < mBlockSize; ++i) { + mContentTable.add(NOT_EXIST); + } + } + mContentTable.set(mLookupTable.get(index / mBlockSize) + (index % mBlockSize), value); + } + + public void remove(final int index) { + set(index, NOT_EXIST); + } + + @UsedForTesting + public int size() { + return mLookupTable.size() * mBlockSize; + } + + @UsedForTesting + /* package */ int getContentTableSize() { + return mContentTable.size(); + } + + @UsedForTesting + /* package */ int getLookupTableSize() { + return mLookupTable.size(); + } + + public boolean contains(final int index) { + return get(index) != NOT_EXIST; + } + + @UsedForTesting + public void write(final OutputStream lookupOutStream, final OutputStream contentOutStream) + throws IOException { + for (final int index : mLookupTable) { + BinaryDictEncoderUtils.writeUIntToStream(lookupOutStream, index, 4); + } + + for (final int index : mContentTable) { + BinaryDictEncoderUtils.writeUIntToStream(contentOutStream, index, 4); + } + } +} diff --git a/java/src/com/android/inputmethod/latin/personalization/DynamicPredictionDictionaryBase.java b/java/src/com/android/inputmethod/latin/personalization/DecayingExpandableBinaryDictionaryBase.java index 075d7e3c3..66517a800 100644 --- a/java/src/com/android/inputmethod/latin/personalization/DynamicPredictionDictionaryBase.java +++ b/java/src/com/android/inputmethod/latin/personalization/DecayingExpandableBinaryDictionaryBase.java @@ -34,12 +34,15 @@ import com.android.inputmethod.latin.utils.UserHistoryDictIOUtils.OnAddWordListe import java.io.File; import java.io.IOException; import java.util.ArrayList; +import java.util.HashMap; +import java.util.Map; /** - * This class is a base class of a dictionary for the personalized prediction language model. + * This class is a base class of a dictionary that supports decaying for the personalized language + * model. */ -public abstract class DynamicPredictionDictionaryBase extends ExpandableBinaryDictionary { - private static final String TAG = DynamicPredictionDictionaryBase.class.getSimpleName(); +public abstract class DecayingExpandableBinaryDictionaryBase extends ExpandableBinaryDictionary { + private static final String TAG = DecayingExpandableBinaryDictionaryBase.class.getSimpleName(); public static final boolean DBG_SAVE_RESTORE = false; private static final boolean DBG_STRESS_TEST = false; private static final boolean PROFILE_SAVE_RESTORE = LatinImeLogger.sDBG; @@ -60,8 +63,9 @@ public abstract class DynamicPredictionDictionaryBase extends ExpandableBinaryDi // Should always be false except when we use this class for test @UsedForTesting boolean mIsTest = false; - /* package */ DynamicPredictionDictionaryBase(final Context context, final String locale, - final SharedPreferences sp, final String dictionaryType, final String fileName) { + /* package */ DecayingExpandableBinaryDictionaryBase(final Context context, + final String locale, final SharedPreferences sp, final String dictionaryType, + final String fileName) { super(context, fileName, dictionaryType, true); mLocale = locale; mFileName = fileName; @@ -84,6 +88,16 @@ public abstract class DynamicPredictionDictionaryBase extends ExpandableBinaryDi } @Override + protected Map<String, String> getHeaderAttributeMap() { + HashMap<String, String> attributeMap = new HashMap<String, String>(); + attributeMap.put(FormatSpec.FileHeader.SUPPORTS_DYNAMIC_UPDATE_ATTRIBUTE, + FormatSpec.FileHeader.ATTRIBUTE_VALUE_TRUE); + attributeMap.put(FormatSpec.FileHeader.USES_FORGETTING_CURVE_ATTRIBUTE, + FormatSpec.FileHeader.ATTRIBUTE_VALUE_TRUE); + return attributeMap; + } + + @Override protected boolean hasContentChanged() { return false; } diff --git a/java/src/com/android/inputmethod/latin/personalization/DynamicPersonalizationDictionaryWriter.java b/java/src/com/android/inputmethod/latin/personalization/DynamicPersonalizationDictionaryWriter.java index 0af028a9e..305088536 100644 --- a/java/src/com/android/inputmethod/latin/personalization/DynamicPersonalizationDictionaryWriter.java +++ b/java/src/com/android/inputmethod/latin/personalization/DynamicPersonalizationDictionaryWriter.java @@ -79,7 +79,7 @@ public class DynamicPersonalizationDictionaryWriter extends AbstractDictionaryWr public void addUnigramWord(final String word, final String shortcutTarget, final int frequency, final boolean isNotAWord) { if (mBigramList.size() > mMaxHistoryBigrams * 2) { - // Too many entries: just stop adding new vocabrary and wait next refresh. + // Too many entries: just stop adding new vocabulary and wait next refresh. return; } mExpandableDictionary.addWord(word, shortcutTarget, frequency); @@ -90,7 +90,7 @@ public class DynamicPersonalizationDictionaryWriter extends AbstractDictionaryWr public void addBigramWords(final String word0, final String word1, final int frequency, final boolean isValid, final long lastModifiedTime) { if (mBigramList.size() > mMaxHistoryBigrams * 2) { - // Too many entries: just stop adding new vocabrary and wait next refresh. + // Too many entries: just stop adding new vocabulary and wait next refresh. return; } if (lastModifiedTime > 0) { @@ -176,8 +176,8 @@ public class DynamicPersonalizationDictionaryWriter extends AbstractDictionaryWr } @UsedForTesting - public boolean isInDictionaryForTests(final String word) { + public boolean isInBigramListForTests(final String word) { // TODO: Use native method to determine whether the word is in dictionary or not - return mBigramList.containsKey(word); + return mBigramList.containsKey(word) || mBigramList.getBigrams(null).containsKey(word); } } diff --git a/java/src/com/android/inputmethod/latin/personalization/PersonalizationDictionaryUpdateSession.java b/java/src/com/android/inputmethod/latin/personalization/PersonalizationDictionaryUpdateSession.java index ab3de801c..c616a296c 100644 --- a/java/src/com/android/inputmethod/latin/personalization/PersonalizationDictionaryUpdateSession.java +++ b/java/src/com/android/inputmethod/latin/personalization/PersonalizationDictionaryUpdateSession.java @@ -46,7 +46,7 @@ public abstract class PersonalizationDictionaryUpdateSession { // TODO: Use a dynamic binary dictionary instead public WeakReference<PersonalizationDictionary> mDictionary; - public WeakReference<DynamicPredictionDictionaryBase> mPredictionDictionary; + public WeakReference<DecayingExpandableBinaryDictionaryBase> mPredictionDictionary; public final String mSystemLocale; public PersonalizationDictionaryUpdateSession(String locale) { mSystemLocale = locale; @@ -60,15 +60,16 @@ public abstract class PersonalizationDictionaryUpdateSession { mDictionary = new WeakReference<PersonalizationDictionary>(dictionary); } - public void setPredictionDictionary(DynamicPredictionDictionaryBase dictionary) { - mPredictionDictionary = new WeakReference<DynamicPredictionDictionaryBase>(dictionary); + public void setPredictionDictionary(DecayingExpandableBinaryDictionaryBase dictionary) { + mPredictionDictionary = + new WeakReference<DecayingExpandableBinaryDictionaryBase>(dictionary); } protected PersonalizationDictionary getDictionary() { return mDictionary == null ? null : mDictionary.get(); } - protected DynamicPredictionDictionaryBase getPredictionDictionary() { + protected DecayingExpandableBinaryDictionaryBase getPredictionDictionary() { return mPredictionDictionary == null ? null : mPredictionDictionary.get(); } @@ -81,7 +82,7 @@ public abstract class PersonalizationDictionaryUpdateSession { } private void unsetPredictionDictionary() { - final DynamicPredictionDictionaryBase dictionary = getPredictionDictionary(); + final DecayingExpandableBinaryDictionaryBase dictionary = getPredictionDictionary(); if (dictionary == null) { return; } @@ -89,7 +90,7 @@ public abstract class PersonalizationDictionaryUpdateSession { } public void clearAndFlushPredictionDictionary(Context context) { - final DynamicPredictionDictionaryBase dictionary = getPredictionDictionary(); + final DecayingExpandableBinaryDictionaryBase dictionary = getPredictionDictionary(); if (dictionary == null) { return; } @@ -105,7 +106,7 @@ public abstract class PersonalizationDictionaryUpdateSession { // TODO: Support multi locale to add bigram public void addBigramToPersonalizationDictionary(String word0, String word1, boolean isValid, int frequency) { - final DynamicPredictionDictionaryBase dictionary = getPredictionDictionary(); + final DecayingExpandableBinaryDictionaryBase dictionary = getPredictionDictionary(); if (dictionary == null) { return; } @@ -116,7 +117,7 @@ public abstract class PersonalizationDictionaryUpdateSession { // TODO: Support multi locale to add bigram public void addBigramsToPersonalizationDictionary( final ArrayList<PersonalizationLanguageModelParam> lmParams) { - final DynamicPredictionDictionaryBase dictionary = getPredictionDictionary(); + final DecayingExpandableBinaryDictionaryBase dictionary = getPredictionDictionary(); if (dictionary == null) { return; } diff --git a/java/src/com/android/inputmethod/latin/personalization/PersonalizationPredictionDictionary.java b/java/src/com/android/inputmethod/latin/personalization/PersonalizationPredictionDictionary.java index e80953c05..432954453 100644 --- a/java/src/com/android/inputmethod/latin/personalization/PersonalizationPredictionDictionary.java +++ b/java/src/com/android/inputmethod/latin/personalization/PersonalizationPredictionDictionary.java @@ -22,7 +22,7 @@ import com.android.inputmethod.latin.ExpandableBinaryDictionary; import android.content.Context; import android.content.SharedPreferences; -public class PersonalizationPredictionDictionary extends DynamicPredictionDictionaryBase { +public class PersonalizationPredictionDictionary extends DecayingExpandableBinaryDictionaryBase { private static final String NAME = PersonalizationPredictionDictionary.class.getSimpleName(); /* package */ PersonalizationPredictionDictionary(final Context context, final String locale, diff --git a/java/src/com/android/inputmethod/latin/personalization/UserHistoryDictionaryBigramList.java b/java/src/com/android/inputmethod/latin/personalization/UserHistoryDictionaryBigramList.java index 4c1803bdf..55a90ee51 100644 --- a/java/src/com/android/inputmethod/latin/personalization/UserHistoryDictionaryBigramList.java +++ b/java/src/com/android/inputmethod/latin/personalization/UserHistoryDictionaryBigramList.java @@ -54,7 +54,7 @@ public final class UserHistoryDictionaryBigramList { * Called when loaded from the SQL DB. */ public void addBigram(String word1, String word2, byte fcValue) { - if (DynamicPredictionDictionaryBase.DBG_SAVE_RESTORE) { + if (DecayingExpandableBinaryDictionaryBase.DBG_SAVE_RESTORE) { Log.d(TAG, "--- add bigram: " + word1 + ", " + word2 + ", " + fcValue); } final HashMap<String, Byte> map; @@ -74,7 +74,7 @@ public final class UserHistoryDictionaryBigramList { * Called when inserted to the SQL DB. */ public void updateBigram(String word1, String word2, byte fcValue) { - if (DynamicPredictionDictionaryBase.DBG_SAVE_RESTORE) { + if (DecayingExpandableBinaryDictionaryBase.DBG_SAVE_RESTORE) { Log.d(TAG, "--- update bigram: " + word1 + ", " + word2 + ", " + fcValue); } final HashMap<String, Byte> map; diff --git a/java/src/com/android/inputmethod/latin/personalization/UserHistoryPredictionDictionary.java b/java/src/com/android/inputmethod/latin/personalization/UserHistoryPredictionDictionary.java index b140c919b..38e308a4e 100644 --- a/java/src/com/android/inputmethod/latin/personalization/UserHistoryPredictionDictionary.java +++ b/java/src/com/android/inputmethod/latin/personalization/UserHistoryPredictionDictionary.java @@ -26,7 +26,7 @@ import android.content.SharedPreferences; * Locally gathers stats about the words user types and various other signals like auto-correction * cancellation or manual picks. This allows the keyboard to adapt to the typist over time. */ -public class UserHistoryPredictionDictionary extends DynamicPredictionDictionaryBase { +public class UserHistoryPredictionDictionary extends DecayingExpandableBinaryDictionaryBase { /* package for tests */ static final String NAME = UserHistoryPredictionDictionary.class.getSimpleName(); /* package */ UserHistoryPredictionDictionary(final Context context, final String locale, diff --git a/java/src/com/android/inputmethod/latin/utils/PrioritizedSerialExecutor.java b/java/src/com/android/inputmethod/latin/utils/PrioritizedSerialExecutor.java index 5dc0b5893..201a70d42 100644 --- a/java/src/com/android/inputmethod/latin/utils/PrioritizedSerialExecutor.java +++ b/java/src/com/android/inputmethod/latin/utils/PrioritizedSerialExecutor.java @@ -16,8 +16,11 @@ package com.android.inputmethod.latin.utils; -import java.util.ArrayDeque; import java.util.Queue; +import java.util.concurrent.ArrayBlockingQueue; +import java.util.concurrent.ConcurrentLinkedQueue; +import java.util.concurrent.ThreadPoolExecutor; +import java.util.concurrent.TimeUnit; /** * An object that executes submitted tasks using a thread. @@ -27,19 +30,20 @@ public class PrioritizedSerialExecutor { private final Object mLock = new Object(); - // The default value of capacities of task queues. - private static final int TASK_QUEUE_CAPACITY = 1000; private final Queue<Runnable> mTasks; private final Queue<Runnable> mPrioritizedTasks; private boolean mIsShutdown; + private final ThreadPoolExecutor mThreadPoolExecutor; // The task which is running now. private Runnable mActive; public PrioritizedSerialExecutor() { - mTasks = new ArrayDeque<Runnable>(TASK_QUEUE_CAPACITY); - mPrioritizedTasks = new ArrayDeque<Runnable>(TASK_QUEUE_CAPACITY); + mTasks = new ConcurrentLinkedQueue<Runnable>(); + mPrioritizedTasks = new ConcurrentLinkedQueue<Runnable>(); mIsShutdown = false; + mThreadPoolExecutor = new ThreadPoolExecutor(1 /* corePoolSize */, 1 /* maximumPoolSize */, + 0 /* keepAliveTime */, TimeUnit.MILLISECONDS, new ArrayBlockingQueue<Runnable>(1)); } /** @@ -59,7 +63,16 @@ public class PrioritizedSerialExecutor { public void execute(final Runnable r) { synchronized(mLock) { if (!mIsShutdown) { - mTasks.offer(r); + mTasks.offer(new Runnable() { + @Override + public void run() { + try { + r.run(); + } finally { + scheduleNext(); + } + } + }); if (mActive == null) { scheduleNext(); } @@ -74,45 +87,36 @@ public class PrioritizedSerialExecutor { public void executePrioritized(final Runnable r) { synchronized(mLock) { if (!mIsShutdown) { - mPrioritizedTasks.offer(r); - if (mActive == null) { + mPrioritizedTasks.offer(new Runnable() { + @Override + public void run() { + try { + r.run(); + } finally { + scheduleNext(); + } + } + }); + if (mActive == null) { scheduleNext(); } } } } - private boolean fetchNextTasks() { - synchronized(mLock) { - mActive = mPrioritizedTasks.poll(); - if (mActive == null) { - mActive = mTasks.poll(); - } - return mActive != null; + private boolean fetchNextTasksLocked() { + mActive = mPrioritizedTasks.poll(); + if (mActive == null) { + mActive = mTasks.poll(); } + return mActive != null; } private void scheduleNext() { synchronized(mLock) { - if (!fetchNextTasks()) { - return; + if (fetchNextTasksLocked()) { + mThreadPoolExecutor.execute(mActive); } - new Thread(new Runnable() { - @Override - public void run() { - try { - do { - synchronized(mLock) { - if (mActive != null) { - mActive.run(); - } - } - } while (fetchNextTasks()); - } finally { - scheduleNext(); - } - } - }).start(); } } diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp index 7f47493b2..85e100e33 100644 --- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp +++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp @@ -26,12 +26,55 @@ #include "suggest/core/dictionary/dictionary.h" #include "suggest/core/suggest_options.h" #include "suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h" +#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h" #include "utils/autocorrection_threshold_utils.h" namespace latinime { class ProximityInfo; +// TODO: Move to makedict. +static jboolean latinime_BinaryDictionary_createEmptyDictFile(JNIEnv *env, jclass clazz, + jstring filePath, jlong dictVersion, jobjectArray attributeKeyStringArray, + jobjectArray attributeValueStringArray) { + const jsize filePathUtf8Length = env->GetStringUTFLength(filePath); + char filePathChars[filePathUtf8Length + 1]; + env->GetStringUTFRegion(filePath, 0, env->GetStringLength(filePath), filePathChars); + filePathChars[filePathUtf8Length] = '\0'; + + const int keyCount = env->GetArrayLength(attributeKeyStringArray); + const int valueCount = env->GetArrayLength(attributeValueStringArray); + if (keyCount != valueCount) { + return false; + } + + HeaderReadWriteUtils::AttributeMap attributeMap; + for (int i = 0; i < keyCount; i++) { + jstring keyString = static_cast<jstring>( + env->GetObjectArrayElement(attributeKeyStringArray, i)); + const jsize keyUtf8Length = env->GetStringUTFLength(keyString); + char keyChars[keyUtf8Length + 1]; + env->GetStringUTFRegion(keyString, 0, env->GetStringLength(keyString), keyChars); + keyChars[keyUtf8Length] = '\0'; + HeaderReadWriteUtils::AttributeMap::key_type key; + HeaderReadWriteUtils::insertCharactersIntoVector(keyChars, &key); + + jstring valueString = static_cast<jstring>( + env->GetObjectArrayElement(attributeValueStringArray, i)); + const jsize valueUtf8Length = env->GetStringUTFLength(valueString); + char valueChars[valueUtf8Length + 1]; + env->GetStringUTFRegion(valueString, 0, env->GetStringLength(valueString), valueChars); + valueChars[valueUtf8Length] = '\0'; + HeaderReadWriteUtils::AttributeMap::mapped_type value; + HeaderReadWriteUtils::insertCharactersIntoVector(valueChars, &value); + + attributeMap[key] = value; + } + + return DictFileWritingUtils::createEmptyDictFile(filePathChars, static_cast<int>(dictVersion), + &attributeMap); +} + static jlong latinime_BinaryDictionary_open(JNIEnv *env, jclass clazz, jstring sourceDir, jlong dictOffset, jlong dictSize, jboolean isUpdatable) { PROF_OPEN; @@ -280,8 +323,31 @@ static int latinime_BinaryDictionary_calculateProbabilityNative(JNIEnv *env, jcl bigramProbability); } +static jstring latinime_BinaryDictionary_getProperty(JNIEnv *env, jclass clazz, jlong dict, + jstring query) { + Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); + if (!dictionary) { + return env->NewStringUTF(""); + } + const jsize queryUtf8Length = env->GetStringUTFLength(query); + char queryChars[queryUtf8Length + 1]; + env->GetStringUTFRegion(query, 0, env->GetStringLength(query), queryChars); + queryChars[queryUtf8Length] = '\0'; + static const int GET_PROPERTY_RESULT_LENGTH = 100; + char resultChars[GET_PROPERTY_RESULT_LENGTH]; + resultChars[0] = '\0'; + dictionary->getDictionaryStructurePolicy()->getProperty(queryChars, resultChars, + GET_PROPERTY_RESULT_LENGTH); + return env->NewStringUTF(resultChars); +} + static const JNINativeMethod sMethods[] = { { + const_cast<char *>("createEmptyDictFileNative"), + const_cast<char *>("(Ljava/lang/String;J[Ljava/lang/String;[Ljava/lang/String;)Z"), + reinterpret_cast<void *>(latinime_BinaryDictionary_createEmptyDictFile) + }, + { const_cast<char *>("openNative"), const_cast<char *>("(Ljava/lang/String;JJZ)J"), reinterpret_cast<void *>(latinime_BinaryDictionary_open) @@ -350,6 +416,11 @@ static const JNINativeMethod sMethods[] = { const_cast<char *>("calculateProbabilityNative"), const_cast<char *>("(JII)I"), reinterpret_cast<void *>(latinime_BinaryDictionary_calculateProbabilityNative) + }, + { + const_cast<char *>("getPropertyNative"), + const_cast<char *>("(JLjava/lang/String;)Ljava/lang/String;"), + reinterpret_cast<void *>(latinime_BinaryDictionary_getProperty) } }; diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp index ec1b63a12..214df1bbf 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.cpp +++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp @@ -125,6 +125,11 @@ bool Dictionary::needsToRunGC() { return mDictionaryStructureWithBufferPolicy->needsToRunGC(); } +void Dictionary::getProperty(const char *const query, char *const outResult, + const int maxResultLength) const { + return mDictionaryStructureWithBufferPolicy->getProperty(query, outResult, maxResultLength); +} + void Dictionary::logDictionaryInfo(JNIEnv *const env) const { const int BUFFER_SIZE = 16; int dictionaryIdCodePointBuffer[BUFFER_SIZE]; diff --git a/native/jni/src/suggest/core/dictionary/dictionary.h b/native/jni/src/suggest/core/dictionary/dictionary.h index 974447468..800751745 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.h +++ b/native/jni/src/suggest/core/dictionary/dictionary.h @@ -83,6 +83,9 @@ class Dictionary { bool needsToRunGC(); + void getProperty(const char *const query, char *const outResult, + const int maxResultLength) const; + const DictionaryStructureWithBufferPolicy *getDictionaryStructurePolicy() const { return mDictionaryStructureWithBufferPolicy; } diff --git a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h index b95488ebd..2434287b1 100644 --- a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h +++ b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h @@ -80,6 +80,9 @@ class DictionaryStructureWithBufferPolicy { virtual bool needsToRunGC() const = 0; + virtual void getProperty(const char *const query, char *const outResult, + const int maxResultLength) const = 0; + protected: DictionaryStructureWithBufferPolicy() {} diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp index 29307b56a..e02f4cbf1 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp @@ -119,7 +119,7 @@ bool DynamicBigramListPolicy::copyAllBigrams(BufferWithExtendableBuffer *const b // Finding useless bigram entries and remove them. Bigram entry is useless when the target PtNode // has been deleted or is not a valid terminal. bool DynamicBigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries( - int *const bigramListPos) { + int *const bigramListPos, int *const outValidBigramEntryCount) { const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*bigramListPos); if (usesAdditionalBuffer) { *bigramListPos -= mBuffer->getOriginalBufferSize(); @@ -161,6 +161,8 @@ bool DynamicBigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries( NOT_A_DICT_POS /* targetOffset */, &bigramEntryPos)) { return false; } + } else { + (*outValidBigramEntryCount) += 1; } } while(BigramListReadWriteUtils::hasNext(bigramFlags)); return true; @@ -169,7 +171,7 @@ bool DynamicBigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries( // Updates bigram target PtNode positions in the list after the placing step in GC. bool DynamicBigramListPolicy::updateAllBigramTargetPtNodePositions(int *const bigramListPos, const DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap *const - ptNodePositionRelocationMap) { + ptNodePositionRelocationMap, int *const outBigramEntryCount) { const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*bigramListPos); if (usesAdditionalBuffer) { *bigramListPos -= mBuffer->getOriginalBufferSize(); @@ -211,11 +213,12 @@ bool DynamicBigramListPolicy::updateAllBigramTargetPtNodePositions(int *const bi return false; } } while(BigramListReadWriteUtils::hasNext(bigramFlags)); + (*outBigramEntryCount) = bigramEntryCount; return true; } bool DynamicBigramListPolicy::addNewBigramEntryToBigramList(const int bigramTargetPos, - const int probability, int *const bigramListPos) { + const int probability, int *const bigramListPos, bool *const outAddedNewBigram) { const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*bigramListPos); if (usesAdditionalBuffer) { *bigramListPos -= mBuffer->getOriginalBufferSize(); @@ -243,6 +246,7 @@ bool DynamicBigramListPolicy::addNewBigramEntryToBigramList(const int bigramTarg } if (followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos) == bigramTargetPos) { // Update this bigram entry. + *outAddedNewBigram = false; const BigramListReadWriteUtils::BigramFlags updatedFlags = BigramListReadWriteUtils::setProbabilityInFlags(bigramFlags, probability); return BigramListReadWriteUtils::writeBigramEntry(mBuffer, updatedFlags, @@ -254,12 +258,14 @@ bool DynamicBigramListPolicy::addNewBigramEntryToBigramList(const int bigramTarg // The current last entry is found. // First, update the flags of the last entry. if (!BigramListReadWriteUtils::setHasNextFlag(mBuffer, true /* hasNext */, entryPos)) { + *outAddedNewBigram = false; return false; } if (usesAdditionalBuffer) { *bigramListPos += mBuffer->getOriginalBufferSize(); } // Then, add a new entry after the last entry. + *outAddedNewBigram = true; return writeNewBigramEntry(bigramTargetPos, probability, bigramListPos); } while(BigramListReadWriteUtils::hasNext(bigramFlags)); // We return directly from the while loop. diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h index 8ea318a41..3ebf69946 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h @@ -50,19 +50,20 @@ class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy { bool copyAllBigrams(BufferWithExtendableBuffer *const bufferToWrite, int *const fromPos, int *const toPos, int *const outBigramsCount) const; - bool updateAllBigramEntriesAndDeleteUselessEntries(int *const bigramListPos); + bool updateAllBigramEntriesAndDeleteUselessEntries(int *const bigramListPos, + int *const outBigramEntryCount); bool updateAllBigramTargetPtNodePositions(int *const bigramListPos, const DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap *const - ptNodePositionRelocationMap); + ptNodePositionRelocationMap, int *const outValidBigramEntryCount); bool addNewBigramEntryToBigramList(const int bigramTargetPos, const int probability, - int *const bigramListPos); + int *const bigramListPos, bool *const outAddedNewBigram); bool writeNewBigramEntry(const int bigramTargetPos, const int probability, int *const writingPos); - // Return if targetBigramPos is found or not. + // Return whether or not targetBigramPos is found. bool removeBigram(const int bigramListPos, const int bigramTargetPos); private: diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp index c60e45819..5eb473300 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp @@ -42,6 +42,9 @@ bool DynamicPatriciaTrieGcEventListeners } } else { valueStack.back() += 1; + if (node->isTerminal()) { + mValidUnigramCount += 1; + } } return true; } @@ -137,10 +140,15 @@ bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateAllPositionField // Updates bigram target PtNode positions in the bigram list. int bigramsPos = node->getBigramsPos(); if (bigramsPos != NOT_A_DICT_POS) { + int bigramEntryCount; if (!mBigramPolicy->updateAllBigramTargetPtNodePositions(&bigramsPos, - &mDictPositionRelocationMap->mPtNodePositionRelocationMap)) { + &mDictPositionRelocationMap->mPtNodePositionRelocationMap, &bigramEntryCount)) { return false; } + mBigramCount += bigramEntryCount; + } + if (node->isTerminal()) { + mUnigramCount++; } return true; diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h index 4256f22fb..aa6e60959 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h @@ -41,7 +41,7 @@ class DynamicPatriciaTrieGcEventListeners { DynamicPatriciaTrieWritingHelper *const writingHelper, BufferWithExtendableBuffer *const buffer) : mWritingHelper(writingHelper), mBuffer(buffer), valueStack(), - mChildrenValue(0) {} + mChildrenValue(0), mValidUnigramCount(0) {} ~TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted() {}; @@ -64,6 +64,10 @@ class DynamicPatriciaTrieGcEventListeners { bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node, const int *const nodeCodePoints); + int getValidUnigramCount() const { + return mValidUnigramCount; + } + private: DISALLOW_IMPLICIT_CONSTRUCTORS( TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted); @@ -72,6 +76,7 @@ class DynamicPatriciaTrieGcEventListeners { BufferWithExtendableBuffer *const mBuffer; std::vector<int> valueStack; int mChildrenValue; + int mValidUnigramCount; }; // Updates all bigram entries that are held by valid PtNodes. This removes useless bigram @@ -80,7 +85,7 @@ class DynamicPatriciaTrieGcEventListeners { : public DynamicPatriciaTrieReadingHelper::TraversingEventListener { public: TraversePolicyToUpdateBigramProbability(DynamicBigramListPolicy *const bigramPolicy) - : mBigramPolicy(bigramPolicy) {} + : mBigramPolicy(bigramPolicy), mValidBigramEntryCount(0) {} bool onAscend() { return true; } @@ -93,18 +98,26 @@ class DynamicPatriciaTrieGcEventListeners { if (!node->isDeleted()) { int pos = node->getBigramsPos(); if (pos != NOT_A_DICT_POS) { - if (!mBigramPolicy->updateAllBigramEntriesAndDeleteUselessEntries(&pos)) { + int bigramEntryCount = 0; + if (!mBigramPolicy->updateAllBigramEntriesAndDeleteUselessEntries(&pos, + &bigramEntryCount)) { return false; } + mValidBigramEntryCount += bigramEntryCount; } } return true; } + int getValidBigramEntryCount() const { + return mValidBigramEntryCount; + } + private: DISALLOW_IMPLICIT_CONSTRUCTORS(TraversePolicyToUpdateBigramProbability); DynamicBigramListPolicy *const mBigramPolicy; + int mValidBigramEntryCount; }; class TraversePolicyToPlaceAndWriteValidPtNodesToBuffer @@ -150,7 +163,8 @@ class DynamicPatriciaTrieGcEventListeners { dictPositionRelocationMap) : mWritingHelper(writingHelper), mBigramPolicy(bigramPolicy), mBufferToWrite(bufferToWrite), - mDictPositionRelocationMap(dictPositionRelocationMap) {}; + mDictPositionRelocationMap(dictPositionRelocationMap), mUnigramCount(0), + mBigramCount(0) {}; bool onAscend() { return true; } @@ -161,6 +175,14 @@ class DynamicPatriciaTrieGcEventListeners { bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node, const int *const nodeCodePoints); + int getUnigramCount() const { + return mUnigramCount; + } + + int getBigramCount() const { + return mBigramCount; + } + private: DISALLOW_IMPLICIT_CONSTRUCTORS(TraversePolicyToUpdateAllPositionFields); @@ -169,6 +191,8 @@ class DynamicPatriciaTrieGcEventListeners { BufferWithExtendableBuffer *const mBufferToWrite; const DynamicPatriciaTrieWritingHelper::DictPositionRelocationMap *const mDictPositionRelocationMap; + int mUnigramCount; + int mBigramCount; }; private: diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp index 456352c17..2fa3111d3 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp @@ -26,7 +26,8 @@ namespace latinime { void DynamicPatriciaTrieNodeReader::fetchPtNodeInfoFromBufferAndProcessMovedPtNode( const int ptNodePos, const int maxCodePointCount, int *const outCodePoints) { if (ptNodePos < 0 || ptNodePos >= mBuffer->getTailPosition()) { - AKLOGE("Fetching PtNode info form invalid dictionary position: %d, dictionary size: %d", + // Reading invalid position because of bug or broken dictionary. + AKLOGE("Fetching PtNode info from invalid dictionary position: %d, dictionary size: %d", ptNodePos, mBuffer->getTailPosition()); ASSERT(false); invalidatePtNodeInfo(); diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp index 42397c19e..4581ec093 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp @@ -16,6 +16,9 @@ #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h" +#include <cstdio> +#include <cstring> + #include "defines.h" #include "suggest/core/dicnode/dic_node.h" #include "suggest/core/dicnode/dic_node_vector.h" @@ -28,6 +31,9 @@ namespace latinime { +const char *const DynamicPatriciaTriePolicy::UNIGRAM_COUNT_QUERY = "UNIGRAM_COUNT"; +const char *const DynamicPatriciaTriePolicy::BIGRAM_COUNT_QUERY = "BIGRAM_COUNT"; + void DynamicPatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode, DicNodeVector *const childDicNodes) const { if (!dicNode->hasChildren()) { @@ -198,7 +204,16 @@ bool DynamicPatriciaTriePolicy::addUnigramWord(const int *const word, const int readingHelper.initWithPtNodeArrayPos(getRootPosition()); DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer, &mBigramListPolicy, &mShortcutListPolicy); - return writingHelper.addUnigramWord(&readingHelper, word, length, probability); + bool addedNewUnigram = false; + if (writingHelper.addUnigramWord(&readingHelper, word, length, probability, + &addedNewUnigram)) { + if (addedNewUnigram) { + mUnigramCount++; + } + return true; + } else { + return false; + } } bool DynamicPatriciaTriePolicy::addBigramWords(const int *const word0, const int length0, @@ -219,7 +234,15 @@ bool DynamicPatriciaTriePolicy::addBigramWords(const int *const word0, const int } DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer, &mBigramListPolicy, &mShortcutListPolicy); - return writingHelper.addBigramWords(word0Pos, word1Pos, probability); + bool addedNewBigram = false; + if (writingHelper.addBigramWords(word0Pos, word1Pos, probability, &addedNewBigram)) { + if (addedNewBigram) { + mBigramCount++; + } + return true; + } else { + return false; + } } bool DynamicPatriciaTriePolicy::removeBigramWords(const int *const word0, const int length0, @@ -240,7 +263,12 @@ bool DynamicPatriciaTriePolicy::removeBigramWords(const int *const word0, const } DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer, &mBigramListPolicy, &mShortcutListPolicy); - return writingHelper.removeBigramWords(word0Pos, word1Pos); + if (writingHelper.removeBigramWords(word0Pos, word1Pos)) { + mBigramCount--; + return true; + } else { + return false; + } } void DynamicPatriciaTriePolicy::flush(const char *const filePath) { @@ -250,7 +278,7 @@ void DynamicPatriciaTriePolicy::flush(const char *const filePath) { } DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer, &mBigramListPolicy, &mShortcutListPolicy); - writingHelper.writeToDictFile(filePath, &mHeaderPolicy); + writingHelper.writeToDictFile(filePath, &mHeaderPolicy, mUnigramCount, mBigramCount); } void DynamicPatriciaTriePolicy::flushWithGC(const char *const filePath) { @@ -272,4 +300,13 @@ bool DynamicPatriciaTriePolicy::needsToRunGC() const { return mBufferWithExtendableBuffer.isNearSizeLimit(); } +void DynamicPatriciaTriePolicy::getProperty(const char *const query, char *const outResult, + const int maxResultLength) const { + if (strncmp(query, UNIGRAM_COUNT_QUERY, maxResultLength) == 0) { + snprintf(outResult, maxResultLength, "%d", mUnigramCount); + } else if (strncmp(query, BIGRAM_COUNT_QUERY, maxResultLength) == 0) { + snprintf(outResult, maxResultLength, "%d", mBigramCount); + } +} + } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h index 06d8095d8..7f9d4d924 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h @@ -37,7 +37,9 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { mBufferWithExtendableBuffer(mBuffer->getBuffer() + mHeaderPolicy.getSize(), mBuffer->getBufferSize() - mHeaderPolicy.getSize()), mShortcutListPolicy(&mBufferWithExtendableBuffer), - mBigramListPolicy(&mBufferWithExtendableBuffer, &mShortcutListPolicy) {} + mBigramListPolicy(&mBufferWithExtendableBuffer, &mShortcutListPolicy), + mUnigramCount(mHeaderPolicy.getUnigramCount()), + mBigramCount(mHeaderPolicy.getBigramCount()) {} ~DynamicPatriciaTriePolicy() { delete mBuffer; @@ -91,14 +93,22 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { bool needsToRunGC() const; + void getProperty(const char *const query, char *const outResult, + const int maxResultLength) const; + private: DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTriePolicy); + static const char*const UNIGRAM_COUNT_QUERY; + static const char*const BIGRAM_COUNT_QUERY; + const MmappedBuffer *const mBuffer; const HeaderPolicy mHeaderPolicy; BufferWithExtendableBuffer mBufferWithExtendableBuffer; DynamicShortcutListPolicy mShortcutListPolicy; DynamicBigramListPolicy mBigramListPolicy; + int mUnigramCount; + int mBigramCount; }; } // namespace latinime #endif // LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp index f4a2ef389..601ee663b 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp @@ -155,6 +155,15 @@ bool DynamicPatriciaTrieReadingHelper::traverseAllPtNodesInPtNodeArrayLevelPreor // Read node array size and process empty node arrays. Nodes and arrays are counted up in this // method to avoid an infinite loop. void DynamicPatriciaTrieReadingHelper::nextPtNodeArray() { + if (mReadingState.mPos < 0 || mReadingState.mPos >= mBuffer->getTailPosition()) { + // Reading invalid position because of a bug or a broken dictionary. + AKLOGE("Reading PtNode array info from invalid dictionary position: %d, dict size: %d", + mReadingState.mPos, mBuffer->getTailPosition()); + ASSERT(false); + mIsError = true; + mReadingState.mPos = NOT_A_DICT_POS; + return; + } mReadingState.mPosOfLastPtNodeArrayHead = mReadingState.mPos; const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(mReadingState.mPos); const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer); @@ -191,6 +200,15 @@ void DynamicPatriciaTrieReadingHelper::nextPtNodeArray() { // Follow the forward link and read the next node array if exists. void DynamicPatriciaTrieReadingHelper::followForwardLink() { + if (mReadingState.mPos < 0 || mReadingState.mPos >= mBuffer->getTailPosition()) { + // Reading invalid position because of bug or broken dictionary. + AKLOGE("Reading forward link from invalid dictionary position: %d, dict size: %d", + mReadingState.mPos, mBuffer->getTailPosition()); + ASSERT(false); + mIsError = true; + mReadingState.mPos = NOT_A_DICT_POS; + return; + } const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(mReadingState.mPos); const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer); if (usesAdditionalBuffer) { diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h index c6d8ddcf7..512a4d818 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h @@ -240,6 +240,7 @@ class DynamicPatriciaTrieReadingHelper { static const int MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP; static const size_t MAX_READING_STATE_STACK_SIZE; + // TODO: Introduce error code to track what caused the error. bool mIsError; ReadingState mReadingState; const BufferWithExtendableBuffer *const mBuffer; diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp index 578645cd5..bae5e8cad 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp @@ -36,7 +36,8 @@ const size_t DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE = 2 * 1024 * bool DynamicPatriciaTrieWritingHelper::addUnigramWord( DynamicPatriciaTrieReadingHelper *const readingHelper, - const int *const wordCodePoints, const int codePointCount, const int probability) { + const int *const wordCodePoints, const int codePointCount, const int probability, + bool *const outAddedNewUnigram) { int parentPos = NOT_A_DICT_POS; while (!readingHelper->isEnd()) { const int matchedCodePointCount = readingHelper->getPrevTotalCodePointCount(); @@ -54,6 +55,7 @@ bool DynamicPatriciaTrieWritingHelper::addUnigramWord( const int nextIndex = matchedCodePointCount + j; if (nextIndex >= codePointCount || !readingHelper->isMatchedCodePoint(j, wordCodePoints[matchedCodePointCount + j])) { + *outAddedNewUnigram = true; return reallocatePtNodeAndAddNewPtNodes(nodeReader, readingHelper->getMergedNodeCodePoints(), j, probability, wordCodePoints + matchedCodePointCount, @@ -63,9 +65,10 @@ bool DynamicPatriciaTrieWritingHelper::addUnigramWord( // All characters are matched. if (codePointCount == readingHelper->getTotalCodePointCount()) { return setPtNodeProbability(nodeReader, probability, - readingHelper->getMergedNodeCodePoints()); + readingHelper->getMergedNodeCodePoints(), outAddedNewUnigram); } if (!nodeReader->hasChildren()) { + *outAddedNewUnigram = true; return createChildrenPtNodeArrayAndAChildPtNode(nodeReader, probability, wordCodePoints + readingHelper->getTotalCodePointCount(), codePointCount - readingHelper->getTotalCodePointCount()); @@ -79,6 +82,7 @@ bool DynamicPatriciaTrieWritingHelper::addUnigramWord( return false; } int pos = readingHelper->getPosOfLastForwardLinkField(); + *outAddedNewUnigram = true; return createAndInsertNodeIntoPtNodeArray(parentPos, wordCodePoints + readingHelper->getPrevTotalCodePointCount(), codePointCount - readingHelper->getPrevTotalCodePointCount(), @@ -86,7 +90,7 @@ bool DynamicPatriciaTrieWritingHelper::addUnigramWord( } bool DynamicPatriciaTrieWritingHelper::addBigramWords(const int word0Pos, const int word1Pos, - const int probability) { + const int probability, bool *const outAddedNewBigram) { int mMergedNodeCodePoints[MAX_WORD_LENGTH]; DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy); nodeReader.fetchNodeInfoInBufferFromPtNodePosAndGetNodeCodePoints(word0Pos, MAX_WORD_LENGTH, @@ -107,9 +111,11 @@ bool DynamicPatriciaTrieWritingHelper::addBigramWords(const int word0Pos, const if (nodeReader.getBigramsPos() != NOT_A_DICT_POS) { // Insert a new bigram entry into the existing bigram list. int bigramListPos = nodeReader.getBigramsPos(); - return mBigramPolicy->addNewBigramEntryToBigramList(word1Pos, probability, &bigramListPos); + return mBigramPolicy->addNewBigramEntryToBigramList(word1Pos, probability, &bigramListPos, + outAddedNewBigram); } else { // The PtNode doesn't have a bigram list. + *outAddedNewBigram = true; // First, Write a bigram entry at the tail position of the PtNode. if (!mBigramPolicy->writeNewBigramEntry(word1Pos, probability, &writingPos)) { return false; @@ -138,9 +144,12 @@ bool DynamicPatriciaTrieWritingHelper::removeBigramWords(const int word0Pos, con } void DynamicPatriciaTrieWritingHelper::writeToDictFile(const char *const fileName, - const HeaderPolicy *const headerPolicy) { + const HeaderPolicy *const headerPolicy, const int unigramCount, const int bigramCount) { BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */); - if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, false /* updatesLastUpdatedTime */)) { + const int extendedRegionSize = headerPolicy->getExtendedRegionSize() + + mBuffer->getTailPosition() - mBuffer->getOriginalBufferSize(); + if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, false /* updatesLastUpdatedTime */, + unigramCount, bigramCount, extendedRegionSize)) { return; } DictFileWritingUtils::flushAllHeaderAndBodyToFile(fileName, &headerBuffer, mBuffer); @@ -148,13 +157,16 @@ void DynamicPatriciaTrieWritingHelper::writeToDictFile(const char *const fileNam void DynamicPatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNodeArrayPos, const char *const fileName, const HeaderPolicy *const headerPolicy) { - BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */); - if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, true /* updatesLastUpdatedTime */)) { - return; - } BufferWithExtendableBuffer newDictBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */, MAX_DICTIONARY_SIZE); - if (!runGC(rootPtNodeArrayPos, &newDictBuffer)) { + int unigramCount = 0; + int bigramCount = 0; + if (!runGC(rootPtNodeArrayPos, &newDictBuffer, &unigramCount, &bigramCount)) { + return; + } + BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */); + if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, true /* updatesLastUpdatedTime */, + unigramCount, bigramCount, 0 /* extendedRegionSize */)) { return; } DictFileWritingUtils::flushAllHeaderAndBodyToFile(fileName, &headerBuffer, &newDictBuffer); @@ -335,9 +347,10 @@ bool DynamicPatriciaTrieWritingHelper::createAndInsertNodeIntoPtNodeArray(const bool DynamicPatriciaTrieWritingHelper::setPtNodeProbability( const DynamicPatriciaTrieNodeReader *const originalPtNode, const int probability, - const int *const codePoints) { + const int *const codePoints, bool *const outAddedNewUnigram) { if (originalPtNode->isTerminal()) { // Overwrites the probability. + *outAddedNewUnigram = false; int probabilityFieldPos = originalPtNode->getProbabilityFieldPos(); if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(mBuffer, probability, &probabilityFieldPos)) { @@ -345,6 +358,7 @@ bool DynamicPatriciaTrieWritingHelper::setPtNodeProbability( } } else { // Make the node terminal and write the probability. + *outAddedNewUnigram = true; int movedPos = mBuffer->getTailPosition(); if (!markNodeAsMovedAndSetPosition(originalPtNode, movedPos, movedPos)) { return false; @@ -460,7 +474,8 @@ bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes( } bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, - BufferWithExtendableBuffer *const bufferToWrite) { + BufferWithExtendableBuffer *const bufferToWrite, int *const outUnigramCount, + int *const outBigramCount) { DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, mBigramPolicy, mShortcutPolicy); readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); DynamicPatriciaTrieGcEventListeners @@ -505,6 +520,8 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, &traversePolicyToUpdateAllPositionFields)) { return false; } + *outUnigramCount = traversePolicyToUpdateAllPositionFields.getUnigramCount(); + *outBigramCount = traversePolicyToUpdateAllPositionFields.getBigramCount(); return true; } diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h index fe1b2437a..827b6097f 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h @@ -56,15 +56,18 @@ class DynamicPatriciaTrieWritingHelper { // Add a word to the dictionary. If the word already exists, update the probability. bool addUnigramWord(DynamicPatriciaTrieReadingHelper *const readingHelper, - const int *const wordCodePoints, const int codePointCount, const int probability); + const int *const wordCodePoints, const int codePointCount, const int probability, + bool *const outAddedNewUnigram); // Add a bigram relation from word0Pos to word1Pos. - bool addBigramWords(const int word0Pos, const int word1Pos, const int probability); + bool addBigramWords(const int word0Pos, const int word1Pos, const int probability, + bool *const outAddedNewBigram); // Remove a bigram relation from word0Pos to word1Pos. bool removeBigramWords(const int word0Pos, const int word1Pos); - void writeToDictFile(const char *const fileName, const HeaderPolicy *const headerPolicy); + void writeToDictFile(const char *const fileName, const HeaderPolicy *const headerPolicy, + const int unigramCount, const int bigramCount); void writeToDictFileWithGC(const int rootPtNodeArrayPos, const char *const fileName, const HeaderPolicy *const headerPolicy); @@ -107,7 +110,7 @@ class DynamicPatriciaTrieWritingHelper { const int nodeCodePointCount, const int probability, int *const forwardLinkFieldPos); bool setPtNodeProbability(const DynamicPatriciaTrieNodeReader *const originalNode, - const int probability, const int *const codePoints); + const int probability, const int *const codePoints, bool *const outAddedNewUnigram); bool createChildrenPtNodeArrayAndAChildPtNode( const DynamicPatriciaTrieNodeReader *const parentNode, const int probability, @@ -122,7 +125,8 @@ class DynamicPatriciaTrieWritingHelper { const int probabilityOfNewPtNode, const int *const newNodeCodePoints, const int newNodeCodePointCount); - bool runGC(const int rootPtNodeArrayPos, BufferWithExtendableBuffer *const bufferToWrite); + bool runGC(const int rootPtNodeArrayPos, BufferWithExtendableBuffer *const bufferToWrite, + int *const outUnigramCount, int *const outBigramCount); }; } // namespace latinime #endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp index 9a32f64d4..78c6c042f 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp @@ -16,15 +16,15 @@ #include "suggest/policyimpl/dictionary/header/header_policy.h" -#include <cstddef> -#include <cstdio> -#include <ctime> - namespace latinime { +// Note that these are corresponding definitions in Java side in FormatSpec.FileHeader. const char *const HeaderPolicy::MULTIPLE_WORDS_DEMOTION_RATE_KEY = "MULTIPLE_WORDS_DEMOTION_RATE"; const char *const HeaderPolicy::USES_FORGETTING_CURVE_KEY = "USES_FORGETTING_CURVE"; const char *const HeaderPolicy::LAST_UPDATED_TIME_KEY = "date"; +const char *const HeaderPolicy::UNIGRAM_COUNT_KEY = "UNIGRAM_COUNT"; +const char *const HeaderPolicy::BIGRAM_COUNT_KEY = "BIGRAM_COUNT"; +const char *const HeaderPolicy::EXTENDED_REGION_SIZE_KEY = "EXTENDED_REGION_SIZE"; const int HeaderPolicy::DEFAULT_MULTIPLE_WORDS_DEMOTION_RATE = 100; const float HeaderPolicy::MULTIPLE_WORD_COST_MULTIPLIER_SCALE = 100.0f; @@ -53,33 +53,17 @@ void HeaderPolicy::readHeaderValueOrQuestionMark(const char *const key, int *out } float HeaderPolicy::readMultipleWordCostMultiplier() const { - std::vector<int> keyVector; - HeaderReadWriteUtils::insertCharactersIntoVector(MULTIPLE_WORDS_DEMOTION_RATE_KEY, &keyVector); const int demotionRate = HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap, - &keyVector, DEFAULT_MULTIPLE_WORDS_DEMOTION_RATE); + MULTIPLE_WORDS_DEMOTION_RATE_KEY, DEFAULT_MULTIPLE_WORDS_DEMOTION_RATE); if (demotionRate <= 0) { return static_cast<float>(MAX_VALUE_FOR_WEIGHTING); } return MULTIPLE_WORD_COST_MULTIPLIER_SCALE / static_cast<float>(demotionRate); } -bool HeaderPolicy::readUsesForgettingCurveFlag() const { - std::vector<int> keyVector; - HeaderReadWriteUtils::insertCharactersIntoVector(USES_FORGETTING_CURVE_KEY, &keyVector); - return HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap, &keyVector, - false /* defaultValue */); -} - -// Returns current time when the key is not found or the value is invalid. -int HeaderPolicy::readLastUpdatedTime() const { - std::vector<int> keyVector; - HeaderReadWriteUtils::insertCharactersIntoVector(LAST_UPDATED_TIME_KEY, &keyVector); - return HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap, &keyVector, - time(0) /* defaultValue */); -} - bool HeaderPolicy::writeHeaderToBuffer(BufferWithExtendableBuffer *const bufferToWrite, - const bool updatesLastUpdatedTime) const { + const bool updatesLastUpdatedTime, const int unigramCount, const int bigramCount, + const int extendedRegionSize) const { int writingPos = 0; if (!HeaderReadWriteUtils::writeDictionaryVersion(bufferToWrite, mDictFormatVersion, &writingPos)) { @@ -95,21 +79,19 @@ bool HeaderPolicy::writeHeaderToBuffer(BufferWithExtendableBuffer *const bufferT &writingPos)) { return false; } + HeaderReadWriteUtils::AttributeMap attributeMapTowrite(mAttributeMap); + HeaderReadWriteUtils::setIntAttribute(&attributeMapTowrite, UNIGRAM_COUNT_KEY, unigramCount); + HeaderReadWriteUtils::setIntAttribute(&attributeMapTowrite, BIGRAM_COUNT_KEY, bigramCount); + HeaderReadWriteUtils::setIntAttribute(&attributeMapTowrite, EXTENDED_REGION_SIZE_KEY, + extendedRegionSize); if (updatesLastUpdatedTime) { // Set current time as a last updated time. - HeaderReadWriteUtils::AttributeMap attributeMapTowrite(mAttributeMap); - std::vector<int> updatedTimekey; - HeaderReadWriteUtils::insertCharactersIntoVector(LAST_UPDATED_TIME_KEY, &updatedTimekey); - HeaderReadWriteUtils::setIntAttribute(&attributeMapTowrite, &updatedTimekey, time(0)); - if (!HeaderReadWriteUtils::writeHeaderAttributes(bufferToWrite, &attributeMapTowrite, - &writingPos)) { - return false; - } - } else { - if (!HeaderReadWriteUtils::writeHeaderAttributes(bufferToWrite, &mAttributeMap, - &writingPos)) { - return false; - } + HeaderReadWriteUtils::setIntAttribute(&attributeMapTowrite, LAST_UPDATED_TIME_KEY, + time(0)); + } + if (!HeaderReadWriteUtils::writeHeaderAttributes(bufferToWrite, &attributeMapTowrite, + &writingPos)) { + return false; } // Writes an actual header size. if (!HeaderReadWriteUtils::writeDictionaryHeaderSize(bufferToWrite, writingPos, diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h index e97c08ca4..93b9c6fcb 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h @@ -17,6 +17,7 @@ #ifndef LATINIME_HEADER_POLICY_H #define LATINIME_HEADER_POLICY_H +#include <ctime> #include <stdint.h> #include "defines.h" @@ -35,8 +36,16 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { mSize(HeaderReadWriteUtils::getHeaderSize(dictBuf)), mAttributeMap(createAttributeMapAndReadAllAttributes(dictBuf)), mMultiWordCostMultiplier(readMultipleWordCostMultiplier()), - mUsesForgettingCurve(readUsesForgettingCurveFlag()), - mLastUpdatedTime(readLastUpdatedTime()) {} + mUsesForgettingCurve(HeaderReadWriteUtils::readBoolAttributeValue(&mAttributeMap, + USES_FORGETTING_CURVE_KEY, false /* defaultValue */)), + mLastUpdatedTime(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap, + LAST_UPDATED_TIME_KEY, time(0) /* defaultValue */)), + mUnigramCount(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap, + UNIGRAM_COUNT_KEY, 0 /* defaultValue */)), + mBigramCount(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap, + BIGRAM_COUNT_KEY, 0 /* defaultValue */)), + mExtendedRegionSize(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap, + EXTENDED_REGION_SIZE_KEY, 0 /* defaultValue */)) {} // Constructs header information using an attribute map. HeaderPolicy(const FormatUtils::FORMAT_VERSION dictFormatVersion, @@ -44,9 +53,12 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { : mDictFormatVersion(dictFormatVersion), mDictionaryFlags(HeaderReadWriteUtils::createAndGetDictionaryFlagsUsingAttributeMap( attributeMap)), mSize(0), mAttributeMap(*attributeMap), - mMultiWordCostMultiplier(readUsesForgettingCurveFlag()), - mUsesForgettingCurve(readUsesForgettingCurveFlag()), - mLastUpdatedTime(readLastUpdatedTime()) {} + mMultiWordCostMultiplier(readMultipleWordCostMultiplier()), + mUsesForgettingCurve(HeaderReadWriteUtils::readBoolAttributeValue(&mAttributeMap, + USES_FORGETTING_CURVE_KEY, false /* defaultValue */)), + mLastUpdatedTime(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap, + LAST_UPDATED_TIME_KEY, time(0) /* defaultValue */)), + mUnigramCount(0), mBigramCount(0), mExtendedRegionSize(0) {} ~HeaderPolicy() {} @@ -78,11 +90,24 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { return mLastUpdatedTime; } + AK_FORCE_INLINE int getUnigramCount() const { + return mUnigramCount; + } + + AK_FORCE_INLINE int getBigramCount() const { + return mBigramCount; + } + + AK_FORCE_INLINE int getExtendedRegionSize() const { + return mExtendedRegionSize; + } + void readHeaderValueOrQuestionMark(const char *const key, int *outValue, int outValueSize) const; bool writeHeaderToBuffer(BufferWithExtendableBuffer *const bufferToWrite, - const bool updatesLastUpdatedTime) const; + const bool updatesLastUpdatedTime, const int unigramCount, + const int bigramCount, const int extendedRegionSize) const; private: DISALLOW_IMPLICIT_CONSTRUCTORS(HeaderPolicy); @@ -90,6 +115,9 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { static const char *const MULTIPLE_WORDS_DEMOTION_RATE_KEY; static const char *const USES_FORGETTING_CURVE_KEY; static const char *const LAST_UPDATED_TIME_KEY; + static const char *const UNIGRAM_COUNT_KEY; + static const char *const BIGRAM_COUNT_KEY; + static const char *const EXTENDED_REGION_SIZE_KEY; static const int DEFAULT_MULTIPLE_WORDS_DEMOTION_RATE; static const float MULTIPLE_WORD_COST_MULTIPLIER_SCALE; @@ -100,13 +128,12 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { const float mMultiWordCostMultiplier; const bool mUsesForgettingCurve; const int mLastUpdatedTime; + const int mUnigramCount; + const int mBigramCount; + const int mExtendedRegionSize; float readMultipleWordCostMultiplier() const; - bool readUsesForgettingCurveFlag() const; - - int readLastUpdatedTime() const; - static HeaderReadWriteUtils::AttributeMap createAttributeMapAndReadAllAttributes( const uint8_t *const dictBuf); }; diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp index e6547675c..2694ce8d5 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp @@ -45,6 +45,7 @@ const HeaderReadWriteUtils::DictionaryFlags const HeaderReadWriteUtils::DictionaryFlags HeaderReadWriteUtils::FRENCH_LIGATURE_PROCESSING_FLAG = 0x4; +// Note that these are corresponding definitions in Java side in FormatSpec.FileHeader. const char *const HeaderReadWriteUtils::SUPPORTS_DYNAMIC_UPDATE_KEY = "SUPPORTS_DYNAMIC_UPDATE"; const char *const HeaderReadWriteUtils::REQUIRES_GERMAN_UMLAUT_PROCESSING_KEY = "REQUIRES_GERMAN_UMLAUT_PROCESSING"; @@ -67,18 +68,12 @@ const char *const HeaderReadWriteUtils::REQUIRES_FRENCH_LIGATURE_PROCESSING_KEY /* static */ HeaderReadWriteUtils::DictionaryFlags HeaderReadWriteUtils::createAndGetDictionaryFlagsUsingAttributeMap( const HeaderReadWriteUtils::AttributeMap *const attributeMap) { - AttributeMap::key_type key; - insertCharactersIntoVector(REQUIRES_GERMAN_UMLAUT_PROCESSING_KEY, &key); - const bool requiresGermanUmlautProcessing = readBoolAttributeValue(attributeMap, &key, - false /* defaultValue */); - key.clear(); - insertCharactersIntoVector(REQUIRES_FRENCH_LIGATURE_PROCESSING_KEY, &key); - const bool requiresFrenchLigatureProcessing = readBoolAttributeValue(attributeMap, &key, - false /* defaultValue */); - key.clear(); - insertCharactersIntoVector(SUPPORTS_DYNAMIC_UPDATE_KEY, &key); - const bool supportsDynamicUpdate = readBoolAttributeValue(attributeMap, &key, - false /* defaultValue */); + const bool requiresGermanUmlautProcessing = readBoolAttributeValue(attributeMap, + REQUIRES_GERMAN_UMLAUT_PROCESSING_KEY, false /* defaultValue */); + const bool requiresFrenchLigatureProcessing = readBoolAttributeValue(attributeMap, + REQUIRES_FRENCH_LIGATURE_PROCESSING_KEY, false /* defaultValue */); + const bool supportsDynamicUpdate = readBoolAttributeValue(attributeMap, + SUPPORTS_DYNAMIC_UPDATE_KEY, false /* defaultValue */); DictionaryFlags dictflags = NO_FLAGS; dictflags |= requiresGermanUmlautProcessing ? GERMAN_UMLAUT_PROCESSING_FLAG : 0; dictflags |= requiresFrenchLigatureProcessing ? FRENCH_LIGATURE_PROCESSING_FLAG : 0; @@ -159,11 +154,18 @@ const char *const HeaderReadWriteUtils::REQUIRES_FRENCH_LIGATURE_PROCESSING_KEY } /* static */ void HeaderReadWriteUtils::setBoolAttribute(AttributeMap *const headerAttributes, - const AttributeMap::key_type *const key, const bool value) { + const char *const key, const bool value) { setIntAttribute(headerAttributes, key, value ? 1 : 0); } /* static */ void HeaderReadWriteUtils::setIntAttribute(AttributeMap *const headerAttributes, + const char *const key, const int value) { + AttributeMap::key_type keyVector; + insertCharactersIntoVector(key, &keyVector); + setIntAttributeInner(headerAttributes, &keyVector, value); +} + +/* static */ void HeaderReadWriteUtils::setIntAttributeInner(AttributeMap *const headerAttributes, const AttributeMap::key_type *const key, const int value) { AttributeMap::mapped_type valueVector; char charBuf[LARGEST_INT_DIGIT_COUNT + 1]; @@ -173,7 +175,7 @@ const char *const HeaderReadWriteUtils::REQUIRES_FRENCH_LIGATURE_PROCESSING_KEY } /* static */ bool HeaderReadWriteUtils::readBoolAttributeValue( - const AttributeMap *const headerAttributes, const AttributeMap::key_type *const key, + const AttributeMap *const headerAttributes, const char *const key, const bool defaultValue) { const int intDefaultValue = defaultValue ? 1 : 0; const int intValue = readIntAttributeValue(headerAttributes, key, intDefaultValue); @@ -181,6 +183,14 @@ const char *const HeaderReadWriteUtils::REQUIRES_FRENCH_LIGATURE_PROCESSING_KEY } /* static */ int HeaderReadWriteUtils::readIntAttributeValue( + const AttributeMap *const headerAttributes, const char *const key, + const int defaultValue) { + AttributeMap::key_type keyVector; + insertCharactersIntoVector(key, &keyVector); + return readIntAttributeValueInner(headerAttributes, &keyVector, defaultValue); +} + +/* static */ int HeaderReadWriteUtils::readIntAttributeValueInner( const AttributeMap *const headerAttributes, const AttributeMap::key_type *const key, const int defaultValue) { AttributeMap::const_iterator it = headerAttributes->find(*key); diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.h index caa5097f6..225968323 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.h @@ -76,16 +76,16 @@ class HeaderReadWriteUtils { * Methods for header attributes. */ static void setBoolAttribute(AttributeMap *const headerAttributes, - const AttributeMap::key_type *const key, const bool value); + const char *const key, const bool value); static void setIntAttribute(AttributeMap *const headerAttributes, - const AttributeMap::key_type *const key, const int value); + const char *const key, const int value); static bool readBoolAttributeValue(const AttributeMap *const headerAttributes, - const AttributeMap::key_type *const key, const bool defaultValue); + const char *const key, const bool defaultValue); static int readIntAttributeValue(const AttributeMap *const headerAttributes, - const AttributeMap::key_type *const key, const int defaultValue); + const char *const key, const int defaultValue); static void insertCharactersIntoVector(const char *const characters, AttributeMap::key_type *const key); @@ -112,6 +112,12 @@ class HeaderReadWriteUtils { static const char *const SUPPORTS_DYNAMIC_UPDATE_KEY; static const char *const REQUIRES_GERMAN_UMLAUT_PROCESSING_KEY; static const char *const REQUIRES_FRENCH_LIGATURE_PROCESSING_KEY; + + static void setIntAttributeInner(AttributeMap *const headerAttributes, + const AttributeMap::key_type *const key, const int value); + + static int readIntAttributeValueInner(const AttributeMap *const headerAttributes, + const AttributeMap::key_type *const key, const int defaultValue); }; } #endif /* LATINIME_HEADER_READ_WRITE_UTILS_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h index f1de914cb..4277ff5d7 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h @@ -113,6 +113,14 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { return false; } + void getProperty(const char *const query, char *const outResult, + const int maxResultLength) const { + // getProperty is not supported for this class. + if (maxResultLength > 0) { + outResult[0] = '\0'; + } + } + private: DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTriePolicy); diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp index 2e4ec2e1d..4fae91936 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp @@ -43,7 +43,8 @@ const char *const DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE = const HeaderReadWriteUtils::AttributeMap *const attributeMap) { BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */); HeaderPolicy headerPolicy(FormatUtils::VERSION_3, attributeMap); - headerPolicy.writeHeaderToBuffer(&headerBuffer, true /* updatesLastUpdatedTime */); + headerPolicy.writeHeaderToBuffer(&headerBuffer, true /* updatesLastUpdatedTime */, + 0 /* unigramCount */, 0 /* bigramCount */, 0 /* extendedRegionSize */); BufferWithExtendableBuffer bodyBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */); if (!DynamicPatriciaTrieWritingUtils::writeEmptyDictionary(&bodyBuffer, 0 /* rootPos */)) { return false; diff --git a/native/jni/src/suggest/policyimpl/typing/typing_weighting.h b/native/jni/src/suggest/policyimpl/typing/typing_weighting.h index b6aa85896..9f0a331e3 100644 --- a/native/jni/src/suggest/policyimpl/typing/typing_weighting.h +++ b/native/jni/src/suggest/policyimpl/typing/typing_weighting.h @@ -74,7 +74,8 @@ class TypingWeighting : public Weighting { // Note: min() required since length can be MAX_POINT_TO_KEY_LENGTH for characters not on // the keyboard (like accented letters) const float normalizedSquaredLength = traverseSession->getProximityInfoState(0) - ->getPointToKeyLength(pointIndex, dicNode->getNodeCodePoint()); + ->getPointToKeyLength(pointIndex, + CharUtils::toBaseLowerCase(dicNode->getNodeCodePoint())); const float normalizedDistance = TouchPositionCorrectionUtils::getSweetSpotFactor( traverseSession->isTouchPositionCorrectionEnabled(), normalizedSquaredLength); const float weightedDistance = ScoringParams::DISTANCE_WEIGHT_LENGTH * normalizedDistance; @@ -113,10 +114,10 @@ class TypingWeighting : public Weighting { const int16_t parentPointIndex = parentDicNode->getInputIndex(0); const int prevCodePoint = parentDicNode->getNodeCodePoint(); const float distance1 = traverseSession->getProximityInfoState(0)->getPointToKeyLength( - parentPointIndex + 1, prevCodePoint); + parentPointIndex + 1, CharUtils::toBaseLowerCase(prevCodePoint)); const int codePoint = dicNode->getNodeCodePoint(); const float distance2 = traverseSession->getProximityInfoState(0)->getPointToKeyLength( - parentPointIndex, codePoint); + parentPointIndex, CharUtils::toBaseLowerCase(codePoint)); const float distance = distance1 + distance2; const float weightedLengthDistance = distance * ScoringParams::DISTANCE_WEIGHT_LENGTH; @@ -133,7 +134,7 @@ class TypingWeighting : public Weighting { const bool existsAdjacentProximityChars = traverseSession->getProximityInfoState(0) ->existsAdjacentProximityChars(insertedPointIndex); const float dist = traverseSession->getProximityInfoState(0)->getPointToKeyLength( - insertedPointIndex + 1, dicNode->getNodeCodePoint()); + insertedPointIndex + 1, CharUtils::toBaseLowerCase(dicNode->getNodeCodePoint())); const float weightedDistance = dist * ScoringParams::DISTANCE_WEIGHT_LENGTH; const bool singleChar = dicNode->getNodeCodePointCount() == 1; float cost = (singleChar ? ScoringParams::INSERTION_COST_FIRST_CHAR : 0.0f); diff --git a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java index 96a2217a3..826c0f7b2 100644 --- a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java +++ b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java @@ -21,24 +21,19 @@ import android.test.suitebuilder.annotation.LargeTest; import android.util.Pair; import com.android.inputmethod.latin.makedict.CodePointUtils; -import com.android.inputmethod.latin.makedict.DictEncoder; import com.android.inputmethod.latin.makedict.FormatSpec; -import com.android.inputmethod.latin.makedict.FusionDictionary; -import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; -import com.android.inputmethod.latin.makedict.UnsupportedFormatException; -import com.android.inputmethod.latin.makedict.Ver3DictEncoder; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; +import java.util.HashSet; import java.util.Locale; +import java.util.Map; import java.util.Random; @LargeTest public class BinaryDictionaryTests extends AndroidTestCase { - private static final FormatSpec.FormatOptions FORMAT_OPTIONS = - new FormatSpec.FormatOptions(3 /* version */, true /* supportsDynamicUpdate */); private static final String TEST_DICT_FILE_EXTENSION = ".testDict"; private static final String TEST_LOCALE = "test"; @@ -52,15 +47,18 @@ public class BinaryDictionaryTests extends AndroidTestCase { super.tearDown(); } - private File createEmptyDictionaryAndGetFile(final String filename) throws IOException, - UnsupportedFormatException { - final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), - new FusionDictionary.DictionaryOptions(new HashMap<String,String>(), false, false)); + private File createEmptyDictionaryAndGetFile(final String filename) throws IOException { final File file = File.createTempFile(filename, TEST_DICT_FILE_EXTENSION, getContext().getCacheDir()); - final DictEncoder dictEncoder = new Ver3DictEncoder(file); - dictEncoder.writeDictionary(dict, FORMAT_OPTIONS); - return file; + Map<String, String> attributeMap = new HashMap<String, String>(); + attributeMap.put(FormatSpec.FileHeader.SUPPORTS_DYNAMIC_UPDATE_ATTRIBUTE, + FormatSpec.FileHeader.ATTRIBUTE_VALUE_TRUE); + if (BinaryDictionary.createEmptyDictFile(file.getAbsolutePath(), + 3 /* dictVersion */, attributeMap)) { + return file; + } else { + throw new IOException("Empty dictionary cannot be created."); + } } public void testIsValidDictionary() { @@ -69,8 +67,6 @@ public class BinaryDictionaryTests extends AndroidTestCase { dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); } catch (IOException e) { fail("IOException while writing an initial dictionary : " + e); - } catch (UnsupportedFormatException e) { - fail("UnsupportedFormatException while writing an initial dictionary : " + e); } BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, @@ -95,8 +91,6 @@ public class BinaryDictionaryTests extends AndroidTestCase { dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); } catch (IOException e) { fail("IOException while writing an initial dictionary : " + e); - } catch (UnsupportedFormatException e) { - fail("UnsupportedFormatException while writing an initial dictionary : " + e); } BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, @@ -139,8 +133,6 @@ public class BinaryDictionaryTests extends AndroidTestCase { dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); } catch (IOException e) { fail("IOException while writing an initial dictionary : " + e); - } catch (UnsupportedFormatException e) { - fail("UnsupportedFormatException while writing an initial dictionary : " + e); } BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, @@ -169,8 +161,6 @@ public class BinaryDictionaryTests extends AndroidTestCase { dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); } catch (IOException e) { fail("IOException while writing an initial dictionary : " + e); - } catch (UnsupportedFormatException e) { - fail("UnsupportedFormatException while writing an initial dictionary : " + e); } BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, @@ -240,8 +230,6 @@ public class BinaryDictionaryTests extends AndroidTestCase { dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); } catch (IOException e) { fail("IOException while writing an initial dictionary : " + e); - } catch (UnsupportedFormatException e) { - fail("UnsupportedFormatException while writing an initial dictionary : " + e); } BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, @@ -294,8 +282,6 @@ public class BinaryDictionaryTests extends AndroidTestCase { dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); } catch (IOException e) { fail("IOException while writing an initial dictionary : " + e); - } catch (UnsupportedFormatException e) { - fail("UnsupportedFormatException while writing an initial dictionary : " + e); } BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, @@ -342,8 +328,6 @@ public class BinaryDictionaryTests extends AndroidTestCase { dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); } catch (IOException e) { fail("IOException while writing an initial dictionary : " + e); - } catch (UnsupportedFormatException e) { - fail("UnsupportedFormatException while writing an initial dictionary : " + e); } BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, @@ -392,8 +376,6 @@ public class BinaryDictionaryTests extends AndroidTestCase { dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); } catch (IOException e) { fail("IOException while writing an initial dictionary : " + e); - } catch (UnsupportedFormatException e) { - fail("UnsupportedFormatException while writing an initial dictionary : " + e); } BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, @@ -445,8 +427,6 @@ public class BinaryDictionaryTests extends AndroidTestCase { dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); } catch (IOException e) { fail("IOException while writing an initial dictionary : " + e); - } catch (UnsupportedFormatException e) { - fail("UnsupportedFormatException while writing an initial dictionary : " + e); } BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), @@ -516,8 +496,6 @@ public class BinaryDictionaryTests extends AndroidTestCase { dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); } catch (IOException e) { fail("IOException while writing an initial dictionary : " + e); - } catch (UnsupportedFormatException e) { - fail("UnsupportedFormatException while writing an initial dictionary : " + e); } BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), @@ -617,8 +595,6 @@ public class BinaryDictionaryTests extends AndroidTestCase { dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); } catch (IOException e) { fail("IOException while writing an initial dictionary : " + e); - } catch (UnsupportedFormatException e) { - fail("UnsupportedFormatException while writing an initial dictionary : " + e); } final ArrayList<String> words = new ArrayList<String>(); @@ -650,4 +626,57 @@ public class BinaryDictionaryTests extends AndroidTestCase { dictFile.delete(); } + + public void testUnigramAndBigramCount() { + final int flashWithGCIterationCount = 10; + final int codePointSetSize = 50; + final int unigramCountPerIteration = 1000; + final int bigramCountPerIteration = 2000; + final int seed = 1123581321; + + final Random random = new Random(seed); + + File dictFile = null; + try { + dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); + } catch (IOException e) { + fail("IOException while writing an initial dictionary : " + e); + } + + final ArrayList<String> words = new ArrayList<String>(); + final HashSet<Pair<String, String>> bigrams = new HashSet<Pair<String, String>>(); + final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); + + BinaryDictionary binaryDictionary; + for (int i = 0; i < flashWithGCIterationCount; i++) { + binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), + 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, + Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); + for (int j = 0; j < unigramCountPerIteration; j++) { + final String word = CodePointUtils.generateWord(random, codePointSet); + words.add(word); + final int unigramProbability = random.nextInt(0xFF); + binaryDictionary.addUnigramWord(word, unigramProbability); + } + for (int j = 0; j < bigramCountPerIteration; j++) { + final String word0 = words.get(random.nextInt(words.size())); + final String word1 = words.get(random.nextInt(words.size())); + bigrams.add(new Pair<String, String>(word0, word1)); + final int bigramProbability = random.nextInt(0xF); + binaryDictionary.addBigramWords(word0, word1, bigramProbability); + } + assertEquals(new HashSet<String>(words).size(), Integer.parseInt( + binaryDictionary.getPropertyForTests(BinaryDictionary.UNIGRAM_COUNT_QUERY))); + assertEquals(new HashSet<Pair<String, String>>(bigrams).size(), Integer.parseInt( + binaryDictionary.getPropertyForTests(BinaryDictionary.BIGRAM_COUNT_QUERY))); + binaryDictionary.flushWithGC(); + assertEquals(new HashSet<String>(words).size(), Integer.parseInt( + binaryDictionary.getPropertyForTests(BinaryDictionary.UNIGRAM_COUNT_QUERY))); + assertEquals(new HashSet<Pair<String, String>>(bigrams).size(), Integer.parseInt( + binaryDictionary.getPropertyForTests(BinaryDictionary.BIGRAM_COUNT_QUERY))); + binaryDictionary.close(); + } + + dictFile.delete(); + } } diff --git a/tests/src/com/android/inputmethod/latin/makedict/SparseTableTests.java b/tests/src/com/android/inputmethod/latin/makedict/SparseTableTests.java new file mode 100644 index 000000000..132483d5e --- /dev/null +++ b/tests/src/com/android/inputmethod/latin/makedict/SparseTableTests.java @@ -0,0 +1,160 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.android.inputmethod.latin.makedict; + +import android.test.AndroidTestCase; +import android.test.suitebuilder.annotation.LargeTest; +import android.util.Log; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.util.ArrayList; +import java.util.Random; + +/** + * Unit tests for SparseTable. + */ +@LargeTest +public class SparseTableTests extends AndroidTestCase { + private static final String TAG = SparseTableTests.class.getSimpleName(); + + private static final int[] SMALL_INDEX = { SparseTable.NOT_EXIST, 0 }; + private static final int[] BIG_INDEX = { SparseTable.NOT_EXIST, 1, 2, 3, 4, 5, 6, 7}; + + private final Random mRandom; + private final ArrayList<Integer> mRandomIndex; + + private static final int DEFAULT_SIZE = 10000; + private static final int BLOCK_SIZE = 8; + + public SparseTableTests() { + this(System.currentTimeMillis(), DEFAULT_SIZE); + } + + public SparseTableTests(final long seed, final int tableSize) { + super(); + Log.d(TAG, "Seed for test is " + seed + ", size is " + tableSize); + mRandom = new Random(seed); + mRandomIndex = new ArrayList<Integer>(tableSize); + for (int i = 0; i < tableSize; ++i) { + mRandomIndex.add(SparseTable.NOT_EXIST); + } + } + + public void testInitializeWithArray() { + final SparseTable table = new SparseTable(SMALL_INDEX, BIG_INDEX, BLOCK_SIZE); + for (int i = 0; i < 8; ++i) { + assertEquals(SparseTable.NOT_EXIST, table.get(i)); + } + assertEquals(SparseTable.NOT_EXIST, table.get(8)); + for (int i = 9; i < 16; ++i) { + assertEquals(i - 8, table.get(i)); + } + } + + public void testSet() { + final SparseTable table = new SparseTable(16, BLOCK_SIZE); + table.set(3, 6); + table.set(8, 16); + for (int i = 0; i < 16; ++i) { + if (i == 3 || i == 8) { + assertEquals(i * 2, table.get(i)); + } else { + assertEquals(SparseTable.NOT_EXIST, table.get(i)); + } + } + } + + private void generateRandomIndex(final int size, final int prop) { + for (int i = 0; i < DEFAULT_SIZE; ++i) { + if (mRandom.nextInt(100) < prop) { + mRandomIndex.set(i, mRandom.nextInt()); + } else { + mRandomIndex.set(i, SparseTable.NOT_EXIST); + } + } + } + + private void runTestRandomSet() { + final SparseTable table = new SparseTable(DEFAULT_SIZE, BLOCK_SIZE); + int elementCount = 0; + for (int i = 0; i < DEFAULT_SIZE; ++i) { + if (mRandomIndex.get(i) != SparseTable.NOT_EXIST) { + table.set(i, mRandomIndex.get(i)); + elementCount++; + } + } + + Log.d(TAG, "table size = " + table.getLookupTableSize() + " + " + + table.getContentTableSize()); + Log.d(TAG, "the table has " + elementCount + " elements"); + for (int i = 0; i < DEFAULT_SIZE; ++i) { + assertEquals(table.get(i), (int)mRandomIndex.get(i)); + } + + // flush and reload + OutputStream lookupOutStream = null; + OutputStream contentOutStream = null; + InputStream lookupInStream = null; + InputStream contentInStream = null; + try { + final File lookupIndexFile = File.createTempFile("testRandomSet", ".small"); + final File contentFile = File.createTempFile("testRandomSet", ".big"); + lookupOutStream = new FileOutputStream(lookupIndexFile); + contentOutStream = new FileOutputStream(contentFile); + table.write(lookupOutStream, contentOutStream); + lookupInStream = new FileInputStream(lookupIndexFile); + contentInStream = new FileInputStream(contentFile); + final byte[] lookupArray = new byte[(int) lookupIndexFile.length()]; + final byte[] contentArray = new byte[(int) contentFile.length()]; + lookupInStream.read(lookupArray); + contentInStream.read(contentArray); + final SparseTable newTable = new SparseTable(lookupArray, contentArray, BLOCK_SIZE); + for (int i = 0; i < DEFAULT_SIZE; ++i) { + assertEquals(table.get(i), newTable.get(i)); + } + } catch (IOException e) { + Log.d(TAG, "IOException while flushing and realoding", e); + } finally { + if (lookupOutStream != null) { + try { + lookupOutStream.close(); + } catch (IOException e) { + Log.d(TAG, "IOException while closing the stream", e); + } + } + if (contentOutStream != null) { + try { + contentOutStream.close(); + } catch (IOException e) { + Log.d(TAG, "IOException while closing contentStream.", e); + } + } + } + } + + public void testRandomSet() { + for (int i = 0; i <= 100; i += 10) { + generateRandomIndex(DEFAULT_SIZE, i); + runTestRandomSet(); + } + } +} diff --git a/tests/src/com/android/inputmethod/latin/personalization/UserHistoryDictionaryTests.java b/tests/src/com/android/inputmethod/latin/personalization/UserHistoryDictionaryTests.java index d605cdb84..06c427193 100644 --- a/tests/src/com/android/inputmethod/latin/personalization/UserHistoryDictionaryTests.java +++ b/tests/src/com/android/inputmethod/latin/personalization/UserHistoryDictionaryTests.java @@ -84,29 +84,24 @@ public class UserHistoryDictionaryTests extends AndroidTestCase { } /** - * @param checksContents if true, checks whether written words are actually in the dictionary + * @param checkContents if true, checks whether written words are actually in the dictionary * or not. */ private void addAndWriteRandomWords(final String testFilenameSuffix, final int numberOfWords, - final Random random, final boolean checksContents) { + final Random random, final boolean checkContents) { final List<String> words = generateWords(numberOfWords, random); final UserHistoryPredictionDictionary dict = PersonalizationHelper.getUserHistoryPredictionDictionary(getContext(), testFilenameSuffix /* locale */, mPrefs); // Add random words to the user history dictionary. addToDict(dict, words); - if (checksContents) { + if (checkContents) { try { Thread.sleep(TimeUnit.MILLISECONDS.convert(5L, TimeUnit.SECONDS)); } catch (InterruptedException e) { } - // Limit word count to check when using a Java on memory dictionary. - final int wordCountToCheck = - ExpandableBinaryDictionary.ENABLE_BINARY_DICTIONARY_DYNAMIC_UPDATE ? - numberOfWords : 10; - for (int i = 0; i < wordCountToCheck; ++i) { + for (int i = 0; i < numberOfWords; ++i) { final String word = words.get(i); - // This may fail as long as we use tryLock on inserting the bigram words assertTrue(dict.isInDictionaryForTests(word)); } } diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java index fa80385fc..6c4cbcf9d 100644 --- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java +++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java @@ -185,7 +185,7 @@ public final class BinaryDictOffdeviceUtils { crash(filename, new RuntimeException( filename + " does not seem to be a dictionary file")); } else { - final DictDecoder dictDecoder = FormatSpec.getDictDecoder(file, + final DictDecoder dictDecoder = FormatSpec.getDictDecoder(decodedSpec.mFile, DictDecoder.USE_BYTEARRAY); if (report) { System.out.println("Format : Binary dictionary format"); |