diff options
13 files changed, 213 insertions, 629 deletions
diff --git a/java/src/com/android/inputmethod/keyboard/MainKeyboardView.java b/java/src/com/android/inputmethod/keyboard/MainKeyboardView.java index 810bd9150..3a2869950 100644 --- a/java/src/com/android/inputmethod/keyboard/MainKeyboardView.java +++ b/java/src/com/android/inputmethod/keyboard/MainKeyboardView.java @@ -705,7 +705,8 @@ public final class MainKeyboardView extends KeyboardView implements PointerTrack final AnimatorSet zoomOutAnimation = new AnimatorSet(); zoomOutAnimation.play(scaleXAnimation).with(scaleYAnimation); // TODO: Implement preference option to control key preview animation duration. - zoomOutAnimation.setDuration(mKeyPreviewZoomOutDuration); + final int zoomOutDuration = Math.min(mKeyPreviewZoomOutDuration, mKeyPreviewLingerTimeout); + zoomOutAnimation.setDuration(zoomOutDuration); zoomOutAnimation.setInterpolator(ACCELERATE_INTERPOLATOR); zoomOutAnimation.addListener(new AnimatorListenerAdapter() { @Override diff --git a/java/src/com/android/inputmethod/latin/BinaryDictionary.java b/java/src/com/android/inputmethod/latin/BinaryDictionary.java index 00eb57c9f..bdf89450f 100644 --- a/java/src/com/android/inputmethod/latin/BinaryDictionary.java +++ b/java/src/com/android/inputmethod/latin/BinaryDictionary.java @@ -357,7 +357,7 @@ public final class BinaryDictionary extends Dictionary { while (len < MAX_WORD_LENGTH && codePoints[len] != 0) { ++len; } - final String word = new String(mOutputCodePoints, 0, len); + final String word = new String(codePoints, 0, len); return new GetNextWordPropertyResult(getWordProperty(word), nextToken); } diff --git a/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java b/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java index 4dee84a7b..226c3c822 100644 --- a/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java +++ b/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java @@ -28,6 +28,7 @@ import com.android.inputmethod.latin.utils.CollectionUtils; import com.android.inputmethod.latin.utils.FileUtils; import com.android.inputmethod.latin.utils.LanguageModelParam; import com.android.inputmethod.latin.utils.PrioritizedSerialExecutor; +import com.android.inputmethod.latin.utils.WordProperty; import java.io.File; import java.util.ArrayList; @@ -778,16 +779,24 @@ abstract public class ExpandableBinaryDictionary extends Dictionary { } @UsedForTesting - protected void runAfterGcForDebug(final Runnable r) { - getExecutor(mDictName).executePrioritized(new Runnable() { + public void dumpAllWordsForDebug() { + reloadDictionaryIfRequired(); + getExecutor(mDictName).execute(new Runnable() { @Override public void run() { - try { - mBinaryDictionary.flushWithGC(); - r.run(); - } finally { - mDictNameDictionaryUpdateController.mProcessingLargeTask.set(false); - } + Log.d(TAG, "dictionary=" + mDictName); + int token = 0; + do { + final BinaryDictionary.GetNextWordPropertyResult result = + mBinaryDictionary.getNextWordProperty(token); + final WordProperty wordProperty = result.mWordProperty; + if (wordProperty == null) { + Log.d(TAG, " dictionary is empty."); + break; + } + Log.d(TAG, wordProperty.toString()); + token = result.mNextToken; + } while (token != 0); } }); } diff --git a/java/src/com/android/inputmethod/latin/personalization/DecayingExpandableBinaryDictionaryBase.java b/java/src/com/android/inputmethod/latin/personalization/DecayingExpandableBinaryDictionaryBase.java index d636a253a..cd6a3aa92 100644 --- a/java/src/com/android/inputmethod/latin/personalization/DecayingExpandableBinaryDictionaryBase.java +++ b/java/src/com/android/inputmethod/latin/personalization/DecayingExpandableBinaryDictionaryBase.java @@ -17,21 +17,15 @@ package com.android.inputmethod.latin.personalization; import android.content.Context; -import android.util.Log; import com.android.inputmethod.annotations.UsedForTesting; import com.android.inputmethod.latin.Constants; import com.android.inputmethod.latin.Dictionary; import com.android.inputmethod.latin.ExpandableBinaryDictionary; -import com.android.inputmethod.latin.makedict.DictDecoder; import com.android.inputmethod.latin.makedict.FormatSpec; -import com.android.inputmethod.latin.makedict.UnsupportedFormatException; import com.android.inputmethod.latin.utils.LanguageModelParam; -import com.android.inputmethod.latin.utils.UserHistoryDictIOUtils; -import com.android.inputmethod.latin.utils.UserHistoryDictIOUtils.OnAddWordListener; import java.io.File; -import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.Locale; @@ -44,7 +38,6 @@ import java.util.concurrent.TimeUnit; */ public abstract class DecayingExpandableBinaryDictionaryBase extends ExpandableBinaryDictionary { private static final String TAG = DecayingExpandableBinaryDictionaryBase.class.getSimpleName(); - public static final boolean DBG_SAVE_RESTORE = false; private static final boolean DBG_DUMP_ON_CLOSE = false; /** Any pair being typed or picked */ @@ -53,8 +46,6 @@ public abstract class DecayingExpandableBinaryDictionaryBase extends ExpandableB public static final int FREQUENCY_FOR_WORDS_IN_DICTS = FREQUENCY_FOR_TYPED; public static final int FREQUENCY_FOR_WORDS_NOT_IN_DICTS = Dictionary.NOT_A_PROBABILITY; - public static final int REQUIRED_BINARY_DICTIONARY_VERSION = FormatSpec.VERSION4; - /** The locale for this dictionary. */ public final Locale mLocale; @@ -161,57 +152,6 @@ public abstract class DecayingExpandableBinaryDictionaryBase extends ExpandableB } @UsedForTesting - public void dumpAllWordsForDebug() { - runAfterGcForDebug(new Runnable() { - @Override - public void run() { - dumpAllWordsForDebugLocked(); - } - }); - } - - private void dumpAllWordsForDebugLocked() { - Log.d(TAG, "dumpAllWordsForDebug started."); - final OnAddWordListener listener = new OnAddWordListener() { - @Override - public void setUnigram(final String word, final String shortcutTarget, - final int frequency, final int shortcutFreq) { - Log.d(TAG, "load unigram: " + word + "," + frequency); - } - - @Override - public void setBigram(final String word0, final String word1, final int frequency) { - if (word0.length() < Constants.DICTIONARY_MAX_WORD_LENGTH - && word1.length() < Constants.DICTIONARY_MAX_WORD_LENGTH) { - Log.d(TAG, "load bigram: " + word0 + "," + word1 + "," + frequency); - } else { - Log.d(TAG, "Skip inserting a too long bigram: " + word0 + "," + word1 + "," - + frequency); - } - } - }; - - // Load the dictionary from binary file - final File dictFile = new File(mContext.getFilesDir(), mDictName); - final DictDecoder dictDecoder = FormatSpec.getDictDecoder(dictFile, - DictDecoder.USE_BYTEARRAY); - if (dictDecoder == null) { - // This is an expected condition: we don't have a user history dictionary for this - // language yet. It will be created sometime later. - return; - } - - try { - dictDecoder.openDictBuffer(); - UserHistoryDictIOUtils.readDictionaryBinary(dictDecoder, listener); - } catch (IOException e) { - Log.d(TAG, "IOException on opening a bytebuffer", e); - } catch (UnsupportedFormatException e) { - Log.d(TAG, "Unsupported format, can't read the dictionary", e); - } - } - - @UsedForTesting public void clearAndFlushDictionary() { // Clear the node structure on memory clear(); diff --git a/java/src/com/android/inputmethod/latin/personalization/UserHistoryDictionaryBigramList.java b/java/src/com/android/inputmethod/latin/personalization/UserHistoryDictionaryBigramList.java deleted file mode 100644 index 55a90ee51..000000000 --- a/java/src/com/android/inputmethod/latin/personalization/UserHistoryDictionaryBigramList.java +++ /dev/null @@ -1,128 +0,0 @@ -/* - * Copyright (C) 2012 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.android.inputmethod.latin.personalization; - -import android.util.Log; - -import com.android.inputmethod.annotations.UsedForTesting; -import com.android.inputmethod.latin.utils.CollectionUtils; - -import java.util.HashMap; -import java.util.Set; - -/** - * A store of bigrams which will be updated when the user history dictionary is closed - * All bigrams including stale ones in SQL DB should be stored in this class to avoid adding stale - * bigrams when we write to the SQL DB. - */ -@UsedForTesting -public final class UserHistoryDictionaryBigramList { - public static final byte FORGETTING_CURVE_INITIAL_VALUE = 0; - private static final String TAG = UserHistoryDictionaryBigramList.class.getSimpleName(); - private static final HashMap<String, Byte> EMPTY_BIGRAM_MAP = CollectionUtils.newHashMap(); - private final HashMap<String, HashMap<String, Byte>> mBigramMap = CollectionUtils.newHashMap(); - private int mSize = 0; - - public void evictAll() { - mSize = 0; - mBigramMap.clear(); - } - - /** - * Called when the user typed a word. - */ - @UsedForTesting - public void addBigram(String word1, String word2) { - addBigram(word1, word2, FORGETTING_CURVE_INITIAL_VALUE); - } - - /** - * Called when loaded from the SQL DB. - */ - public void addBigram(String word1, String word2, byte fcValue) { - if (DecayingExpandableBinaryDictionaryBase.DBG_SAVE_RESTORE) { - Log.d(TAG, "--- add bigram: " + word1 + ", " + word2 + ", " + fcValue); - } - final HashMap<String, Byte> map; - if (mBigramMap.containsKey(word1)) { - map = mBigramMap.get(word1); - } else { - map = CollectionUtils.newHashMap(); - mBigramMap.put(word1, map); - } - if (!map.containsKey(word2)) { - ++mSize; - map.put(word2, fcValue); - } - } - - /** - * Called when inserted to the SQL DB. - */ - public void updateBigram(String word1, String word2, byte fcValue) { - if (DecayingExpandableBinaryDictionaryBase.DBG_SAVE_RESTORE) { - Log.d(TAG, "--- update bigram: " + word1 + ", " + word2 + ", " + fcValue); - } - final HashMap<String, Byte> map; - if (mBigramMap.containsKey(word1)) { - map = mBigramMap.get(word1); - } else { - return; - } - if (!map.containsKey(word2)) { - return; - } - map.put(word2, fcValue); - } - - public int size() { - return mSize; - } - - public boolean isEmpty() { - return mBigramMap.isEmpty(); - } - - public boolean containsKey(String word) { - return mBigramMap.containsKey(word); - } - - public Set<String> keySet() { - return mBigramMap.keySet(); - } - - public HashMap<String, Byte> getBigrams(String word1) { - if (mBigramMap.containsKey(word1)) return mBigramMap.get(word1); - // TODO: lower case according to locale - final String lowerWord1 = word1.toLowerCase(); - if (mBigramMap.containsKey(lowerWord1)) return mBigramMap.get(lowerWord1); - return EMPTY_BIGRAM_MAP; - } - - public boolean removeBigram(String word1, String word2) { - final HashMap<String, Byte> set = getBigrams(word1); - if (set.isEmpty()) { - return false; - } - if (set.containsKey(word2)) { - set.remove(word2); - --mSize; - return true; - } - return false; - } -} diff --git a/java/src/com/android/inputmethod/latin/utils/UserHistoryDictIOUtils.java b/java/src/com/android/inputmethod/latin/utils/UserHistoryDictIOUtils.java deleted file mode 100644 index 7af03da59..000000000 --- a/java/src/com/android/inputmethod/latin/utils/UserHistoryDictIOUtils.java +++ /dev/null @@ -1,181 +0,0 @@ -/* - * Copyright (C) 2012 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.android.inputmethod.latin.utils; - -import android.util.Log; - -import com.android.inputmethod.annotations.UsedForTesting; -import com.android.inputmethod.latin.makedict.BinaryDictIOUtils; -import com.android.inputmethod.latin.makedict.DictDecoder; -import com.android.inputmethod.latin.makedict.DictEncoder; -import com.android.inputmethod.latin.makedict.FormatSpec; -import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; -import com.android.inputmethod.latin.makedict.FusionDictionary; -import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; -import com.android.inputmethod.latin.makedict.PendingAttribute; -import com.android.inputmethod.latin.makedict.UnsupportedFormatException; -import com.android.inputmethod.latin.personalization.UserHistoryDictionaryBigramList; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.Map.Entry; -import java.util.TreeMap; -import java.util.concurrent.TimeUnit; - -/** - * Reads and writes Binary files for a UserHistoryDictionary. - * - * All the methods in this class are static. - */ -public final class UserHistoryDictIOUtils { - private static final String TAG = UserHistoryDictIOUtils.class.getSimpleName(); - private static final boolean DEBUG = false; - - public interface OnAddWordListener { - /** - * Callback to be notified when a word is added to the dictionary. - * @param word The added word. - * @param shortcutTarget A shortcut target for this word, or null if none. - * @param frequency The frequency for this word. - * @param shortcutFreq The frequency of the shortcut (0~15, with 15 = whitelist). - * Unspecified if shortcutTarget is null - do not rely on its value. - */ - public void setUnigram(final String word, final String shortcutTarget, final int frequency, - final int shortcutFreq); - public void setBigram(final String word1, final String word2, final int frequency); - } - - @UsedForTesting - public interface BigramDictionaryInterface { - public int getFrequency(final String word1, final String word2); - } - - /** - * Writes dictionary to file. - */ - @UsedForTesting - public static void writeDictionary(final DictEncoder dictEncoder, - final BigramDictionaryInterface dict, final UserHistoryDictionaryBigramList bigrams, - final FormatOptions formatOptions, final HashMap<String, String> options) { - final FusionDictionary fusionDict = constructFusionDictionary(dict, bigrams, options); - fusionDict.addOptionAttribute(FormatSpec.FileHeader.USES_FORGETTING_CURVE_KEY, - FormatSpec.FileHeader.ATTRIBUTE_VALUE_TRUE); - fusionDict.addOptionAttribute(FormatSpec.FileHeader.DICTIONARY_DATE_KEY, - String.valueOf(TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis()))); - try { - dictEncoder.writeDictionary(fusionDict, formatOptions); - Log.d(TAG, "end writing"); - } catch (IOException e) { - Log.e(TAG, "IO exception while writing file", e); - } catch (UnsupportedFormatException e) { - Log.e(TAG, "Unsupported format", e); - } - } - - /** - * Constructs a new FusionDictionary from BigramDictionaryInterface. - */ - @UsedForTesting - static FusionDictionary constructFusionDictionary(final BigramDictionaryInterface dict, - final UserHistoryDictionaryBigramList bigrams, final HashMap<String, String> options) { - final FusionDictionary fusionDict = new FusionDictionary(new PtNodeArray(), - new FusionDictionary.DictionaryOptions(options)); - int profTotal = 0; - for (final String word1 : bigrams.keySet()) { - final HashMap<String, Byte> word1Bigrams = bigrams.getBigrams(word1); - for (final String word2 : word1Bigrams.keySet()) { - final int freq = dict.getFrequency(word1, word2); - if (freq == -1) { - // don't add this bigram. - continue; - } - if (DEBUG) { - if (word1 == null) { - Log.d(TAG, "add unigram: " + word2 + "," + Integer.toString(freq)); - } else { - Log.d(TAG, "add bigram: " + word1 - + "," + word2 + "," + Integer.toString(freq)); - } - profTotal++; - } - if (word1 == null) { // unigram - fusionDict.add(word2, freq, null, false /* isNotAWord */); - } else { // bigram - if (FusionDictionary.findWordInTree(fusionDict.mRootNodeArray, word1) == null) { - fusionDict.add(word1, 2, null, false /* isNotAWord */); - } - fusionDict.setBigram(word1, word2, freq); - } - bigrams.updateBigram(word1, word2, (byte)freq); - } - } - if (DEBUG) { - Log.d(TAG, "add " + profTotal + "words"); - } - return fusionDict; - } - - /** - * Reads dictionary from file. - */ - public static void readDictionaryBinary(final DictDecoder dictDecoder, - final OnAddWordListener dict) { - final TreeMap<Integer, String> unigrams = CollectionUtils.newTreeMap(); - final TreeMap<Integer, Integer> frequencies = CollectionUtils.newTreeMap(); - final TreeMap<Integer, ArrayList<PendingAttribute>> bigrams = CollectionUtils.newTreeMap(); - try { - dictDecoder.readUnigramsAndBigramsBinary(unigrams, frequencies, bigrams); - } catch (IOException e) { - Log.e(TAG, "IO exception while reading file", e); - } catch (UnsupportedFormatException e) { - Log.e(TAG, "Unsupported format", e); - } catch (ArrayIndexOutOfBoundsException e) { - Log.e(TAG, "ArrayIndexOutOfBoundsException while reading file", e); - } - addWordsFromWordMap(unigrams, frequencies, bigrams, dict); - } - - /** - * Adds all unigrams and bigrams in maps to OnAddWordListener. - */ - @UsedForTesting - static void addWordsFromWordMap(final TreeMap<Integer, String> unigrams, - final TreeMap<Integer, Integer> frequencies, - final TreeMap<Integer, ArrayList<PendingAttribute>> bigrams, - final OnAddWordListener to) { - for (Entry<Integer, String> entry : unigrams.entrySet()) { - final String word1 = entry.getValue(); - final int unigramFrequency = frequencies.get(entry.getKey()); - to.setUnigram(word1, null /* shortcutTarget */, unigramFrequency, 0 /* shortcutFreq */); - final ArrayList<PendingAttribute> attrList = bigrams.get(entry.getKey()); - if (attrList != null) { - for (final PendingAttribute attr : attrList) { - final String word2 = unigrams.get(attr.mAddress); - if (word1 == null || word2 == null) { - Log.e(TAG, "Invalid bigram pair detected: " + word1 + ", " + word2); - continue; - } - to.setBigram(word1, word2, - BinaryDictIOUtils.reconstructBigramFrequency(unigramFrequency, - attr.mFrequency)); - } - } - } - - } -} diff --git a/java/src/com/android/inputmethod/latin/utils/WordProperty.java b/java/src/com/android/inputmethod/latin/utils/WordProperty.java index ba9b114b0..fed5d3332 100644 --- a/java/src/com/android/inputmethod/latin/utils/WordProperty.java +++ b/java/src/com/android/inputmethod/latin/utils/WordProperty.java @@ -41,7 +41,7 @@ public class WordProperty { // package. public static final class ProbabilityInfo { public final int mProbability; - // wTimestamp, mLevel and mCount are historical info. These values are depend on the + // mTimestamp, mLevel and mCount are historical info. These values are depend on the // implementation in native code; thus, we must not use them and have any assumptions about // them except for tests. public final int mTimestamp; @@ -54,6 +54,11 @@ public class WordProperty { mLevel = probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_LEVEL_INDEX]; mCount = probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_COUNT_INDEX]; } + + @Override + public String toString() { + return mTimestamp + ":" + mLevel + ":" + mCount; + } } private static int getCodePointCount(final int[] codePoints) { @@ -105,4 +110,44 @@ public class WordProperty { public boolean isValid() { return mProbabilityInfo.mProbability != BinaryDictionary.NOT_A_PROBABILITY; } + + @Override + public String toString() { + // TODO: Move this logic to CombinedInputOutput. + final StringBuffer builder = new StringBuffer(); + builder.append(" word=" + mCodePoints); + builder.append(","); + builder.append("f=" + mProbabilityInfo.mProbability); + if (mIsNotAWord) { + builder.append(","); + builder.append("not_a_word=true"); + } + if (mIsBlacklisted) { + builder.append(","); + builder.append("blacklisted=true"); + } + if (mProbabilityInfo.mTimestamp != BinaryDictionary.NOT_A_VALID_TIMESTAMP) { + builder.append(","); + builder.append("historicalInfo=" + mProbabilityInfo); + } + builder.append("\n"); + for (int i = 0; i < mBigramTargets.size(); i++) { + builder.append(" bigram=" + mBigramTargets.get(i).mWord); + builder.append(","); + builder.append("f=" + mBigramTargets.get(i).mFrequency); + if (mBigramProbabilityInfo.get(i).mTimestamp + != BinaryDictionary.NOT_A_VALID_TIMESTAMP) { + builder.append(","); + builder.append("historicalInfo=" + mBigramProbabilityInfo.get(i)); + } + builder.append("\n"); + } + for (int i = 0; i < mShortcutTargets.size(); i++) { + builder.append(" shortcut=" + mShortcutTargets.get(i).mWord); + builder.append(","); + builder.append("f=" + mShortcutTargets.get(i).mFrequency); + builder.append("\n"); + } + return builder.toString(); + } }
\ No newline at end of file diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.cpp index b918e0765..824d442e4 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.cpp @@ -28,6 +28,14 @@ const int DynamicPtReadingHelper::MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP = 10000 const int DynamicPtReadingHelper::MAX_PT_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP = 100000; const size_t DynamicPtReadingHelper::MAX_READING_STATE_STACK_SIZE = MAX_WORD_LENGTH; +bool DynamicPtReadingHelper::TraversePolicyToGetAllTerminalPtNodePositions::onVisitingPtNode( + const PtNodeParams *const ptNodeParams) { + if (ptNodeParams->isTerminal() && !ptNodeParams->isDeleted()) { + mTerminalPositions->push_back(ptNodeParams->getHeadPos()); + } + return true; +} + // Visits all PtNodes in post-order depth first manner. // For example, visits c -> b -> y -> x -> a for the following dictionary: // a _ b _ c diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h index a69490943..bcc5c7857 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h @@ -59,6 +59,21 @@ class DynamicPtReadingHelper { DISALLOW_COPY_AND_ASSIGN(TraversingEventListener); }; + class TraversePolicyToGetAllTerminalPtNodePositions : public TraversingEventListener { + public: + TraversePolicyToGetAllTerminalPtNodePositions(std::vector<int> *const terminalPositions) + : mTerminalPositions(terminalPositions) {} + bool onAscend() { return true; } + bool onDescend(const int ptNodeArrayPos) { return true; } + bool onReadingPtNodeArrayTail() { return true; } + bool onVisitingPtNode(const PtNodeParams *const ptNodeParams); + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(TraversePolicyToGetAllTerminalPtNodePositions); + + std::vector<int> *const mTerminalPositions; + }; + DynamicPtReadingHelper(const BufferWithExtendableBuffer *const buffer, const PtNodeReader *const ptNodeReader) : mIsError(false), mReadingState(), mBuffer(buffer), diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp index 1c420e070..75d85988c 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp @@ -392,10 +392,32 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(const int *const code historicalInfo->getCount(), &bigrams, &shortcuts); } -int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token, - int *const outCodePoints) { - // TODO: Implement. - return 0; +int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints) { + if (token == 0) { + mTerminalPtNodePositionsForIteratingWords.clear(); + DynamicPtReadingHelper::TraversePolicyToGetAllTerminalPtNodePositions traversePolicy( + &mTerminalPtNodePositionsForIteratingWords); + DynamicPtReadingHelper readingHelper(mDictBuffer, &mNodeReader); + readingHelper.initWithPtNodeArrayPos(getRootPosition()); + readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(&traversePolicy); + } + const int terminalPtNodePositionsVectorSize = + static_cast<int>(mTerminalPtNodePositionsForIteratingWords.size()); + if (token < 0 || token >= terminalPtNodePositionsVectorSize) { + AKLOGE("Given token %d is invalid.", token); + return 0; + } + const int terminalPtNodePos = mTerminalPtNodePositionsForIteratingWords[token]; + int unigramProbability = NOT_A_PROBABILITY; + getCodePointsAndProbabilityAndReturnCodePointCount(terminalPtNodePos, MAX_WORD_LENGTH, + outCodePoints, &unigramProbability); + const int nextToken = token + 1; + if (nextToken >= terminalPtNodePositionsVectorSize) { + // All words have been iterated. + mTerminalPtNodePositionsForIteratingWords.clear(); + return 0; + } + return nextToken; } } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h index 1bcd4ceea..9ba5be0c3 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h @@ -17,6 +17,8 @@ #ifndef LATINIME_VER4_PATRICIA_TRIE_POLICY_H #define LATINIME_VER4_PATRICIA_TRIE_POLICY_H +#include <vector> + #include "defines.h" #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" #include "suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h" @@ -50,7 +52,8 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { mUpdatingHelper(mDictBuffer, &mNodeReader, &mNodeWriter), mWritingHelper(mBuffers.get()), mUnigramCount(mHeaderPolicy->getUnigramCount()), - mBigramCount(mHeaderPolicy->getBigramCount()) {}; + mBigramCount(mHeaderPolicy->getBigramCount()), + mTerminalPtNodePositionsForIteratingWords() {}; AK_FORCE_INLINE int getRootPosition() const { return 0; @@ -134,6 +137,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { Ver4PatriciaTrieWritingHelper mWritingHelper; int mUnigramCount; int mBigramCount; + std::vector<int> mTerminalPtNodePositionsForIteratingWords; }; } // namespace latinime #endif // LATINIME_VER4_PATRICIA_TRIE_POLICY_H diff --git a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java index e39b46f94..bab86e546 100644 --- a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java +++ b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java @@ -971,6 +971,99 @@ public class BinaryDictionaryTests extends AndroidTestCase { } } + public void testIterateAllWords() { + testIterateAllWords(FormatSpec.VERSION4); + } + + private void testIterateAllWords(final int formatVersion) { + final long seed = System.currentTimeMillis(); + final Random random = new Random(seed); + final int UNIGRAM_COUNT = 1000; + final int BIGRAM_COUNT = 1000; + final int codePointSetSize = 20; + final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); + + File dictFile = null; + try { + dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); + } catch (IOException e) { + fail("IOException while writing an initial dictionary : " + e); + } + final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), + 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, + Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); + + final WordProperty invalidWordProperty = binaryDictionary.getWordProperty("dummyWord"); + assertFalse(invalidWordProperty.isValid()); + + final ArrayList<String> words = new ArrayList<String>(); + final HashMap<String, Integer> wordProbabilitiesToCheckLater = + new HashMap<String, Integer>(); + final HashMap<String, HashSet<String>> bigrams = new HashMap<String, HashSet<String>>(); + final HashMap<Pair<String, String>, Integer> bigramProbabilitiesToCheckLater = + new HashMap<Pair<String, String>, Integer>(); + + for (int i = 0; i < UNIGRAM_COUNT; i++) { + final String word = CodePointUtils.generateWord(random, codePointSet); + final int unigramProbability = random.nextInt(0xFF); + addUnigramWord(binaryDictionary, word, unigramProbability); + if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) { + binaryDictionary.flushWithGC(); + } + words.add(word); + wordProbabilitiesToCheckLater.put(word, unigramProbability); + } + + for (int i = 0; i < BIGRAM_COUNT; i++) { + final int word0Index = random.nextInt(wordProbabilitiesToCheckLater.size()); + final int word1Index = random.nextInt(wordProbabilitiesToCheckLater.size()); + if (word0Index == word1Index) { + continue; + } + final String word0 = words.get(word0Index); + final String word1 = words.get(word1Index); + final int bigramProbability = random.nextInt(0xF); + binaryDictionary.addBigramWords(word0, word1, bigramProbability, + BinaryDictionary.NOT_A_VALID_TIMESTAMP); + if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) { + binaryDictionary.flushWithGC(); + } + if (!bigrams.containsKey(word0)) { + final HashSet<String> bigramWord1s = new HashSet<String>(); + bigrams.put(word0, bigramWord1s); + } + bigrams.get(word0).add(word1); + bigramProbabilitiesToCheckLater.put( + new Pair<String, String>(word0, word1), bigramProbability); + } + + final HashSet<String> wordSet = new HashSet<String>(words); + final HashSet<Pair<String, String>> bigramSet = + new HashSet<Pair<String,String>>(bigramProbabilitiesToCheckLater.keySet()); + int token = 0; + do { + final BinaryDictionary.GetNextWordPropertyResult result = + binaryDictionary.getNextWordProperty(token); + final WordProperty wordProperty = result.mWordProperty; + final String word0 = wordProperty.mCodePoints; + assertEquals((int)wordProbabilitiesToCheckLater.get(word0), + wordProperty.mProbabilityInfo.mProbability); + wordSet.remove(word0); + final HashSet<String> bigramWord1s = bigrams.get(word0); + for (int j = 0; j < wordProperty.mBigramTargets.size(); j++) { + final String word1 = wordProperty.mBigramTargets.get(j).mWord; + assertTrue(bigramWord1s.contains(word1)); + final int probability = wordProperty.mBigramTargets.get(j).mFrequency; + final Pair<String, String> bigram = new Pair<String, String>(word0, word1); + assertEquals((int)bigramProbabilitiesToCheckLater.get(bigram), probability); + bigramSet.remove(bigram); + } + token = result.mNextToken; + } while (token != 0); + assertTrue(wordSet.isEmpty()); + assertTrue(bigramSet.isEmpty()); + } + public void testAddShortcuts() { testAddShortcuts(FormatSpec.VERSION4); } diff --git a/tests/src/com/android/inputmethod/latin/utils/UserHistoryDictIOUtilsTests.java b/tests/src/com/android/inputmethod/latin/utils/UserHistoryDictIOUtilsTests.java deleted file mode 100644 index 93731b3cd..000000000 --- a/tests/src/com/android/inputmethod/latin/utils/UserHistoryDictIOUtilsTests.java +++ /dev/null @@ -1,244 +0,0 @@ -/* - * Copyright (C) 2012 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.android.inputmethod.latin.utils; - -import android.content.Context; -import android.test.AndroidTestCase; -import android.test.suitebuilder.annotation.LargeTest; -import android.util.Log; - -import com.android.inputmethod.latin.makedict.DictDecoder; -import com.android.inputmethod.latin.makedict.DictEncoder; -import com.android.inputmethod.latin.makedict.FormatSpec; -import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; -import com.android.inputmethod.latin.makedict.FusionDictionary; -import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode; -import com.android.inputmethod.latin.makedict.UnsupportedFormatException; -import com.android.inputmethod.latin.makedict.Ver2DictDecoder; -import com.android.inputmethod.latin.makedict.Ver2DictEncoder; -import com.android.inputmethod.latin.personalization.UserHistoryDictionaryBigramList; -import com.android.inputmethod.latin.utils.UserHistoryDictIOUtils.BigramDictionaryInterface; -import com.android.inputmethod.latin.utils.UserHistoryDictIOUtils.OnAddWordListener; - -import java.io.File; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; - -/** - * Unit tests for UserHistoryDictIOUtils - */ -@LargeTest -public class UserHistoryDictIOUtilsTests extends AndroidTestCase - implements BigramDictionaryInterface { - - private static final String TAG = UserHistoryDictIOUtilsTests.class.getSimpleName(); - private static final int UNIGRAM_FREQUENCY = 50; - private static final int BIGRAM_FREQUENCY = 100; - private static final ArrayList<String> NOT_HAVE_BIGRAM = new ArrayList<String>(); - private static final FormatSpec.FormatOptions FORMAT_OPTIONS = new FormatSpec.FormatOptions(2); - private static final String TEST_DICT_FILE_EXTENSION = ".testDict"; - private static final HashMap<String, String> HEADER_OPTIONS = new HashMap<String, String>(); - static { - HEADER_OPTIONS.put(FileHeader.DICTIONARY_LOCALE_KEY, "en_US"); - HEADER_OPTIONS.put(FileHeader.DICTIONARY_ID_KEY, "test"); - HEADER_OPTIONS.put(FileHeader.DICTIONARY_VERSION_KEY, "1000"); - } - - /** - * Return same frequency for all words and bigrams - */ - @Override - public int getFrequency(String word1, String word2) { - if (word1 == null) return UNIGRAM_FREQUENCY; - return BIGRAM_FREQUENCY; - } - - // Utilities for Testing - - private void addWord(final String word, - final HashMap<String, ArrayList<String> > addedWords) { - if (!addedWords.containsKey(word)) { - addedWords.put(word, new ArrayList<String>()); - } - } - - private void addBigram(final String word1, final String word2, - final HashMap<String, ArrayList<String> > addedWords) { - addWord(word1, addedWords); - addWord(word2, addedWords); - addedWords.get(word1).add(word2); - } - - private void addBigramToBigramList(final String word1, final String word2, - final HashMap<String, ArrayList<String> > addedWords, - final UserHistoryDictionaryBigramList bigramList) { - bigramList.addBigram(null, word1); - bigramList.addBigram(word1, word2); - - addBigram(word1, word2, addedWords); - } - - private void checkWordInFusionDict(final FusionDictionary dict, final String word, - final ArrayList<String> expectedBigrams) { - final PtNode ptNode = FusionDictionary.findWordInTree(dict.mRootNodeArray, word); - assertNotNull(ptNode); - assertTrue(ptNode.isTerminal()); - - for (final String bigram : expectedBigrams) { - assertNotNull(ptNode.getBigram(bigram)); - } - } - - private void checkWordsInFusionDict(final FusionDictionary dict, - final HashMap<String, ArrayList<String> > bigrams) { - for (final String word : bigrams.keySet()) { - if (bigrams.containsKey(word)) { - checkWordInFusionDict(dict, word, bigrams.get(word)); - } else { - checkWordInFusionDict(dict, word, NOT_HAVE_BIGRAM); - } - } - } - - private void checkWordInBigramList( - final UserHistoryDictionaryBigramList bigramList, final String word, - final ArrayList<String> expectedBigrams) { - // check unigram - final HashMap<String,Byte> unigramMap = bigramList.getBigrams(null); - assertTrue(unigramMap.containsKey(word)); - - // check bigrams - final ArrayList<String> actualBigrams = new ArrayList<String>( - bigramList.getBigrams(word).keySet()); - - Collections.sort(expectedBigrams); - Collections.sort(actualBigrams); - assertEquals(expectedBigrams, actualBigrams); - } - - private void checkWordsInBigramList(final UserHistoryDictionaryBigramList bigramList, - final HashMap<String, ArrayList<String> > addedWords) { - for (final String word : addedWords.keySet()) { - if (addedWords.containsKey(word)) { - checkWordInBigramList(bigramList, word, addedWords.get(word)); - } else { - checkWordInBigramList(bigramList, word, NOT_HAVE_BIGRAM); - } - } - } - - private void writeDictToFile(final File file, - final UserHistoryDictionaryBigramList bigramList) { - final DictEncoder dictEncoder = new Ver2DictEncoder(file); - UserHistoryDictIOUtils.writeDictionary(dictEncoder, this, bigramList, FORMAT_OPTIONS, - HEADER_OPTIONS); - } - - private void readDictFromFile(final File file, final OnAddWordListener listener) - throws IOException, FileNotFoundException, UnsupportedFormatException { - final DictDecoder dictDecoder = FormatSpec.getDictDecoder(file, DictDecoder.USE_BYTEARRAY); - dictDecoder.openDictBuffer(); - UserHistoryDictIOUtils.readDictionaryBinary(dictDecoder, listener); - } - - public void testGenerateFusionDictionary() { - final UserHistoryDictionaryBigramList originalList = new UserHistoryDictionaryBigramList(); - - final HashMap<String, ArrayList<String> > addedWords = - new HashMap<String, ArrayList<String>>(); - addBigramToBigramList("this", "is", addedWords, originalList); - addBigramToBigramList("this", "was", addedWords, originalList); - addBigramToBigramList("hello", "world", addedWords, originalList); - - final FusionDictionary fusionDict = UserHistoryDictIOUtils.constructFusionDictionary( - this, originalList, HEADER_OPTIONS); - - checkWordsInFusionDict(fusionDict, addedWords); - } - - public void testReadAndWrite() throws IOException, FileNotFoundException, - UnsupportedFormatException { - final Context context = getContext(); - - File file = null; - try { - file = File.createTempFile("testReadAndWrite", TEST_DICT_FILE_EXTENSION, - getContext().getCacheDir()); - } catch (IOException e) { - Log.d(TAG, "IOException while creating a temporary file", e); - } - assertNotNull(file); - - // make original dictionary - final UserHistoryDictionaryBigramList originalList = new UserHistoryDictionaryBigramList(); - final HashMap<String, ArrayList<String>> addedWords = CollectionUtils.newHashMap(); - addBigramToBigramList("this" , "is" , addedWords, originalList); - addBigramToBigramList("this" , "was" , addedWords, originalList); - addBigramToBigramList("is" , "not" , addedWords, originalList); - addBigramToBigramList("hello", "world", addedWords, originalList); - - // write to file - writeDictToFile(file, originalList); - - // make result dict. - final UserHistoryDictionaryBigramList resultList = new UserHistoryDictionaryBigramList(); - final OnAddWordListener listener = new OnAddWordListener() { - @Override - public void setUnigram(final String word, final String shortcutTarget, - final int frequency, final int shortcutFreq) { - Log.d(TAG, "in: setUnigram: " + word + "," + frequency); - resultList.addBigram(null, word, (byte)frequency); - } - @Override - public void setBigram(final String word1, final String word2, final int frequency) { - Log.d(TAG, "in: setBigram: " + word1 + "," + word2 + "," + frequency); - resultList.addBigram(word1, word2, (byte)frequency); - } - }; - - // load from file - readDictFromFile(file, listener); - checkWordsInBigramList(resultList, addedWords); - - // add new bigram - addBigramToBigramList("hello", "java", addedWords, resultList); - - // rewrite - writeDictToFile(file, resultList); - final UserHistoryDictionaryBigramList resultList2 = new UserHistoryDictionaryBigramList(); - final OnAddWordListener listener2 = new OnAddWordListener() { - @Override - public void setUnigram(final String word, final String shortcutTarget, - final int frequency, final int shortcutFreq) { - Log.d(TAG, "in: setUnigram: " + word + "," + frequency); - resultList2.addBigram(null, word, (byte)frequency); - } - @Override - public void setBigram(final String word1, final String word2, final int frequency) { - Log.d(TAG, "in: setBigram: " + word1 + "," + word2 + "," + frequency); - resultList2.addBigram(word1, word2, (byte)frequency); - } - }; - - // load from file - readDictFromFile(file, listener2); - checkWordsInBigramList(resultList2, addedWords); - } -} |