diff options
44 files changed, 747 insertions, 1436 deletions
diff --git a/java/res/xml/key_f1.xml b/java/res/xml/key_f1.xml index 455f9ef59..72e38cb1a 100644 --- a/java/res/xml/key_f1.xml +++ b/java/res/xml/key_f1.xml @@ -47,7 +47,7 @@ <Key latin:keyLabel="!text/keylabel_for_comma" latin:keyLabelFlags="hasPopupHint" - latin:additionalMoreKeys="!text/more_keys_for_comma" + latin:additionalMoreKeys="!text/more_keys_for_comma,!text/shortcut_as_more_key" latin:keyStyle="f1MoreKeysStyle" /> </default> </switch> diff --git a/java/res/xml/key_nepali_traditional_period.xml b/java/res/xml/key_nepali_traditional_period.xml index 0f575c50b..1c389b009 100644 --- a/java/res/xml/key_nepali_traditional_period.xml +++ b/java/res/xml/key_nepali_traditional_period.xml @@ -39,10 +39,11 @@ set of Key definitions are needed based on the API version. --> <include latin:keyboardLayout="@xml/keystyle_devanagari_sign_virama" /> + <!-- U+002E: "." FULL STOP --> <Key latin:keyStyle="baseKeyDevanagariSignVirama" latin:keyLabelFlags="hasPopupHint" - latin:moreKeys="!fixedColumnOrder!4,.,!text/more_keys_for_punctuation" + latin:moreKeys="!fixedColumnOrder!9,.,!text/more_keys_for_punctuation" latin:backgroundType="functional" /> </default> </switch> diff --git a/java/res/xml/row_dvorak4.xml b/java/res/xml/row_dvorak4.xml index 02a95acea..b78872fe4 100644 --- a/java/res/xml/row_dvorak4.xml +++ b/java/res/xml/row_dvorak4.xml @@ -27,42 +27,11 @@ <Key latin:keyStyle="toSymbolKeyStyle" latin:keyWidth="15%p" /> - <switch> - <case - latin:hasShortcutKey="true" - latin:keyboardLayoutSetElement="alphabet" - > - <Key - latin:keyLabel="q" - latin:backgroundType="normal" - latin:additionalMoreKeys="!text/shortcut_as_more_key" - latin:keyStyle="f1MoreKeysStyle" /> - </case> - <case - latin:hasShortcutKey="true" - > - <Key - latin:keyLabel="Q" - latin:backgroundType="normal" - latin:additionalMoreKeys="!text/shortcut_as_more_key" - latin:keyStyle="f1MoreKeysStyle" /> - </case> - <!-- latin:hasShortcutKey="false" --> - <case - latin:keyboardLayoutSetElement="alphabet" - > - <Key - latin:keyLabel="q" - latin:backgroundType="normal" - latin:keyStyle="f1MoreKeysStyle" /> - </case> - <default> - <Key - latin:keyLabel="Q" - latin:backgroundType="normal" - latin:keyStyle="f1MoreKeysStyle" /> - </default> - </switch> + <Key + latin:keyLabel="q" + latin:backgroundType="normal" + latin:additionalMoreKeys="!text/shortcut_as_more_key" + latin:keyStyle="f1MoreKeysStyle" /> <include latin:keyXPos="25%p" latin:keyboardLayout="@xml/key_space_5kw" /> diff --git a/java/src/com/android/inputmethod/keyboard/internal/KeyboardTextsSet.java b/java/src/com/android/inputmethod/keyboard/internal/KeyboardTextsSet.java index e94edec02..b3975dc50 100644 --- a/java/src/com/android/inputmethod/keyboard/internal/KeyboardTextsSet.java +++ b/java/src/com/android/inputmethod/keyboard/internal/KeyboardTextsSet.java @@ -284,7 +284,7 @@ public final class KeyboardTextsSet { /* 56 */ "\u00A2,\u00A3,\u20AC,\u00A5,\u20B1", /* 57 */ "$", /* 58 */ "$,\u00A2,\u20AC,\u00A3,\u00A5,\u20B1", - /* 59 */ "!fixedColumnOrder!4,#,!,\\,,?,-,:,',@", + /* 59 */ "!fixedColumnOrder!8,;,/,(,),#,!,\\,,?,&,\\%,+,\",-,:,',@", // U+2020: "†" DAGGER // U+2021: "‡" DOUBLE DAGGER // U+2605: "★" BLACK STAR @@ -795,7 +795,7 @@ public final class KeyboardTextsSet { null, null, null, null, null, null, null, null, null, null, null, null, null, null, /* ~58 */ // U+00B7: "·" MIDDLE DOT - /* 59 */ "!fixedColumnOrder!4,\u00B7,!,\\,,?,:,;,@", + /* 59 */ "!fixedColumnOrder!9,;,/,(,),#,\u00B7,!,\\,,?,&,\\%,+,\",-,:,',@", /* 60~ */ null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, @@ -1981,7 +1981,7 @@ public final class KeyboardTextsSet { // U+20AA: "₪" NEW SHEQEL SIGN /* 57 */ "\u20AA", /* 58 */ null, - /* 59 */ null, + /* 59 */ "!fixedColumnOrder!8,;,/,(|),)|(,#,!,\\,,?,&,\\%,+,\",-,:,',@", // U+2605: "★" BLACK STAR /* 60 */ "\u2605", /* 61 */ null, diff --git a/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java b/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java index 710c3eaac..d059cc8a9 100644 --- a/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java +++ b/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java @@ -23,7 +23,6 @@ import android.util.Log; import com.android.inputmethod.annotations.UsedForTesting; import com.android.inputmethod.keyboard.ProximityInfo; import com.android.inputmethod.latin.makedict.FormatSpec; -import com.android.inputmethod.latin.personalization.DynamicPersonalizationDictionaryWriter; import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo; import com.android.inputmethod.latin.utils.AsyncResultHolder; import com.android.inputmethod.latin.utils.CollectionUtils; @@ -53,10 +52,6 @@ abstract public class ExpandableBinaryDictionary extends Dictionary { /** Whether to print debug output to log */ private static boolean DEBUG = false; - // TODO: Remove. - /** Whether to call binary dictionary dynamically updating methods. */ - public static final boolean ENABLE_BINARY_DICTIONARY_DYNAMIC_UPDATE = true; - private static final int TIMEOUT_FOR_READ_OPS_IN_MILLISECONDS = 100; /** @@ -164,11 +159,7 @@ abstract public class ExpandableBinaryDictionary extends Dictionary { private static AbstractDictionaryWriter getDictionaryWriter(final Context context, final String dictType, final boolean isDynamicPersonalizationDictionary) { if (isDynamicPersonalizationDictionary) { - if (ENABLE_BINARY_DICTIONARY_DYNAMIC_UPDATE) { - return null; - } else { - return new DynamicPersonalizationDictionaryWriter(context, dictType); - } + return null; } else { return new DictionaryWriter(context, dictType); } @@ -244,7 +235,7 @@ abstract public class ExpandableBinaryDictionary extends Dictionary { getExecutor(mFilename).execute(new Runnable() { @Override public void run() { - if (ENABLE_BINARY_DICTIONARY_DYNAMIC_UPDATE && mDictionaryWriter == null) { + if (mDictionaryWriter == null) { mBinaryDictionary.close(); final File file = new File(mContext.getFilesDir(), mFilename); BinaryDictionary.createEmptyDictFile(file.getAbsolutePath(), @@ -286,7 +277,6 @@ abstract public class ExpandableBinaryDictionary extends Dictionary { * Check whether GC is needed and run GC if required. */ protected void runGCIfRequired(final boolean mindsBlockByGC) { - if (!ENABLE_BINARY_DICTIONARY_DYNAMIC_UPDATE) return; getExecutor(mFilename).execute(new Runnable() { @Override public void run() { @@ -296,7 +286,6 @@ abstract public class ExpandableBinaryDictionary extends Dictionary { } private void runGCIfRequiredInternalLocked(final boolean mindsBlockByGC) { - if (!ENABLE_BINARY_DICTIONARY_DYNAMIC_UPDATE) return; // Calls to needsToRunGC() need to be serialized. if (mBinaryDictionary.needsToRunGC(mindsBlockByGC)) { if (setIsRegeneratingIfNotRegenerating()) { @@ -327,14 +316,8 @@ abstract public class ExpandableBinaryDictionary extends Dictionary { getExecutor(mFilename).execute(new Runnable() { @Override public void run() { - if (ENABLE_BINARY_DICTIONARY_DYNAMIC_UPDATE) { - runGCIfRequiredInternalLocked(true /* mindsBlockByGC */); - mBinaryDictionary.addUnigramWord(word, frequency); - } else { - // TODO: Remove. - mDictionaryWriter.addUnigramWord(word, shortcutTarget, frequency, shortcutFreq, - isNotAWord); - } + runGCIfRequiredInternalLocked(true /* mindsBlockByGC */); + mBinaryDictionary.addUnigramWord(word, frequency); } }); } @@ -352,14 +335,8 @@ abstract public class ExpandableBinaryDictionary extends Dictionary { getExecutor(mFilename).execute(new Runnable() { @Override public void run() { - if (ENABLE_BINARY_DICTIONARY_DYNAMIC_UPDATE) { - runGCIfRequiredInternalLocked(true /* mindsBlockByGC */); - mBinaryDictionary.addBigramWords(word0, word1, frequency); - } else { - // TODO: Remove. - mDictionaryWriter.addBigramWords(word0, word1, frequency, isValid, - 0 /* lastTouchedTime */); - } + runGCIfRequiredInternalLocked(true /* mindsBlockByGC */); + mBinaryDictionary.addBigramWords(word0, word1, frequency); } }); } @@ -376,13 +353,8 @@ abstract public class ExpandableBinaryDictionary extends Dictionary { getExecutor(mFilename).execute(new Runnable() { @Override public void run() { - if (ENABLE_BINARY_DICTIONARY_DYNAMIC_UPDATE) { - runGCIfRequiredInternalLocked(true /* mindsBlockByGC */); - mBinaryDictionary.removeBigramWords(word0, word1); - } else { - // TODO: Remove. - mDictionaryWriter.removeBigramWords(word0, word1); - } + runGCIfRequiredInternalLocked(true /* mindsBlockByGC */); + mBinaryDictionary.removeBigramWords(word0, word1); } }); } @@ -396,46 +368,20 @@ abstract public class ExpandableBinaryDictionary extends Dictionary { if (isRegenerating()) { return null; } - final ArrayList<SuggestedWordInfo> suggestions = CollectionUtils.newArrayList(); final AsyncResultHolder<ArrayList<SuggestedWordInfo>> holder = new AsyncResultHolder<ArrayList<SuggestedWordInfo>>(); getExecutor(mFilename).executePrioritized(new Runnable() { @Override public void run() { - if (ENABLE_BINARY_DICTIONARY_DYNAMIC_UPDATE) { - if (mBinaryDictionary == null) { - holder.set(null); - return; - } - final ArrayList<SuggestedWordInfo> binarySuggestion = - mBinaryDictionary.getSuggestionsWithSessionId(composer, prevWord, - proximityInfo, blockOffensiveWords, additionalFeaturesOptions, - sessionId); - holder.set(binarySuggestion); - } else { - final ArrayList<SuggestedWordInfo> inMemDictSuggestion = - composer.isBatchMode() ? null : - mDictionaryWriter.getSuggestionsWithSessionId(composer, - prevWord, proximityInfo, blockOffensiveWords, - additionalFeaturesOptions, sessionId); - // TODO: Remove checking mIsUpdatable and use native suggestion. - if (mBinaryDictionary != null && !mIsUpdatable) { - final ArrayList<SuggestedWordInfo> binarySuggestion = - mBinaryDictionary.getSuggestionsWithSessionId(composer, prevWord, - proximityInfo, blockOffensiveWords, - additionalFeaturesOptions, sessionId); - if (inMemDictSuggestion == null) { - holder.set(binarySuggestion); - } else if (binarySuggestion == null) { - holder.set(inMemDictSuggestion); - } else { - binarySuggestion.addAll(inMemDictSuggestion); - holder.set(binarySuggestion); - } - } else { - holder.set(inMemDictSuggestion); - } + if (mBinaryDictionary == null) { + holder.set(null); + return; } + final ArrayList<SuggestedWordInfo> binarySuggestion = + mBinaryDictionary.getSuggestionsWithSessionId(composer, prevWord, + proximityInfo, blockOffensiveWords, additionalFeaturesOptions, + sessionId); + holder.set(binarySuggestion); } }); return holder.get(null, TIMEOUT_FOR_READ_OPS_IN_MILLISECONDS); @@ -542,20 +488,16 @@ abstract public class ExpandableBinaryDictionary extends Dictionary { loadDictionaryAsync(); mDictionaryWriter.write(mFilename, getHeaderAttributeMap()); } else { - if (ENABLE_BINARY_DICTIONARY_DYNAMIC_UPDATE) { - if (mBinaryDictionary == null || !mBinaryDictionary.isValidDictionary()) { - final File file = new File(mContext.getFilesDir(), mFilename); - BinaryDictionary.createEmptyDictFile(file.getAbsolutePath(), - DICTIONARY_FORMAT_VERSION, getHeaderAttributeMap()); + if (mBinaryDictionary == null || !mBinaryDictionary.isValidDictionary()) { + final File file = new File(mContext.getFilesDir(), mFilename); + BinaryDictionary.createEmptyDictFile(file.getAbsolutePath(), + DICTIONARY_FORMAT_VERSION, getHeaderAttributeMap()); + } else { + if (mBinaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) { + mBinaryDictionary.flushWithGC(); } else { - if (mBinaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) { - mBinaryDictionary.flushWithGC(); - } else { - mBinaryDictionary.flush(); - } + mBinaryDictionary.flush(); } - } else { - mDictionaryWriter.write(mFilename, getHeaderAttributeMap()); } } } @@ -663,20 +605,6 @@ abstract public class ExpandableBinaryDictionary extends Dictionary { } /** - * Load the dictionary to memory. - */ - protected void asyncLoadDictionaryToMemory() { - getExecutor(mFilename).executePrioritized(new Runnable() { - @Override - public void run() { - if (!ENABLE_BINARY_DICTIONARY_DYNAMIC_UPDATE) { - loadDictionaryAsync(); - } - } - }); - } - - /** * Generate binary dictionary using DictionaryWriter. */ protected void asyncFlashAllBinaryDictionary() { @@ -704,7 +632,7 @@ abstract public class ExpandableBinaryDictionary extends Dictionary { } } - // TODO: Implement native binary methods once the dynamic dictionary implementation is done. + // TODO: Implement BinaryDictionary.isInDictionary(). @UsedForTesting public boolean isInDictionaryForTests(final String word) { final AsyncResultHolder<Boolean> holder = new AsyncResultHolder<Boolean>(); @@ -712,12 +640,7 @@ abstract public class ExpandableBinaryDictionary extends Dictionary { @Override public void run() { if (mDictType == Dictionary.TYPE_USER_HISTORY) { - if (ENABLE_BINARY_DICTIONARY_DYNAMIC_UPDATE) { - holder.set(mBinaryDictionary.isValidWord(word)); - } else { - holder.set(((DynamicPersonalizationDictionaryWriter) mDictionaryWriter) - .isInBigramListForTests(word)); - } + holder.set(mBinaryDictionary.isValidWord(word)); } } }); diff --git a/java/src/com/android/inputmethod/latin/ExpandableDictionary.java b/java/src/com/android/inputmethod/latin/ExpandableDictionary.java deleted file mode 100644 index 8fdff8f7e..000000000 --- a/java/src/com/android/inputmethod/latin/ExpandableDictionary.java +++ /dev/null @@ -1,897 +0,0 @@ -/* - * Copyright (C) 2009 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.android.inputmethod.latin; - -import android.text.TextUtils; -import android.util.Log; - -import com.android.inputmethod.annotations.UsedForTesting; -import com.android.inputmethod.keyboard.ProximityInfo; -import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo; -import com.android.inputmethod.latin.utils.CollectionUtils; -import com.android.inputmethod.latin.utils.UserHistoryForgettingCurveUtils.ForgettingCurveParams; - -import java.util.ArrayList; -import java.util.LinkedList; - -/** - * Class for an in-memory dictionary that can grow dynamically and can - * be searched for suggestions and valid words. - */ -// TODO: Remove after binary dictionary supports dynamic update. -@UsedForTesting -public class ExpandableDictionary extends Dictionary { - private static final String TAG = ExpandableDictionary.class.getSimpleName(); - /** - * The weight to give to a word if it's length is the same as the number of typed characters. - */ - private static final int FULL_WORD_SCORE_MULTIPLIER = 2; - - private char[] mWordBuilder = new char[Constants.DICTIONARY_MAX_WORD_LENGTH]; - private int mMaxDepth; - private int mInputLength; - - private static final class Node { - char mCode; - int mFrequency; - boolean mTerminal; - Node mParent; - NodeArray mChildren; - ArrayList<char[]> mShortcutTargets; - boolean mShortcutOnly; - LinkedList<NextWord> mNGrams; // Supports ngram - } - - private static final class NodeArray { - Node[] mData; - int mLength = 0; - private static final int INCREMENT = 2; - - NodeArray() { - mData = new Node[INCREMENT]; - } - - void add(final Node n) { - if (mLength + 1 > mData.length) { - Node[] tempData = new Node[mLength + INCREMENT]; - if (mLength > 0) { - System.arraycopy(mData, 0, tempData, 0, mLength); - } - mData = tempData; - } - mData[mLength++] = n; - } - } - - public interface NextWord { - public Node getWordNode(); - public int getFrequency(); - public ForgettingCurveParams getFcParams(); - public int notifyTypedAgainAndGetFrequency(); - } - - private static final class NextStaticWord implements NextWord { - public final Node mWord; - private final int mFrequency; - public NextStaticWord(Node word, int frequency) { - mWord = word; - mFrequency = frequency; - } - - @Override - public Node getWordNode() { - return mWord; - } - - @Override - public int getFrequency() { - return mFrequency; - } - - @Override - public ForgettingCurveParams getFcParams() { - return null; - } - - @Override - public int notifyTypedAgainAndGetFrequency() { - return mFrequency; - } - } - - private static final class NextHistoryWord implements NextWord { - public final Node mWord; - public final ForgettingCurveParams mFcp; - - public NextHistoryWord(Node word, ForgettingCurveParams fcp) { - mWord = word; - mFcp = fcp; - } - - @Override - public Node getWordNode() { - return mWord; - } - - @Override - public int getFrequency() { - return mFcp.getFrequency(); - } - - @Override - public ForgettingCurveParams getFcParams() { - return mFcp; - } - - @Override - public int notifyTypedAgainAndGetFrequency() { - return mFcp.notifyTypedAgainAndGetFrequency(); - } - } - - private NodeArray mRoots; - - private int[][] mCodes; - - @UsedForTesting - public ExpandableDictionary(final String dictType) { - super(dictType); - clearDictionary(); - mCodes = new int[Constants.DICTIONARY_MAX_WORD_LENGTH][]; - } - - public int getMaxWordLength() { - return Constants.DICTIONARY_MAX_WORD_LENGTH; - } - - /** - * Add a word with an optional shortcut to the dictionary. - * @param word The word to add. - * @param shortcutTarget A shortcut target for this word, or null if none. - * @param frequency The frequency for this unigram. - * @param shortcutFreq The frequency of the shortcut (0~15, with 15 = whitelist). Ignored - * if shortcutTarget is null. - */ - @UsedForTesting - public void addWord(final String word, final String shortcutTarget, final int frequency, - final int shortcutFreq) { - if (word.length() >= Constants.DICTIONARY_MAX_WORD_LENGTH) { - return; - } - addWordRec(mRoots, word, 0, shortcutTarget, frequency, shortcutFreq, null); - } - - /** - * Add a word, recursively searching for its correct place in the trie tree. - * @param children The node to recursively search for addition. Initially, the root of the tree. - * @param word The word to add. - * @param depth The current depth in the tree. - * @param shortcutTarget A shortcut target for this word, or null if none. - * @param frequency The frequency for this unigram. - * @param shortcutFreq The frequency of the shortcut (0~15, with 15 = whitelist). Ignored - * if shortcutTarget is null. - * @param parentNode The parent node, for up linking. Initially null, as the root has no parent. - */ - private void addWordRec(final NodeArray children, final String word, final int depth, - final String shortcutTarget, final int frequency, final int shortcutFreq, - final Node parentNode) { - final int wordLength = word.length(); - if (wordLength <= depth) return; - final char c = word.charAt(depth); - // Does children have the current character? - final int childrenLength = children.mLength; - Node childNode = null; - for (int i = 0; i < childrenLength; i++) { - final Node node = children.mData[i]; - if (node.mCode == c) { - childNode = node; - break; - } - } - final boolean isShortcutOnly = (null != shortcutTarget); - if (childNode == null) { - childNode = new Node(); - childNode.mCode = c; - childNode.mParent = parentNode; - childNode.mShortcutOnly = isShortcutOnly; - children.add(childNode); - } - if (wordLength == depth + 1) { - // Terminate this word - childNode.mTerminal = true; - if (isShortcutOnly) { - if (null == childNode.mShortcutTargets) { - childNode.mShortcutTargets = CollectionUtils.newArrayList(); - } - childNode.mShortcutTargets.add(shortcutTarget.toCharArray()); - } else { - childNode.mShortcutOnly = false; - } - childNode.mFrequency = Math.max(frequency, childNode.mFrequency); - if (childNode.mFrequency > 255) childNode.mFrequency = 255; - return; - } - if (childNode.mChildren == null) { - childNode.mChildren = new NodeArray(); - } - addWordRec(childNode.mChildren, word, depth + 1, shortcutTarget, frequency, shortcutFreq, - childNode); - } - - @Override - public ArrayList<SuggestedWordInfo> getSuggestions(final WordComposer composer, - final String prevWord, final ProximityInfo proximityInfo, - final boolean blockOffensiveWords, final int[] additionalFeaturesOptions) { - if (composer.size() > 1) { - if (composer.size() >= Constants.DICTIONARY_MAX_WORD_LENGTH) { - return null; - } - final ArrayList<SuggestedWordInfo> suggestions = - getWordsInner(composer, prevWord, proximityInfo); - return suggestions; - } else { - if (TextUtils.isEmpty(prevWord)) return null; - final ArrayList<SuggestedWordInfo> suggestions = CollectionUtils.newArrayList(); - runBigramReverseLookUp(prevWord, suggestions); - return suggestions; - } - } - - private ArrayList<SuggestedWordInfo> getWordsInner(final WordComposer codes, - final String prevWordForBigrams, final ProximityInfo proximityInfo) { - final ArrayList<SuggestedWordInfo> suggestions = CollectionUtils.newArrayList(); - mInputLength = codes.size(); - if (mCodes.length < mInputLength) mCodes = new int[mInputLength][]; - final InputPointers ips = codes.getInputPointers(); - final int[] xCoordinates = ips.getXCoordinates(); - final int[] yCoordinates = ips.getYCoordinates(); - // Cache the codes so that we don't have to lookup an array list - for (int i = 0; i < mInputLength; i++) { - // TODO: Calculate proximity info here. - if (mCodes[i] == null || mCodes[i].length < 1) { - mCodes[i] = new int[ProximityInfo.MAX_PROXIMITY_CHARS_SIZE]; - } - final int x = xCoordinates != null && i < xCoordinates.length ? - xCoordinates[i] : Constants.NOT_A_COORDINATE; - final int y = xCoordinates != null && i < yCoordinates.length ? - yCoordinates[i] : Constants.NOT_A_COORDINATE; - proximityInfo.fillArrayWithNearestKeyCodes(x, y, codes.getCodeAt(i), mCodes[i]); - } - mMaxDepth = mInputLength * 3; - getWordsRec(mRoots, codes, mWordBuilder, 0, false, 1, 0, -1, suggestions); - for (int i = 0; i < mInputLength; i++) { - getWordsRec(mRoots, codes, mWordBuilder, 0, false, 1, 0, i, suggestions); - } - return suggestions; - } - - @Override - public synchronized boolean isValidWord(final String word) { - final Node node = searchNode(mRoots, word, 0, word.length()); - // If node is null, we didn't find the word, so it's not valid. - // If node.mShortcutOnly is true, then it exists as a shortcut but not as a word, - // so that means it's not a valid word. - // If node.mShortcutOnly is false, then it exists as a word (it may also exist as - // a shortcut, but this does not matter), so it's a valid word. - return (node == null) ? false : !node.mShortcutOnly; - } - - public boolean removeBigram(final String word0, final String word1) { - // Refer to addOrSetBigram() about word1.toLowerCase() - final Node firstWord = searchWord(mRoots, word0.toLowerCase(), 0, null); - final Node secondWord = searchWord(mRoots, word1, 0, null); - LinkedList<NextWord> bigrams = firstWord.mNGrams; - NextWord bigramNode = null; - if (bigrams == null || bigrams.size() == 0) { - return false; - } else { - for (NextWord nw : bigrams) { - if (nw.getWordNode() == secondWord) { - bigramNode = nw; - break; - } - } - } - if (bigramNode == null) { - return false; - } - return bigrams.remove(bigramNode); - } - - /** - * Returns the word's frequency or -1 if not found - */ - @UsedForTesting - public int getWordFrequency(final String word) { - // Case-sensitive search - final Node node = searchNode(mRoots, word, 0, word.length()); - return (node == null) ? -1 : node.mFrequency; - } - - public NextWord getBigramWord(final String word0, final String word1) { - // Refer to addOrSetBigram() about word0.toLowerCase() - final Node firstWord = searchWord(mRoots, word0.toLowerCase(), 0, null); - final Node secondWord = searchWord(mRoots, word1, 0, null); - LinkedList<NextWord> bigrams = firstWord.mNGrams; - if (bigrams == null || bigrams.size() == 0) { - return null; - } else { - for (NextWord nw : bigrams) { - if (nw.getWordNode() == secondWord) { - return nw; - } - } - } - return null; - } - - private static int computeSkippedWordFinalFreq(final int freq, final int snr, - final int inputLength) { - // The computation itself makes sense for >= 2, but the == 2 case returns 0 - // anyway so we may as well test against 3 instead and return the constant - if (inputLength >= 3) { - return (freq * snr * (inputLength - 2)) / (inputLength - 1); - } else { - return 0; - } - } - - /** - * Helper method to add a word and its shortcuts. - * - * @param node the terminal node - * @param word the word to insert, as an array of code points - * @param depth the depth of the node in the tree - * @param finalFreq the frequency for this word - * @param suggestions the suggestion collection to add the suggestions to - * @return whether there is still space for more words. - */ - private boolean addWordAndShortcutsFromNode(final Node node, final char[] word, final int depth, - final int finalFreq, final ArrayList<SuggestedWordInfo> suggestions) { - if (finalFreq > 0 && !node.mShortcutOnly) { - // Use KIND_CORRECTION always. This dictionary does not really have a notion of - // COMPLETION against CORRECTION; we could artificially add one by looking at - // the respective size of the typed word and the suggestion if it matters sometime - // in the future. - suggestions.add(new SuggestedWordInfo(new String(word, 0, depth + 1), finalFreq, - SuggestedWordInfo.KIND_CORRECTION, this /* sourceDict */, - SuggestedWordInfo.NOT_AN_INDEX /* indexOfTouchPointOfSecondWord */, - SuggestedWordInfo.NOT_A_CONFIDENCE /* autoCommitFirstWordConfidence */)); - if (suggestions.size() >= Suggest.MAX_SUGGESTIONS) return false; - } - if (null != node.mShortcutTargets) { - final int length = node.mShortcutTargets.size(); - for (int shortcutIndex = 0; shortcutIndex < length; ++shortcutIndex) { - final char[] shortcut = node.mShortcutTargets.get(shortcutIndex); - suggestions.add(new SuggestedWordInfo(new String(shortcut, 0, shortcut.length), - finalFreq, SuggestedWordInfo.KIND_SHORTCUT, this /* sourceDict */, - SuggestedWordInfo.NOT_AN_INDEX /* indexOfTouchPointOfSecondWord */, - SuggestedWordInfo.NOT_A_CONFIDENCE /* autoCommitFirstWordConfidence */)); - if (suggestions.size() > Suggest.MAX_SUGGESTIONS) return false; - } - } - return true; - } - - /** - * Recursively traverse the tree for words that match the input. Input consists of - * a list of arrays. Each item in the list is one input character position. An input - * character is actually an array of multiple possible candidates. This function is not - * optimized for speed, assuming that the user dictionary will only be a few hundred words in - * size. - * @param roots node whose children have to be search for matches - * @param codes the input character codes - * @param word the word being composed as a possible match - * @param depth the depth of traversal - the length of the word being composed thus far - * @param completion whether the traversal is now in completion mode - meaning that we've - * exhausted the input and we're looking for all possible suffixes. - * @param snr current weight of the word being formed - * @param inputIndex position in the input characters. This can be off from the depth in - * case we skip over some punctuations such as apostrophe in the traversal. That is, if you type - * "wouldve", it could be matching "would've", so the depth will be one more than the - * inputIndex - * @param suggestions the list in which to add suggestions - */ - // TODO: Share this routine with the native code for BinaryDictionary - private void getWordsRec(final NodeArray roots, final WordComposer codes, final char[] word, - final int depth, final boolean completion, final int snr, final int inputIndex, - final int skipPos, final ArrayList<SuggestedWordInfo> suggestions) { - final int count = roots.mLength; - final int codeSize = mInputLength; - // Optimization: Prune out words that are too long compared to how much was typed. - if (depth > mMaxDepth) { - return; - } - final int[] currentChars; - if (codeSize <= inputIndex) { - currentChars = null; - } else { - currentChars = mCodes[inputIndex]; - } - - for (int i = 0; i < count; i++) { - final Node node = roots.mData[i]; - final char c = node.mCode; - final char lowerC = toLowerCase(c); - final boolean terminal = node.mTerminal; - final NodeArray children = node.mChildren; - final int freq = node.mFrequency; - if (completion || currentChars == null) { - word[depth] = c; - if (terminal) { - final int finalFreq; - if (skipPos < 0) { - finalFreq = freq * snr; - } else { - finalFreq = computeSkippedWordFinalFreq(freq, snr, mInputLength); - } - if (!addWordAndShortcutsFromNode(node, word, depth, finalFreq, suggestions)) { - // No space left in the queue, bail out - return; - } - } - if (children != null) { - getWordsRec(children, codes, word, depth + 1, true, snr, inputIndex, - skipPos, suggestions); - } - } else if ((c == Constants.CODE_SINGLE_QUOTE - && currentChars[0] != Constants.CODE_SINGLE_QUOTE) || depth == skipPos) { - // Skip the ' and continue deeper - word[depth] = c; - if (children != null) { - getWordsRec(children, codes, word, depth + 1, completion, snr, inputIndex, - skipPos, suggestions); - } - } else { - // Don't use alternatives if we're looking for missing characters - final int alternativesSize = skipPos >= 0 ? 1 : currentChars.length; - for (int j = 0; j < alternativesSize; j++) { - final int addedAttenuation = (j > 0 ? 1 : 2); - final int currentChar = currentChars[j]; - if (currentChar == Constants.NOT_A_CODE) { - break; - } - if (currentChar == lowerC || currentChar == c) { - word[depth] = c; - - if (codeSize == inputIndex + 1) { - if (terminal) { - final int finalFreq; - if (skipPos < 0) { - finalFreq = freq * snr * addedAttenuation - * FULL_WORD_SCORE_MULTIPLIER; - } else { - finalFreq = computeSkippedWordFinalFreq(freq, - snr * addedAttenuation, mInputLength); - } - if (!addWordAndShortcutsFromNode(node, word, depth, finalFreq, - suggestions)) { - // No space left in the queue, bail out - return; - } - } - if (children != null) { - getWordsRec(children, codes, word, depth + 1, - true, snr * addedAttenuation, inputIndex + 1, - skipPos, suggestions); - } - } else if (children != null) { - getWordsRec(children, codes, word, depth + 1, - false, snr * addedAttenuation, inputIndex + 1, - skipPos, suggestions); - } - } - } - } - } - } - - public int setBigramAndGetFrequency(final String word0, final String word1, - final int frequency) { - return setBigramAndGetFrequency(word0, word1, frequency, null /* unused */); - } - - public int setBigramAndGetFrequency(final String word0, final String word1, - final ForgettingCurveParams fcp) { - return setBigramAndGetFrequency(word0, word1, 0 /* unused */, fcp); - } - - /** - * Adds bigrams to the in-memory trie structure that is being used to retrieve any word - * @param word0 the first word of this bigram - * @param word1 the second word of this bigram - * @param frequency frequency for this bigram - * @param fcp an instance of ForgettingCurveParams to use for decay policy - * @return returns the final bigram frequency - */ - private int setBigramAndGetFrequency(final String word0, final String word1, - final int frequency, final ForgettingCurveParams fcp) { - if (TextUtils.isEmpty(word0)) { - Log.e(TAG, "Invalid bigram previous word: " + word0); - return frequency; - } - // We don't want results to be different according to case of the looked up left hand side - // word. We do want however to return the correct case for the right hand side. - // So we want to squash the case of the left hand side, and preserve that of the right - // hand side word. - final String word0Lower = word0.toLowerCase(); - if (TextUtils.isEmpty(word0Lower) || TextUtils.isEmpty(word1)) { - Log.e(TAG, "Invalid bigram pair: " + word0 + ", " + word0Lower + ", " + word1); - return frequency; - } - final Node firstWord = searchWord(mRoots, word0Lower, 0, null); - final Node secondWord = searchWord(mRoots, word1, 0, null); - LinkedList<NextWord> bigrams = firstWord.mNGrams; - if (bigrams == null || bigrams.size() == 0) { - firstWord.mNGrams = CollectionUtils.newLinkedList(); - bigrams = firstWord.mNGrams; - } else { - for (NextWord nw : bigrams) { - if (nw.getWordNode() == secondWord) { - return nw.notifyTypedAgainAndGetFrequency(); - } - } - } - if (fcp != null) { - // history - firstWord.mNGrams.add(new NextHistoryWord(secondWord, fcp)); - } else { - firstWord.mNGrams.add(new NextStaticWord(secondWord, frequency)); - } - return frequency; - } - - /** - * Searches for the word and add the word if it does not exist. - * @return Returns the terminal node of the word we are searching for. - */ - private Node searchWord(final NodeArray children, final String word, final int depth, - final Node parentNode) { - final int wordLength = word.length(); - final char c = word.charAt(depth); - // Does children have the current character? - final int childrenLength = children.mLength; - Node childNode = null; - for (int i = 0; i < childrenLength; i++) { - final Node node = children.mData[i]; - if (node.mCode == c) { - childNode = node; - break; - } - } - if (childNode == null) { - childNode = new Node(); - childNode.mCode = c; - childNode.mParent = parentNode; - children.add(childNode); - } - if (wordLength == depth + 1) { - // Terminate this word - childNode.mTerminal = true; - return childNode; - } - if (childNode.mChildren == null) { - childNode.mChildren = new NodeArray(); - } - return searchWord(childNode.mChildren, word, depth + 1, childNode); - } - - private void runBigramReverseLookUp(final String previousWord, - final ArrayList<SuggestedWordInfo> suggestions) { - // Search for the lowercase version of the word only, because that's where bigrams - // store their sons. - final Node prevWord = searchNode(mRoots, previousWord.toLowerCase(), 0, - previousWord.length()); - if (prevWord != null && prevWord.mNGrams != null) { - reverseLookUp(prevWord.mNGrams, suggestions); - } - } - - // Local to reverseLookUp, but do not allocate each time. - private final char[] mLookedUpString = new char[Constants.DICTIONARY_MAX_WORD_LENGTH]; - - /** - * reverseLookUp retrieves the full word given a list of terminal nodes and adds those words - * to the suggestions list passed as an argument. - * @param terminalNodes list of terminal nodes we want to add - * @param suggestions the suggestion collection to add the word to - */ - private void reverseLookUp(final LinkedList<NextWord> terminalNodes, - final ArrayList<SuggestedWordInfo> suggestions) { - Node node; - int freq; - for (NextWord nextWord : terminalNodes) { - node = nextWord.getWordNode(); - freq = nextWord.getFrequency(); - int index = Constants.DICTIONARY_MAX_WORD_LENGTH; - do { - --index; - mLookedUpString[index] = node.mCode; - node = node.mParent; - } while (node != null && index > 0); - - // If node is null, we have a word longer than MAX_WORD_LENGTH in the dictionary. - // It's a little unclear how this can happen, but just in case it does it's safer - // to ignore the word in this case. - if (freq >= 0 && node == null) { - suggestions.add(new SuggestedWordInfo(new String(mLookedUpString, index, - Constants.DICTIONARY_MAX_WORD_LENGTH - index), - freq, SuggestedWordInfo.KIND_CORRECTION, this /* sourceDict */, - SuggestedWordInfo.NOT_AN_INDEX /* indexOfTouchPointOfSecondWord */, - SuggestedWordInfo.NOT_A_CONFIDENCE /* autoCommitFirstWordConfidence */)); - } - } - } - - /** - * Recursively search for the terminal node of the word. - * - * One iteration takes the full word to search for and the current index of the recursion. - * - * @param children the node of the trie to search under. - * @param word the word to search for. Only read [offset..length] so there may be trailing chars - * @param offset the index in {@code word} this recursion should operate on. - * @param length the length of the input word. - * @return Returns the terminal node of the word if the word exists - */ - private Node searchNode(final NodeArray children, final CharSequence word, final int offset, - final int length) { - final int count = children.mLength; - final char currentChar = word.charAt(offset); - for (int j = 0; j < count; j++) { - final Node node = children.mData[j]; - if (node.mCode == currentChar) { - if (offset == length - 1) { - if (node.mTerminal) { - return node; - } - } else { - if (node.mChildren != null) { - Node returnNode = searchNode(node.mChildren, word, offset + 1, length); - if (returnNode != null) return returnNode; - } - } - } - } - return null; - } - - public void clearDictionary() { - mRoots = new NodeArray(); - } - - private static char toLowerCase(final char c) { - char baseChar = c; - if (c < BASE_CHARS.length) { - baseChar = BASE_CHARS[c]; - } - if (baseChar >= 'A' && baseChar <= 'Z') { - return (char)(baseChar | 32); - } else if (baseChar > 127) { - return Character.toLowerCase(baseChar); - } - return baseChar; - } - - /** - * Table mapping most combined Latin, Greek, and Cyrillic characters - * to their base characters. If c is in range, BASE_CHARS[c] == c - * if c is not a combined character, or the base character if it - * is combined. - * - * cf. native/jni/src/utils/char_utils.cpp - */ - private static final char BASE_CHARS[] = { - /* U+0000 */ 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, - /* U+0008 */ 0x0008, 0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x000E, 0x000F, - /* U+0010 */ 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, - /* U+0018 */ 0x0018, 0x0019, 0x001A, 0x001B, 0x001C, 0x001D, 0x001E, 0x001F, - /* U+0020 */ 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, - /* U+0028 */ 0x0028, 0x0029, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F, - /* U+0030 */ 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, - /* U+0038 */ 0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F, - /* U+0040 */ 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, - /* U+0048 */ 0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, - /* U+0050 */ 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, - /* U+0058 */ 0x0058, 0x0059, 0x005A, 0x005B, 0x005C, 0x005D, 0x005E, 0x005F, - /* U+0060 */ 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, - /* U+0068 */ 0x0068, 0x0069, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F, - /* U+0070 */ 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, - /* U+0078 */ 0x0078, 0x0079, 0x007A, 0x007B, 0x007C, 0x007D, 0x007E, 0x007F, - /* U+0080 */ 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, - /* U+0088 */ 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F, - /* U+0090 */ 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, - /* U+0098 */ 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, - /* U+00A0 */ 0x0020, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, - /* U+00A8 */ 0x0020, 0x00A9, 0x0061, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x0020, - /* U+00B0 */ 0x00B0, 0x00B1, 0x0032, 0x0033, 0x0020, 0x03BC, 0x00B6, 0x00B7, - /* U+00B8 */ 0x0020, 0x0031, 0x006F, 0x00BB, 0x0031, 0x0031, 0x0033, 0x00BF, - /* U+00C0 */ 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x00C6, 0x0043, - /* U+00C8 */ 0x0045, 0x0045, 0x0045, 0x0045, 0x0049, 0x0049, 0x0049, 0x0049, - /* U+00D0 */ 0x00D0, 0x004E, 0x004F, 0x004F, 0x004F, 0x004F, 0x004F, 0x00D7, - /* U+00D8 */ 0x004F, 0x0055, 0x0055, 0x0055, 0x0055, 0x0059, 0x00DE, 0x0073, - // U+00D8: Manually changed from 00D8 to 004F - // TODO: Check if it's really acceptable to consider Ø a diacritical variant of O - // U+00DF: Manually changed from 00DF to 0073 - /* U+00E0 */ 0x0061, 0x0061, 0x0061, 0x0061, 0x0061, 0x0061, 0x00E6, 0x0063, - /* U+00E8 */ 0x0065, 0x0065, 0x0065, 0x0065, 0x0069, 0x0069, 0x0069, 0x0069, - /* U+00F0 */ 0x00F0, 0x006E, 0x006F, 0x006F, 0x006F, 0x006F, 0x006F, 0x00F7, - /* U+00F8 */ 0x006F, 0x0075, 0x0075, 0x0075, 0x0075, 0x0079, 0x00FE, 0x0079, - // U+00F8: Manually changed from 00F8 to 006F - // TODO: Check if it's really acceptable to consider ø a diacritical variant of o - /* U+0100 */ 0x0041, 0x0061, 0x0041, 0x0061, 0x0041, 0x0061, 0x0043, 0x0063, - /* U+0108 */ 0x0043, 0x0063, 0x0043, 0x0063, 0x0043, 0x0063, 0x0044, 0x0064, - /* U+0110 */ 0x0110, 0x0111, 0x0045, 0x0065, 0x0045, 0x0065, 0x0045, 0x0065, - /* U+0118 */ 0x0045, 0x0065, 0x0045, 0x0065, 0x0047, 0x0067, 0x0047, 0x0067, - /* U+0120 */ 0x0047, 0x0067, 0x0047, 0x0067, 0x0048, 0x0068, 0x0126, 0x0127, - /* U+0128 */ 0x0049, 0x0069, 0x0049, 0x0069, 0x0049, 0x0069, 0x0049, 0x0069, - /* U+0130 */ 0x0049, 0x0131, 0x0049, 0x0069, 0x004A, 0x006A, 0x004B, 0x006B, - /* U+0138 */ 0x0138, 0x004C, 0x006C, 0x004C, 0x006C, 0x004C, 0x006C, 0x004C, - /* U+0140 */ 0x006C, 0x004C, 0x006C, 0x004E, 0x006E, 0x004E, 0x006E, 0x004E, - // U+0141: Manually changed from 0141 to 004C - // U+0142: Manually changed from 0142 to 006C - /* U+0148 */ 0x006E, 0x02BC, 0x014A, 0x014B, 0x004F, 0x006F, 0x004F, 0x006F, - /* U+0150 */ 0x004F, 0x006F, 0x0152, 0x0153, 0x0052, 0x0072, 0x0052, 0x0072, - /* U+0158 */ 0x0052, 0x0072, 0x0053, 0x0073, 0x0053, 0x0073, 0x0053, 0x0073, - /* U+0160 */ 0x0053, 0x0073, 0x0054, 0x0074, 0x0054, 0x0074, 0x0166, 0x0167, - /* U+0168 */ 0x0055, 0x0075, 0x0055, 0x0075, 0x0055, 0x0075, 0x0055, 0x0075, - /* U+0170 */ 0x0055, 0x0075, 0x0055, 0x0075, 0x0057, 0x0077, 0x0059, 0x0079, - /* U+0178 */ 0x0059, 0x005A, 0x007A, 0x005A, 0x007A, 0x005A, 0x007A, 0x0073, - /* U+0180 */ 0x0180, 0x0181, 0x0182, 0x0183, 0x0184, 0x0185, 0x0186, 0x0187, - /* U+0188 */ 0x0188, 0x0189, 0x018A, 0x018B, 0x018C, 0x018D, 0x018E, 0x018F, - /* U+0190 */ 0x0190, 0x0191, 0x0192, 0x0193, 0x0194, 0x0195, 0x0196, 0x0197, - /* U+0198 */ 0x0198, 0x0199, 0x019A, 0x019B, 0x019C, 0x019D, 0x019E, 0x019F, - /* U+01A0 */ 0x004F, 0x006F, 0x01A2, 0x01A3, 0x01A4, 0x01A5, 0x01A6, 0x01A7, - /* U+01A8 */ 0x01A8, 0x01A9, 0x01AA, 0x01AB, 0x01AC, 0x01AD, 0x01AE, 0x0055, - /* U+01B0 */ 0x0075, 0x01B1, 0x01B2, 0x01B3, 0x01B4, 0x01B5, 0x01B6, 0x01B7, - /* U+01B8 */ 0x01B8, 0x01B9, 0x01BA, 0x01BB, 0x01BC, 0x01BD, 0x01BE, 0x01BF, - /* U+01C0 */ 0x01C0, 0x01C1, 0x01C2, 0x01C3, 0x0044, 0x0044, 0x0064, 0x004C, - /* U+01C8 */ 0x004C, 0x006C, 0x004E, 0x004E, 0x006E, 0x0041, 0x0061, 0x0049, - /* U+01D0 */ 0x0069, 0x004F, 0x006F, 0x0055, 0x0075, 0x0055, 0x0075, 0x0055, - // U+01D5: Manually changed from 00DC to 0055 - // U+01D6: Manually changed from 00FC to 0075 - // U+01D7: Manually changed from 00DC to 0055 - /* U+01D8 */ 0x0075, 0x0055, 0x0075, 0x0055, 0x0075, 0x01DD, 0x0041, 0x0061, - // U+01D8: Manually changed from 00FC to 0075 - // U+01D9: Manually changed from 00DC to 0055 - // U+01DA: Manually changed from 00FC to 0075 - // U+01DB: Manually changed from 00DC to 0055 - // U+01DC: Manually changed from 00FC to 0075 - // U+01DE: Manually changed from 00C4 to 0041 - // U+01DF: Manually changed from 00E4 to 0061 - /* U+01E0 */ 0x0041, 0x0061, 0x00C6, 0x00E6, 0x01E4, 0x01E5, 0x0047, 0x0067, - // U+01E0: Manually changed from 0226 to 0041 - // U+01E1: Manually changed from 0227 to 0061 - /* U+01E8 */ 0x004B, 0x006B, 0x004F, 0x006F, 0x004F, 0x006F, 0x01B7, 0x0292, - // U+01EC: Manually changed from 01EA to 004F - // U+01ED: Manually changed from 01EB to 006F - /* U+01F0 */ 0x006A, 0x0044, 0x0044, 0x0064, 0x0047, 0x0067, 0x01F6, 0x01F7, - /* U+01F8 */ 0x004E, 0x006E, 0x0041, 0x0061, 0x00C6, 0x00E6, 0x004F, 0x006F, - // U+01FA: Manually changed from 00C5 to 0041 - // U+01FB: Manually changed from 00E5 to 0061 - // U+01FE: Manually changed from 00D8 to 004F - // TODO: Check if it's really acceptable to consider Ø a diacritical variant of O - // U+01FF: Manually changed from 00F8 to 006F - // TODO: Check if it's really acceptable to consider ø a diacritical variant of o - /* U+0200 */ 0x0041, 0x0061, 0x0041, 0x0061, 0x0045, 0x0065, 0x0045, 0x0065, - /* U+0208 */ 0x0049, 0x0069, 0x0049, 0x0069, 0x004F, 0x006F, 0x004F, 0x006F, - /* U+0210 */ 0x0052, 0x0072, 0x0052, 0x0072, 0x0055, 0x0075, 0x0055, 0x0075, - /* U+0218 */ 0x0053, 0x0073, 0x0054, 0x0074, 0x021C, 0x021D, 0x0048, 0x0068, - /* U+0220 */ 0x0220, 0x0221, 0x0222, 0x0223, 0x0224, 0x0225, 0x0041, 0x0061, - /* U+0228 */ 0x0045, 0x0065, 0x004F, 0x006F, 0x004F, 0x006F, 0x004F, 0x006F, - // U+022A: Manually changed from 00D6 to 004F - // U+022B: Manually changed from 00F6 to 006F - // U+022C: Manually changed from 00D5 to 004F - // U+022D: Manually changed from 00F5 to 006F - /* U+0230 */ 0x004F, 0x006F, 0x0059, 0x0079, 0x0234, 0x0235, 0x0236, 0x0237, - // U+0230: Manually changed from 022E to 004F - // U+0231: Manually changed from 022F to 006F - /* U+0238 */ 0x0238, 0x0239, 0x023A, 0x023B, 0x023C, 0x023D, 0x023E, 0x023F, - /* U+0240 */ 0x0240, 0x0241, 0x0242, 0x0243, 0x0244, 0x0245, 0x0246, 0x0247, - /* U+0248 */ 0x0248, 0x0249, 0x024A, 0x024B, 0x024C, 0x024D, 0x024E, 0x024F, - /* U+0250 */ 0x0250, 0x0251, 0x0252, 0x0253, 0x0254, 0x0255, 0x0256, 0x0257, - /* U+0258 */ 0x0258, 0x0259, 0x025A, 0x025B, 0x025C, 0x025D, 0x025E, 0x025F, - /* U+0260 */ 0x0260, 0x0261, 0x0262, 0x0263, 0x0264, 0x0265, 0x0266, 0x0267, - /* U+0268 */ 0x0268, 0x0269, 0x026A, 0x026B, 0x026C, 0x026D, 0x026E, 0x026F, - /* U+0270 */ 0x0270, 0x0271, 0x0272, 0x0273, 0x0274, 0x0275, 0x0276, 0x0277, - /* U+0278 */ 0x0278, 0x0279, 0x027A, 0x027B, 0x027C, 0x027D, 0x027E, 0x027F, - /* U+0280 */ 0x0280, 0x0281, 0x0282, 0x0283, 0x0284, 0x0285, 0x0286, 0x0287, - /* U+0288 */ 0x0288, 0x0289, 0x028A, 0x028B, 0x028C, 0x028D, 0x028E, 0x028F, - /* U+0290 */ 0x0290, 0x0291, 0x0292, 0x0293, 0x0294, 0x0295, 0x0296, 0x0297, - /* U+0298 */ 0x0298, 0x0299, 0x029A, 0x029B, 0x029C, 0x029D, 0x029E, 0x029F, - /* U+02A0 */ 0x02A0, 0x02A1, 0x02A2, 0x02A3, 0x02A4, 0x02A5, 0x02A6, 0x02A7, - /* U+02A8 */ 0x02A8, 0x02A9, 0x02AA, 0x02AB, 0x02AC, 0x02AD, 0x02AE, 0x02AF, - /* U+02B0 */ 0x0068, 0x0266, 0x006A, 0x0072, 0x0279, 0x027B, 0x0281, 0x0077, - /* U+02B8 */ 0x0079, 0x02B9, 0x02BA, 0x02BB, 0x02BC, 0x02BD, 0x02BE, 0x02BF, - /* U+02C0 */ 0x02C0, 0x02C1, 0x02C2, 0x02C3, 0x02C4, 0x02C5, 0x02C6, 0x02C7, - /* U+02C8 */ 0x02C8, 0x02C9, 0x02CA, 0x02CB, 0x02CC, 0x02CD, 0x02CE, 0x02CF, - /* U+02D0 */ 0x02D0, 0x02D1, 0x02D2, 0x02D3, 0x02D4, 0x02D5, 0x02D6, 0x02D7, - /* U+02D8 */ 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x02DE, 0x02DF, - /* U+02E0 */ 0x0263, 0x006C, 0x0073, 0x0078, 0x0295, 0x02E5, 0x02E6, 0x02E7, - /* U+02E8 */ 0x02E8, 0x02E9, 0x02EA, 0x02EB, 0x02EC, 0x02ED, 0x02EE, 0x02EF, - /* U+02F0 */ 0x02F0, 0x02F1, 0x02F2, 0x02F3, 0x02F4, 0x02F5, 0x02F6, 0x02F7, - /* U+02F8 */ 0x02F8, 0x02F9, 0x02FA, 0x02FB, 0x02FC, 0x02FD, 0x02FE, 0x02FF, - /* U+0300 */ 0x0300, 0x0301, 0x0302, 0x0303, 0x0304, 0x0305, 0x0306, 0x0307, - /* U+0308 */ 0x0308, 0x0309, 0x030A, 0x030B, 0x030C, 0x030D, 0x030E, 0x030F, - /* U+0310 */ 0x0310, 0x0311, 0x0312, 0x0313, 0x0314, 0x0315, 0x0316, 0x0317, - /* U+0318 */ 0x0318, 0x0319, 0x031A, 0x031B, 0x031C, 0x031D, 0x031E, 0x031F, - /* U+0320 */ 0x0320, 0x0321, 0x0322, 0x0323, 0x0324, 0x0325, 0x0326, 0x0327, - /* U+0328 */ 0x0328, 0x0329, 0x032A, 0x032B, 0x032C, 0x032D, 0x032E, 0x032F, - /* U+0330 */ 0x0330, 0x0331, 0x0332, 0x0333, 0x0334, 0x0335, 0x0336, 0x0337, - /* U+0338 */ 0x0338, 0x0339, 0x033A, 0x033B, 0x033C, 0x033D, 0x033E, 0x033F, - /* U+0340 */ 0x0300, 0x0301, 0x0342, 0x0313, 0x0308, 0x0345, 0x0346, 0x0347, - /* U+0348 */ 0x0348, 0x0349, 0x034A, 0x034B, 0x034C, 0x034D, 0x034E, 0x034F, - /* U+0350 */ 0x0350, 0x0351, 0x0352, 0x0353, 0x0354, 0x0355, 0x0356, 0x0357, - /* U+0358 */ 0x0358, 0x0359, 0x035A, 0x035B, 0x035C, 0x035D, 0x035E, 0x035F, - /* U+0360 */ 0x0360, 0x0361, 0x0362, 0x0363, 0x0364, 0x0365, 0x0366, 0x0367, - /* U+0368 */ 0x0368, 0x0369, 0x036A, 0x036B, 0x036C, 0x036D, 0x036E, 0x036F, - /* U+0370 */ 0x0370, 0x0371, 0x0372, 0x0373, 0x02B9, 0x0375, 0x0376, 0x0377, - /* U+0378 */ 0x0378, 0x0379, 0x0020, 0x037B, 0x037C, 0x037D, 0x003B, 0x037F, - /* U+0380 */ 0x0380, 0x0381, 0x0382, 0x0383, 0x0020, 0x00A8, 0x0391, 0x00B7, - /* U+0388 */ 0x0395, 0x0397, 0x0399, 0x038B, 0x039F, 0x038D, 0x03A5, 0x03A9, - /* U+0390 */ 0x03CA, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, - /* U+0398 */ 0x0398, 0x0399, 0x039A, 0x039B, 0x039C, 0x039D, 0x039E, 0x039F, - /* U+03A0 */ 0x03A0, 0x03A1, 0x03A2, 0x03A3, 0x03A4, 0x03A5, 0x03A6, 0x03A7, - /* U+03A8 */ 0x03A8, 0x03A9, 0x0399, 0x03A5, 0x03B1, 0x03B5, 0x03B7, 0x03B9, - /* U+03B0 */ 0x03CB, 0x03B1, 0x03B2, 0x03B3, 0x03B4, 0x03B5, 0x03B6, 0x03B7, - /* U+03B8 */ 0x03B8, 0x03B9, 0x03BA, 0x03BB, 0x03BC, 0x03BD, 0x03BE, 0x03BF, - /* U+03C0 */ 0x03C0, 0x03C1, 0x03C2, 0x03C3, 0x03C4, 0x03C5, 0x03C6, 0x03C7, - /* U+03C8 */ 0x03C8, 0x03C9, 0x03B9, 0x03C5, 0x03BF, 0x03C5, 0x03C9, 0x03CF, - /* U+03D0 */ 0x03B2, 0x03B8, 0x03A5, 0x03D2, 0x03D2, 0x03C6, 0x03C0, 0x03D7, - /* U+03D8 */ 0x03D8, 0x03D9, 0x03DA, 0x03DB, 0x03DC, 0x03DD, 0x03DE, 0x03DF, - /* U+03E0 */ 0x03E0, 0x03E1, 0x03E2, 0x03E3, 0x03E4, 0x03E5, 0x03E6, 0x03E7, - /* U+03E8 */ 0x03E8, 0x03E9, 0x03EA, 0x03EB, 0x03EC, 0x03ED, 0x03EE, 0x03EF, - /* U+03F0 */ 0x03BA, 0x03C1, 0x03C2, 0x03F3, 0x0398, 0x03B5, 0x03F6, 0x03F7, - /* U+03F8 */ 0x03F8, 0x03A3, 0x03FA, 0x03FB, 0x03FC, 0x03FD, 0x03FE, 0x03FF, - /* U+0400 */ 0x0415, 0x0415, 0x0402, 0x0413, 0x0404, 0x0405, 0x0406, 0x0406, - /* U+0408 */ 0x0408, 0x0409, 0x040A, 0x040B, 0x041A, 0x0418, 0x0423, 0x040F, - /* U+0410 */ 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, - /* U+0418 */ 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F, - // U+0419: Manually changed from 0418 to 0419 - /* U+0420 */ 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, - /* U+0428 */ 0x0428, 0x0429, 0x042C, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F, - // U+042A: Manually changed from 042A to 042C - /* U+0430 */ 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437, - /* U+0438 */ 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, 0x043F, - // U+0439: Manually changed from 0438 to 0439 - /* U+0440 */ 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447, - /* U+0448 */ 0x0448, 0x0449, 0x044C, 0x044B, 0x044C, 0x044D, 0x044E, 0x044F, - // U+044A: Manually changed from 044A to 044C - /* U+0450 */ 0x0435, 0x0435, 0x0452, 0x0433, 0x0454, 0x0455, 0x0456, 0x0456, - /* U+0458 */ 0x0458, 0x0459, 0x045A, 0x045B, 0x043A, 0x0438, 0x0443, 0x045F, - /* U+0460 */ 0x0460, 0x0461, 0x0462, 0x0463, 0x0464, 0x0465, 0x0466, 0x0467, - /* U+0468 */ 0x0468, 0x0469, 0x046A, 0x046B, 0x046C, 0x046D, 0x046E, 0x046F, - /* U+0470 */ 0x0470, 0x0471, 0x0472, 0x0473, 0x0474, 0x0475, 0x0474, 0x0475, - /* U+0478 */ 0x0478, 0x0479, 0x047A, 0x047B, 0x047C, 0x047D, 0x047E, 0x047F, - /* U+0480 */ 0x0480, 0x0481, 0x0482, 0x0483, 0x0484, 0x0485, 0x0486, 0x0487, - /* U+0488 */ 0x0488, 0x0489, 0x048A, 0x048B, 0x048C, 0x048D, 0x048E, 0x048F, - /* U+0490 */ 0x0490, 0x0491, 0x0492, 0x0493, 0x0494, 0x0495, 0x0496, 0x0497, - /* U+0498 */ 0x0498, 0x0499, 0x049A, 0x049B, 0x049C, 0x049D, 0x049E, 0x049F, - /* U+04A0 */ 0x04A0, 0x04A1, 0x04A2, 0x04A3, 0x04A4, 0x04A5, 0x04A6, 0x04A7, - /* U+04A8 */ 0x04A8, 0x04A9, 0x04AA, 0x04AB, 0x04AC, 0x04AD, 0x04AE, 0x04AF, - /* U+04B0 */ 0x04B0, 0x04B1, 0x04B2, 0x04B3, 0x04B4, 0x04B5, 0x04B6, 0x04B7, - /* U+04B8 */ 0x04B8, 0x04B9, 0x04BA, 0x04BB, 0x04BC, 0x04BD, 0x04BE, 0x04BF, - /* U+04C0 */ 0x04C0, 0x0416, 0x0436, 0x04C3, 0x04C4, 0x04C5, 0x04C6, 0x04C7, - /* U+04C8 */ 0x04C8, 0x04C9, 0x04CA, 0x04CB, 0x04CC, 0x04CD, 0x04CE, 0x04CF, - /* U+04D0 */ 0x0410, 0x0430, 0x0410, 0x0430, 0x04D4, 0x04D5, 0x0415, 0x0435, - /* U+04D8 */ 0x04D8, 0x04D9, 0x04D8, 0x04D9, 0x0416, 0x0436, 0x0417, 0x0437, - /* U+04E0 */ 0x04E0, 0x04E1, 0x0418, 0x0438, 0x0418, 0x0438, 0x041E, 0x043E, - /* U+04E8 */ 0x04E8, 0x04E9, 0x04E8, 0x04E9, 0x042D, 0x044D, 0x0423, 0x0443, - /* U+04F0 */ 0x0423, 0x0443, 0x0423, 0x0443, 0x0427, 0x0447, 0x04F6, 0x04F7, - /* U+04F8 */ 0x042B, 0x044B, 0x04FA, 0x04FB, 0x04FC, 0x04FD, 0x04FE, 0x04FF, - }; -} diff --git a/java/src/com/android/inputmethod/latin/LatinIME.java b/java/src/com/android/inputmethod/latin/LatinIME.java index d33e0c5a6..326c53f0e 100644 --- a/java/src/com/android/inputmethod/latin/LatinIME.java +++ b/java/src/com/android/inputmethod/latin/LatinIME.java @@ -909,6 +909,7 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen false /* shouldFinishComposition */)) { // We try resetting the caches up to 5 times before giving up. mHandler.postResetCaches(isDifferentTextField, 5 /* remainingTries */); + // mLastSelection{Start,End} are reset later in this method, don't need to do it here canReachInputConnection = false; } else { if (isDifferentTextField) { @@ -988,10 +989,16 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen if (textLength > mLastSelectionStart || (textLength < Constants.EDITOR_CONTENTS_CACHE_SIZE && mLastSelectionStart < Constants.EDITOR_CONTENTS_CACHE_SIZE)) { + // It should not be possible to have only one of those variables be + // NOT_A_CURSOR_POSITION, so if they are equal, either the selection is zero-sized + // (simple cursor, no selection) or there is no cursor/we don't know its pos + final boolean wasEqual = mLastSelectionStart == mLastSelectionEnd; mLastSelectionStart = textLength; // We can't figure out the value of mLastSelectionEnd :( - // But at least if it's smaller than mLastSelectionStart something is wrong - if (mLastSelectionStart > mLastSelectionEnd) { + // But at least if it's smaller than mLastSelectionStart something is wrong, + // and if they used to be equal we also don't want to make it look like there is a + // selection. + if (wasEqual || mLastSelectionStart > mLastSelectionEnd) { mLastSelectionEnd = mLastSelectionStart; } } diff --git a/java/src/com/android/inputmethod/latin/RichInputConnection.java b/java/src/com/android/inputmethod/latin/RichInputConnection.java index c212f9c81..673d1b4c2 100644 --- a/java/src/com/android/inputmethod/latin/RichInputConnection.java +++ b/java/src/com/android/inputmethod/latin/RichInputConnection.java @@ -61,7 +61,7 @@ public final class RichInputConnection { * cursor may end up after all the keyboard-triggered updates have passed. We keep this to * compare it to the actual cursor position to guess whether the move was caused by a * keyboard command or not. - * It's not really the cursor position: the cursor may not be there yet, and it's also expected + * It's not really the cursor position: the cursor may not be there yet, and it's also expected * there be cases where it never actually comes to be there. */ private int mExpectedCursorPosition = INVALID_CURSOR_POSITION; // in chars, not code points @@ -292,7 +292,11 @@ public final class RichInputConnection { mCommittedTextBeforeComposingText.length() + mComposingText.length(); // If we have enough characters to satisfy the request, or if we have all characters in // the text field, then we can return the cached version right away. - if (cachedLength >= n || cachedLength >= mExpectedCursorPosition) { + // However, if we don't have an expected cursor position, then we should always + // go fetch the cache again (as it happens, INVALID_CURSOR_POSITION < 0, so we need to + // test for this explicitly) + if (INVALID_CURSOR_POSITION != mExpectedCursorPosition + && (cachedLength >= n || cachedLength >= mExpectedCursorPosition)) { final StringBuilder s = new StringBuilder(mCommittedTextBeforeComposingText); // We call #toString() here to create a temporary object. // In some situations, this method is called on a worker thread, and it's possible diff --git a/java/src/com/android/inputmethod/latin/personalization/DecayingExpandableBinaryDictionaryBase.java b/java/src/com/android/inputmethod/latin/personalization/DecayingExpandableBinaryDictionaryBase.java index c817d3eb5..e7a25d216 100644 --- a/java/src/com/android/inputmethod/latin/personalization/DecayingExpandableBinaryDictionaryBase.java +++ b/java/src/com/android/inputmethod/latin/personalization/DecayingExpandableBinaryDictionaryBase.java @@ -76,7 +76,6 @@ public abstract class DecayingExpandableBinaryDictionaryBase extends ExpandableB mFileName = fileName; mPrefs = sp; if (mLocale != null && mLocale.length() > 1) { - asyncLoadDictionaryToMemory(); reloadDictionaryIfRequired(); } } @@ -86,9 +85,6 @@ public abstract class DecayingExpandableBinaryDictionaryBase extends ExpandableB if (DBG_DUMP_ON_CLOSE) { dumpAllWordsForDebug(); } - if (!ExpandableBinaryDictionary.ENABLE_BINARY_DICTIONARY_DYNAMIC_UPDATE) { - closeBinaryDictionary(); - } // Flush pending writes. // TODO: Remove after this class become to use a dynamic binary dictionary. asyncFlashAllBinaryDictionary(); @@ -130,9 +126,8 @@ public abstract class DecayingExpandableBinaryDictionaryBase extends ExpandableB (word0 != null && word0.length() >= Constants.DICTIONARY_MAX_WORD_LENGTH)) { return; } - final int frequency = ENABLE_BINARY_DICTIONARY_DYNAMIC_UPDATE ? - (isValid ? FREQUENCY_FOR_WORDS_IN_DICTS : FREQUENCY_FOR_WORDS_NOT_IN_DICTS) : - FREQUENCY_FOR_TYPED; + final int frequency = isValid ? + FREQUENCY_FOR_WORDS_IN_DICTS : FREQUENCY_FOR_WORDS_NOT_IN_DICTS; addWordDynamically(word1, null /* shortcutTarget */, frequency, 0 /* shortcutFreq */, false /* isNotAWord */); // Do not insert a word as a bigram of itself diff --git a/java/src/com/android/inputmethod/latin/personalization/DynamicPersonalizationDictionaryWriter.java b/java/src/com/android/inputmethod/latin/personalization/DynamicPersonalizationDictionaryWriter.java deleted file mode 100644 index 6f152bb91..000000000 --- a/java/src/com/android/inputmethod/latin/personalization/DynamicPersonalizationDictionaryWriter.java +++ /dev/null @@ -1,190 +0,0 @@ -/* - * Copyright (C) 2013 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.android.inputmethod.latin.personalization; - -import android.content.Context; - -import com.android.inputmethod.annotations.UsedForTesting; -import com.android.inputmethod.compat.ActivityManagerCompatUtils; -import com.android.inputmethod.keyboard.ProximityInfo; -import com.android.inputmethod.latin.AbstractDictionaryWriter; -import com.android.inputmethod.latin.ExpandableDictionary; -import com.android.inputmethod.latin.WordComposer; -import com.android.inputmethod.latin.ExpandableDictionary.NextWord; -import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo; -import com.android.inputmethod.latin.makedict.DictEncoder; -import com.android.inputmethod.latin.makedict.FormatSpec; -import com.android.inputmethod.latin.makedict.UnsupportedFormatException; -import com.android.inputmethod.latin.utils.UserHistoryDictIOUtils; -import com.android.inputmethod.latin.utils.UserHistoryDictIOUtils.BigramDictionaryInterface; -import com.android.inputmethod.latin.utils.UserHistoryForgettingCurveUtils; -import com.android.inputmethod.latin.utils.UserHistoryForgettingCurveUtils.ForgettingCurveParams; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Map; - -// Currently this class is used to implement dynamic prodiction dictionary. -// TODO: Move to native code. -public class DynamicPersonalizationDictionaryWriter extends AbstractDictionaryWriter { - private static final String TAG = DynamicPersonalizationDictionaryWriter.class.getSimpleName(); - /** Maximum number of pairs. Pruning will start when databases goes above this number. */ - public static final int DEFAULT_MAX_HISTORY_BIGRAMS = 10000; - public static final int LOW_MEMORY_MAX_HISTORY_BIGRAMS = 2000; - - /** Any pair being typed or picked */ - private static final int FREQUENCY_FOR_TYPED = 2; - - private static final int BINARY_DICT_VERSION = 3; - private static final FormatSpec.FormatOptions FORMAT_OPTIONS = - new FormatSpec.FormatOptions(BINARY_DICT_VERSION, true /* supportsDynamicUpdate */); - - private final UserHistoryDictionaryBigramList mBigramList = - new UserHistoryDictionaryBigramList(); - private final ExpandableDictionary mExpandableDictionary; - private final int mMaxHistoryBigrams; - - public DynamicPersonalizationDictionaryWriter(final Context context, final String dictType) { - super(context, dictType); - mExpandableDictionary = new ExpandableDictionary(dictType); - final boolean isLowRamDevice = ActivityManagerCompatUtils.isLowRamDevice(context); - mMaxHistoryBigrams = isLowRamDevice ? - LOW_MEMORY_MAX_HISTORY_BIGRAMS : DEFAULT_MAX_HISTORY_BIGRAMS; - } - - @Override - public void clear() { - mBigramList.evictAll(); - mExpandableDictionary.clearDictionary(); - } - - /** - * Adds a word unigram to the fusion dictionary. Call updateBinaryDictionary when all changes - * are done to update the binary dictionary. - * @param word The word to add. - * @param shortcutTarget A shortcut target for this word, or null if none. - * @param frequency The frequency for this unigram. - * @param shortcutFreq The frequency of the shortcut (0~15, with 15 = whitelist). Ignored - * if shortcutTarget is null. - * @param isNotAWord true if this is not a word, i.e. shortcut only. - */ - @Override - public void addUnigramWord(final String word, final String shortcutTarget, final int frequency, - final int shortcutFreq, final boolean isNotAWord) { - if (mBigramList.size() > mMaxHistoryBigrams * 2) { - // Too many entries: just stop adding new vocabulary and wait next refresh. - return; - } - mExpandableDictionary.addWord(word, shortcutTarget, frequency, shortcutFreq); - mBigramList.addBigram(null, word, (byte)frequency); - } - - @Override - public void addBigramWords(final String word0, final String word1, final int frequency, - final boolean isValid, final long lastModifiedTime) { - if (mBigramList.size() > mMaxHistoryBigrams * 2) { - // Too many entries: just stop adding new vocabulary and wait next refresh. - return; - } - if (lastModifiedTime > 0) { - mExpandableDictionary.setBigramAndGetFrequency(word0, word1, - new ForgettingCurveParams(frequency, System.currentTimeMillis(), - lastModifiedTime)); - mBigramList.addBigram(word0, word1, (byte)frequency); - } else { - mExpandableDictionary.setBigramAndGetFrequency(word0, word1, - new ForgettingCurveParams(isValid)); - mBigramList.addBigram(word0, word1, (byte)frequency); - } - } - - @Override - public void removeBigramWords(final String word0, final String word1) { - if (mBigramList.removeBigram(word0, word1)) { - mExpandableDictionary.removeBigram(word0, word1); - } - } - - @Override - protected void writeDictionary(final DictEncoder dictEncoder, - final Map<String, String> attributeMap) throws IOException, UnsupportedFormatException { - UserHistoryDictIOUtils.writeDictionary(dictEncoder, - new FrequencyProvider(mBigramList, mExpandableDictionary, mMaxHistoryBigrams), - mBigramList, FORMAT_OPTIONS); - } - - private static class FrequencyProvider implements BigramDictionaryInterface { - private final UserHistoryDictionaryBigramList mBigramList; - private final ExpandableDictionary mExpandableDictionary; - private final int mMaxHistoryBigrams; - - public FrequencyProvider(final UserHistoryDictionaryBigramList bigramList, - final ExpandableDictionary expandableDictionary, final int maxHistoryBigrams) { - mBigramList = bigramList; - mExpandableDictionary = expandableDictionary; - mMaxHistoryBigrams = maxHistoryBigrams; - } - - @Override - public int getFrequency(final String word0, final String word1) { - final int freq; - if (word0 == null) { // unigram - freq = FREQUENCY_FOR_TYPED; - } else { // bigram - final NextWord nw = mExpandableDictionary.getBigramWord(word0, word1); - if (nw != null) { - final ForgettingCurveParams forgettingCurveParams = nw.getFcParams(); - final byte prevFc = mBigramList.getBigrams(word0).get(word1); - final byte fc = forgettingCurveParams.getFc(); - final boolean isValid = forgettingCurveParams.isValid(); - if (prevFc > 0 && prevFc == fc) { - freq = fc & 0xFF; - } else if (UserHistoryForgettingCurveUtils. - needsToSave(fc, isValid, mBigramList.size() <= mMaxHistoryBigrams)) { - freq = fc & 0xFF; - } else { - // Delete this entry - freq = -1; - } - } else { - // Delete this entry - freq = -1; - } - } - return freq; - } - } - - @Override - public ArrayList<SuggestedWordInfo> getSuggestions(final WordComposer composer, - final String prevWord, final ProximityInfo proximityInfo, - boolean blockOffensiveWords, final int[] additionalFeaturesOptions) { - return mExpandableDictionary.getSuggestions(composer, prevWord, proximityInfo, - blockOffensiveWords, additionalFeaturesOptions); - } - - @Override - public boolean isValidWord(final String word) { - return mExpandableDictionary.isValidWord(word); - } - - @UsedForTesting - public boolean isInBigramListForTests(final String word) { - // TODO: Use native method to determine whether the word is in dictionary or not - return mBigramList.containsKey(word) || mBigramList.getBigrams(null).containsKey(word); - } -} diff --git a/java/src/com/android/inputmethod/latin/utils/AdditionalSubtypeUtils.java b/java/src/com/android/inputmethod/latin/utils/AdditionalSubtypeUtils.java index fdfabbd3e..ef1d0f42c 100644 --- a/java/src/com/android/inputmethod/latin/utils/AdditionalSubtypeUtils.java +++ b/java/src/com/android/inputmethod/latin/utils/AdditionalSubtypeUtils.java @@ -17,6 +17,7 @@ package com.android.inputmethod.latin.utils; import static com.android.inputmethod.latin.Constants.Subtype.KEYBOARD_MODE; +import static com.android.inputmethod.latin.Constants.Subtype.ExtraValue.EMOJI_CAPABLE; import static com.android.inputmethod.latin.Constants.Subtype.ExtraValue.IS_ADDITIONAL_SUBTYPE; import static com.android.inputmethod.latin.Constants.Subtype.ExtraValue.KEYBOARD_LAYOUT_SET; import static com.android.inputmethod.latin.Constants.Subtype.ExtraValue.UNTRANSLATABLE_STRING_IN_SUBTYPE_NAME; @@ -27,10 +28,10 @@ import android.util.Log; import android.view.inputmethod.InputMethodSubtype; import com.android.inputmethod.compat.InputMethodSubtypeCompatUtils; -import com.android.inputmethod.latin.Constants; import com.android.inputmethod.latin.R; import java.util.ArrayList; +import java.util.Arrays; public final class AdditionalSubtypeUtils { private static final String TAG = AdditionalSubtypeUtils.class.getSimpleName(); @@ -146,31 +147,36 @@ public final class AdditionalSubtypeUtils { return sb.toString(); } - private static InputMethodSubtype buildInputMethodSubtype(int nameId, String localeString, - String layoutExtraValue, String additionalSubtypeExtraValue) { - // CAVEAT! If you want to change subtypeId after changing the extra values, - // you must change "getInputMethodSubtypeId". But it will remove the additional keyboard - // from the current users. So, you should be really careful to change it. - final int subtypeId = getInputMethodSubtypeId(nameId, localeString, layoutExtraValue, - additionalSubtypeExtraValue); + private static InputMethodSubtype buildInputMethodSubtype(final int nameId, + final String localeString, final String layoutExtraValue, + final String additionalSubtypeExtraValue) { + // To preserve additional subtype settings and user's selection across OS updates, subtype + // id shouldn't be changed. New attributes, such as emojiCapable, are carefully excluded + // from the calculation of subtype id. + final String compatibleExtraValue = StringUtils.joinCommaSplittableText( + layoutExtraValue, additionalSubtypeExtraValue); + final int compatibleSubtypeId = getInputMethodSubtypeId(localeString, compatibleExtraValue); final String extraValue; - if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.JELLY_BEAN_MR1) { - extraValue = layoutExtraValue + "," + additionalSubtypeExtraValue - + "," + Constants.Subtype.ExtraValue.ASCII_CAPABLE - + "," + Constants.Subtype.ExtraValue.EMOJI_CAPABLE; + // Color Emoji is supported from KitKat. + if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.KITKAT) { + extraValue = StringUtils.appendToCommaSplittableTextIfNotExists( + EMOJI_CAPABLE, compatibleExtraValue); } else { - extraValue = layoutExtraValue + "," + additionalSubtypeExtraValue; + extraValue = compatibleExtraValue; } return InputMethodSubtypeCompatUtils.newInputMethodSubtype(nameId, R.drawable.ic_ime_switcher_dark, localeString, KEYBOARD_MODE, extraValue, - false, false, subtypeId); + false, false, compatibleSubtypeId); } - private static int getInputMethodSubtypeId(int nameId, String localeString, - String layoutExtraValue, String additionalSubtypeExtraValue) { - // TODO: Use InputMethodSubtypeBuilder once we use SDK version 19. - return (new InputMethodSubtype(nameId, R.drawable.ic_ime_switcher_dark, - localeString, KEYBOARD_MODE, layoutExtraValue + "," + additionalSubtypeExtraValue, - false, false)).hashCode(); + private static int getInputMethodSubtypeId(final String localeString, final String extraValue) { + // From the compatibility point of view, the calculation of subtype id has been copied from + // {@link InputMethodSubtype} of JellyBean MR2. + return Arrays.hashCode(new Object[] { + localeString, + KEYBOARD_MODE, + extraValue, + false /* isAuxiliary */, + false /* overrideImplicitlyEnabledSubtype */ }); } } diff --git a/java/src/com/android/inputmethod/latin/utils/StringUtils.java b/java/src/com/android/inputmethod/latin/utils/StringUtils.java index a36548392..4cc89d0a7 100644 --- a/java/src/com/android/inputmethod/latin/utils/StringUtils.java +++ b/java/src/com/android/inputmethod/latin/utils/StringUtils.java @@ -39,6 +39,8 @@ public final class StringUtils { public static final int CAPITALIZE_FIRST = 1; // First only public static final int CAPITALIZE_ALL = 2; // All caps + private static final String EMPTY_STRING = ""; + private StringUtils() { // This utility class is not publicly instantiable. } @@ -80,6 +82,20 @@ public final class StringUtils { return containsInArray(text, extraValues.split(SEPARATOR_FOR_COMMA_SPLITTABLE_TEXT)); } + public static String joinCommaSplittableText(final String head, final String tail) { + if (TextUtils.isEmpty(head) && TextUtils.isEmpty(tail)) { + return EMPTY_STRING; + } + // Here either head or tail is not null. + if (TextUtils.isEmpty(head)) { + return tail; + } + if (TextUtils.isEmpty(tail)) { + return head; + } + return head + SEPARATOR_FOR_COMMA_SPLITTABLE_TEXT + tail; + } + public static String appendToCommaSplittableTextIfNotExists(final String text, final String extraValues) { if (TextUtils.isEmpty(extraValues)) { @@ -94,7 +110,7 @@ public final class StringUtils { public static String removeFromCommaSplittableTextIfExists(final String text, final String extraValues) { if (TextUtils.isEmpty(extraValues)) { - return ""; + return EMPTY_STRING; } final String[] elements = extraValues.split(SEPARATOR_FOR_COMMA_SPLITTABLE_TEXT); if (!containsInArray(text, elements)) { @@ -380,7 +396,7 @@ public final class StringUtils { @UsedForTesting public static String byteArrayToHexString(byte[] bytes) { if (bytes == null || bytes.length == 0) { - return ""; + return EMPTY_STRING; } final StringBuilder sb = new StringBuilder(); for (byte b : bytes) { @@ -444,7 +460,7 @@ public final class StringUtils { public static String listToJsonStr(List<Object> list) { if (list == null || list.isEmpty()) { - return ""; + return EMPTY_STRING; } final StringWriter sw = new StringWriter(); final JsonWriter writer = new JsonWriter(sw); @@ -470,6 +486,6 @@ public final class StringUtils { } catch (IOException e) { } } - return ""; + return EMPTY_STRING; } } diff --git a/native/jni/Android.mk b/native/jni/Android.mk index a5757fd58..b61a66ce6 100644 --- a/native/jni/Android.mk +++ b/native/jni/Android.mk @@ -68,12 +68,14 @@ LATIN_IME_CORE_SRC_FILES := \ suggest/core/policy/weighting.cpp \ suggest/core/session/dic_traverse_session.cpp \ $(addprefix suggest/policyimpl/dictionary/, \ - bigram/bigram_list_read_write_utils.cpp \ - bigram/dynamic_bigram_list_policy.cpp \ header/header_policy.cpp \ header/header_read_write_utils.cpp \ shortcut/shortcut_list_reading_utils.cpp \ structure/dictionary_structure_with_buffer_policy_factory.cpp) \ + $(addprefix suggest/policyimpl/dictionary/bigram/, \ + bigram_list_read_write_utils.cpp \ + dynamic_bigram_list_policy.cpp \ + ver4_bigram_list_policy.cpp) \ $(addprefix suggest/policyimpl/dictionary/structure/v2/, \ patricia_trie_policy.cpp \ patricia_trie_reading_utils.cpp) \ @@ -88,6 +90,7 @@ LATIN_IME_CORE_SRC_FILES := \ dynamic_patricia_trie_writing_helper.cpp \ dynamic_patricia_trie_writing_utils.cpp) \ $(addprefix suggest/policyimpl/dictionary/structure/v4/, \ + content/bigram_dict_content.cpp \ ver4_dict_constants.cpp \ ver4_patricia_trie_node_reader.cpp \ ver4_patricia_trie_node_writer.cpp \ diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp index 3becc7e39..c4383d754 100644 --- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp +++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp @@ -86,10 +86,10 @@ static jlong latinime_BinaryDictionary_open(JNIEnv *env, jclass clazz, jstring s char sourceDirChars[sourceDirUtf8Length + 1]; env->GetStringUTFRegion(sourceDir, 0, env->GetStringLength(sourceDir), sourceDirChars); sourceDirChars[sourceDirUtf8Length] = '\0'; - DictionaryStructureWithBufferPolicy::StructurePoilcyPtr dictionaryStructureWithBufferPolicy( + DictionaryStructureWithBufferPolicy::StructurePoilcyPtr dictionaryStructureWithBufferPolicy = DictionaryStructureWithBufferPolicyFactory::newDictionaryStructureWithBufferPolicy( sourceDirChars, static_cast<int>(dictOffset), static_cast<int>(dictSize), - isUpdatable == JNI_TRUE)); + isUpdatable == JNI_TRUE); if (!dictionaryStructureWithBufferPolicy.get()) { return 0; } diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp new file mode 100644 index 000000000..94d7f1061 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp @@ -0,0 +1,142 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h" + +#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h" +#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h" +#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h" +#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h" + +namespace latinime { + +void Ver4BigramListPolicy::getNextBigram(int *const outBigramPos, int *const outProbability, + bool *const outHasNext, int *const bigramEntryPos) const { + int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID; + mBigramDictContent->getBigramEntryAndAdvancePosition(outProbability, outHasNext, + &targetTerminalId, bigramEntryPos); + if (outBigramPos) { + // Lookup target PtNode position. + *outBigramPos = mTerminalPositionLookupTable->getTerminalPtNodePosition(targetTerminalId); + } +} + +bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTargetTerminalId, + const int newProbability, bool *const outAddedNewEntry) { + if (outAddedNewEntry) { + *outAddedNewEntry = false; + } + const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId); + if (bigramListPos == NOT_A_DICT_POS) { + // Updating PtNode doesn't have a bigram list. + // Create new bigram list. + if (!mBigramDictContent->createNewBigramList(terminalId)) { + return false; + } + // Write an entry. + int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId); + if (!mBigramDictContent->writeBigramEntryAndAdvancePosition(newProbability, + false /* hasNext */, newTargetTerminalId, &writingPos)) { + return false; + } + return true; + } + + const int entryPosToUpdate = getEntryPosToUpdate(newTargetTerminalId, bigramListPos); + if (entryPosToUpdate != NOT_A_DICT_POS) { + // Overwrite existing entry. + int readingPos = entryPosToUpdate; + bool hasNext = false; + int probability = NOT_A_PROBABILITY; + int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID; + mBigramDictContent->getBigramEntryAndAdvancePosition(&probability, &hasNext, + &targetTerminalId, &readingPos); + if (targetTerminalId == Ver4DictConstants::NOT_A_TERMINAL_ID && outAddedNewEntry) { + // Reuse invalid entry. + *outAddedNewEntry = true; + } + int writingPos = entryPosToUpdate; + return mBigramDictContent->writeBigramEntryAndAdvancePosition(newProbability, hasNext, + newTargetTerminalId, &writingPos); + } + + // Add new entry to the bigram list. + // Create new bigram list. + if (!mBigramDictContent->createNewBigramList(terminalId)) { + return false; + } + // Write new entry at a head position of the bigram list. + int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId); + if (!mBigramDictContent->writeBigramEntryAndAdvancePosition(newProbability, + true /* hasNext */, newTargetTerminalId, &writingPos)) { + return false; + } + if (outAddedNewEntry) { + *outAddedNewEntry = true; + } + // Append existing entries by copying. + return mBigramDictContent->copyBigramList(bigramListPos, writingPos); +} + +bool Ver4BigramListPolicy::removeEntry(const int terminalId, const int targetTerminalId) { + const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId); + if (bigramListPos == NOT_A_DICT_POS) { + // Bigram list does't exist. + return false; + } + const int entryPosToUpdate = getEntryPosToUpdate(targetTerminalId, bigramListPos); + if (entryPosToUpdate == NOT_A_DICT_POS) { + // Bigram entry doesn't exist. + return false; + } + int readingPos = entryPosToUpdate; + bool hasNext = false; + int probability = NOT_A_PROBABILITY; + int originalTargetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID; + mBigramDictContent->getBigramEntryAndAdvancePosition(&probability, &hasNext, + &originalTargetTerminalId, &readingPos); + if (targetTerminalId != originalTargetTerminalId) { + // Bigram entry doesn't exist. + return false; + } + int writingPos = entryPosToUpdate; + // Remove bigram entry by overwriting target terminal Id. + return mBigramDictContent->writeBigramEntryAndAdvancePosition(probability, hasNext, + Ver4DictConstants::NOT_A_TERMINAL_ID /* targetTerminalId */, &writingPos); +} + +int Ver4BigramListPolicy::getEntryPosToUpdate(const int targetTerminalIdToFind, + const int bigramListPos) const { + bool hasNext = true; + int invalidEntryPos = NOT_A_DICT_POS; + int readingPos = bigramListPos; + while(hasNext) { + const int entryPos = readingPos; + int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID; + mBigramDictContent->getBigramEntryAndAdvancePosition(0 /* probability */, &hasNext, + &targetTerminalId, &readingPos); + if (targetTerminalId == targetTerminalIdToFind) { + // Entry with same target is found. + return entryPos; + } else if (targetTerminalId == Ver4DictConstants::NOT_A_TERMINAL_ID) { + // Invalid entry that can be reused is found. + invalidEntryPos = entryPos; + } + } + return invalidEntryPos; +} + +} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h index 875a0ff9b..b3fe13d7d 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h @@ -19,46 +19,37 @@ #include "defines.h" #include "suggest/core/policy/dictionary_bigrams_structure_policy.h" -#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h" -#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h" -#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h" namespace latinime { +class BigramDictContent; +class TerminalPositionLookupTable; + class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy { public: - Ver4BigramListPolicy(const BigramDictContent *const bigramDictContent, + Ver4BigramListPolicy(BigramDictContent *const bigramDictContent, const TerminalPositionLookupTable *const terminalPositionLookupTable) : mBigramDictContent(bigramDictContent), mTerminalPositionLookupTable(terminalPositionLookupTable) {} void getNextBigram(int *const outBigramPos, int *const outProbability, - bool *const outHasNext, int *const bigramEntryPos) const { - int bigramFlags = 0; - int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID; - mBigramDictContent->getBigramEntryAndAdvancePosition(&bigramFlags, &targetTerminalId, - bigramEntryPos); - if (outProbability) { - *outProbability = BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags); - } - if (outHasNext) { - *outHasNext = BigramListReadWriteUtils::hasNext(bigramFlags); - } - if (outBigramPos) { - // Lookup target PtNode position. - *outBigramPos = - mTerminalPositionLookupTable->getTerminalPtNodePosition(targetTerminalId); - } - } + bool *const outHasNext, int *const bigramEntryPos) const; void skipAllBigrams(int *const pos) const { // Do nothing because we don't need to skip bigram lists in ver4 dictionaries. } + bool addNewEntry(const int terminalId, const int newTargetTerminalId, const int newProbability, + bool *const outAddedNewEntry); + + bool removeEntry(const int terminalId, const int targetTerminalId); + private: DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4BigramListPolicy); - const BigramDictContent *const mBigramDictContent; + int getEntryPosToUpdate(const int targetTerminalIdToFind, const int bigramListPos) const; + + BigramDictContent *const mBigramDictContent; const TerminalPositionLookupTable *const mTerminalPositionLookupTable; }; } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp index 3ab6a8e21..063b84cbf 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp @@ -35,8 +35,8 @@ namespace latinime { const int bufOffset, const int size, const bool isUpdatable) { // Allocated buffer in MmapedBuffer::newBuffer() will be freed in the destructor of // MmappedBufferWrapper if the instance has the responsibility. - MmappedBuffer::MmappedBufferPtr mmappedBuffer(MmappedBuffer::openBuffer(path, bufOffset, size, - isUpdatable)); + MmappedBuffer::MmappedBufferPtr mmappedBuffer = MmappedBuffer::openBuffer(path, bufOffset, size, + isUpdatable); if (!mmappedBuffer.get()) { return DictionaryStructureWithBufferPolicy::StructurePoilcyPtr(0); } @@ -58,8 +58,8 @@ namespace latinime { } // Removing extension to get the base path. dictDirPath.erase(pos); - const Ver4DictBuffers::Ver4DictBuffersPtr dictBuffers( - Ver4DictBuffers::openVer4DictBuffers(dictDirPath.c_str(), mmappedBuffer)); + const Ver4DictBuffers::Ver4DictBuffersPtr dictBuffers = + Ver4DictBuffers::openVer4DictBuffers(dictDirPath.c_str(), mmappedBuffer); if (!dictBuffers.get()->isValid()) { AKLOGE("DICT: The dictionary doesn't satisfy ver4 format requirements."); ASSERT(false); diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.cpp index c3fe03d37..b3fdbeb78 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.cpp @@ -238,6 +238,9 @@ int DynamicPatriciaTrieReadingHelper::getTerminalPtNodePositionOfWord(const int } // All characters are matched. if (length == getTotalCodePointCount(ptNodeParams)) { + if (!ptNodeParams.isTerminal()) { + return NOT_A_DICT_POS; + } // Terminal position is found. return ptNodeParams.getHeadPos(); } diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp new file mode 100644 index 000000000..999460086 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp @@ -0,0 +1,75 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h" + +#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" + +namespace latinime { + +void BigramDictContent::getBigramEntryAndAdvancePosition(int *const outProbability, + bool *const outHasNext, int *const outTargetTerminalId, int *const bigramEntryPos) const { + const BufferWithExtendableBuffer *const bigramListBuffer = getContentBuffer(); + const int bigramFlags = bigramListBuffer->readUintAndAdvancePosition( + Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, bigramEntryPos); + if (outProbability) { + *outProbability = bigramFlags & Ver4DictConstants::BIGRAM_PROBABILITY_MASK; + } + if (outHasNext) { + *outHasNext = (bigramFlags & Ver4DictConstants::BIGRAM_HAS_NEXT_MASK) != 0; + } + const int targetTerminalId = bigramListBuffer->readUintAndAdvancePosition( + Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, bigramEntryPos); + if (outTargetTerminalId) { + *outTargetTerminalId = + (targetTerminalId == Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID) ? + Ver4DictConstants::NOT_A_TERMINAL_ID : targetTerminalId; + } +} + +bool BigramDictContent::writeBigramEntryAndAdvancePosition(const int probability, const int hasNext, + const int targetTerminalId, int *const entryWritingPos) { + BufferWithExtendableBuffer *const bigramListBuffer = getWritableContentBuffer(); + const int bigramFlags = createAndGetBigramFlags(probability, hasNext); + if (!bigramListBuffer->writeUintAndAdvancePosition(bigramFlags, + Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, entryWritingPos)) { + return false; + } + const int targetTerminalIdToWrite = + (targetTerminalId == Ver4DictConstants::NOT_A_TERMINAL_ID) ? + Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID : targetTerminalId; + return bigramListBuffer->writeUintAndAdvancePosition(targetTerminalIdToWrite, + Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, entryWritingPos); +} + +bool BigramDictContent::copyBigramList(const int bigramListPos, const int toPos) { + bool hasNext = true; + int readingPos = bigramListPos; + int writingPos = toPos; + while(hasNext) { + int probability = NOT_A_PROBABILITY; + int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID; + getBigramEntryAndAdvancePosition(&probability, &hasNext, &targetTerminalId, + &readingPos); + if (!writeBigramEntryAndAdvancePosition(probability, hasNext, targetTerminalId, + &writingPos)) { + return false; + } + } + return true; +} + +} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h index 5eed13e70..bc9e4b619 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h @@ -33,21 +33,15 @@ class BigramDictContent : public SparseTableDictContent { Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE, Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE) {} - void getBigramEntryAndAdvancePosition(int *const outBigramFlags, - int *const outTargetTerminalId, int *const bigramEntryPos) const { - const BufferWithExtendableBuffer *const bigramListBuffer = getContentBuffer(); - if (outBigramFlags) { - *outBigramFlags = bigramListBuffer->readUintAndAdvancePosition( - Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, bigramEntryPos); - } - if (outTargetTerminalId) { - *outTargetTerminalId = bigramListBuffer->readUintAndAdvancePosition( - Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, bigramEntryPos); - } - } + BigramDictContent() + : SparseTableDictContent(Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE, + Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE) {} + + void getBigramEntryAndAdvancePosition(int *const outProbability, bool *const outHasNext, + int *const outTargetTerminalId, int *const bigramEntryPos) const; - // Returns head position of bigram list for a PtNode specified by terminalId. - int getBigramListHeadPos(const int terminalId) const { + // Returns head position of bigram list for a PtNode specified by terminalId. + int getBigramListHeadPos(const int terminalId) const { const SparseTable *const addressLookupTable = getAddressLookupTable(); if (!addressLookupTable->contains(terminalId)) { return NOT_A_DICT_POS; @@ -55,8 +49,23 @@ class BigramDictContent : public SparseTableDictContent { return addressLookupTable->get(terminalId); } + bool writeBigramEntryAndAdvancePosition(const int probability, const int hasNext, + const int targetTerminalId, int *const entryWritingPos); + + bool createNewBigramList(const int terminalId) { + const int bigramListPos = getContentBuffer()->getTailPosition(); + return getUpdatableAddressLookupTable()->set(terminalId, bigramListPos); + } + + bool copyBigramList(const int bigramListPos, const int toPos); + private: - DISALLOW_IMPLICIT_CONSTRUCTORS(BigramDictContent); + DISALLOW_COPY_AND_ASSIGN(BigramDictContent); + + int createAndGetBigramFlags(const int probability, const bool hasNext) const { + return (probability & Ver4DictConstants::BIGRAM_PROBABILITY_MASK) + | (hasNext ? Ver4DictConstants::BIGRAM_HAS_NEXT_MASK : 0); + } }; } // namespace latinime #endif /* LATINIME_BIGRAM_DICT_CONTENT_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h index e85bbe18e..c109cbf51 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h @@ -31,6 +31,8 @@ class ProbabilityDictContent : public SingleDictContent { : SingleDictContent(dictDirPath, Ver4DictConstants::FREQ_FILE_EXTENSION, isUpdatable) {} + ProbabilityDictContent() {} + int getProbability(const int terminalId) const { if (terminalId < 0 || terminalId >= getSize()) { return NOT_A_PROBABILITY; @@ -61,7 +63,7 @@ class ProbabilityDictContent : public SingleDictContent { } private: - DISALLOW_IMPLICIT_CONSTRUCTORS(ProbabilityDictContent); + DISALLOW_COPY_AND_ASSIGN(ProbabilityDictContent); int getSize() const { return getBuffer()->getTailPosition() / (Ver4DictConstants::PROBABILITY_SIZE diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h index c10fbcb2a..8463a1753 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h @@ -33,6 +33,10 @@ class ShortcutDictContent : public SparseTableDictContent { Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE, Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE) {} + ShortcutDictContent() + : SparseTableDictContent(Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE, + Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE) {} + void getShortcutEntryAndAdvancePosition(const int maxCodePointCount, int *const outCodePoint, int *const outCodePointCount, int *const outShortcutFlags, int *const shortcutEntryPos) const { @@ -57,7 +61,7 @@ class ShortcutDictContent : public SparseTableDictContent { } private: - DISALLOW_IMPLICIT_CONSTRUCTORS(ShortcutDictContent); + DISALLOW_COPY_AND_ASSIGN(ShortcutDictContent); }; } // namespace latinime #endif /* LATINIME_SHORTCUT_DICT_CONTENT_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h index 4cb96da6a..7669c1eca 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h @@ -19,6 +19,7 @@ #include "defines.h" #include "suggest/policyimpl/dictionary/structure/v4/content/dict_content.h" +#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" #include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h" @@ -31,12 +32,17 @@ class SingleDictContent : public DictContent { : mMmappedBuffer(MmappedBuffer::openBuffer(dictDirPath, contentFileName, isUpdatable)), mExpandableContentBuffer(mMmappedBuffer.get() ? mMmappedBuffer.get()->getBuffer() : 0, mMmappedBuffer.get() ? mMmappedBuffer.get()->getBufferSize() : 0, - BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE) {} + BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE), + mIsValid(mMmappedBuffer.get() != 0) {} + + SingleDictContent() + : mMmappedBuffer(0), mExpandableContentBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE), + mIsValid(true) {} virtual ~SingleDictContent() {} virtual bool isValid() const { - return mMmappedBuffer.get() != 0; + return mIsValid; } protected: @@ -49,10 +55,11 @@ class SingleDictContent : public DictContent { } private: - DISALLOW_IMPLICIT_CONSTRUCTORS(SingleDictContent); + DISALLOW_COPY_AND_ASSIGN(SingleDictContent); const MmappedBuffer::MmappedBufferPtr mMmappedBuffer; BufferWithExtendableBuffer mExpandableContentBuffer; + const bool mIsValid; }; } // namespace latinime #endif /* LATINIME_SINGLE_DICT_CONTENT_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h index 71868e9ca..5ae5f0ff1 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h @@ -19,6 +19,7 @@ #include "defines.h" #include "suggest/policyimpl/dictionary/structure/v4/content/dict_content.h" +#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" #include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h" #include "suggest/policyimpl/dictionary/utils/sparse_table.h" @@ -49,20 +50,37 @@ class SparseTableDictContent : public DictContent { mContentBuffer.get() ? mContentBuffer.get()->getBufferSize() : 0, BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE), mAddressLookupTable(&mExpandableLookupTableBuffer, &mExpandableAddressTableBuffer, - sparseTableBlockSize, sparseTableDataSize) {} + sparseTableBlockSize, sparseTableDataSize), + mIsValid(mLookupTableBuffer.get() != 0 && mAddressTableBuffer.get() != 0 + && mContentBuffer.get() != 0) {} + + SparseTableDictContent(const int sparseTableBlockSize, const int sparseTableDataSize) + : mLookupTableBuffer(0), mAddressTableBuffer(0), mContentBuffer(0), + mExpandableLookupTableBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE), + mExpandableAddressTableBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE), + mExpandableContentBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE), + mAddressLookupTable(&mExpandableLookupTableBuffer, &mExpandableAddressTableBuffer, + sparseTableBlockSize, sparseTableDataSize), mIsValid(true) {} virtual ~SparseTableDictContent() {} virtual bool isValid() const { - return mLookupTableBuffer.get() != 0 && mAddressTableBuffer.get() != 0 - && mContentBuffer.get() != 0; + return mIsValid; } protected: + SparseTable *getUpdatableAddressLookupTable() { + return &mAddressLookupTable; + } + const SparseTable *getAddressLookupTable() const { return &mAddressLookupTable; } + BufferWithExtendableBuffer *getWritableContentBuffer() { + return &mExpandableContentBuffer; + } + const BufferWithExtendableBuffer *getContentBuffer() const { return &mExpandableContentBuffer; } @@ -70,7 +88,6 @@ class SparseTableDictContent : public DictContent { private: DISALLOW_IMPLICIT_CONSTRUCTORS(SparseTableDictContent); - // TODO: Have sparse table. const MmappedBuffer::MmappedBufferPtr mLookupTableBuffer; const MmappedBuffer::MmappedBufferPtr mAddressTableBuffer; const MmappedBuffer::MmappedBufferPtr mContentBuffer; @@ -78,6 +95,7 @@ class SparseTableDictContent : public DictContent { BufferWithExtendableBuffer mExpandableAddressTableBuffer; BufferWithExtendableBuffer mExpandableContentBuffer; SparseTable mAddressLookupTable; + const bool mIsValid; }; } // namespace latinime #endif /* LATINIME_SPARSE_TABLE_DICT_CONTENT_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h index f6ced31b4..e016a2b5f 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h @@ -38,6 +38,8 @@ class TerminalPositionLookupTable : public SingleDictContent { / Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE), mHeaderRegionSize(headerRegionSize) {} + TerminalPositionLookupTable() : mSize(0), mHeaderRegionSize(0) {} + int getTerminalPtNodePosition(const int terminalId) const { if (terminalId < 0 || terminalId >= mSize) { return NOT_A_DICT_POS; @@ -66,7 +68,7 @@ class TerminalPositionLookupTable : public SingleDictContent { } private: - DISALLOW_IMPLICIT_CONSTRUCTORS(TerminalPositionLookupTable); + DISALLOW_COPY_AND_ASSIGN(TerminalPositionLookupTable); int mSize; const int mHeaderRegionSize; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h index 6476478e5..e468be591 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h @@ -33,27 +33,30 @@ class Ver4DictBuffers { public: typedef ExclusiveOwnershipPointer<Ver4DictBuffers> Ver4DictBuffersPtr; - static Ver4DictBuffersPtr openVer4DictBuffers(const char *const dictDirPath, + static AK_FORCE_INLINE Ver4DictBuffersPtr openVer4DictBuffers(const char *const dictDirPath, const MmappedBuffer::MmappedBufferPtr &dictBuffer) { const bool isUpdatable = dictBuffer.get() ? dictBuffer.get()->isUpdatable() : false; return Ver4DictBuffersPtr(new Ver4DictBuffers(dictDirPath, dictBuffer, isUpdatable)); } + static AK_FORCE_INLINE Ver4DictBuffersPtr createVer4DictBuffers() { + return Ver4DictBuffersPtr(new Ver4DictBuffers()); + } + AK_FORCE_INLINE bool isValid() const { return mDictBuffer.get() != 0 && mProbabilityDictContent.isValid() && mTerminalPositionLookupTable.isValid() && mBigramDictContent.isValid() && mShortcutDictContent.isValid(); } - AK_FORCE_INLINE uint8_t *getRawDictBuffer() const { - return mDictBuffer.get()->getBuffer(); + AK_FORCE_INLINE BufferWithExtendableBuffer *getWritableHeaderBuffer() { + return &mExpandableHeaderBuffer; } - AK_FORCE_INLINE int getRawDictBufferSize() const { - return mDictBuffer.get()->getBufferSize(); + AK_FORCE_INLINE BufferWithExtendableBuffer *getWritableTrieBuffer() { + return &mExpandableTrieBuffer; } - AK_FORCE_INLINE TerminalPositionLookupTable *getUpdatableTerminalPositionLookupTable() { return &mTerminalPositionLookupTable; } @@ -70,6 +73,10 @@ class Ver4DictBuffers { return &mProbabilityDictContent; } + AK_FORCE_INLINE BigramDictContent *getUpdatableBigramDictContent() { + return &mBigramDictContent; + } + AK_FORCE_INLINE const BigramDictContent *getBigramDictContent() const { return &mBigramDictContent; } @@ -82,21 +89,41 @@ class Ver4DictBuffers { return mIsUpdatable; } + bool flush(const char *const dictDirPath) { + // TODO: Implement. + return false; + } + private: - DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4DictBuffers); + DISALLOW_COPY_AND_ASSIGN(Ver4DictBuffers); AK_FORCE_INLINE Ver4DictBuffers(const char *const dictDirPath, const MmappedBuffer::MmappedBufferPtr &dictBuffer, const bool isUpdatable) : mDictBuffer(dictBuffer), - // TODO: Quit using getHeaderSize. - mTerminalPositionLookupTable(dictDirPath, isUpdatable, - HeaderReadWriteUtils::getHeaderSize(mDictBuffer.get()->getBuffer())), + mHeaderSize(HeaderReadWriteUtils::getHeaderSize(mDictBuffer.get()->getBuffer())), + mExpandableHeaderBuffer(dictBuffer.get()->getBuffer(), mHeaderSize, + BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE), + mExpandableTrieBuffer(dictBuffer.get()->getBuffer() + mHeaderSize, + dictBuffer.get()->getBufferSize() - mHeaderSize, + BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE), + // TODO: Quit using header size. + mTerminalPositionLookupTable(dictDirPath, isUpdatable, mHeaderSize), mProbabilityDictContent(dictDirPath, isUpdatable), mBigramDictContent(dictDirPath, isUpdatable), mShortcutDictContent(dictDirPath, isUpdatable), mIsUpdatable(isUpdatable) {} + AK_FORCE_INLINE Ver4DictBuffers() + : mDictBuffer(0), mHeaderSize(0), + mExpandableHeaderBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE), + mExpandableTrieBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE), + mTerminalPositionLookupTable(), mProbabilityDictContent(), + mBigramDictContent(), mShortcutDictContent(), mIsUpdatable(true) {} + const MmappedBuffer::MmappedBufferPtr mDictBuffer; + const int mHeaderSize; + BufferWithExtendableBuffer mExpandableHeaderBuffer; + BufferWithExtendableBuffer mExpandableTrieBuffer; TerminalPositionLookupTable mTerminalPositionLookupTable; ProbabilityDictContent mProbabilityDictContent; BigramDictContent mBigramDictContent; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp index 941bcd594..af13a374a 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp @@ -30,6 +30,10 @@ const char *const Ver4DictConstants::SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION = ".sh const char *const Ver4DictConstants::SHORTCUT_CONTENT_TABLE_FILE_EXTENSION = ".shortcut_index_shortcut"; +// Version 4 dictionary size is implicitly limited to 8MB due to 3-byte offsets. +// TODO: Make MAX_DICTIONARY_SIZE 8MB. +const int Ver4DictConstants::MAX_DICTIONARY_SIZE = 2 * 1024 * 1024; + const int Ver4DictConstants::NOT_A_TERMINAL_ID = -1; const int Ver4DictConstants::PROBABILITY_SIZE = 1; const int Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE = 1; @@ -42,7 +46,13 @@ const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE = 16; const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE = 4; const int Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE = 3; +// Unsigned int max value of BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE-byte is used for representing +// invalid terminal ID in bigram lists. +const int Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID = + (1 << (BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE * 8)) - 1; const int Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE = 1; +const int Ver4DictConstants::BIGRAM_PROBABILITY_MASK = 0x0F; +const int Ver4DictConstants::BIGRAM_HAS_NEXT_MASK = 0x80; const int Ver4DictConstants::SHORTCUT_FLAGS_FIELD_SIZE = 1; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h index 7270d9e6e..cfb7740be 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h @@ -34,6 +34,8 @@ class Ver4DictConstants { static const char *const SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION; static const char *const SHORTCUT_CONTENT_TABLE_FILE_EXTENSION; + static const int MAX_DICTIONARY_SIZE; + static const int NOT_A_TERMINAL_ID; static const int PROBABILITY_SIZE; static const int FLAGS_IN_PROBABILITY_FILE_SIZE; @@ -47,6 +49,9 @@ class Ver4DictConstants { static const int BIGRAM_FLAGS_FIELD_SIZE; static const int BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE; + static const int INVALID_BIGRAM_TARGET_TERMINAL_ID; + static const int BIGRAM_PROBABILITY_MASK; + static const int BIGRAM_HAS_NEXT_MASK; static const int SHORTCUT_FLAGS_FIELD_SIZE; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp index 8b0ea823e..b572ee87f 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp @@ -16,7 +16,7 @@ #include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h" -#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h" +#include "suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h" #include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h" #include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h" @@ -167,8 +167,6 @@ bool Ver4PatriciaTrieNodeWriter::writePtNodeAndAdvancePosition( ptNodeParams->getChildrenPos(), ptNodeWritingPos)) { return false; } - // TODO: Implement bigram and shortcut writing. - // Create node flags and write them. PatriciaTrieReadingUtils::NodeFlags nodeFlags = PatriciaTrieReadingUtils::createAndGetFlags(ptNodeParams->isBlacklisted(), @@ -188,14 +186,14 @@ bool Ver4PatriciaTrieNodeWriter::addNewBigramEntry( const PtNodeParams *const sourcePtNodeParams, const PtNodeParams *const targetPtNodeParam, const int probability, bool *const outAddedNewBigram) { - // TODO: Implement. - return false; + return mBigramPolicy->addNewEntry(sourcePtNodeParams->getTerminalId(), + targetPtNodeParam->getTerminalId(), probability, outAddedNewBigram); } bool Ver4PatriciaTrieNodeWriter::removeBigramEntry( const PtNodeParams *const sourcePtNodeParams, const PtNodeParams *const targetPtNodeParam) { - // TODO: Implement. - return false; + return mBigramPolicy->removeEntry(sourcePtNodeParams->getTerminalId(), + targetPtNodeParam->getTerminalId()); } } diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp index 520ffc080..698483a79 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp @@ -28,14 +28,14 @@ namespace latinime { const int Ver4PatriciaTriePolicy::MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS = 1024; const int Ver4PatriciaTriePolicy::MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS = - DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE - MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS; + Ver4DictConstants::MAX_DICTIONARY_SIZE - MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS; void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode, DicNodeVector *const childDicNodes) const { if (!dicNode->hasChildren()) { return; } - DynamicPatriciaTrieReadingHelper readingHelper(&mDictBuffer, &mNodeReader); + DynamicPatriciaTrieReadingHelper readingHelper(mDictBuffer, &mNodeReader); readingHelper.initWithPtNodeArrayPos(dicNode->getChildrenPtNodeArrayPos()); while (!readingHelper.isEnd()) { const PtNodeParams ptNodeParams = readingHelper.getPtNodeParams(); @@ -63,7 +63,7 @@ void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const d int Ver4PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( const int ptNodePos, const int maxCodePointCount, int *const outCodePoints, int *const outUnigramProbability) const { - DynamicPatriciaTrieReadingHelper readingHelper(&mDictBuffer, &mNodeReader); + DynamicPatriciaTrieReadingHelper readingHelper(mDictBuffer, &mNodeReader); readingHelper.initWithPtNodePos(ptNodePos); return readingHelper.getCodePointsAndProbabilityAndReturnCodePointCount( maxCodePointCount, outCodePoints, outUnigramProbability); @@ -71,7 +71,7 @@ int Ver4PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( int Ver4PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord, const int length, const bool forceLowerCaseSearch) const { - DynamicPatriciaTrieReadingHelper readingHelper(&mDictBuffer, &mNodeReader); + DynamicPatriciaTrieReadingHelper readingHelper(mDictBuffer, &mNodeReader); readingHelper.initWithPtNodeArrayPos(getRootPosition()); return readingHelper.getTerminalPtNodePositionOfWord(inWord, length, forceLowerCaseSearch); } @@ -135,12 +135,12 @@ bool Ver4PatriciaTriePolicy::addUnigramWord(const int *const word, const int len AKLOGI("Warning: addUnigramWord() is called for non-updatable dictionary."); return false; } - if (mDictBuffer.getTailPosition() - >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) { - AKLOGE("The dictionary is too large to dynamically update."); + if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) { + AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d", + mDictBuffer->getTailPosition()); return false; } - DynamicPatriciaTrieReadingHelper readingHelper(&mDictBuffer, &mNodeReader); + DynamicPatriciaTrieReadingHelper readingHelper(mDictBuffer, &mNodeReader); readingHelper.initWithPtNodeArrayPos(getRootPosition()); bool addedNewUnigram = false; if (mUpdatingHelper.addUnigramWord(&readingHelper, word, length, probability, @@ -156,14 +156,63 @@ bool Ver4PatriciaTriePolicy::addUnigramWord(const int *const word, const int len bool Ver4PatriciaTriePolicy::addBigramWords(const int *const word0, const int length0, const int *const word1, const int length1, const int probability) { - // TODO: Implement. - return false; + if (!mBuffers.get()->isUpdatable()) { + AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary."); + return false; + } + if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) { + AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d", + mDictBuffer->getTailPosition()); + return false; + } + const int word0Pos = getTerminalPtNodePositionOfWord(word0, length0, + false /* forceLowerCaseSearch */); + if (word0Pos == NOT_A_DICT_POS) { + return false; + } + const int word1Pos = getTerminalPtNodePositionOfWord(word1, length1, + false /* forceLowerCaseSearch */); + if (word1Pos == NOT_A_DICT_POS) { + return false; + } + bool addedNewBigram = false; + if (mUpdatingHelper.addBigramWords(word0Pos, word1Pos, probability, &addedNewBigram)) { + if (addedNewBigram) { + mBigramCount++; + } + return true; + } else { + return false; + } } bool Ver4PatriciaTriePolicy::removeBigramWords(const int *const word0, const int length0, const int *const word1, const int length1) { - // TODO: Implement. - return false; + if (!mBuffers.get()->isUpdatable()) { + AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary."); + return false; + } + if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) { + AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d", + mDictBuffer->getTailPosition()); + return false; + } + const int word0Pos = getTerminalPtNodePositionOfWord(word0, length0, + false /* forceLowerCaseSearch */); + if (word0Pos == NOT_A_DICT_POS) { + return false; + } + const int word1Pos = getTerminalPtNodePositionOfWord(word1, length1, + false /* forceLowerCaseSearch */); + if (word1Pos == NOT_A_DICT_POS) { + return false; + } + if (mUpdatingHelper.removeBigramWords(word0Pos, word1Pos)) { + mBigramCount--; + return true; + } else { + return false; + } } void Ver4PatriciaTriePolicy::flush(const char *const filePath) { diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h index fdb7ac69b..e8fdf5513 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h @@ -38,18 +38,17 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { public: Ver4PatriciaTriePolicy(const Ver4DictBuffers::Ver4DictBuffersPtr &buffers) : mBuffers(buffers), - mHeaderPolicy(mBuffers.get()->getRawDictBuffer(), FormatUtils::VERSION_4), - mDictBuffer(mBuffers.get()->getRawDictBuffer() + mHeaderPolicy.getSize(), - mBuffers.get()->getRawDictBufferSize() - mHeaderPolicy.getSize(), - BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE), - mBigramPolicy(mBuffers.get()->getBigramDictContent(), + mHeaderPolicy(mBuffers.get()->getWritableHeaderBuffer()->getBuffer( + false /* usesAdditionalBuffer*/), FormatUtils::VERSION_4), + mDictBuffer(mBuffers.get()->getWritableTrieBuffer()), + mBigramPolicy(mBuffers.get()->getUpdatableBigramDictContent(), mBuffers.get()->getTerminalPositionLookupTable()), mShortcutPolicy(mBuffers.get()->getShortcutDictContent(), mBuffers.get()->getTerminalPositionLookupTable()), - mNodeReader(&mDictBuffer, mBuffers.get()->getProbabilityDictContent()), - mNodeWriter(&mDictBuffer, mBuffers.get(), &mNodeReader, &mBigramPolicy, + mNodeReader(mDictBuffer, mBuffers.get()->getProbabilityDictContent()), + mNodeWriter(mDictBuffer, mBuffers.get(), &mNodeReader, &mBigramPolicy, &mShortcutPolicy), - mUpdatingHelper(&mDictBuffer, &mNodeReader, &mNodeWriter, + mUpdatingHelper(mDictBuffer, &mNodeReader, &mNodeWriter, mHeaderPolicy.isDecayingDict()), mUnigramCount(mHeaderPolicy.getUnigramCount()), mBigramCount(mHeaderPolicy.getBigramCount()) {}; @@ -115,7 +114,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { Ver4DictBuffers::Ver4DictBuffersPtr mBuffers; const HeaderPolicy mHeaderPolicy; - BufferWithExtendableBuffer mDictBuffer; + BufferWithExtendableBuffer *const mDictBuffer; Ver4BigramListPolicy mBigramPolicy; Ver4ShortcutListPolicy mShortcutPolicy; Ver4PatriciaTrieNodeReader mNodeReader; diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp index f17a0d1c0..26eafcd44 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp @@ -49,6 +49,11 @@ void BufferWithExtendableBuffer::readCodePointsAndAdvancePosition(const int maxC } } +bool BufferWithExtendableBuffer::writeUint(const uint32_t data, const int size, const int pos) { + int writingPos = pos; + return writeUintAndAdvancePosition(data, size, &writingPos); +} + bool BufferWithExtendableBuffer::writeUintAndAdvancePosition(const uint32_t data, const int size, int *const pos) { if (!(size >= 1 && size <= 4)) { diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h index 13dce9b61..ee6107ad7 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h @@ -93,6 +93,8 @@ class BufferWithExtendableBuffer { * Writing is allowed for original buffer, already written region of additional buffer and the * tail of additional buffer. */ + bool writeUint(const uint32_t data, const int size, const int pos); + bool writeUintAndAdvancePosition(const uint32_t data, const int size, int *const pos); bool writeCodePointsAndAdvancePosition(const int *const codePoints, const int codePointCount, diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp index b48e5b005..40f7d1f5c 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp @@ -21,6 +21,7 @@ #include "suggest/policyimpl/dictionary/header/header_policy.h" #include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.h" +#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" #include "suggest/policyimpl/dictionary/utils/format_utils.h" @@ -34,7 +35,7 @@ const char *const DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE = case 3: return createEmptyV3DictFile(filePath, attributeMap); case 4: - // TODO: Support version 4 dictionary format. + return createEmptyV4DictFile(filePath, attributeMap); return false; default: // Only version 3 dictionary is supported for now. @@ -58,6 +59,20 @@ const char *const DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE = return flushAllHeaderAndBodyToFile(filePath, &headerBuffer, &bodyBuffer); } +/* static */ bool DictFileWritingUtils::createEmptyV4DictFile(const char *const filePath, + const HeaderReadWriteUtils::AttributeMap *const attributeMap) { + Ver4DictBuffers::Ver4DictBuffersPtr dictBuffers = Ver4DictBuffers::createVer4DictBuffers(); + HeaderPolicy headerPolicy(FormatUtils::VERSION_4, attributeMap); + headerPolicy.writeHeaderToBuffer(dictBuffers.get()->getWritableHeaderBuffer(), + true /* updatesLastUpdatedTime */, true /* updatesLastDecayedTime */, + 0 /* unigramCount */, 0 /* bigramCount */, 0 /* extendedRegionSize */); + if (!DynamicPatriciaTrieWritingUtils::writeEmptyDictionary( + dictBuffers.get()->getWritableTrieBuffer(), 0 /* rootPos */)) { + return false; + } + return dictBuffers.get()->flush(filePath); +} + /* static */ bool DictFileWritingUtils::flushAllHeaderAndBodyToFile(const char *const filePath, BufferWithExtendableBuffer *const dictHeader, BufferWithExtendableBuffer *const dictBody) { const int tmpFileNameBufSize = strlen(filePath) @@ -69,21 +84,21 @@ const char *const DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE = TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE); FILE *const file = fopen(tmpFileName, "wb"); if (!file) { - AKLOGE("Dictionary file %s cannnot be opened.", tmpFileName); + AKLOGE("Dictionary file %s cannot be opened.", tmpFileName); ASSERT(false); return false; } // Write the dictionary header. if (!writeBufferToFile(file, dictHeader)) { remove(tmpFileName); - AKLOGE("Dictionary header cannnot be written. size: %d", dictHeader->getTailPosition()); + AKLOGE("Dictionary header cannot be written. size: %d", dictHeader->getTailPosition()); ASSERT(false); return false; } // Write the dictionary body. if (!writeBufferToFile(file, dictBody)) { remove(tmpFileName); - AKLOGE("Dictionary body cannnot be written. size: %d", dictBody->getTailPosition()); + AKLOGE("Dictionary body cannot be written. size: %d", dictBody->getTailPosition()); ASSERT(false); return false; } diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h index bd4ac66fd..3291f98c7 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h @@ -43,6 +43,9 @@ class DictFileWritingUtils { static bool createEmptyV3DictFile(const char *const filePath, const HeaderReadWriteUtils::AttributeMap *const attributeMap); + static bool createEmptyV4DictFile(const char *const filePath, + const HeaderReadWriteUtils::AttributeMap *const attributeMap); + static bool writeBufferToFile(FILE *const file, const BufferWithExtendableBuffer *const buffer); }; diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.cpp index 2678b8c7b..9be35620c 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.cpp @@ -19,23 +19,68 @@ namespace latinime { const int SparseTable::NOT_EXIST = -1; +const int SparseTable::INDEX_SIZE = 4; bool SparseTable::contains(const int id) const { - const int readingPos = id / mBlockSize * mDataSize; + const int readingPos = getPosInIndexTable(id); if (id < 0 || mIndexTableBuffer->getTailPosition() <= readingPos) { return false; } - const int index = mIndexTableBuffer->readUint(mDataSize, readingPos); + const int index = mIndexTableBuffer->readUint(INDEX_SIZE, readingPos); return index != NOT_EXIST; } uint32_t SparseTable::get(const int id) const { - const int indexTableIndex = id / mBlockSize; - int readingPos = indexTableIndex * mDataSize; - const int index = mIndexTableBuffer->readUint(mDataSize, readingPos); + const int indexTableReadingPos = getPosInIndexTable(id); + const int index = mIndexTableBuffer->readUint(INDEX_SIZE, indexTableReadingPos); + const int contentTableReadingPos = getPosInContentTable(id, index); + return mContentTableBuffer->readUint(mDataSize, contentTableReadingPos); +} + +bool SparseTable::set(const int id, const uint32_t value) { + const int posInIndexTable = getPosInIndexTable(id); + // Extends the index table if needed. + if (mIndexTableBuffer->getTailPosition() < posInIndexTable) { + int tailPos = mIndexTableBuffer->getTailPosition(); + while(tailPos < posInIndexTable) { + if (!mIndexTableBuffer->writeUintAndAdvancePosition(NOT_EXIST, INDEX_SIZE, &tailPos)) { + return false; + } + } + } + if (contains(id)) { + // The entry is already in the content table. + const int index = mIndexTableBuffer->readUint(INDEX_SIZE, posInIndexTable); + return mContentTableBuffer->writeUint(value, mDataSize, getPosInContentTable(id, index)); + } + // The entry is not in the content table. + // Create new entry in the content table. + const int index = getIndexFromContentTablePos(mContentTableBuffer->getTailPosition()); + if (!mIndexTableBuffer->writeUint(index, INDEX_SIZE, posInIndexTable)) { + return false; + } + // Write a new block that containing the entry to be set. + int writingPos = getPosInContentTable(0 /* id */, index); + for (int i = 0; i < mBlockSize; ++i) { + if (!mContentTableBuffer->writeUintAndAdvancePosition(NOT_A_DICT_POS, mDataSize, + &writingPos)) { + return false; + } + } + return mContentTableBuffer->writeUint(value, mDataSize, getPosInContentTable(id, index)); +} + +int SparseTable::getIndexFromContentTablePos(const int contentTablePos) const { + return contentTablePos / mDataSize / mBlockSize; +} + +int SparseTable::getPosInIndexTable(const int id) const { + return (id / mBlockSize) * INDEX_SIZE; +} + +int SparseTable::getPosInContentTable(const int id, const int index) const { const int offset = id % mBlockSize; - readingPos = (index * mDataSize + offset) * mBlockSize; - return mContentTableBuffer->readUint(mDataSize, readingPos); + return (index * mDataSize + offset) * mBlockSize; } } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.h b/native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.h index d71756c63..21c167506 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.h +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.h @@ -38,10 +38,19 @@ class SparseTable { uint32_t get(const int id) const; + bool set(const int id, const uint32_t value); + private: DISALLOW_IMPLICIT_CONSTRUCTORS(SparseTable); + int getIndexFromContentTablePos(const int contentTablePos) const; + + int getPosInIndexTable(const int id) const; + + int getPosInContentTable(const int id, const int index) const; + static const int NOT_EXIST; + static const int INDEX_SIZE; BufferWithExtendableBuffer *const mIndexTableBuffer; BufferWithExtendableBuffer *const mContentTableBuffer; diff --git a/tests/src/com/android/inputmethod/latin/ExpandableDictionaryTests.java b/tests/src/com/android/inputmethod/latin/ExpandableDictionaryTests.java deleted file mode 100644 index 6aae1044e..000000000 --- a/tests/src/com/android/inputmethod/latin/ExpandableDictionaryTests.java +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Copyright (C) 2013 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.android.inputmethod.latin; - -import android.test.AndroidTestCase; -import android.test.suitebuilder.annotation.SmallTest; - -/** - * Unit test for ExpandableDictionary - */ -@SmallTest -public class ExpandableDictionaryTests extends AndroidTestCase { - - private final static int UNIGRAM_FREQ = 50; - // See UserBinaryDictionary for more information about this variable. - // For tests, its actual value does not matter. - private final static int SHORTCUT_FREQ = 14; - - public void testAddWordAndGetWordFrequency() { - final ExpandableDictionary dict = new ExpandableDictionary(Dictionary.TYPE_USER); - - // Add words - dict.addWord("abcde", "abcde", UNIGRAM_FREQ, SHORTCUT_FREQ); - dict.addWord("abcef", null, UNIGRAM_FREQ + 1, 0); - - // Check words - assertFalse(dict.isValidWord("abcde")); - assertEquals(UNIGRAM_FREQ, dict.getWordFrequency("abcde")); - assertTrue(dict.isValidWord("abcef")); - assertEquals(UNIGRAM_FREQ+1, dict.getWordFrequency("abcef")); - - dict.addWord("abc", null, UNIGRAM_FREQ + 2, 0); - assertTrue(dict.isValidWord("abc")); - assertEquals(UNIGRAM_FREQ + 2, dict.getWordFrequency("abc")); - - // Add existing word with lower frequency - dict.addWord("abc", null, UNIGRAM_FREQ, 0); - assertEquals(UNIGRAM_FREQ + 2, dict.getWordFrequency("abc")); - - // Add existing word with higher frequency - dict.addWord("abc", null, UNIGRAM_FREQ + 3, 0); - assertEquals(UNIGRAM_FREQ + 3, dict.getWordFrequency("abc")); - } -} diff --git a/tests/src/com/android/inputmethod/latin/Ver4BinaryDictionaryTests.java b/tests/src/com/android/inputmethod/latin/Ver4BinaryDictionaryTests.java index 85e6243e4..ad57e4c9f 100644 --- a/tests/src/com/android/inputmethod/latin/Ver4BinaryDictionaryTests.java +++ b/tests/src/com/android/inputmethod/latin/Ver4BinaryDictionaryTests.java @@ -206,4 +206,96 @@ public class Ver4BinaryDictionaryTests extends AndroidTestCase { assertEquals(probability, binaryDictionary.getFrequency("y")); } + public void testWriteBigrams() { + final String dictVersion = Long.toString(System.currentTimeMillis()); + final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), + getDictionaryOptions(TEST_LOCALE, dictVersion)); + final DictEncoder encoder = new Ver4DictEncoder(getContext().getCacheDir()); + try { + encoder.writeDictionary(dict, FORMAT_OPTIONS); + } catch (IOException e) { + Log.e(TAG, "IOException while writing dictionary", e); + } catch (UnsupportedFormatException e) { + Log.e(TAG, "Unsupported format", e); + } + final File trieFile = getTrieFile(TEST_LOCALE, dictVersion); + final BinaryDictionary binaryDictionary = new BinaryDictionary(trieFile.getAbsolutePath(), + 0 /* offset */, trieFile.length(), true /* useFullEditDistance */, + Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); + assertTrue(binaryDictionary.isValidDictionary()); + + final int unigramProbability = 100; + final int bigramProbability = 10; + final int updatedBigramProbability = 15; + binaryDictionary.addUnigramWord("aaa", unigramProbability); + binaryDictionary.addUnigramWord("abb", unigramProbability); + binaryDictionary.addUnigramWord("bcc", unigramProbability); + binaryDictionary.addBigramWords("aaa", "abb", bigramProbability); + binaryDictionary.addBigramWords("aaa", "bcc", bigramProbability); + binaryDictionary.addBigramWords("abb", "aaa", bigramProbability); + binaryDictionary.addBigramWords("abb", "bcc", bigramProbability); + + final int probability = binaryDictionary.calculateProbability(unigramProbability, + bigramProbability); + assertEquals(true, binaryDictionary.isValidBigram("aaa", "abb")); + assertEquals(true, binaryDictionary.isValidBigram("aaa", "bcc")); + assertEquals(true, binaryDictionary.isValidBigram("abb", "aaa")); + assertEquals(true, binaryDictionary.isValidBigram("abb", "bcc")); + assertEquals(probability, binaryDictionary.getBigramProbability("aaa", "abb")); + assertEquals(probability, binaryDictionary.getBigramProbability("aaa", "bcc")); + assertEquals(probability, binaryDictionary.getBigramProbability("abb", "aaa")); + assertEquals(probability, binaryDictionary.getBigramProbability("abb", "bcc")); + } + + public void testRemoveBigramWords() { + final String dictVersion = Long.toString(System.currentTimeMillis()); + final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), + getDictionaryOptions(TEST_LOCALE, dictVersion)); + final DictEncoder encoder = new Ver4DictEncoder(getContext().getCacheDir()); + try { + encoder.writeDictionary(dict, FORMAT_OPTIONS); + } catch (IOException e) { + Log.e(TAG, "IOException while writing dictionary", e); + } catch (UnsupportedFormatException e) { + Log.e(TAG, "Unsupported format", e); + } + final File trieFile = getTrieFile(TEST_LOCALE, dictVersion); + final BinaryDictionary binaryDictionary = new BinaryDictionary(trieFile.getAbsolutePath(), + 0 /* offset */, trieFile.length(), true /* useFullEditDistance */, + Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); + assertTrue(binaryDictionary.isValidDictionary()); + + final int unigramProbability = 100; + final int bigramProbability = 10; + binaryDictionary.addUnigramWord("aaa", unigramProbability); + binaryDictionary.addUnigramWord("abb", unigramProbability); + binaryDictionary.addUnigramWord("bcc", unigramProbability); + binaryDictionary.addBigramWords("aaa", "abb", bigramProbability); + binaryDictionary.addBigramWords("aaa", "bcc", bigramProbability); + binaryDictionary.addBigramWords("abb", "aaa", bigramProbability); + binaryDictionary.addBigramWords("abb", "bcc", bigramProbability); + + assertEquals(true, binaryDictionary.isValidBigram("aaa", "abb")); + assertEquals(true, binaryDictionary.isValidBigram("aaa", "bcc")); + assertEquals(true, binaryDictionary.isValidBigram("abb", "aaa")); + assertEquals(true, binaryDictionary.isValidBigram("abb", "bcc")); + + binaryDictionary.removeBigramWords("aaa", "abb"); + assertEquals(false, binaryDictionary.isValidBigram("aaa", "abb")); + binaryDictionary.addBigramWords("aaa", "abb", bigramProbability); + assertEquals(true, binaryDictionary.isValidBigram("aaa", "abb")); + + binaryDictionary.removeBigramWords("aaa", "bcc"); + assertEquals(false, binaryDictionary.isValidBigram("aaa", "bcc")); + binaryDictionary.removeBigramWords("abb", "aaa"); + assertEquals(false, binaryDictionary.isValidBigram("abb", "aaa")); + binaryDictionary.removeBigramWords("abb", "bcc"); + assertEquals(false, binaryDictionary.isValidBigram("abb", "bcc")); + + binaryDictionary.removeBigramWords("aaa", "abb"); + // Test remove non-existing bigram operation. + binaryDictionary.removeBigramWords("aaa", "abb"); + binaryDictionary.removeBigramWords("bcc", "aaa"); + } + } diff --git a/tests/src/com/android/inputmethod/latin/personalization/UserHistoryDictionaryTests.java b/tests/src/com/android/inputmethod/latin/personalization/UserHistoryDictionaryTests.java index 7c1decb71..c3e062b65 100644 --- a/tests/src/com/android/inputmethod/latin/personalization/UserHistoryDictionaryTests.java +++ b/tests/src/com/android/inputmethod/latin/personalization/UserHistoryDictionaryTests.java @@ -214,9 +214,7 @@ public class UserHistoryDictionaryTests extends AndroidTestCase { public void testAddManyWords() { final String testFilenameSuffix = "testRandomWords" + System.currentTimeMillis(); - final int numberOfWords = - ExpandableBinaryDictionary.ENABLE_BINARY_DICTIONARY_DYNAMIC_UPDATE ? - 10000 : 1000; + final int numberOfWords = 10000; final Random random = new Random(123456); clearHistory(testFilenameSuffix); try { diff --git a/tests/src/com/android/inputmethod/latin/utils/StringUtilsTests.java b/tests/src/com/android/inputmethod/latin/utils/StringUtilsTests.java index 4e396a1cf..21fcf1117 100644 --- a/tests/src/com/android/inputmethod/latin/utils/StringUtilsTests.java +++ b/tests/src/com/android/inputmethod/latin/utils/StringUtilsTests.java @@ -44,7 +44,7 @@ public class StringUtilsTests extends AndroidTestCase { })); } - public void testContainsInExtraValues() { + public void testContainsInCommaSplittableText() { assertFalse("null", StringUtils.containsInCommaSplittableText("key", null)); assertFalse("empty", StringUtils.containsInCommaSplittableText("key", "")); assertFalse("not in 1 element", @@ -56,7 +56,28 @@ public class StringUtilsTests extends AndroidTestCase { assertTrue("in 2 elements", StringUtils.containsInCommaSplittableText("key", "key1,key")); } - public void testAppendToExtraValuesIfNotExists() { + public void testJoinCommaSplittableText() { + assertEquals("2 nulls", "", + StringUtils.joinCommaSplittableText(null, null)); + assertEquals("null and empty", "", + StringUtils.joinCommaSplittableText(null, "")); + assertEquals("empty and null", "", + StringUtils.joinCommaSplittableText("", null)); + assertEquals("2 empties", "", + StringUtils.joinCommaSplittableText("", "")); + assertEquals("text and null", "text", + StringUtils.joinCommaSplittableText("text", null)); + assertEquals("text and empty", "text", + StringUtils.joinCommaSplittableText("text", "")); + assertEquals("null and text", "text", + StringUtils.joinCommaSplittableText(null, "text")); + assertEquals("empty and text", "text", + StringUtils.joinCommaSplittableText("", "text")); + assertEquals("2 texts", "text1,text2", + StringUtils.joinCommaSplittableText("text1", "text2")); + } + + public void testAppendToCommaSplittableTextIfNotExists() { assertEquals("null", "key", StringUtils.appendToCommaSplittableTextIfNotExists("key", null)); assertEquals("empty", "key", @@ -77,7 +98,7 @@ public class StringUtilsTests extends AndroidTestCase { StringUtils.appendToCommaSplittableTextIfNotExists("key", "key1,key,key3")); } - public void testRemoveFromExtraValuesIfExists() { + public void testRemoveFromCommaSplittableTextIfExists() { assertEquals("null", "", StringUtils.removeFromCommaSplittableTextIfExists("key", null)); assertEquals("empty", "", StringUtils.removeFromCommaSplittableTextIfExists("key", "")); diff --git a/tools/make-keyboard-text/res/values-ca/donottranslate-more-keys.xml b/tools/make-keyboard-text/res/values-ca/donottranslate-more-keys.xml index 4cf742441..66393732c 100644 --- a/tools/make-keyboard-text/res/values-ca/donottranslate-more-keys.xml +++ b/tools/make-keyboard-text/res/values-ca/donottranslate-more-keys.xml @@ -71,7 +71,7 @@ U+0142: "ł" LATIN SMALL LETTER L WITH STROKE --> <string name="more_keys_for_l">l·l,ł</string> <!-- U+00B7: "·" MIDDLE DOT --> - <string name="more_keys_for_punctuation">"!fixedColumnOrder!4,·,!,\\,,\?,:,;,\@"</string> + <string name="more_keys_for_punctuation">"!fixedColumnOrder!9,;,/,(,),#,·,!,\\,,\?,&,\\%,+,\",-,:,',\@"</string> <string name="more_keys_for_period">\?,·</string> <!-- U+00E7: "ç" LATIN SMALL LETTER C WITH CEDILLA --> <string name="keylabel_for_spanish_row2_10">ç</string> diff --git a/tools/make-keyboard-text/res/values-iw/donottranslate-more-keys.xml b/tools/make-keyboard-text/res/values-iw/donottranslate-more-keys.xml index a1633316f..994e35ae9 100644 --- a/tools/make-keyboard-text/res/values-iw/donottranslate-more-keys.xml +++ b/tools/make-keyboard-text/res/values-iw/donottranslate-more-keys.xml @@ -28,6 +28,7 @@ <!-- U+00B1: "±" PLUS-MINUS SIGN U+FB29: "﬩" HEBREW LETTER ALTERNATIVE PLUS SIGN --> <string name="more_keys_for_plus">±,﬩</string> + <string name="more_keys_for_punctuation">"!fixedColumnOrder!8,;,/,(|),)|(,#,!,\\,,\?,&,\\%,+,\",-,:,',\@"</string> <!-- The all letters need to be mirrored are found at http://www.unicode.org/Public/6.1.0/ucd/BidiMirroring.txt --> <string name="more_keys_for_left_parenthesis">!fixedColumnOrder!3,<|>,{|},[|]</string> diff --git a/tools/make-keyboard-text/res/values/donottranslate-more-keys.xml b/tools/make-keyboard-text/res/values/donottranslate-more-keys.xml index 008ef3007..5f687db99 100644 --- a/tools/make-keyboard-text/res/values/donottranslate-more-keys.xml +++ b/tools/make-keyboard-text/res/values/donottranslate-more-keys.xml @@ -83,7 +83,7 @@ <string name="more_keys_for_currency_dollar">¢,£,€,¥,₱</string> <string name="keylabel_for_currency">$</string> <string name="more_keys_for_currency">$,¢,€,£,¥,₱</string> - <string name="more_keys_for_punctuation">"!fixedColumnOrder!4,#,!,\\,,\?,-,:,',\@"</string> + <string name="more_keys_for_punctuation">"!fixedColumnOrder!8,;,/,(,),#,!,\\,,\?,&,\\%,+,\",-,:,',\@"</string> <!-- U+2020: "†" DAGGER U+2021: "‡" DOUBLE DAGGER U+2605: "★" BLACK STAR --> |