diff options
18 files changed, 201 insertions, 91 deletions
diff --git a/java/src/com/android/inputmethod/latin/BinaryDictionary.java b/java/src/com/android/inputmethod/latin/BinaryDictionary.java index 834d3ed53..dacb8483c 100644 --- a/java/src/com/android/inputmethod/latin/BinaryDictionary.java +++ b/java/src/com/android/inputmethod/latin/BinaryDictionary.java @@ -40,6 +40,9 @@ public final class BinaryDictionary extends Dictionary { private static final int MAX_WORD_LENGTH = Constants.DICTIONARY_MAX_WORD_LENGTH; // Must be equal to MAX_RESULTS in native/jni/src/defines.h private static final int MAX_RESULTS = 18; + // Required space count for auto commit. + // TODO: Remove this heuristic. + private static final int SPACE_COUNT_FOR_AUTO_COMMIT = 3; private long mNativeDict; private final Locale mLocale; @@ -49,6 +52,7 @@ public final class BinaryDictionary extends Dictionary { private final int[] mSpaceIndices = new int[MAX_RESULTS]; private final int[] mOutputScores = new int[MAX_RESULTS]; private final int[] mOutputTypes = new int[MAX_RESULTS]; + private final int[] mOutputAutoCommitFirstWordConfidence = new int[1]; // Only one result private final NativeSuggestOptions mNativeSuggestOptions = new NativeSuggestOptions(); @@ -104,7 +108,8 @@ public final class BinaryDictionary extends Dictionary { long traverseSession, int[] xCoordinates, int[] yCoordinates, int[] times, int[] pointerIds, int[] inputCodePoints, int inputSize, int commitPoint, int[] suggestOptions, int[] prevWordCodePointArray, - int[] outputCodePoints, int[] outputScores, int[] outputIndices, int[] outputTypes); + int[] outputCodePoints, int[] outputScores, int[] outputIndices, int[] outputTypes, + int[] outputAutoCommitFirstWordConfidence); private static native float calcNormalizedScoreNative(int[] before, int[] after, int score); private static native int editDistanceNative(int[] before, int[] after); private static native void addUnigramWordNative(long dict, int[] word, int probability); @@ -157,7 +162,7 @@ public final class BinaryDictionary extends Dictionary { ips.getYCoordinates(), ips.getTimes(), ips.getPointerIds(), mInputCodePoints, inputSize, 0 /* commitPoint */, mNativeSuggestOptions.getOptions(), prevWordCodePointArray, mOutputCodePoints, mOutputScores, mSpaceIndices, - mOutputTypes); + mOutputTypes, mOutputAutoCommitFirstWordConfidence); final ArrayList<SuggestedWordInfo> suggestions = CollectionUtils.newArrayList(); for (int j = 0; j < count; ++j) { final int start = j * MAX_WORD_LENGTH; @@ -181,7 +186,8 @@ public final class BinaryDictionary extends Dictionary { // flags too and pass mOutputTypes[j] instead of kind suggestions.add(new SuggestedWordInfo(new String(mOutputCodePoints, start, len), score, kind, this /* sourceDict */, - mSpaceIndices[0] /* indexOfTouchPointOfSecondWord */)); + mSpaceIndices[0] /* indexOfTouchPointOfSecondWord */, + mOutputAutoCommitFirstWordConfidence[0])); } } return suggestions; @@ -256,6 +262,22 @@ public final class BinaryDictionary extends Dictionary { } @Override + public boolean shouldAutoCommit(final SuggestedWordInfo candidate) { + // TODO: actually use the confidence rather than use this completely broken heuristic + final String word = candidate.mWord; + final int length = word.length(); + int remainingSpaces = SPACE_COUNT_FOR_AUTO_COMMIT; + for (int i = 0; i < length; ++i) { + // This is okay because no low-surrogate and no high-surrogate can ever match the + // space character, so we don't need to take care of iterating on code points. + if (Constants.CODE_SPACE == word.charAt(i)) { + if (0 >= --remainingSpaces) return true; + } + } + return false; + } + + @Override public void close() { synchronized (mDicTraverseSessions) { final int sessionsSize = mDicTraverseSessions.size(); diff --git a/java/src/com/android/inputmethod/latin/Dictionary.java b/java/src/com/android/inputmethod/latin/Dictionary.java index 8a3a88438..fa79f5af7 100644 --- a/java/src/com/android/inputmethod/latin/Dictionary.java +++ b/java/src/com/android/inputmethod/latin/Dictionary.java @@ -137,7 +137,10 @@ public abstract class Dictionary { } /** - * Whether we think this suggestion should trigger an auto-commit. + * Whether we think this suggestion should trigger an auto-commit. prevWord is the word + * before the suggestion, so that we can use n-gram frequencies. + * @param candidate The candidate suggestion, in whole (not only the first part). + * @return whether we should auto-commit or not. */ public boolean shouldAutoCommit(final SuggestedWordInfo candidate) { // If we don't have support for auto-commit, or if we don't know, we return false to diff --git a/java/src/com/android/inputmethod/latin/ExpandableDictionary.java b/java/src/com/android/inputmethod/latin/ExpandableDictionary.java index ba7d1a2b0..d491f988a 100644 --- a/java/src/com/android/inputmethod/latin/ExpandableDictionary.java +++ b/java/src/com/android/inputmethod/latin/ExpandableDictionary.java @@ -344,7 +344,8 @@ public class ExpandableDictionary extends Dictionary { // in the future. suggestions.add(new SuggestedWordInfo(new String(word, 0, depth + 1), finalFreq, SuggestedWordInfo.KIND_CORRECTION, this /* sourceDict */, - SuggestedWordInfo.NOT_AN_INDEX /* indexOfTouchPointOfSecondWord */)); + SuggestedWordInfo.NOT_AN_INDEX /* indexOfTouchPointOfSecondWord */, + SuggestedWordInfo.NOT_A_CONFIDENCE /* autoCommitFirstWordConfidence */)); if (suggestions.size() >= Suggest.MAX_SUGGESTIONS) return false; } if (null != node.mShortcutTargets) { @@ -353,7 +354,8 @@ public class ExpandableDictionary extends Dictionary { final char[] shortcut = node.mShortcutTargets.get(shortcutIndex); suggestions.add(new SuggestedWordInfo(new String(shortcut, 0, shortcut.length), finalFreq, SuggestedWordInfo.KIND_SHORTCUT, this /* sourceDict */, - SuggestedWordInfo.NOT_AN_INDEX /* indexOfTouchPointOfSecondWord */)); + SuggestedWordInfo.NOT_AN_INDEX /* indexOfTouchPointOfSecondWord */, + SuggestedWordInfo.NOT_A_CONFIDENCE /* autoCommitFirstWordConfidence */)); if (suggestions.size() > Suggest.MAX_SUGGESTIONS) return false; } } @@ -604,7 +606,8 @@ public class ExpandableDictionary extends Dictionary { suggestions.add(new SuggestedWordInfo(new String(mLookedUpString, index, Constants.DICTIONARY_MAX_WORD_LENGTH - index), freq, SuggestedWordInfo.KIND_CORRECTION, this /* sourceDict */, - SuggestedWordInfo.NOT_AN_INDEX /* indexOfTouchPointOfSecondWord */)); + SuggestedWordInfo.NOT_AN_INDEX /* indexOfTouchPointOfSecondWord */, + SuggestedWordInfo.NOT_A_CONFIDENCE /* autoCommitFirstWordConfidence */)); } } } diff --git a/java/src/com/android/inputmethod/latin/LatinIME.java b/java/src/com/android/inputmethod/latin/LatinIME.java index 5657ed779..d8a47a307 100644 --- a/java/src/com/android/inputmethod/latin/LatinIME.java +++ b/java/src/com/android/inputmethod/latin/LatinIME.java @@ -2702,7 +2702,9 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen suggestions.add(new SuggestedWordInfo(s, SuggestionStripView.MAX_SUGGESTIONS - i, SuggestedWordInfo.KIND_RESUMED, Dictionary.DICTIONARY_RESUMED, - SuggestedWordInfo.NOT_AN_INDEX /* indexOfTouchPointOfSecondWord */)); + SuggestedWordInfo.NOT_AN_INDEX /* indexOfTouchPointOfSecondWord */, + SuggestedWordInfo.NOT_A_CONFIDENCE + /* autoCommitFirstWordConfidence */)); } } } diff --git a/java/src/com/android/inputmethod/latin/Suggest.java b/java/src/com/android/inputmethod/latin/Suggest.java index 9370757ec..7815f4d41 100644 --- a/java/src/com/android/inputmethod/latin/Suggest.java +++ b/java/src/com/android/inputmethod/latin/Suggest.java @@ -327,7 +327,8 @@ public final class Suggest { suggestionsContainer.add(0, new SuggestedWordInfo(typedWord, SuggestedWordInfo.MAX_SCORE, SuggestedWordInfo.KIND_TYPED, Dictionary.DICTIONARY_USER_TYPED, - SuggestedWordInfo.NOT_AN_INDEX /* indexOfTouchPointOfSecondWord */)); + SuggestedWordInfo.NOT_AN_INDEX /* indexOfTouchPointOfSecondWord */, + SuggestedWordInfo.NOT_A_CONFIDENCE /* autoCommitFirstWordConfidence */)); } SuggestedWordInfo.removeDups(suggestionsContainer); @@ -474,7 +475,8 @@ public final class Suggest { sb.appendCodePoint(Constants.CODE_SINGLE_QUOTE); } return new SuggestedWordInfo(sb.toString(), wordInfo.mScore, wordInfo.mKind, - wordInfo.mSourceDict, wordInfo.mIndexOfTouchPointOfSecondWord); + wordInfo.mSourceDict, wordInfo.mIndexOfTouchPointOfSecondWord, + SuggestedWordInfo.NOT_A_CONFIDENCE /* autoCommitFirstWordConfidence */); } public void close() { diff --git a/java/src/com/android/inputmethod/latin/SuggestedWords.java b/java/src/com/android/inputmethod/latin/SuggestedWords.java index b27fd81e9..17637054a 100644 --- a/java/src/com/android/inputmethod/latin/SuggestedWords.java +++ b/java/src/com/android/inputmethod/latin/SuggestedWords.java @@ -114,7 +114,8 @@ public final class SuggestedWords { final SuggestedWordInfo suggestedWordInfo = new SuggestedWordInfo(text.toString(), SuggestedWordInfo.MAX_SCORE, SuggestedWordInfo.KIND_APP_DEFINED, Dictionary.DICTIONARY_APPLICATION_DEFINED, - SuggestedWordInfo.NOT_AN_INDEX /* indexOfTouchPointOfSecondWord */); + SuggestedWordInfo.NOT_AN_INDEX /* indexOfTouchPointOfSecondWord */, + SuggestedWordInfo.NOT_A_CONFIDENCE /* autoCommitFirstWordConfidence */); result.add(suggestedWordInfo); } return result; @@ -128,7 +129,8 @@ public final class SuggestedWords { final HashSet<String> alreadySeen = CollectionUtils.newHashSet(); suggestionsList.add(new SuggestedWordInfo(typedWord, SuggestedWordInfo.MAX_SCORE, SuggestedWordInfo.KIND_TYPED, Dictionary.DICTIONARY_USER_TYPED, - SuggestedWordInfo.NOT_AN_INDEX /* indexOfTouchPointOfSecondWord */)); + SuggestedWordInfo.NOT_AN_INDEX /* indexOfTouchPointOfSecondWord */, + SuggestedWordInfo.NOT_A_CONFIDENCE /* autoCommitFirstWordConfidence */)); alreadySeen.add(typedWord.toString()); final int previousSize = previousSuggestions.size(); for (int index = 1; index < previousSize; index++) { @@ -151,6 +153,7 @@ public final class SuggestedWords { public static final class SuggestedWordInfo { public static final int NOT_AN_INDEX = -1; + public static final int NOT_A_CONFIDENCE = -1; public static final int MAX_SCORE = Integer.MAX_VALUE; public static final int KIND_MASK_KIND = 0xFF; // Mask to get only the kind public static final int KIND_TYPED = 0; // What user typed @@ -180,16 +183,30 @@ public final class SuggestedWords { // passed to native code to get suggestions for a gesture that corresponds to the first // letter of the second word. public final int mIndexOfTouchPointOfSecondWord; + // For auto-commit. This is a measure of how confident we are that we can commit the + // first word of this suggestion. + public final int mAutoCommitFirstWordConfidence; private String mDebugString = ""; + /** + * Create a new suggested word info. + * @param word The string to suggest. + * @param score A measure of how likely this suggestion is. + * @param kind The kind of suggestion, as one of the above KIND_* constants. + * @param sourceDict What instance of Dictionary produced this suggestion. + * @param indexOfTouchPointOfSecondWord See mIndexOfTouchPointOfSecondWord. + * @param autoCommitFirstWordConfidence See mAutoCommitFirstWordConfidence. + */ public SuggestedWordInfo(final String word, final int score, final int kind, - final Dictionary sourceDict, final int indexOfTouchPointOfSecondWord) { + final Dictionary sourceDict, final int indexOfTouchPointOfSecondWord, + final int autoCommitFirstWordConfidence) { mWord = word; mScore = score; mKind = kind; mSourceDict = sourceDict; mCodePointCount = StringUtils.codePointCount(mWord); mIndexOfTouchPointOfSecondWord = indexOfTouchPointOfSecondWord; + mAutoCommitFirstWordConfidence = autoCommitFirstWordConfidence; } public boolean isEligibleForAutoCommit() { diff --git a/java/src/com/android/inputmethod/latin/settings/SettingsValues.java b/java/src/com/android/inputmethod/latin/settings/SettingsValues.java index 072bb8731..fc2d19298 100644 --- a/java/src/com/android/inputmethod/latin/settings/SettingsValues.java +++ b/java/src/com/android/inputmethod/latin/settings/SettingsValues.java @@ -295,7 +295,8 @@ public final class SettingsValues { puncList.add(new SuggestedWordInfo(KeySpecParser.getLabel(puncSpec), SuggestedWordInfo.MAX_SCORE, SuggestedWordInfo.KIND_HARDCODED, Dictionary.DICTIONARY_HARDCODED, - SuggestedWordInfo.NOT_AN_INDEX /* indexOfTouchPointOfSecondWord */)); + SuggestedWordInfo.NOT_AN_INDEX /* indexOfTouchPointOfSecondWord */, + SuggestedWordInfo.NOT_A_CONFIDENCE /* autoCommitFirstWordConfidence */)); } } return new SuggestedWords(puncList, diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp index 8da1859c4..6a366121d 100644 --- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp +++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp @@ -70,7 +70,8 @@ static int latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jclass clazz, j jintArray yCoordinatesArray, jintArray timesArray, jintArray pointerIdsArray, jintArray inputCodePointsArray, jint inputSize, jint commitPoint, jintArray suggestOptions, jintArray prevWordCodePointsForBigrams, jintArray outputCodePointsArray, - jintArray scoresArray, jintArray spaceIndicesArray, jintArray outputTypesArray) { + jintArray scoresArray, jintArray spaceIndicesArray, jintArray outputTypesArray, + jintArray outputAutoCommitFirstWordConfidence) { Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); if (!dictionary) return 0; ProximityInfo *pInfo = reinterpret_cast<ProximityInfo *>(proximityInfo); @@ -253,7 +254,7 @@ static const JNINativeMethod sMethods[] = { }, { const_cast<char *>("getSuggestionsNative"), - const_cast<char *>("(JJJ[I[I[I[I[III[I[I[I[I[I[I)I"), + const_cast<char *>("(JJJ[I[I[I[I[III[I[I[I[I[I[I[I)I"), reinterpret_cast<void *>(latinime_BinaryDictionary_getSuggestions) }, { diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp index 936dc9c5d..4ee138125 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp @@ -18,6 +18,42 @@ namespace latinime { +const int DynamicBigramListPolicy::BIGRAM_LINK_COUNT_LIMIT = 10000; + +void DynamicBigramListPolicy::getNextBigram(int *const outBigramPos, int *const outProbability, + bool *const outHasNext, int *const pos) const { + const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*pos); + const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer); + if (usesAdditionalBuffer) { + *pos -= mBuffer->getOriginalBufferSize(); + } + const BigramListReadWriteUtils::BigramFlags flags = + BigramListReadWriteUtils::getFlagsAndForwardPointer(buffer, pos); + int originalBigramPos = BigramListReadWriteUtils::getBigramAddressAndForwardPointer( + buffer, flags, pos); + if (usesAdditionalBuffer && originalBigramPos != NOT_A_VALID_WORD_POS) { + originalBigramPos += mBuffer->getOriginalBufferSize(); + } + *outBigramPos = followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos); + *outProbability = BigramListReadWriteUtils::getProbabilityFromFlags(flags); + *outHasNext = BigramListReadWriteUtils::hasNext(flags); + if (usesAdditionalBuffer) { + *pos += mBuffer->getOriginalBufferSize(); + } +} + +void DynamicBigramListPolicy::skipAllBigrams(int *const pos) const { + const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*pos); + const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer); + if (usesAdditionalBuffer) { + *pos -= mBuffer->getOriginalBufferSize(); + } + BigramListReadWriteUtils::skipExistingBigrams(buffer, pos); + if (usesAdditionalBuffer) { + *pos += mBuffer->getOriginalBufferSize(); + } +} + bool DynamicBigramListPolicy::copyAllBigrams(int *const fromPos, int *const toPos) { const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*fromPos); if (usesAdditionalBuffer) { @@ -28,15 +64,16 @@ bool DynamicBigramListPolicy::copyAllBigrams(int *const fromPos, int *const toPo // The buffer address can be changed after calling buffer writing methods. const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer); flags = BigramListReadWriteUtils::getFlagsAndForwardPointer(buffer, fromPos); - int bigramPos = BigramListReadWriteUtils::getBigramAddressAndForwardPointer( + int originalBigramPos = BigramListReadWriteUtils::getBigramAddressAndForwardPointer( buffer, flags, fromPos); - if (bigramPos == NOT_A_VALID_WORD_POS) { + if (originalBigramPos == NOT_A_VALID_WORD_POS) { // skip invalid bigram entry. continue; } if (usesAdditionalBuffer) { - bigramPos += mBuffer->getOriginalBufferSize(); + originalBigramPos += mBuffer->getOriginalBufferSize(); } + const int bigramPos = followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos); BigramListReadWriteUtils::BigramFlags newBigramFlags; uint32_t newBigramOffset; int newBigramOffsetFieldSize; @@ -133,11 +170,12 @@ bool DynamicBigramListPolicy::removeBigram(const int bigramListPos, const int ta if (usesAdditionalBuffer) { bigramOffsetFieldPos += mBuffer->getOriginalBufferSize(); } - int bigramPos = BigramListReadWriteUtils::getBigramAddressAndForwardPointer( + int originalBigramPos = BigramListReadWriteUtils::getBigramAddressAndForwardPointer( buffer, flags, &pos); - if (usesAdditionalBuffer && bigramPos != NOT_A_VALID_WORD_POS) { - bigramPos += mBuffer->getOriginalBufferSize(); + if (usesAdditionalBuffer && originalBigramPos != NOT_A_VALID_WORD_POS) { + originalBigramPos += mBuffer->getOriginalBufferSize(); } + const int bigramPos = followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos); if (bigramPos != targetBigramPos) { continue; } @@ -152,4 +190,26 @@ bool DynamicBigramListPolicy::removeBigram(const int bigramListPos, const int ta return false; } +int DynamicBigramListPolicy::followBigramLinkAndGetCurrentBigramPtNodePos( + const int originalBigramPos) const { + if (originalBigramPos == NOT_A_VALID_WORD_POS) { + return NOT_A_VALID_WORD_POS; + } + int currentPos = originalBigramPos; + DynamicPatriciaTrieNodeReader nodeReader(mBuffer, this /* bigramsPolicy */, mShortcutPolicy); + nodeReader.fetchNodeInfoFromBuffer(currentPos); + int bigramLinkCount = 0; + while (nodeReader.getBigramLinkedNodePos() != NOT_A_DICT_POS) { + currentPos = nodeReader.getBigramLinkedNodePos(); + nodeReader.fetchNodeInfoFromBuffer(currentPos); + bigramLinkCount++; + if (bigramLinkCount > BIGRAM_LINK_COUNT_LIMIT) { + AKLOGI("Bigram link is invalid. start position: %d", bigramPos); + ASSERT(false); + return NOT_A_VALID_WORD_POS; + } + } + return currentPos; +} + } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h index b7c05376d..e451e313d 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h @@ -21,7 +21,9 @@ #include "defines.h" #include "suggest/core/policy/dictionary_bigrams_structure_policy.h" +#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h" #include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h" +#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" namespace latinime { @@ -31,43 +33,16 @@ namespace latinime { */ class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy { public: - DynamicBigramListPolicy(BufferWithExtendableBuffer *const buffer) - : mBuffer(buffer) {} + DynamicBigramListPolicy(BufferWithExtendableBuffer *const buffer, + const DictionaryShortcutsStructurePolicy *const shortcutPolicy) + : mBuffer(buffer), mShortcutPolicy(shortcutPolicy) {} ~DynamicBigramListPolicy() {} void getNextBigram(int *const outBigramPos, int *const outProbability, bool *const outHasNext, - int *const pos) const { - const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*pos); - const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer); - if (usesAdditionalBuffer) { - *pos -= mBuffer->getOriginalBufferSize(); - } - const BigramListReadWriteUtils::BigramFlags flags = - BigramListReadWriteUtils::getFlagsAndForwardPointer(buffer, pos); - *outBigramPos = BigramListReadWriteUtils::getBigramAddressAndForwardPointer( - buffer, flags, pos); - if (usesAdditionalBuffer && *outBigramPos != NOT_A_VALID_WORD_POS) { - *outBigramPos += mBuffer->getOriginalBufferSize(); - } - *outProbability = BigramListReadWriteUtils::getProbabilityFromFlags(flags); - *outHasNext = BigramListReadWriteUtils::hasNext(flags); - if (usesAdditionalBuffer) { - *pos += mBuffer->getOriginalBufferSize(); - } - } + int *const pos) const; - void skipAllBigrams(int *const pos) const { - const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*pos); - const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer); - if (usesAdditionalBuffer) { - *pos -= mBuffer->getOriginalBufferSize(); - } - BigramListReadWriteUtils::skipExistingBigrams(buffer, pos); - if (usesAdditionalBuffer) { - *pos += mBuffer->getOriginalBufferSize(); - } - } + void skipAllBigrams(int *const pos) const; // Copy bigrams from the bigram list that starts at fromPos to toPos and advance these // positions after bigram lists. This method skips invalid bigram entries. @@ -81,7 +56,13 @@ class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy { private: DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicBigramListPolicy); + static const int BIGRAM_LINK_COUNT_LIMIT; + BufferWithExtendableBuffer *const mBuffer; + const DictionaryShortcutsStructurePolicy *const mShortcutPolicy; + + // Follow bigram link and return the position of bigram target PtNode that is currently valid. + int followBigramLinkAndGetCurrentBigramPtNodePos(const int originalBigramPos) const; }; } // namespace latinime #endif // LATINIME_DYNAMIC_BIGRAM_LIST_POLICY_H diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp index 14682e3ce..56ef60ae4 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp @@ -62,6 +62,11 @@ void DynamicPatriciaTrieNodeReader::fetchNodeInfoFromBufferAndProcessMovedNode(c if (usesAdditionalBuffer && mChildrenPos != NOT_A_DICT_POS) { mChildrenPos += mBuffer->getOriginalBufferSize(); } + if (mSiblingPos == NOT_A_VALID_WORD_POS && DynamicPatriciaTrieReadingUtils::isMoved(mFlags)) { + mBigramLinkedNodePos = mChildrenPos; + } else { + mBigramLinkedNodePos = NOT_A_DICT_POS; + } if (usesAdditionalBuffer) { pos += mBuffer->getOriginalBufferSize(); } diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h index 30d251f3e..89d38a590 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h @@ -39,11 +39,11 @@ class DynamicPatriciaTrieNodeReader { const DictionaryBigramsStructurePolicy *const bigramsPolicy, const DictionaryShortcutsStructurePolicy *const shortcutsPolicy) : mBuffer(buffer), mBigramsPolicy(bigramsPolicy), - mShortcutsPolicy(shortcutsPolicy), mNodePos(NOT_A_VALID_WORD_POS), - mHeadPos(NOT_A_DICT_POS), mFlags(0), mParentPos(NOT_A_DICT_POS), mCodePointCount(0), - mProbabilityFieldPos(NOT_A_DICT_POS), mProbability(NOT_A_PROBABILITY), - mChildrenPosFieldPos(NOT_A_DICT_POS), mChildrenPos(NOT_A_DICT_POS), - mShortcutPos(NOT_A_DICT_POS), mBigramPos(NOT_A_DICT_POS), + mShortcutsPolicy(shortcutsPolicy), mHeadPos(NOT_A_VALID_WORD_POS), mFlags(0), + mParentPos(NOT_A_DICT_POS), mCodePointCount(0), mProbabilityFieldPos(NOT_A_DICT_POS), + mProbability(NOT_A_PROBABILITY), mChildrenPosFieldPos(NOT_A_DICT_POS), + mChildrenPos(NOT_A_DICT_POS), mBigramLinkedNodePos(NOT_A_DICT_POS), + mShortcutPos(NOT_A_DICT_POS), mBigramPos(NOT_A_DICT_POS), mSiblingPos(NOT_A_VALID_WORD_POS) {} ~DynamicPatriciaTrieNodeReader() {} @@ -56,13 +56,9 @@ class DynamicPatriciaTrieNodeReader { AK_FORCE_INLINE void fetchNodeInfoFromBufferAndGetNodeCodePoints(const int nodePos, const int maxCodePointCount, int *const outCodePoints) { - mNodePos = nodePos; mSiblingPos = NOT_A_VALID_WORD_POS; - fetchNodeInfoFromBufferAndProcessMovedNode(mNodePos, maxCodePointCount, outCodePoints); - } - - AK_FORCE_INLINE int getNodePos() const { - return mNodePos; + mBigramLinkedNodePos = NOT_A_DICT_POS; + fetchNodeInfoFromBufferAndProcessMovedNode(nodePos, maxCodePointCount, outCodePoints); } // HeadPos is different from NodePos when the current PtNode is a moved PtNode. @@ -119,6 +115,11 @@ class DynamicPatriciaTrieNodeReader { return mChildrenPos; } + // Bigram linked node position. + AK_FORCE_INLINE int getBigramLinkedNodePos() const { + return mBigramLinkedNodePos; + } + // Shortcutlist position AK_FORCE_INLINE int getShortcutPos() const { return mShortcutPos; @@ -140,7 +141,6 @@ class DynamicPatriciaTrieNodeReader { const BufferWithExtendableBuffer *const mBuffer; const DictionaryBigramsStructurePolicy *const mBigramsPolicy; const DictionaryShortcutsStructurePolicy *const mShortcutsPolicy; - int mNodePos; int mHeadPos; DynamicPatriciaTrieReadingUtils::NodeFlags mFlags; int mParentPos; @@ -149,6 +149,7 @@ class DynamicPatriciaTrieNodeReader { int mProbability; int mChildrenPosFieldPos; int mChildrenPos; + int mBigramLinkedNodePos; int mShortcutPos; int mBigramPos; int mSiblingPos; diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp index 945677b50..ece1781f1 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp @@ -38,7 +38,7 @@ void DynamicPatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const d readingHelper.initWithNodeArrayPos(dicNode->getChildrenPos()); const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader(); while (!readingHelper.isEnd()) { - childDicNodes->pushLeavingChild(dicNode, nodeReader->getNodePos(), + childDicNodes->pushLeavingChild(dicNode, nodeReader->getHeadPos(), nodeReader->getChildrenPos(), nodeReader->getProbability(), nodeReader->isTerminal() && !nodeReader->isDeleted(), nodeReader->hasChildren(), nodeReader->isBlacklisted() || nodeReader->isNotAWord(), @@ -122,7 +122,7 @@ int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const in // All characters are matched. if (length == readingHelper.getTotalCodePointCount()) { // Terminal position is found. - return nodeReader->getNodePos(); + return nodeReader->getHeadPos(); } if (!nodeReader->hasChildren()) { return NOT_A_VALID_WORD_POS; diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h index cdab0e16a..50c724012 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h @@ -36,8 +36,8 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { : mBuffer(buffer), mHeaderPolicy(mBuffer->getBuffer()), mBufferWithExtendableBuffer(mBuffer->getBuffer() + mHeaderPolicy.getSize(), mBuffer->getBufferSize() - mHeaderPolicy.getSize()), - mBigramListPolicy(&mBufferWithExtendableBuffer), - mShortcutListPolicy(&mBufferWithExtendableBuffer) {} + mShortcutListPolicy(&mBufferWithExtendableBuffer), + mBigramListPolicy(&mBufferWithExtendableBuffer, &mShortcutListPolicy) {} ~DynamicPatriciaTriePolicy() { delete mBuffer; @@ -91,8 +91,8 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { const MmappedBuffer *const mBuffer; const HeaderPolicy mHeaderPolicy; BufferWithExtendableBuffer mBufferWithExtendableBuffer; - DynamicBigramListPolicy mBigramListPolicy; DynamicShortcutListPolicy mShortcutListPolicy; + DynamicBigramListPolicy mBigramListPolicy; }; } // namespace latinime #endif // LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp index 7dfa9ec5a..dbc80f66a 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp @@ -63,7 +63,7 @@ bool DynamicPatriciaTrieWritingHelper::addUnigramWord( codePointCount - readingHelper->getTotalCodePointCount()); } // Advance to the children nodes. - parentPos = nodeReader->getNodePos(); + parentPos = nodeReader->getHeadPos(); readingHelper->readChildNode(); } if (readingHelper->isError()) { @@ -100,8 +100,9 @@ bool DynamicPatriciaTrieWritingHelper::removeBigramWords(const int word0Pos, con } bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition( - const DynamicPatriciaTrieNodeReader *const originalNode, const int movedPos) { - int pos = originalNode->getNodePos(); + const DynamicPatriciaTrieNodeReader *const originalNode, const int movedPos, + const int bigramLinkedNodePos) { + int pos = originalNode->getHeadPos(); const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(pos); const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer); if (usesAdditionalBuffer) { @@ -113,18 +114,24 @@ bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition( const PatriciaTrieReadingUtils::NodeFlags updatedFlags = DynamicPatriciaTrieReadingUtils::updateAndGetFlags(originalFlags, true /* isMoved */, false /* isDeleted */); - int writingPos = originalNode->getNodePos(); + int writingPos = originalNode->getHeadPos(); // Update flags. if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags, &writingPos)) { return false; } // Update moved position, which is stored in the parent offset field. - const int movedPosOffset = movedPos - originalNode->getNodePos(); + const int movedPosOffset = movedPos - originalNode->getHeadPos(); if (!DynamicPatriciaTrieWritingUtils::writeParentOffsetAndAdvancePosition( mBuffer, movedPosOffset, &writingPos)) { return false; } + // Update bigram linked node position, which is stored in the children position field. + int childrenPosFieldPos = originalNode->getChildrenPosFieldPos(); + if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition( + mBuffer, bigramLinkedNodePos, &childrenPosFieldPos)) { + return false; + } if (originalNode->hasChildren()) { // Update children's parent position. DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, mBigramPolicy, mShortcutPolicy); @@ -248,7 +255,7 @@ bool DynamicPatriciaTrieWritingHelper::setPtNodeProbability( } else { // Make the node terminal and write the probability. int movedPos = mBuffer->getTailPosition(); - if (!markNodeAsMovedAndSetPosition(originalPtNode, movedPos)) { + if (!markNodeAsMovedAndSetPosition(originalPtNode, movedPos, movedPos)) { return false; } if (!writePtNodeToBufferByCopyingPtNodeInfo(originalPtNode, originalPtNode->getParentPos(), @@ -268,7 +275,7 @@ bool DynamicPatriciaTrieWritingHelper::createChildrenPtNodeArrayAndAChildPtNode( newPtNodeArrayPos, &childrenPosFieldPos)) { return false; } - return createNewPtNodeArrayWithAChildPtNode(parentNode->getNodePos(), codePoints, + return createNewPtNodeArrayWithAChildPtNode(parentNode->getHeadPos(), codePoints, codePointCount, probability); } @@ -305,11 +312,8 @@ bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes( // Reallocating PtNode: abcde, newNode: abc. // abc (1st, terminal) __ de (2nd) const bool addsExtraChild = newNodeCodePointCount > overlappingCodePointCount; - const int firstPtNodePos = mBuffer->getTailPosition(); - if (!markNodeAsMovedAndSetPosition(reallocatingPtNode, firstPtNodePos)) { - return false; - } - int writingPos = firstPtNodePos; + const int firstPartOfReallocatedPtNodePos = mBuffer->getTailPosition(); + int writingPos = firstPartOfReallocatedPtNodePos; // Write the 1st part of the reallocating node. The children position will be updated later // with actual children position. const int newProbability = addsExtraChild ? NOT_A_PROBABILITY : probabilityOfNewPtNode; @@ -325,15 +329,15 @@ bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes( return false; } // Write the 2nd part of the reallocating node. - if (!writePtNodeToBufferByCopyingPtNodeInfo(reallocatingPtNode, - reallocatingPtNode->getNodePos(), + const int secondPartOfReallocatedPtNodePos = writingPos; + if (!writePtNodeToBufferByCopyingPtNodeInfo(reallocatingPtNode, firstPartOfReallocatedPtNodePos, reallocatingPtNodeCodePoints + overlappingCodePointCount, reallocatingPtNode->getCodePointCount() - overlappingCodePointCount, reallocatingPtNode->getProbability(), &writingPos)) { return false; } if (addsExtraChild) { - if (!writePtNodeToBuffer(reallocatingPtNode->getNodePos(), + if (!writePtNodeToBuffer(firstPartOfReallocatedPtNodePos, newNodeCodePoints + overlappingCodePointCount, newNodeCodePointCount - overlappingCodePointCount, probabilityOfNewPtNode, &writingPos)) { @@ -344,9 +348,14 @@ bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes( NOT_A_DICT_POS /* forwardLinkPos */, &writingPos)) { return false; } + // Update original reallocatingPtNode as moved. + if (!markNodeAsMovedAndSetPosition(reallocatingPtNode, firstPartOfReallocatedPtNodePos, + secondPartOfReallocatedPtNodePos)) { + return false; + } // Load node info. Information of the 1st part will be fetched. DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy); - nodeReader.fetchNodeInfoFromBuffer(firstPtNodePos); + nodeReader.fetchNodeInfoFromBuffer(firstPartOfReallocatedPtNodePos); // Update children position. int childrenPosFieldPos = nodeReader.getChildrenPosFieldPos(); if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBuffer, diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h index ada634a54..e1b9d2e75 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h @@ -54,7 +54,7 @@ class DynamicPatriciaTrieWritingHelper { DynamicShortcutListPolicy *const mShortcutPolicy; bool markNodeAsMovedAndSetPosition(const DynamicPatriciaTrieNodeReader *const nodeToUpdate, - const int movedPos); + const int movedPos, const int bigramLinkedNodePos); bool writePtNodeWithFullInfoToBuffer(const bool isBlacklisted, const bool isNotAWord, const int parentPos, const int *const codePoints, const int codePointCount, diff --git a/tests/src/com/android/inputmethod/latin/InputTestsBase.java b/tests/src/com/android/inputmethod/latin/InputTestsBase.java index 2603b35f5..234bb1b31 100644 --- a/tests/src/com/android/inputmethod/latin/InputTestsBase.java +++ b/tests/src/com/android/inputmethod/latin/InputTestsBase.java @@ -259,7 +259,8 @@ public class InputTestsBase extends ServiceTestCase<LatinIMEForTests> { protected void pickSuggestionManually(final int index, final String suggestion) { mLatinIME.pickSuggestionManually(index, new SuggestedWordInfo(suggestion, 1, SuggestedWordInfo.KIND_CORRECTION, null /* sourceDict */, - SuggestedWordInfo.NOT_AN_INDEX /* indexOfTouchPointOfSecondWord */)); + SuggestedWordInfo.NOT_AN_INDEX /* indexOfTouchPointOfSecondWord */, + SuggestedWordInfo.NOT_A_CONFIDENCE /* autoCommitFirstWordConfidence */)); } // Helper to avoid writing the try{}catch block each time diff --git a/tests/src/com/android/inputmethod/latin/SuggestedWordsTests.java b/tests/src/com/android/inputmethod/latin/SuggestedWordsTests.java index a5f3685da..4cf83339a 100644 --- a/tests/src/com/android/inputmethod/latin/SuggestedWordsTests.java +++ b/tests/src/com/android/inputmethod/latin/SuggestedWordsTests.java @@ -35,11 +35,13 @@ public class SuggestedWordsTests extends AndroidTestCase { final ArrayList<SuggestedWordInfo> list = CollectionUtils.newArrayList(); list.add(new SuggestedWordInfo(TYPED_WORD, TYPED_WORD_FREQ, SuggestedWordInfo.KIND_TYPED, null /* sourceDict */, - SuggestedWordInfo.NOT_AN_INDEX /* indexOfTouchPointOfSecondWord */)); + SuggestedWordInfo.NOT_AN_INDEX /* indexOfTouchPointOfSecondWord */, + SuggestedWordInfo.NOT_A_CONFIDENCE /* autoCommitFirstWordConfidence */)); for (int i = 0; i < NUMBER_OF_ADDED_SUGGESTIONS; ++i) { list.add(new SuggestedWordInfo("" + i, 1, SuggestedWordInfo.KIND_CORRECTION, null /* sourceDict */, - SuggestedWordInfo.NOT_AN_INDEX /* indexOfTouchPointOfSecondWord */)); + SuggestedWordInfo.NOT_AN_INDEX /* indexOfTouchPointOfSecondWord */, + SuggestedWordInfo.NOT_A_CONFIDENCE /* autoCommitFirstWordConfidence */)); } final SuggestedWords words = new SuggestedWords( |