diff options
Diffstat (limited to 'native')
17 files changed, 201 insertions, 99 deletions
diff --git a/native/jni/com_android_inputmethod_latin_DicTraverseSession.cpp b/native/jni/com_android_inputmethod_latin_DicTraverseSession.cpp index 72e625836..386643332 100644 --- a/native/jni/com_android_inputmethod_latin_DicTraverseSession.cpp +++ b/native/jni/com_android_inputmethod_latin_DicTraverseSession.cpp @@ -25,8 +25,9 @@ namespace latinime { class Dictionary; -static jlong latinime_setDicTraverseSession(JNIEnv *env, jclass clazz, jstring localeJStr) { - void *traverseSession = DicTraverseSession::getSessionInstance(env, localeJStr); +static jlong latinime_setDicTraverseSession(JNIEnv *env, jclass clazz, jstring localeJStr, + jlong dictSize) { + void *traverseSession = DicTraverseSession::getSessionInstance(env, localeJStr, dictSize); return reinterpret_cast<jlong>(traverseSession); } @@ -53,7 +54,7 @@ static void latinime_releaseDicTraverseSession(JNIEnv *env, jclass clazz, jlong static const JNINativeMethod sMethods[] = { { const_cast<char *>("setDicTraverseSessionNative"), - const_cast<char *>("(Ljava/lang/String;)J"), + const_cast<char *>("(Ljava/lang/String;J)J"), reinterpret_cast<void *>(latinime_setDicTraverseSession) }, { diff --git a/native/jni/src/defines.h b/native/jni/src/defines.h index 07f1e52c6..4605890c7 100644 --- a/native/jni/src/defines.h +++ b/native/jni/src/defines.h @@ -32,8 +32,6 @@ #define MAX_WORD_LENGTH 48 // Must be equal to BinaryDictionary.MAX_RESULTS in Java #define MAX_RESULTS 18 -// The biggest value among MAX_CACHE_DIC_NODE_SIZE, MAX_CACHE_DIC_NODE_SIZE_FOR_SINGLE_POINT, ... -#define MAX_DIC_NODE_PRIORITY_QUEUE_CAPACITY 310 // Must be equal to ProximityInfo.MAX_PROXIMITY_CHARS_SIZE in Java #define MAX_PROXIMITY_CHARS_SIZE 16 #define ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE 2 diff --git a/native/jni/src/suggest/core/dicnode/dic_nodes_cache.cpp b/native/jni/src/suggest/core/dicnode/dic_nodes_cache.cpp index c3d2a2e74..b6be47e90 100644 --- a/native/jni/src/suggest/core/dicnode/dic_nodes_cache.cpp +++ b/native/jni/src/suggest/core/dicnode/dic_nodes_cache.cpp @@ -23,6 +23,11 @@ namespace latinime { +// The biggest value among MAX_CACHE_DIC_NODE_SIZE, MAX_CACHE_DIC_NODE_SIZE_FOR_SINGLE_POINT, ... +const int DicNodesCache::LARGE_PRIORITY_QUEUE_CAPACITY = 310; +// Capacity for reducing memory footprint. +const int DicNodesCache::SMALL_PRIORITY_QUEUE_CAPACITY = 100; + /** * Truncates all of the dicNodes so that they start at the given commit point. * Only called for multi-word typing input. diff --git a/native/jni/src/suggest/core/dicnode/dic_nodes_cache.h b/native/jni/src/suggest/core/dicnode/dic_nodes_cache.h index f085848aa..8493b6a8b 100644 --- a/native/jni/src/suggest/core/dicnode/dic_nodes_cache.h +++ b/native/jni/src/suggest/core/dicnode/dic_nodes_cache.h @@ -31,10 +31,11 @@ class DicNode; */ class DicNodesCache { public: - AK_FORCE_INLINE DicNodesCache() - : mDicNodePriorityQueue0(MAX_DIC_NODE_PRIORITY_QUEUE_CAPACITY), - mDicNodePriorityQueue1(MAX_DIC_NODE_PRIORITY_QUEUE_CAPACITY), - mDicNodePriorityQueue2(MAX_DIC_NODE_PRIORITY_QUEUE_CAPACITY), + AK_FORCE_INLINE explicit DicNodesCache(const bool usesLargeCapacityCache) + : mUsesLargeCapacityCache(usesLargeCapacityCache), + mDicNodePriorityQueue0(getCacheCapacity()), + mDicNodePriorityQueue1(getCacheCapacity()), + mDicNodePriorityQueue2(getCacheCapacity()), mDicNodePriorityQueueForTerminal(MAX_RESULTS), mActiveDicNodes(&mDicNodePriorityQueue0), mNextActiveDicNodes(&mDicNodePriorityQueue1), @@ -50,7 +51,8 @@ class DicNodesCache { // We want to use the max capacity for the current active dic node queue. mActiveDicNodes->clearAndResizeToCapacity(); // nextActiveSize is used to limit the next iteration's active dic node size. - mNextActiveDicNodes->clearAndResize(nextActiveSize); + const int nextActiveSizeFittingToTheCapacity = min(nextActiveSize, getCacheCapacity()); + mNextActiveDicNodes->clearAndResize(nextActiveSizeFittingToTheCapacity); mTerminalDicNodes->clearAndResize(terminalSize); // We want to use the max capacity for the cached dic nodes that will be used for the // continuous suggestion. @@ -162,12 +164,21 @@ class DicNodesCache { return tmp; } + AK_FORCE_INLINE int getCacheCapacity() const { + return mUsesLargeCapacityCache ? + LARGE_PRIORITY_QUEUE_CAPACITY : SMALL_PRIORITY_QUEUE_CAPACITY; + } + AK_FORCE_INLINE void resetTemporaryCaches() { mActiveDicNodes->clear(); mNextActiveDicNodes->clear(); mTerminalDicNodes->clear(); } + static const int LARGE_PRIORITY_QUEUE_CAPACITY; + static const int SMALL_PRIORITY_QUEUE_CAPACITY; + + const bool mUsesLargeCapacityCache; // Instances DicNodePriorityQueue mDicNodePriorityQueue0; DicNodePriorityQueue mDicNodePriorityQueue1; diff --git a/native/jni/src/suggest/core/session/dic_traverse_session.cpp b/native/jni/src/suggest/core/session/dic_traverse_session.cpp index e7b386b2d..2c2259214 100644 --- a/native/jni/src/suggest/core/session/dic_traverse_session.cpp +++ b/native/jni/src/suggest/core/session/dic_traverse_session.cpp @@ -23,6 +23,11 @@ namespace latinime { +// 256K bytes threshold is heuristically used to distinguish dictionaries containing many unigrams +// (e.g. main dictionary) from small dictionaries (e.g. contacts...) +const int DicTraverseSession::DICTIONARY_SIZE_THRESHOLD_TO_USE_LARGE_CACHE_FOR_SUGGESTION = + 256 * 1024; + void DicTraverseSession::init(const Dictionary *const dictionary, const int *prevWord, int prevWordLength, const SuggestOptions *const suggestOptions) { mDictionary = dictionary; diff --git a/native/jni/src/suggest/core/session/dic_traverse_session.h b/native/jni/src/suggest/core/session/dic_traverse_session.h index b25580b96..fe8893590 100644 --- a/native/jni/src/suggest/core/session/dic_traverse_session.h +++ b/native/jni/src/suggest/core/session/dic_traverse_session.h @@ -37,8 +37,12 @@ class DicTraverseSession { public: // A factory method for DicTraverseSession - static AK_FORCE_INLINE void *getSessionInstance(JNIEnv *env, jstring localeStr) { - return new DicTraverseSession(env, localeStr); + static AK_FORCE_INLINE void *getSessionInstance(JNIEnv *env, jstring localeStr, + jlong dictSize) { + // To deal with the trade-off between accuracy and memory space, large cache is used for + // dictionaries larger that the threshold + return new DicTraverseSession(env, localeStr, + dictSize >= DICTIONARY_SIZE_THRESHOLD_TO_USE_LARGE_CACHE_FOR_SUGGESTION); } static AK_FORCE_INLINE void initSessionInstance(DicTraverseSession *traverseSession, @@ -54,10 +58,10 @@ class DicTraverseSession { delete traverseSession; } - AK_FORCE_INLINE DicTraverseSession(JNIEnv *env, jstring localeStr) + AK_FORCE_INLINE DicTraverseSession(JNIEnv *env, jstring localeStr, bool usesLargeCache) : mPrevWordPos(NOT_A_VALID_WORD_POS), mProximityInfo(0), - mDictionary(0), mSuggestOptions(0), mDicNodesCache(), mMultiBigramMap(), - mInputSize(0), mPartiallyCommited(false), mMaxPointerCount(1), + mDictionary(0), mSuggestOptions(0), mDicNodesCache(usesLargeCache), + mMultiBigramMap(), mInputSize(0), mPartiallyCommited(false), mMaxPointerCount(1), mMultiWordCostMultiplier(1.0f) { // NOTE: mProximityInfoStates is an array of instances. // No need to initialize it explicitly here. @@ -181,6 +185,7 @@ class DicTraverseSession { DISALLOW_IMPLICIT_CONSTRUCTORS(DicTraverseSession); // threshold to start caching static const int CACHE_START_INPUT_LENGTH_THRESHOLD; + static const int DICTIONARY_SIZE_THRESHOLD_TO_USE_LARGE_CACHE_FOR_SUGGESTION; void initializeProximityInfoStates(const int *const inputCodePoints, const int *const inputXs, const int *const inputYs, const int *const times, const int *const pointerIds, const int inputSize, const float maxSpatialDistance, const int maxPointerCount); diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp index 936dc9c5d..4ee138125 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp @@ -18,6 +18,42 @@ namespace latinime { +const int DynamicBigramListPolicy::BIGRAM_LINK_COUNT_LIMIT = 10000; + +void DynamicBigramListPolicy::getNextBigram(int *const outBigramPos, int *const outProbability, + bool *const outHasNext, int *const pos) const { + const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*pos); + const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer); + if (usesAdditionalBuffer) { + *pos -= mBuffer->getOriginalBufferSize(); + } + const BigramListReadWriteUtils::BigramFlags flags = + BigramListReadWriteUtils::getFlagsAndForwardPointer(buffer, pos); + int originalBigramPos = BigramListReadWriteUtils::getBigramAddressAndForwardPointer( + buffer, flags, pos); + if (usesAdditionalBuffer && originalBigramPos != NOT_A_VALID_WORD_POS) { + originalBigramPos += mBuffer->getOriginalBufferSize(); + } + *outBigramPos = followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos); + *outProbability = BigramListReadWriteUtils::getProbabilityFromFlags(flags); + *outHasNext = BigramListReadWriteUtils::hasNext(flags); + if (usesAdditionalBuffer) { + *pos += mBuffer->getOriginalBufferSize(); + } +} + +void DynamicBigramListPolicy::skipAllBigrams(int *const pos) const { + const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*pos); + const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer); + if (usesAdditionalBuffer) { + *pos -= mBuffer->getOriginalBufferSize(); + } + BigramListReadWriteUtils::skipExistingBigrams(buffer, pos); + if (usesAdditionalBuffer) { + *pos += mBuffer->getOriginalBufferSize(); + } +} + bool DynamicBigramListPolicy::copyAllBigrams(int *const fromPos, int *const toPos) { const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*fromPos); if (usesAdditionalBuffer) { @@ -28,15 +64,16 @@ bool DynamicBigramListPolicy::copyAllBigrams(int *const fromPos, int *const toPo // The buffer address can be changed after calling buffer writing methods. const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer); flags = BigramListReadWriteUtils::getFlagsAndForwardPointer(buffer, fromPos); - int bigramPos = BigramListReadWriteUtils::getBigramAddressAndForwardPointer( + int originalBigramPos = BigramListReadWriteUtils::getBigramAddressAndForwardPointer( buffer, flags, fromPos); - if (bigramPos == NOT_A_VALID_WORD_POS) { + if (originalBigramPos == NOT_A_VALID_WORD_POS) { // skip invalid bigram entry. continue; } if (usesAdditionalBuffer) { - bigramPos += mBuffer->getOriginalBufferSize(); + originalBigramPos += mBuffer->getOriginalBufferSize(); } + const int bigramPos = followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos); BigramListReadWriteUtils::BigramFlags newBigramFlags; uint32_t newBigramOffset; int newBigramOffsetFieldSize; @@ -133,11 +170,12 @@ bool DynamicBigramListPolicy::removeBigram(const int bigramListPos, const int ta if (usesAdditionalBuffer) { bigramOffsetFieldPos += mBuffer->getOriginalBufferSize(); } - int bigramPos = BigramListReadWriteUtils::getBigramAddressAndForwardPointer( + int originalBigramPos = BigramListReadWriteUtils::getBigramAddressAndForwardPointer( buffer, flags, &pos); - if (usesAdditionalBuffer && bigramPos != NOT_A_VALID_WORD_POS) { - bigramPos += mBuffer->getOriginalBufferSize(); + if (usesAdditionalBuffer && originalBigramPos != NOT_A_VALID_WORD_POS) { + originalBigramPos += mBuffer->getOriginalBufferSize(); } + const int bigramPos = followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos); if (bigramPos != targetBigramPos) { continue; } @@ -152,4 +190,26 @@ bool DynamicBigramListPolicy::removeBigram(const int bigramListPos, const int ta return false; } +int DynamicBigramListPolicy::followBigramLinkAndGetCurrentBigramPtNodePos( + const int originalBigramPos) const { + if (originalBigramPos == NOT_A_VALID_WORD_POS) { + return NOT_A_VALID_WORD_POS; + } + int currentPos = originalBigramPos; + DynamicPatriciaTrieNodeReader nodeReader(mBuffer, this /* bigramsPolicy */, mShortcutPolicy); + nodeReader.fetchNodeInfoFromBuffer(currentPos); + int bigramLinkCount = 0; + while (nodeReader.getBigramLinkedNodePos() != NOT_A_DICT_POS) { + currentPos = nodeReader.getBigramLinkedNodePos(); + nodeReader.fetchNodeInfoFromBuffer(currentPos); + bigramLinkCount++; + if (bigramLinkCount > BIGRAM_LINK_COUNT_LIMIT) { + AKLOGI("Bigram link is invalid. start position: %d", bigramPos); + ASSERT(false); + return NOT_A_VALID_WORD_POS; + } + } + return currentPos; +} + } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h index b7c05376d..e451e313d 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h @@ -21,7 +21,9 @@ #include "defines.h" #include "suggest/core/policy/dictionary_bigrams_structure_policy.h" +#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h" #include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h" +#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" namespace latinime { @@ -31,43 +33,16 @@ namespace latinime { */ class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy { public: - DynamicBigramListPolicy(BufferWithExtendableBuffer *const buffer) - : mBuffer(buffer) {} + DynamicBigramListPolicy(BufferWithExtendableBuffer *const buffer, + const DictionaryShortcutsStructurePolicy *const shortcutPolicy) + : mBuffer(buffer), mShortcutPolicy(shortcutPolicy) {} ~DynamicBigramListPolicy() {} void getNextBigram(int *const outBigramPos, int *const outProbability, bool *const outHasNext, - int *const pos) const { - const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*pos); - const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer); - if (usesAdditionalBuffer) { - *pos -= mBuffer->getOriginalBufferSize(); - } - const BigramListReadWriteUtils::BigramFlags flags = - BigramListReadWriteUtils::getFlagsAndForwardPointer(buffer, pos); - *outBigramPos = BigramListReadWriteUtils::getBigramAddressAndForwardPointer( - buffer, flags, pos); - if (usesAdditionalBuffer && *outBigramPos != NOT_A_VALID_WORD_POS) { - *outBigramPos += mBuffer->getOriginalBufferSize(); - } - *outProbability = BigramListReadWriteUtils::getProbabilityFromFlags(flags); - *outHasNext = BigramListReadWriteUtils::hasNext(flags); - if (usesAdditionalBuffer) { - *pos += mBuffer->getOriginalBufferSize(); - } - } + int *const pos) const; - void skipAllBigrams(int *const pos) const { - const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*pos); - const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer); - if (usesAdditionalBuffer) { - *pos -= mBuffer->getOriginalBufferSize(); - } - BigramListReadWriteUtils::skipExistingBigrams(buffer, pos); - if (usesAdditionalBuffer) { - *pos += mBuffer->getOriginalBufferSize(); - } - } + void skipAllBigrams(int *const pos) const; // Copy bigrams from the bigram list that starts at fromPos to toPos and advance these // positions after bigram lists. This method skips invalid bigram entries. @@ -81,7 +56,13 @@ class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy { private: DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicBigramListPolicy); + static const int BIGRAM_LINK_COUNT_LIMIT; + BufferWithExtendableBuffer *const mBuffer; + const DictionaryShortcutsStructurePolicy *const mShortcutPolicy; + + // Follow bigram link and return the position of bigram target PtNode that is currently valid. + int followBigramLinkAndGetCurrentBigramPtNodePos(const int originalBigramPos) const; }; } // namespace latinime #endif // LATINIME_DYNAMIC_BIGRAM_LIST_POLICY_H diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp index 5674cb48e..56ef60ae4 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp @@ -28,6 +28,7 @@ void DynamicPatriciaTrieNodeReader::fetchNodeInfoFromBufferAndProcessMovedNode(c const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(nodePos); const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer); int pos = nodePos; + mHeadPos = nodePos; if (usesAdditionalBuffer) { pos -= mBuffer->getOriginalBufferSize(); } @@ -57,10 +58,15 @@ void DynamicPatriciaTrieNodeReader::fetchNodeInfoFromBufferAndProcessMovedNode(c mChildrenPosFieldPos += mBuffer->getOriginalBufferSize(); } mChildrenPos = DynamicPatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition( - dictBuf, mFlags, &pos); + dictBuf, &pos); if (usesAdditionalBuffer && mChildrenPos != NOT_A_DICT_POS) { mChildrenPos += mBuffer->getOriginalBufferSize(); } + if (mSiblingPos == NOT_A_VALID_WORD_POS && DynamicPatriciaTrieReadingUtils::isMoved(mFlags)) { + mBigramLinkedNodePos = mChildrenPos; + } else { + mBigramLinkedNodePos = NOT_A_DICT_POS; + } if (usesAdditionalBuffer) { pos += mBuffer->getOriginalBufferSize(); } diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h index 2ee7c2495..89d38a590 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h @@ -39,11 +39,11 @@ class DynamicPatriciaTrieNodeReader { const DictionaryBigramsStructurePolicy *const bigramsPolicy, const DictionaryShortcutsStructurePolicy *const shortcutsPolicy) : mBuffer(buffer), mBigramsPolicy(bigramsPolicy), - mShortcutsPolicy(shortcutsPolicy), mNodePos(NOT_A_VALID_WORD_POS), mFlags(0), - mParentPos(NOT_A_DICT_POS), mCodePointCount(0), - mProbabilityFieldPos(NOT_A_DICT_POS), mProbability(NOT_A_PROBABILITY), - mChildrenPosFieldPos(NOT_A_DICT_POS), mChildrenPos(NOT_A_DICT_POS), - mShortcutPos(NOT_A_DICT_POS), mBigramPos(NOT_A_DICT_POS), + mShortcutsPolicy(shortcutsPolicy), mHeadPos(NOT_A_VALID_WORD_POS), mFlags(0), + mParentPos(NOT_A_DICT_POS), mCodePointCount(0), mProbabilityFieldPos(NOT_A_DICT_POS), + mProbability(NOT_A_PROBABILITY), mChildrenPosFieldPos(NOT_A_DICT_POS), + mChildrenPos(NOT_A_DICT_POS), mBigramLinkedNodePos(NOT_A_DICT_POS), + mShortcutPos(NOT_A_DICT_POS), mBigramPos(NOT_A_DICT_POS), mSiblingPos(NOT_A_VALID_WORD_POS) {} ~DynamicPatriciaTrieNodeReader() {} @@ -56,13 +56,14 @@ class DynamicPatriciaTrieNodeReader { AK_FORCE_INLINE void fetchNodeInfoFromBufferAndGetNodeCodePoints(const int nodePos, const int maxCodePointCount, int *const outCodePoints) { - mNodePos = nodePos; mSiblingPos = NOT_A_VALID_WORD_POS; - fetchNodeInfoFromBufferAndProcessMovedNode(mNodePos, maxCodePointCount, outCodePoints); + mBigramLinkedNodePos = NOT_A_DICT_POS; + fetchNodeInfoFromBufferAndProcessMovedNode(nodePos, maxCodePointCount, outCodePoints); } - AK_FORCE_INLINE int getNodePos() const { - return mNodePos; + // HeadPos is different from NodePos when the current PtNode is a moved PtNode. + AK_FORCE_INLINE int getHeadPos() const { + return mHeadPos; } // Flags @@ -114,6 +115,11 @@ class DynamicPatriciaTrieNodeReader { return mChildrenPos; } + // Bigram linked node position. + AK_FORCE_INLINE int getBigramLinkedNodePos() const { + return mBigramLinkedNodePos; + } + // Shortcutlist position AK_FORCE_INLINE int getShortcutPos() const { return mShortcutPos; @@ -135,7 +141,7 @@ class DynamicPatriciaTrieNodeReader { const BufferWithExtendableBuffer *const mBuffer; const DictionaryBigramsStructurePolicy *const mBigramsPolicy; const DictionaryShortcutsStructurePolicy *const mShortcutsPolicy; - int mNodePos; + int mHeadPos; DynamicPatriciaTrieReadingUtils::NodeFlags mFlags; int mParentPos; uint8_t mCodePointCount; @@ -143,6 +149,7 @@ class DynamicPatriciaTrieNodeReader { int mProbability; int mChildrenPosFieldPos; int mChildrenPos; + int mBigramLinkedNodePos; int mShortcutPos; int mBigramPos; int mSiblingPos; diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp index 945677b50..ece1781f1 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp @@ -38,7 +38,7 @@ void DynamicPatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const d readingHelper.initWithNodeArrayPos(dicNode->getChildrenPos()); const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader(); while (!readingHelper.isEnd()) { - childDicNodes->pushLeavingChild(dicNode, nodeReader->getNodePos(), + childDicNodes->pushLeavingChild(dicNode, nodeReader->getHeadPos(), nodeReader->getChildrenPos(), nodeReader->getProbability(), nodeReader->isTerminal() && !nodeReader->isDeleted(), nodeReader->hasChildren(), nodeReader->isBlacklisted() || nodeReader->isNotAWord(), @@ -122,7 +122,7 @@ int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const in // All characters are matched. if (length == readingHelper.getTotalCodePointCount()) { // Terminal position is found. - return nodeReader->getNodePos(); + return nodeReader->getHeadPos(); } if (!nodeReader->hasChildren()) { return NOT_A_VALID_WORD_POS; diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h index cdab0e16a..50c724012 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h @@ -36,8 +36,8 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { : mBuffer(buffer), mHeaderPolicy(mBuffer->getBuffer()), mBufferWithExtendableBuffer(mBuffer->getBuffer() + mHeaderPolicy.getSize(), mBuffer->getBufferSize() - mHeaderPolicy.getSize()), - mBigramListPolicy(&mBufferWithExtendableBuffer), - mShortcutListPolicy(&mBufferWithExtendableBuffer) {} + mShortcutListPolicy(&mBufferWithExtendableBuffer), + mBigramListPolicy(&mBufferWithExtendableBuffer, &mShortcutListPolicy) {} ~DynamicPatriciaTriePolicy() { delete mBuffer; @@ -91,8 +91,8 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { const MmappedBuffer *const mBuffer; const HeaderPolicy mHeaderPolicy; BufferWithExtendableBuffer mBufferWithExtendableBuffer; - DynamicBigramListPolicy mBigramListPolicy; DynamicShortcutListPolicy mShortcutListPolicy; + DynamicBigramListPolicy mBigramListPolicy; }; } // namespace latinime #endif // LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp index 5d979fa51..c7e89fff8 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp @@ -29,18 +29,14 @@ const DptReadingUtils::NodeFlags DptReadingUtils::FLAG_IS_MOVED = 0x40; const DptReadingUtils::NodeFlags DptReadingUtils::FLAG_IS_DELETED = 0x80; /* static */ int DptReadingUtils::readChildrenPositionAndAdvancePosition( - const uint8_t *const buffer, const NodeFlags flags, int *const pos) { - if ((flags & MASK_MOVED) == FLAG_IS_NOT_MOVED) { - const int base = *pos; - const int offset = ByteArrayUtils::readSint24AndAdvancePosition(buffer, pos); - if (offset == 0) { - // 0 offset means that the node does not have children. - return NOT_A_DICT_POS; - } else { - return base + offset; - } - } else { + const uint8_t *const buffer, int *const pos) { + const int base = *pos; + const int offset = ByteArrayUtils::readSint24AndAdvancePosition(buffer, pos); + if (offset == 0) { + // 0 offset means that the node does not have children. return NOT_A_DICT_POS; + } else { + return base + offset; } } diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h index 2e604a202..5a2ad9cb9 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h @@ -42,8 +42,7 @@ class DynamicPatriciaTrieReadingUtils { return ByteArrayUtils::readSint24AndAdvancePosition(buffer, pos); } - static int readChildrenPositionAndAdvancePosition(const uint8_t *const buffer, - const NodeFlags flags, int *const pos); + static int readChildrenPositionAndAdvancePosition(const uint8_t *const buffer, int *const pos); /** * Node Flags diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp index 7c0b6286c..dbc80f66a 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp @@ -63,7 +63,7 @@ bool DynamicPatriciaTrieWritingHelper::addUnigramWord( codePointCount - readingHelper->getTotalCodePointCount()); } // Advance to the children nodes. - parentPos = nodeReader->getNodePos(); + parentPos = nodeReader->getHeadPos(); readingHelper->readChildNode(); } if (readingHelper->isError()) { @@ -100,8 +100,9 @@ bool DynamicPatriciaTrieWritingHelper::removeBigramWords(const int word0Pos, con } bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition( - const DynamicPatriciaTrieNodeReader *const originalNode, const int movedPos) { - int pos = originalNode->getNodePos(); + const DynamicPatriciaTrieNodeReader *const originalNode, const int movedPos, + const int bigramLinkedNodePos) { + int pos = originalNode->getHeadPos(); const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(pos); const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer); if (usesAdditionalBuffer) { @@ -113,18 +114,42 @@ bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition( const PatriciaTrieReadingUtils::NodeFlags updatedFlags = DynamicPatriciaTrieReadingUtils::updateAndGetFlags(originalFlags, true /* isMoved */, false /* isDeleted */); - int writingPos = originalNode->getNodePos(); + int writingPos = originalNode->getHeadPos(); // Update flags. if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags, &writingPos)) { return false; } // Update moved position, which is stored in the parent offset field. - const int movedPosOffset = movedPos - originalNode->getNodePos(); + const int movedPosOffset = movedPos - originalNode->getHeadPos(); if (!DynamicPatriciaTrieWritingUtils::writeParentOffsetAndAdvancePosition( mBuffer, movedPosOffset, &writingPos)) { return false; } + // Update bigram linked node position, which is stored in the children position field. + int childrenPosFieldPos = originalNode->getChildrenPosFieldPos(); + if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition( + mBuffer, bigramLinkedNodePos, &childrenPosFieldPos)) { + return false; + } + if (originalNode->hasChildren()) { + // Update children's parent position. + DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, mBigramPolicy, mShortcutPolicy); + const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader(); + readingHelper.initWithNodeArrayPos(originalNode->getChildrenPos()); + while (!readingHelper.isEnd()) { + const int childPtNodeWrittenPos = nodeReader->getHeadPos(); + const int parentOffset = movedPos - childPtNodeWrittenPos; + int parentOffsetFieldPos = childPtNodeWrittenPos + 1 /* Flags */; + if (!DynamicPatriciaTrieWritingUtils::writeParentOffsetAndAdvancePosition( + mBuffer, parentOffset, &parentOffsetFieldPos)) { + // Parent offset cannot be written because of a bug or a broken dictionary; thus, + // we give up to update dictionary. + return false; + } + readingHelper.readNextSiblingNode(); + } + } return true; } @@ -230,7 +255,7 @@ bool DynamicPatriciaTrieWritingHelper::setPtNodeProbability( } else { // Make the node terminal and write the probability. int movedPos = mBuffer->getTailPosition(); - if (!markNodeAsMovedAndSetPosition(originalPtNode, movedPos)) { + if (!markNodeAsMovedAndSetPosition(originalPtNode, movedPos, movedPos)) { return false; } if (!writePtNodeToBufferByCopyingPtNodeInfo(originalPtNode, originalPtNode->getParentPos(), @@ -250,7 +275,7 @@ bool DynamicPatriciaTrieWritingHelper::createChildrenPtNodeArrayAndAChildPtNode( newPtNodeArrayPos, &childrenPosFieldPos)) { return false; } - return createNewPtNodeArrayWithAChildPtNode(parentNode->getNodePos(), codePoints, + return createNewPtNodeArrayWithAChildPtNode(parentNode->getHeadPos(), codePoints, codePointCount, probability); } @@ -287,11 +312,8 @@ bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes( // Reallocating PtNode: abcde, newNode: abc. // abc (1st, terminal) __ de (2nd) const bool addsExtraChild = newNodeCodePointCount > overlappingCodePointCount; - const int firstPtNodePos = mBuffer->getTailPosition(); - if (!markNodeAsMovedAndSetPosition(reallocatingPtNode, firstPtNodePos)) { - return false; - } - int writingPos = firstPtNodePos; + const int firstPartOfReallocatedPtNodePos = mBuffer->getTailPosition(); + int writingPos = firstPartOfReallocatedPtNodePos; // Write the 1st part of the reallocating node. The children position will be updated later // with actual children position. const int newProbability = addsExtraChild ? NOT_A_PROBABILITY : probabilityOfNewPtNode; @@ -307,15 +329,15 @@ bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes( return false; } // Write the 2nd part of the reallocating node. - if (!writePtNodeToBufferByCopyingPtNodeInfo(reallocatingPtNode, - reallocatingPtNode->getNodePos(), + const int secondPartOfReallocatedPtNodePos = writingPos; + if (!writePtNodeToBufferByCopyingPtNodeInfo(reallocatingPtNode, firstPartOfReallocatedPtNodePos, reallocatingPtNodeCodePoints + overlappingCodePointCount, reallocatingPtNode->getCodePointCount() - overlappingCodePointCount, reallocatingPtNode->getProbability(), &writingPos)) { return false; } if (addsExtraChild) { - if (!writePtNodeToBuffer(reallocatingPtNode->getNodePos(), + if (!writePtNodeToBuffer(firstPartOfReallocatedPtNodePos, newNodeCodePoints + overlappingCodePointCount, newNodeCodePointCount - overlappingCodePointCount, probabilityOfNewPtNode, &writingPos)) { @@ -326,9 +348,14 @@ bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes( NOT_A_DICT_POS /* forwardLinkPos */, &writingPos)) { return false; } + // Update original reallocatingPtNode as moved. + if (!markNodeAsMovedAndSetPosition(reallocatingPtNode, firstPartOfReallocatedPtNodePos, + secondPartOfReallocatedPtNodePos)) { + return false; + } // Load node info. Information of the 1st part will be fetched. DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy); - nodeReader.fetchNodeInfoFromBuffer(firstPtNodePos); + nodeReader.fetchNodeInfoFromBuffer(firstPartOfReallocatedPtNodePos); // Update children position. int childrenPosFieldPos = nodeReader.getChildrenPosFieldPos(); if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBuffer, diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h index ada634a54..e1b9d2e75 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h @@ -54,7 +54,7 @@ class DynamicPatriciaTrieWritingHelper { DynamicShortcutListPolicy *const mShortcutPolicy; bool markNodeAsMovedAndSetPosition(const DynamicPatriciaTrieNodeReader *const nodeToUpdate, - const int movedPos); + const int movedPos, const int bigramLinkedNodePos); bool writePtNodeWithFullInfoToBuffer(const bool isBlacklisted, const bool isNotAWord, const int parentPos, const int *const codePoints, const int codePointCount, diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h index f727ecf8e..6bafb64ee 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h @@ -172,6 +172,7 @@ class ByteArrayUtils { int codePoint = readCodePointAndAdvancePosition(buffer, pos); while (NOT_A_CODE_POINT != codePoint && length < maxLength) { codePoint = readCodePointAndAdvancePosition(buffer, pos); + length++; } return length; } |