diff options
Diffstat (limited to 'native/jni/src')
43 files changed, 714 insertions, 379 deletions
diff --git a/native/jni/src/defines.h b/native/jni/src/defines.h index 34a646f80..89dfa39b3 100644 --- a/native/jni/src/defines.h +++ b/native/jni/src/defines.h @@ -292,7 +292,6 @@ static inline void prof_out(void) { // of the binary dictionary where a {key,value} string pair scheme is used. #define LARGEST_INT_DIGIT_COUNT 11 -#define NOT_A_VALID_WORD_POS (-99) #define NOT_A_CODE_POINT (-1) #define NOT_A_DISTANCE (-1) #define NOT_A_COORDINATE (-1) @@ -322,13 +321,6 @@ static inline void prof_out(void) { #define MAX_POINTER_COUNT 1 #define MAX_POINTER_COUNT_G 2 -// Queue IDs and size for DicNodesCache -#define DIC_NODES_CACHE_INITIAL_QUEUE_ID_ACTIVE 0 -#define DIC_NODES_CACHE_INITIAL_QUEUE_ID_NEXT_ACTIVE 1 -#define DIC_NODES_CACHE_INITIAL_QUEUE_ID_TERMINAL 2 -#define DIC_NODES_CACHE_INITIAL_QUEUE_ID_CACHE_FOR_CONTINUOUS_SUGGESTION 3 -#define DIC_NODES_CACHE_PRIORITY_QUEUES_SIZE 4 - template<typename T> AK_FORCE_INLINE const T &min(const T &a, const T &b) { return a < b ? a : b; } template<typename T> AK_FORCE_INLINE const T &max(const T &a, const T &b) { return a > b ? a : b; } diff --git a/native/jni/src/suggest/core/dicnode/dic_node.h b/native/jni/src/suggest/core/dicnode/dic_node.h index cdd9f59aa..41ef9d2b2 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node.h +++ b/native/jni/src/suggest/core/dicnode/dic_node.h @@ -112,7 +112,7 @@ class DicNode { mIsUsed = true; mIsCachedForNextSuggestion = false; mDicNodeProperties.init( - NOT_A_VALID_WORD_POS /* pos */, rootGroupPos, NOT_A_CODE_POINT /* nodeCodePoint */, + NOT_A_DICT_POS /* pos */, rootGroupPos, NOT_A_CODE_POINT /* nodeCodePoint */, NOT_A_PROBABILITY /* probability */, false /* isTerminal */, true /* hasChildren */, false /* isBlacklistedOrNotAWord */, 0 /* depth */, 0 /* terminalDepth */); @@ -125,7 +125,7 @@ class DicNode { mIsUsed = true; mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion; mDicNodeProperties.init( - NOT_A_VALID_WORD_POS /* pos */, rootGroupPos, NOT_A_CODE_POINT /* nodeCodePoint */, + NOT_A_DICT_POS /* pos */, rootGroupPos, NOT_A_CODE_POINT /* nodeCodePoint */, NOT_A_PROBABILITY /* probability */, false /* isTerminal */, true /* hasChildren */, false /* isBlacklistedOrNotAWord */, 0 /* depth */, 0 /* terminalDepth */); @@ -143,7 +143,7 @@ class DicNode { dicNode->mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength(), dicNode->getOutputWordBuf(), dicNode->mDicNodeProperties.getDepth(), - dicNode->mDicNodeState.mDicNodeStatePrevWord.mPrevSpacePositions, + dicNode->mDicNodeState.mDicNodeStatePrevWord.getSecondWordFirstInputIndex(), mDicNodeState.mDicNodeStateInput.getInputIndex(0) /* lastInputIndex */); PROF_NODE_COPY(&dicNode->mProfiler, mProfiler); } @@ -234,7 +234,7 @@ class DicNode { } bool isFirstWord() const { - return mDicNodeState.mDicNodeStatePrevWord.getPrevWordNodePos() == NOT_A_VALID_WORD_POS; + return mDicNodeState.mDicNodeStatePrevWord.getPrevWordNodePos() == NOT_A_DICT_POS; } bool isCompletion(const int inputSize) const { @@ -321,8 +321,13 @@ class DicNode { DUMP_WORD_AND_SCORE("OUTPUT"); } - void outputSpacePositionsResult(int *spaceIndices) const { - mDicNodeState.mDicNodeStatePrevWord.outputSpacePositions(spaceIndices); + int getSecondWordFirstInputIndex(const ProximityInfoState *const pInfoState) const { + const int inputIndex = mDicNodeState.mDicNodeStatePrevWord.getSecondWordFirstInputIndex(); + if (inputIndex == NOT_AN_INDEX) { + return NOT_AN_INDEX; + } else { + return pInfoState->getInputIndexOfSampledPoint(inputIndex); + } } bool hasMultipleWords() const { @@ -573,7 +578,11 @@ class DicNode { } } - AK_FORCE_INLINE void updateInputIndexG(DicNode_InputStateG *inputStateG) { + AK_FORCE_INLINE void updateInputIndexG(const DicNode_InputStateG *const inputStateG) { + if (mDicNodeState.mDicNodeStatePrevWord.getPrevWordCount() == 1 && isFirstLetter()) { + mDicNodeState.mDicNodeStatePrevWord.setSecondWordFirstInputIndex( + inputStateG->mInputIndex); + } mDicNodeState.mDicNodeStateInput.updateInputIndexG(inputStateG->mPointerId, inputStateG->mInputIndex, inputStateG->mPrevCodePoint, inputStateG->mTerminalDiffCost, inputStateG->mRawLength); diff --git a/native/jni/src/suggest/core/dicnode/dic_node_priority_queue.h b/native/jni/src/suggest/core/dicnode/dic_node_priority_queue.h index 2a486b804..7461f0cc6 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node_priority_queue.h +++ b/native/jni/src/suggest/core/dicnode/dic_node_priority_queue.h @@ -24,20 +24,16 @@ #include "suggest/core/dicnode/dic_node.h" #include "suggest/core/dicnode/dic_node_release_listener.h" -// The biggest value among MAX_CACHE_DIC_NODE_SIZE, MAX_CACHE_DIC_NODE_SIZE_FOR_SINGLE_POINT, ... -#define MAX_DIC_NODE_PRIORITY_QUEUE_CAPACITY 310 - namespace latinime { class DicNodePriorityQueue : public DicNodeReleaseListener { public: - AK_FORCE_INLINE DicNodePriorityQueue() - : MAX_CAPACITY(MAX_DIC_NODE_PRIORITY_QUEUE_CAPACITY), - mMaxSize(MAX_DIC_NODE_PRIORITY_QUEUE_CAPACITY), mDicNodesBuf(), mUnusedNodeIndices(), - mNextUnusedNodeId(0), mDicNodesQueue() { - mDicNodesBuf.resize(MAX_CAPACITY + 1); - mUnusedNodeIndices.resize(MAX_CAPACITY + 1); - reset(); + AK_FORCE_INLINE explicit DicNodePriorityQueue(const int capacity) + : mCapacity(capacity), mMaxSize(capacity), mDicNodesBuf(), + mUnusedNodeIndices(), mNextUnusedNodeId(0), mDicNodesQueue() { + mDicNodesBuf.resize(mCapacity + 1); + mUnusedNodeIndices.resize(mCapacity + 1); + clearAndResizeToCapacity(); } // Non virtual inline destructor -- never inherit this class @@ -52,11 +48,12 @@ class DicNodePriorityQueue : public DicNodeReleaseListener { } AK_FORCE_INLINE void setMaxSize(const int maxSize) { - mMaxSize = min(maxSize, MAX_CAPACITY); + ASSERT(maxSize <= mCapacity); + mMaxSize = min(maxSize, mCapacity); } - AK_FORCE_INLINE void reset() { - clearAndResize(MAX_CAPACITY); + AK_FORCE_INLINE void clearAndResizeToCapacity() { + clearAndResize(mCapacity); } AK_FORCE_INLINE void clear() { @@ -64,27 +61,19 @@ class DicNodePriorityQueue : public DicNodeReleaseListener { } AK_FORCE_INLINE void clearAndResize(const int maxSize) { + ASSERT(maxSize <= mCapacity); while (!mDicNodesQueue.empty()) { mDicNodesQueue.pop(); } setMaxSize(maxSize); - for (int i = 0; i < MAX_CAPACITY + 1; ++i) { + for (int i = 0; i < mCapacity + 1; ++i) { mDicNodesBuf[i].remove(); mDicNodesBuf[i].setReleaseListener(this); - mUnusedNodeIndices[i] = i == MAX_CAPACITY ? NOT_A_NODE_ID : static_cast<int>(i) + 1; + mUnusedNodeIndices[i] = i == mCapacity ? NOT_A_NODE_ID : static_cast<int>(i) + 1; } mNextUnusedNodeId = 0; } - AK_FORCE_INLINE DicNode *newDicNode(DicNode *dicNode) { - DicNode *newNode = searchEmptyDicNode(); - if (newNode) { - DicNodeUtils::initByCopy(dicNode, newNode); - return newNode; - } - return 0; - } - // Copy AK_FORCE_INLINE DicNode *copyPush(DicNode *dicNode) { return copyPush(dicNode, mMaxSize); @@ -111,12 +100,12 @@ class DicNodePriorityQueue : public DicNodeReleaseListener { } mUnusedNodeIndices[index] = mNextUnusedNodeId; mNextUnusedNodeId = index; - ASSERT(index >= 0 && index < (MAX_CAPACITY + 1)); + ASSERT(index >= 0 && index < (mCapacity + 1)); } AK_FORCE_INLINE void dump() const { AKLOGI("\n\n\n\n\n==========================="); - for (int i = 0; i < MAX_CAPACITY + 1; ++i) { + for (int i = 0; i < mCapacity + 1; ++i) { if (mDicNodesBuf[i].isUsed()) { mDicNodesBuf[i].dump("QUEUE: "); } @@ -125,7 +114,7 @@ class DicNodePriorityQueue : public DicNodeReleaseListener { } private: - DISALLOW_COPY_AND_ASSIGN(DicNodePriorityQueue); + DISALLOW_IMPLICIT_CONSTRUCTORS(DicNodePriorityQueue); static const int NOT_A_NODE_ID = -1; AK_FORCE_INLINE static bool compareDicNode(DicNode *left, DicNode *right) { @@ -139,7 +128,7 @@ class DicNodePriorityQueue : public DicNodeReleaseListener { }; typedef std::priority_queue<DicNode *, std::vector<DicNode *>, DicNodeComparator> DicNodesQueue; - const int MAX_CAPACITY; + const int mCapacity; int mMaxSize; std::vector<DicNode> mDicNodesBuf; // of each element of mDicNodesBuf respectively std::vector<int> mUnusedNodeIndices; @@ -163,13 +152,12 @@ class DicNodePriorityQueue : public DicNodeReleaseListener { } AK_FORCE_INLINE DicNode *searchEmptyDicNode() { - // TODO: Currently O(n) but should be improved to O(1) - if (MAX_CAPACITY == 0) { + if (mCapacity == 0) { return 0; } if (mNextUnusedNodeId == NOT_A_NODE_ID) { AKLOGI("No unused node found."); - for (int i = 0; i < MAX_CAPACITY + 1; ++i) { + for (int i = 0; i < mCapacity + 1; ++i) { AKLOGI("Dump node availability, %d, %d, %d", i, mDicNodesBuf[i].isUsed(), mUnusedNodeIndices[i]); } @@ -185,7 +173,7 @@ class DicNodePriorityQueue : public DicNodeReleaseListener { const int index = static_cast<int>(dicNode - &mDicNodesBuf[0]); mNextUnusedNodeId = mUnusedNodeIndices[index]; mUnusedNodeIndices[index] = NOT_A_NODE_ID; - ASSERT(index >= 0 && index < (MAX_CAPACITY + 1)); + ASSERT(index >= 0 && index < (mCapacity + 1)); } AK_FORCE_INLINE DicNode *pushPoolNodeWithMaxSize(DicNode *dicNode, const int maxSize) { @@ -209,6 +197,15 @@ class DicNodePriorityQueue : public DicNodeReleaseListener { AK_FORCE_INLINE DicNode *copyPush(DicNode *dicNode, const int maxSize) { return pushPoolNodeWithMaxSize(newDicNode(dicNode), maxSize); } + + AK_FORCE_INLINE DicNode *newDicNode(DicNode *dicNode) { + DicNode *newNode = searchEmptyDicNode(); + if (newNode) { + DicNodeUtils::initByCopy(dicNode, newNode); + } + return newNode; + } + }; } // namespace latinime #endif // LATINIME_DIC_NODE_PRIORITY_QUEUE_H diff --git a/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp b/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp index e81591992..ec65114c7 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp +++ b/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp @@ -89,7 +89,7 @@ namespace latinime { const int unigramProbability = node->getProbability(); const int wordPos = node->getPos(); const int prevWordPos = node->getPrevWordPos(); - if (NOT_A_VALID_WORD_POS == wordPos || NOT_A_VALID_WORD_POS == prevWordPos) { + if (NOT_A_DICT_POS == wordPos || NOT_A_DICT_POS == prevWordPos) { // Note: Normally wordPos comes from the dictionary and should never equal // NOT_A_VALID_WORD_POS. return dictionaryStructurePolicy->getProbability(unigramProbability, diff --git a/native/jni/src/suggest/core/dicnode/dic_nodes_cache.cpp b/native/jni/src/suggest/core/dicnode/dic_nodes_cache.cpp index c3d2a2e74..b6be47e90 100644 --- a/native/jni/src/suggest/core/dicnode/dic_nodes_cache.cpp +++ b/native/jni/src/suggest/core/dicnode/dic_nodes_cache.cpp @@ -23,6 +23,11 @@ namespace latinime { +// The biggest value among MAX_CACHE_DIC_NODE_SIZE, MAX_CACHE_DIC_NODE_SIZE_FOR_SINGLE_POINT, ... +const int DicNodesCache::LARGE_PRIORITY_QUEUE_CAPACITY = 310; +// Capacity for reducing memory footprint. +const int DicNodesCache::SMALL_PRIORITY_QUEUE_CAPACITY = 100; + /** * Truncates all of the dicNodes so that they start at the given commit point. * Only called for multi-word typing input. diff --git a/native/jni/src/suggest/core/dicnode/dic_nodes_cache.h b/native/jni/src/suggest/core/dicnode/dic_nodes_cache.h index 7aab0906e..8493b6a8b 100644 --- a/native/jni/src/suggest/core/dicnode/dic_nodes_cache.h +++ b/native/jni/src/suggest/core/dicnode/dic_nodes_cache.h @@ -31,25 +31,32 @@ class DicNode; */ class DicNodesCache { public: - AK_FORCE_INLINE DicNodesCache() - : mActiveDicNodes(&mDicNodePriorityQueues[DIC_NODES_CACHE_INITIAL_QUEUE_ID_ACTIVE]), - mNextActiveDicNodes(&mDicNodePriorityQueues[ - DIC_NODES_CACHE_INITIAL_QUEUE_ID_NEXT_ACTIVE]), - mTerminalDicNodes(&mDicNodePriorityQueues[DIC_NODES_CACHE_INITIAL_QUEUE_ID_TERMINAL]), - mCachedDicNodesForContinuousSuggestion(&mDicNodePriorityQueues[ - DIC_NODES_CACHE_INITIAL_QUEUE_ID_CACHE_FOR_CONTINUOUS_SUGGESTION]), - mInputIndex(0), mLastCachedInputIndex(0) { - } + AK_FORCE_INLINE explicit DicNodesCache(const bool usesLargeCapacityCache) + : mUsesLargeCapacityCache(usesLargeCapacityCache), + mDicNodePriorityQueue0(getCacheCapacity()), + mDicNodePriorityQueue1(getCacheCapacity()), + mDicNodePriorityQueue2(getCacheCapacity()), + mDicNodePriorityQueueForTerminal(MAX_RESULTS), + mActiveDicNodes(&mDicNodePriorityQueue0), + mNextActiveDicNodes(&mDicNodePriorityQueue1), + mCachedDicNodesForContinuousSuggestion(&mDicNodePriorityQueue2), + mTerminalDicNodes(&mDicNodePriorityQueueForTerminal), + mInputIndex(0), mLastCachedInputIndex(0) {} AK_FORCE_INLINE virtual ~DicNodesCache() {} AK_FORCE_INLINE void reset(const int nextActiveSize, const int terminalSize) { mInputIndex = 0; mLastCachedInputIndex = 0; - mActiveDicNodes->reset(); - mNextActiveDicNodes->clearAndResize(nextActiveSize); + // We want to use the max capacity for the current active dic node queue. + mActiveDicNodes->clearAndResizeToCapacity(); + // nextActiveSize is used to limit the next iteration's active dic node size. + const int nextActiveSizeFittingToTheCapacity = min(nextActiveSize, getCacheCapacity()); + mNextActiveDicNodes->clearAndResize(nextActiveSizeFittingToTheCapacity); mTerminalDicNodes->clearAndResize(terminalSize); - mCachedDicNodesForContinuousSuggestion->reset(); + // We want to use the max capacity for the cached dic nodes that will be used for the + // continuous suggestion. + mCachedDicNodesForContinuousSuggestion->clearAndResizeToCapacity(); } AK_FORCE_INLINE void continueSearch() { @@ -157,21 +164,35 @@ class DicNodesCache { return tmp; } + AK_FORCE_INLINE int getCacheCapacity() const { + return mUsesLargeCapacityCache ? + LARGE_PRIORITY_QUEUE_CAPACITY : SMALL_PRIORITY_QUEUE_CAPACITY; + } + AK_FORCE_INLINE void resetTemporaryCaches() { mActiveDicNodes->clear(); mNextActiveDicNodes->clear(); mTerminalDicNodes->clear(); } - DicNodePriorityQueue mDicNodePriorityQueues[DIC_NODES_CACHE_PRIORITY_QUEUES_SIZE]; + static const int LARGE_PRIORITY_QUEUE_CAPACITY; + static const int SMALL_PRIORITY_QUEUE_CAPACITY; + + const bool mUsesLargeCapacityCache; + // Instances + DicNodePriorityQueue mDicNodePriorityQueue0; + DicNodePriorityQueue mDicNodePriorityQueue1; + DicNodePriorityQueue mDicNodePriorityQueue2; + DicNodePriorityQueue mDicNodePriorityQueueForTerminal; + // Active dicNodes currently being expanded. DicNodePriorityQueue *mActiveDicNodes; // Next dicNodes to be expanded. DicNodePriorityQueue *mNextActiveDicNodes; - // Current top terminal dicNodes. - DicNodePriorityQueue *mTerminalDicNodes; // Cached dicNodes used for continuous suggestion. DicNodePriorityQueue *mCachedDicNodesForContinuousSuggestion; + // Current top terminal dicNodes. + DicNodePriorityQueue *mTerminalDicNodes; int mInputIndex; int mLastCachedInputIndex; }; diff --git a/native/jni/src/suggest/core/dicnode/internal/dic_node_state_prevword.h b/native/jni/src/suggest/core/dicnode/internal/dic_node_state_prevword.h index 9bc96877e..b8986203d 100644 --- a/native/jni/src/suggest/core/dicnode/internal/dic_node_state_prevword.h +++ b/native/jni/src/suggest/core/dicnode/internal/dic_node_state_prevword.h @@ -22,6 +22,7 @@ #include "defines.h" #include "suggest/core/dicnode/dic_node_utils.h" +#include "suggest/core/layout/proximity_info_state.h" namespace latinime { @@ -29,9 +30,8 @@ class DicNodeStatePrevWord { public: AK_FORCE_INLINE DicNodeStatePrevWord() : mPrevWordCount(0), mPrevWordLength(0), mPrevWordStart(0), mPrevWordProbability(0), - mPrevWordNodePos(NOT_A_VALID_WORD_POS) { + mPrevWordNodePos(NOT_A_DICT_POS), mSecondWordFirstInputIndex(NOT_AN_INDEX) { memset(mPrevWord, 0, sizeof(mPrevWord)); - memset(mPrevSpacePositions, 0, sizeof(mPrevSpacePositions)); } virtual ~DicNodeStatePrevWord() {} @@ -41,8 +41,8 @@ class DicNodeStatePrevWord { mPrevWordCount = 0; mPrevWordStart = 0; mPrevWordProbability = -1; - mPrevWordNodePos = NOT_A_VALID_WORD_POS; - memset(mPrevSpacePositions, 0, sizeof(mPrevSpacePositions)); + mPrevWordNodePos = NOT_A_DICT_POS; + mSecondWordFirstInputIndex = NOT_AN_INDEX; } void init(const int prevWordNodePos) { @@ -51,7 +51,7 @@ class DicNodeStatePrevWord { mPrevWordStart = 0; mPrevWordProbability = -1; mPrevWordNodePos = prevWordNodePos; - memset(mPrevSpacePositions, 0, sizeof(mPrevSpacePositions)); + mSecondWordFirstInputIndex = NOT_AN_INDEX; } // Init by copy @@ -61,14 +61,14 @@ class DicNodeStatePrevWord { mPrevWordStart = prevWord->mPrevWordStart; mPrevWordProbability = prevWord->mPrevWordProbability; mPrevWordNodePos = prevWord->mPrevWordNodePos; + mSecondWordFirstInputIndex = prevWord->mSecondWordFirstInputIndex; memcpy(mPrevWord, prevWord->mPrevWord, prevWord->mPrevWordLength * sizeof(mPrevWord[0])); - memcpy(mPrevSpacePositions, prevWord->mPrevSpacePositions, sizeof(mPrevSpacePositions)); } void init(const int16_t prevWordCount, const int16_t prevWordProbability, const int prevWordNodePos, const int *const src0, const int16_t length0, - const int *const src1, const int16_t length1, const int *const prevSpacePositions, - const int lastInputIndex) { + const int *const src1, const int16_t length1, + const int prevWordSecondWordFirstInputIndex, const int lastInputIndex) { mPrevWordCount = min(prevWordCount, static_cast<int16_t>(MAX_RESULTS)); mPrevWordProbability = prevWordProbability; mPrevWordNodePos = prevWordNodePos; @@ -80,8 +80,7 @@ class DicNodeStatePrevWord { mPrevWord[twoWordsLen] = KEYCODE_SPACE; mPrevWordStart = length0; mPrevWordLength = static_cast<int16_t>(twoWordsLen + 1); - memcpy(mPrevSpacePositions, prevSpacePositions, sizeof(mPrevSpacePositions)); - mPrevSpacePositions[mPrevWordCount - 1] = lastInputIndex; + mSecondWordFirstInputIndex = prevWordSecondWordFirstInputIndex; } void truncate(const int offset) { @@ -96,11 +95,12 @@ class DicNodeStatePrevWord { mPrevWordLength = newPrevWordLength; } - void outputSpacePositions(int *spaceIndices) const { - // Convert uint16_t to int - for (int i = 0; i < MAX_RESULTS; i++) { - spaceIndices[i] = mPrevSpacePositions[i]; - } + void setSecondWordFirstInputIndex(const int inputIndex) { + mSecondWordFirstInputIndex = inputIndex; + } + + int getSecondWordFirstInputIndex() const { + return mSecondWordFirstInputIndex; } // TODO: remove @@ -138,8 +138,6 @@ class DicNodeStatePrevWord { // TODO: Move to private int mPrevWord[MAX_WORD_LENGTH]; - // TODO: Move to private - int mPrevSpacePositions[MAX_RESULTS]; private: // Caution!!! @@ -150,6 +148,7 @@ class DicNodeStatePrevWord { int16_t mPrevWordStart; int16_t mPrevWordProbability; int mPrevWordNodePos; + int mSecondWordFirstInputIndex; }; } // namespace latinime #endif // LATINIME_DIC_NODE_STATE_PREVWORD_H diff --git a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp index cf1cd8815..425b07624 100644 --- a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp +++ b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp @@ -116,7 +116,7 @@ int BigramDictionary::getPredictions(const int *prevWord, const int prevWordLeng mDictionaryStructurePolicy->getBigramsStructurePolicy(), pos); while (bigramsIt.hasNext()) { bigramsIt.next(); - if (bigramsIt.getBigramPos() == NOT_A_VALID_WORD_POS) { + if (bigramsIt.getBigramPos() == NOT_A_DICT_POS) { continue; } const int codePointCount = mDictionaryStructurePolicy-> @@ -146,7 +146,7 @@ int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const in if (0 >= prevWordLength) return NOT_A_DICT_POS; int pos = mDictionaryStructurePolicy->getTerminalNodePositionOfWord(prevWord, prevWordLength, forceLowerCaseSearch); - if (NOT_A_VALID_WORD_POS == pos) return NOT_A_DICT_POS; + if (NOT_A_DICT_POS == pos) return NOT_A_DICT_POS; return mDictionaryStructurePolicy->getBigramsPositionOfNode(pos); } @@ -157,7 +157,7 @@ bool BigramDictionary::isValidBigram(const int *word0, int length0, const int *w if (NOT_A_DICT_POS == pos) return false; int nextWordPos = mDictionaryStructurePolicy->getTerminalNodePositionOfWord(word1, length1, false /* forceLowerCaseSearch */); - if (NOT_A_VALID_WORD_POS == nextWordPos) return false; + if (NOT_A_DICT_POS == nextWordPos) return false; BinaryDictionaryBigramsIterator bigramsIt( mDictionaryStructurePolicy->getBigramsStructurePolicy(), pos); diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp index 02ece639c..033572201 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.cpp +++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp @@ -87,7 +87,7 @@ int Dictionary::getBigrams(const int *word, int length, int *outWords, int *freq int Dictionary::getProbability(const int *word, int length) const { int pos = getDictionaryStructurePolicy()->getTerminalNodePositionOfWord(word, length, false /* forceLowerCaseSearch */); - if (NOT_A_VALID_WORD_POS == pos) { + if (NOT_A_DICT_POS == pos) { return NOT_A_PROBABILITY; } return getDictionaryStructurePolicy()->getUnigramProbabilityOfPtNode(pos); @@ -112,6 +112,18 @@ void Dictionary::removeBigramWords(const int *const word0, const int length0, mDictionaryStructureWithBufferPolicy->removeBigramWords(word0, length0, word1, length1); } +void Dictionary::flush(const char *const filePath) { + mDictionaryStructureWithBufferPolicy->flush(filePath); +} + +void Dictionary::flushWithGC(const char *const filePath) { + mDictionaryStructureWithBufferPolicy->flushWithGC(filePath); +} + +bool Dictionary::needsToRunGC() { + return mDictionaryStructureWithBufferPolicy->needsToRunGC(); +} + void Dictionary::logDictionaryInfo(JNIEnv *const env) const { const int BUFFER_SIZE = 16; int dictionaryIdCodePointBuffer[BUFFER_SIZE]; diff --git a/native/jni/src/suggest/core/dictionary/dictionary.h b/native/jni/src/suggest/core/dictionary/dictionary.h index 0afe5a54b..06e84bbfe 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.h +++ b/native/jni/src/suggest/core/dictionary/dictionary.h @@ -77,6 +77,12 @@ class Dictionary { void removeBigramWords(const int *const word0, const int length0, const int *const word1, const int length1); + void flush(const char *const filePath); + + void flushWithGC(const char *const filePath); + + bool needsToRunGC(); + const DictionaryStructureWithBufferPolicy *getDictionaryStructurePolicy() const { return mDictionaryStructureWithBufferPolicy; } diff --git a/native/jni/src/suggest/core/dictionary/multi_bigram_map.h b/native/jni/src/suggest/core/dictionary/multi_bigram_map.h index 9efe5f6f9..aecf41386 100644 --- a/native/jni/src/suggest/core/dictionary/multi_bigram_map.h +++ b/native/jni/src/suggest/core/dictionary/multi_bigram_map.h @@ -73,7 +73,7 @@ class MultiBigramMap { bigramsListPos); while (bigramsIt.hasNext()) { bigramsIt.next(); - if (bigramsIt.getBigramPos() == NOT_A_VALID_WORD_POS) { + if (bigramsIt.getBigramPos() == NOT_A_DICT_POS) { continue; } mBigramMap[bigramsIt.getBigramPos()] = bigramsIt.getProbability(); diff --git a/native/jni/src/suggest/core/layout/proximity_info_state.cpp b/native/jni/src/suggest/core/layout/proximity_info_state.cpp index 7780efdfd..fbabd92f2 100644 --- a/native/jni/src/suggest/core/layout/proximity_info_state.cpp +++ b/native/jni/src/suggest/core/layout/proximity_info_state.cpp @@ -36,8 +36,8 @@ void ProximityInfoState::initInputParams(const int pointerId, const float maxPoi const int *const xCoordinates, const int *const yCoordinates, const int *const times, const int *const pointerIds, const bool isGeometric) { ASSERT(isGeometric || (inputSize < MAX_WORD_LENGTH)); - mIsContinuousSuggestionPossible = - ProximityInfoStateUtils::checkAndReturnIsContinuousSuggestionPossible( + mIsContinuousSuggestionPossible = (mHasBeenUpdatedByGeometricInput != isGeometric) ? + false : ProximityInfoStateUtils::checkAndReturnIsContinuousSuggestionPossible( inputSize, xCoordinates, yCoordinates, times, mSampledInputSize, &mSampledInputXs, &mSampledInputYs, &mSampledTimes, &mSampledInputIndice); if (DEBUG_DICT) { @@ -155,6 +155,7 @@ void ProximityInfoState::initInputParams(const int pointerId, const float maxPoi if (DEBUG_GEO_FULL) { AKLOGI("ProximityState init finished: %d points out of %d", mSampledInputSize, inputSize); } + mHasBeenUpdatedByGeometricInput = isGeometric; } // This function basically converts from a length to an edit distance. Accordingly, it's obviously diff --git a/native/jni/src/suggest/core/layout/proximity_info_state.h b/native/jni/src/suggest/core/layout/proximity_info_state.h index dbcd54488..c94060fa9 100644 --- a/native/jni/src/suggest/core/layout/proximity_info_state.h +++ b/native/jni/src/suggest/core/layout/proximity_info_state.h @@ -46,10 +46,11 @@ class ProximityInfoState { : mProximityInfo(0), mMaxPointToKeyLength(0.0f), mAverageSpeed(0.0f), mHasTouchPositionCorrectionData(false), mMostCommonKeyWidthSquare(0), mKeyCount(0), mCellHeight(0), mCellWidth(0), mGridHeight(0), mGridWidth(0), - mIsContinuousSuggestionPossible(false), mSampledInputXs(), mSampledInputYs(), - mSampledTimes(), mSampledInputIndice(), mSampledLengthCache(), - mBeelineSpeedPercentiles(), mSampledNormalizedSquaredLengthCache(), mSpeedRates(), - mDirections(), mCharProbabilities(), mSampledNearKeySets(), mSampledSearchKeySets(), + mIsContinuousSuggestionPossible(false), mHasBeenUpdatedByGeometricInput(false), + mSampledInputXs(), mSampledInputYs(), mSampledTimes(), mSampledInputIndice(), + mSampledLengthCache(), mBeelineSpeedPercentiles(), + mSampledNormalizedSquaredLengthCache(), mSpeedRates(), mDirections(), + mCharProbabilities(), mSampledNearKeySets(), mSampledSearchKeySets(), mSampledSearchKeyVectors(), mTouchPositionCorrectionEnabled(false), mSampledInputSize(0), mMostProbableStringProbability(0.0f) { memset(mInputProximities, 0, sizeof(mInputProximities)); @@ -129,6 +130,10 @@ class ProximityInfoState { return mSampledInputYs[index]; } + int getInputIndexOfSampledPoint(const int sampledIndex) const { + return mSampledInputIndice[sampledIndex]; + } + bool hasSpaceProximity(const int index) const; int getLengthCache(const int index) const { @@ -204,6 +209,7 @@ class ProximityInfoState { int mGridHeight; int mGridWidth; bool mIsContinuousSuggestionPossible; + bool mHasBeenUpdatedByGeometricInput; std::vector<int> mSampledInputXs; std::vector<int> mSampledInputYs; diff --git a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h index c8cbbcfdf..24d1b8ba1 100644 --- a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h +++ b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h @@ -74,6 +74,12 @@ class DictionaryStructureWithBufferPolicy { virtual bool removeBigramWords(const int *const word0, const int length0, const int *const word1, const int length1) = 0; + virtual void flush(const char *const filePath) = 0; + + virtual void flushWithGC(const char *const filePath) = 0; + + virtual bool needsToRunGC() const = 0; + protected: DictionaryStructureWithBufferPolicy() {} diff --git a/native/jni/src/suggest/core/session/dic_traverse_session.cpp b/native/jni/src/suggest/core/session/dic_traverse_session.cpp index 0ca583f90..50f2bbd8d 100644 --- a/native/jni/src/suggest/core/session/dic_traverse_session.cpp +++ b/native/jni/src/suggest/core/session/dic_traverse_session.cpp @@ -23,6 +23,11 @@ namespace latinime { +// 256K bytes threshold is heuristically used to distinguish dictionaries containing many unigrams +// (e.g. main dictionary) from small dictionaries (e.g. contacts...) +const int DicTraverseSession::DICTIONARY_SIZE_THRESHOLD_TO_USE_LARGE_CACHE_FOR_SUGGESTION = + 256 * 1024; + void DicTraverseSession::init(const Dictionary *const dictionary, const int *prevWord, int prevWordLength, const SuggestOptions *const suggestOptions) { mDictionary = dictionary; @@ -30,13 +35,13 @@ void DicTraverseSession::init(const Dictionary *const dictionary, const int *pre ->getMultiWordCostMultiplier(); mSuggestOptions = suggestOptions; if (!prevWord) { - mPrevWordPos = NOT_A_VALID_WORD_POS; + mPrevWordPos = NOT_A_DICT_POS; return; } // TODO: merge following similar calls to getTerminalPosition into one case-insensitive call. mPrevWordPos = getDictionaryStructurePolicy()->getTerminalNodePositionOfWord( prevWord, prevWordLength, false /* forceLowerCaseSearch */); - if (mPrevWordPos == NOT_A_VALID_WORD_POS) { + if (mPrevWordPos == NOT_A_DICT_POS) { // Check bigrams for lower-cased previous word if original was not found. Useful for // auto-capitalized words like "The [current_word]". mPrevWordPos = getDictionaryStructurePolicy()->getTerminalNodePositionOfWord( @@ -59,8 +64,9 @@ const DictionaryStructureWithBufferPolicy *DicTraverseSession::getDictionaryStru return mDictionary->getDictionaryStructurePolicy(); } -void DicTraverseSession::resetCache(const int nextActiveCacheSize, const int maxWords) { - mDicNodesCache.reset(nextActiveCacheSize, maxWords); +void DicTraverseSession::resetCache(const int thresholdForNextActiveDicNodes, const int maxWords) { + mDicNodesCache.reset(thresholdForNextActiveDicNodes /* nextActiveSize */, + maxWords /* terminalSize */); mMultiBigramMap.clear(); mPartiallyCommited = false; } diff --git a/native/jni/src/suggest/core/session/dic_traverse_session.h b/native/jni/src/suggest/core/session/dic_traverse_session.h index 23de5cc65..e0b1c67d9 100644 --- a/native/jni/src/suggest/core/session/dic_traverse_session.h +++ b/native/jni/src/suggest/core/session/dic_traverse_session.h @@ -37,8 +37,12 @@ class DicTraverseSession { public: // A factory method for DicTraverseSession - static AK_FORCE_INLINE void *getSessionInstance(JNIEnv *env, jstring localeStr) { - return new DicTraverseSession(env, localeStr); + static AK_FORCE_INLINE void *getSessionInstance(JNIEnv *env, jstring localeStr, + jlong dictSize) { + // To deal with the trade-off between accuracy and memory space, large cache is used for + // dictionaries larger that the threshold + return new DicTraverseSession(env, localeStr, + dictSize >= DICTIONARY_SIZE_THRESHOLD_TO_USE_LARGE_CACHE_FOR_SUGGESTION); } static AK_FORCE_INLINE void initSessionInstance(DicTraverseSession *traverseSession, @@ -54,10 +58,10 @@ class DicTraverseSession { delete traverseSession; } - AK_FORCE_INLINE DicTraverseSession(JNIEnv *env, jstring localeStr) - : mPrevWordPos(NOT_A_VALID_WORD_POS), mProximityInfo(0), - mDictionary(0), mSuggestOptions(0), mDicNodesCache(), mMultiBigramMap(), - mInputSize(0), mPartiallyCommited(false), mMaxPointerCount(1), + AK_FORCE_INLINE DicTraverseSession(JNIEnv *env, jstring localeStr, bool usesLargeCache) + : mPrevWordPos(NOT_A_DICT_POS), mProximityInfo(0), + mDictionary(0), mSuggestOptions(0), mDicNodesCache(usesLargeCache), + mMultiBigramMap(), mInputSize(0), mPartiallyCommited(false), mMaxPointerCount(1), mMultiWordCostMultiplier(1.0f) { // NOTE: mProximityInfoStates is an array of instances. // No need to initialize it explicitly here. @@ -73,7 +77,7 @@ class DicTraverseSession { const int inputSize, const int *const inputXs, const int *const inputYs, const int *const times, const int *const pointerIds, const float maxSpatialDistance, const int maxPointerCount); - void resetCache(const int nextActiveCacheSize, const int maxWords); + void resetCache(const int thresholdForNextActiveDicNodes, const int maxWords); const DictionaryStructureWithBufferPolicy *getDictionaryStructurePolicy() const; @@ -109,7 +113,9 @@ class DicTraverseSession { if (usedPointerCount != 1) { return false; } - *pointerId = usedPointerId; + if (pointerId) { + *pointerId = usedPointerId; + } return true; } @@ -181,6 +187,7 @@ class DicTraverseSession { DISALLOW_IMPLICIT_CONSTRUCTORS(DicTraverseSession); // threshold to start caching static const int CACHE_START_INPUT_LENGTH_THRESHOLD; + static const int DICTIONARY_SIZE_THRESHOLD_TO_USE_LARGE_CACHE_FOR_SUGGESTION; void initializeProximityInfoStates(const int *const inputCodePoints, const int *const inputXs, const int *const inputYs, const int *const times, const int *const pointerIds, const int inputSize, const float maxSpatialDistance, const int maxPointerCount); diff --git a/native/jni/src/suggest/core/suggest.cpp b/native/jni/src/suggest/core/suggest.cpp index e788e914a..0c925be25 100644 --- a/native/jni/src/suggest/core/suggest.cpp +++ b/native/jni/src/suggest/core/suggest.cpp @@ -117,7 +117,7 @@ void Suggest::initializeSearch(DicTraverseSession *traverseSession, int commitPo * Outputs the final list of suggestions (i.e., terminal nodes). */ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequencies, - int *outputCodePoints, int *spaceIndices, int *outputTypes) const { + int *outputCodePoints, int *outputIndicesToPartialCommit, int *outputTypes) const { #if DEBUG_EVALUATE_MOST_PROBABLE_STRING const int terminalSize = 0; #else @@ -139,6 +139,7 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen SCORING->getMostProbableString(traverseSession, terminalSize, languageWeight, &outputCodePoints[0], &outputTypes[0], &frequencies[0]); if (hasMostProbableString) { + outputIndicesToPartialCommit[outputWordIndex] = NOT_AN_INDEX; ++outputWordIndex; } @@ -160,6 +161,9 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen || (traverseSession->getInputSize() >= MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT && terminals[0].hasMultipleWords())) : false; + // TODO: have partial commit work even with multiple pointers. + const bool outputSecondWordFirstLetterInputIndex = + traverseSession->isOnlyOnePointerUsed(0 /* pointerId */); // Output suggestion results here for (int terminalIndex = 0; terminalIndex < terminalSize && outputWordIndex < MAX_RESULTS; ++terminalIndex) { @@ -194,18 +198,21 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen terminalDicNode->isExactMatch() || (forceCommitMultiWords && terminalDicNode->hasMultipleWords()) || (isValidWord && SCORING->doesAutoCorrectValidWord())); - maxScore = max(maxScore, finalScore); - - // TODO: Implement a smarter auto-commit method for handling multi-word suggestions. - // Index for top typing suggestion should be 0. - if (isValidWord && outputWordIndex == 0) { - terminalDicNode->outputSpacePositionsResult(spaceIndices); + if (maxScore < finalScore && isValidWord) { + maxScore = finalScore; } // Don't output invalid words. However, we still need to submit their shortcuts if any. if (isValidWord) { outputTypes[outputWordIndex] = Dictionary::KIND_CORRECTION | outputTypeFlags; frequencies[outputWordIndex] = finalScore; + if (outputSecondWordFirstLetterInputIndex) { + outputIndicesToPartialCommit[outputWordIndex] = + terminalDicNode->getSecondWordFirstInputIndex( + traverseSession->getProximityInfoState(0)); + } else { + outputIndicesToPartialCommit[outputWordIndex] = NOT_AN_INDEX; + } // Populate the outputChars array with the suggested word. const int startIndex = outputWordIndex * MAX_WORD_LENGTH; terminalDicNode->outputResult(&outputCodePoints[startIndex]); @@ -220,8 +227,19 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen // Shortcut is not supported for multiple words suggestions. // TODO: Check shortcuts during traversal for multiple words suggestions. const bool sameAsTyped = TRAVERSAL->sameAsTyped(traverseSession, terminalDicNode); - outputWordIndex = ShortcutUtils::outputShortcuts(&shortcutIt, outputWordIndex, - finalScore, outputCodePoints, frequencies, outputTypes, sameAsTyped); + const int updatedOutputWordIndex = ShortcutUtils::outputShortcuts(&shortcutIt, + outputWordIndex, finalScore, outputCodePoints, frequencies, outputTypes, + sameAsTyped); + const int secondWordFirstInputIndex = terminalDicNode->getSecondWordFirstInputIndex( + traverseSession->getProximityInfoState(0)); + for (int i = outputWordIndex; i < updatedOutputWordIndex; ++i) { + if (outputSecondWordFirstLetterInputIndex) { + outputIndicesToPartialCommit[i] = secondWordFirstInputIndex; + } else { + outputIndicesToPartialCommit[i] = NOT_AN_INDEX; + } + } + outputWordIndex = updatedOutputWordIndex; } DicNode::managedDelete(terminalDicNode); } diff --git a/native/jni/src/suggest/core/suggest.h b/native/jni/src/suggest/core/suggest.h index 875cbe4e0..b24019632 100644 --- a/native/jni/src/suggest/core/suggest.h +++ b/native/jni/src/suggest/core/suggest.h @@ -55,7 +55,7 @@ class Suggest : public SuggestInterface { void createNextWordDicNode(DicTraverseSession *traverseSession, DicNode *dicNode, const bool spaceSubstitution) const; int outputSuggestions(DicTraverseSession *traverseSession, int *frequencies, - int *outputCodePoints, int *outputIndices, int *outputTypes) const; + int *outputCodePoints, int *outputIndicesToPartialCommit, int *outputTypes) const; void initializeSearch(DicTraverseSession *traverseSession, int commitPoint) const; void expandCurrentDicNodes(DicTraverseSession *traverseSession) const; void processTerminalDicNode(DicTraverseSession *traverseSession, DicNode *dicNode) const; diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp index d57547445..09e832f07 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp @@ -38,6 +38,22 @@ const BigramListReadWriteUtils::BigramFlags BigramListReadWriteUtils::MASK_ATTRIBUTE_PROBABILITY = 0x0F; const int BigramListReadWriteUtils::ATTRIBUTE_ADDRESS_SHIFT = 4; +/* static */ BigramListReadWriteUtils::BigramFlags + BigramListReadWriteUtils::getFlagsAndForwardPointer(const uint8_t *const bigramsBuf, + int *const pos) { + return ByteArrayUtils::readUint8AndAdvancePosition(bigramsBuf, pos); +} + +/* static */ void BigramListReadWriteUtils::skipExistingBigrams(const uint8_t *const bigramsBuf, + int *const pos) { + BigramFlags flags = getFlagsAndForwardPointer(bigramsBuf, pos); + while (hasNext(flags)) { + *pos += attributeAddressSize(flags); + flags = getFlagsAndForwardPointer(bigramsBuf, pos); + } + *pos += attributeAddressSize(flags); +} + /* static */ int BigramListReadWriteUtils::getBigramAddressAndForwardPointer( const uint8_t *const bigramsBuf, const BigramFlags flags, int *const pos) { int offset = 0; @@ -54,7 +70,7 @@ const int BigramListReadWriteUtils::ATTRIBUTE_ADDRESS_SHIFT = 4; break; } if (offset == 0) { - return NOT_A_VALID_WORD_POS; + return NOT_A_DICT_POS; } if (isOffsetNegative(flags)) { return origin - offset; diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h index ee2b722a4..884bcd7a9 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h @@ -21,7 +21,6 @@ #include <stdint.h> #include "defines.h" -#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h" namespace latinime { @@ -29,10 +28,7 @@ class BigramListReadWriteUtils { public: typedef uint8_t BigramFlags; - static AK_FORCE_INLINE BigramFlags getFlagsAndForwardPointer( - const uint8_t *const bigramsBuf, int *const pos) { - return ByteArrayUtils::readUint8AndAdvancePosition(bigramsBuf, pos); - } + static BigramFlags getFlagsAndForwardPointer(const uint8_t *const bigramsBuf, int *const pos); static AK_FORCE_INLINE int getProbabilityFromFlags(const BigramFlags flags) { return flags & MASK_ATTRIBUTE_PROBABILITY; @@ -43,15 +39,7 @@ public: } // Bigrams reading methods - static AK_FORCE_INLINE void skipExistingBigrams(const uint8_t *const bigramsBuf, - int *const pos) { - BigramFlags flags = getFlagsAndForwardPointer(bigramsBuf, pos); - while (hasNext(flags)) { - *pos += attributeAddressSize(flags); - flags = getFlagsAndForwardPointer(bigramsBuf, pos); - } - *pos += attributeAddressSize(flags); - } + static void skipExistingBigrams(const uint8_t *const bigramsBuf, int *const pos); static int getBigramAddressAndForwardPointer(const uint8_t *const bigramsBuf, const BigramFlags flags, int *const pos); @@ -74,12 +62,17 @@ public: return flags | FLAG_ATTRIBUTE_HAS_NEXT; } + static AK_FORCE_INLINE BigramFlags setProbabilityInFlags(const BigramFlags flags, + const int probability) { + return (flags & (~MASK_ATTRIBUTE_PROBABILITY)) | (probability & MASK_ATTRIBUTE_PROBABILITY); + } + // Returns true if the bigram entry is valid and put entry values into out*. static AK_FORCE_INLINE bool createBigramEntryAndGetFlagsAndOffsetAndOffsetFieldSize( const int entryPos, const int targetPos, const int probability, const bool hasNext, BigramFlags *const outBigramFlags, uint32_t *const outOffset, int *const outOffsetFieldSize) { - if (targetPos == NOT_A_VALID_WORD_POS) { + if (targetPos == NOT_A_DICT_POS) { return false; } BigramFlags flags = probability & MASK_ATTRIBUTE_PROBABILITY; diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp index 936dc9c5d..4c44d22fd 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp @@ -18,25 +18,64 @@ namespace latinime { -bool DynamicBigramListPolicy::copyAllBigrams(int *const fromPos, int *const toPos) { +const int DynamicBigramListPolicy::BIGRAM_LINK_COUNT_LIMIT = 10000; + +void DynamicBigramListPolicy::getNextBigram(int *const outBigramPos, int *const outProbability, + bool *const outHasNext, int *const pos) const { + const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*pos); + const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer); + if (usesAdditionalBuffer) { + *pos -= mBuffer->getOriginalBufferSize(); + } + const BigramListReadWriteUtils::BigramFlags flags = + BigramListReadWriteUtils::getFlagsAndForwardPointer(buffer, pos); + int originalBigramPos = BigramListReadWriteUtils::getBigramAddressAndForwardPointer( + buffer, flags, pos); + if (usesAdditionalBuffer && originalBigramPos != NOT_A_DICT_POS) { + originalBigramPos += mBuffer->getOriginalBufferSize(); + } + *outBigramPos = followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos); + *outProbability = BigramListReadWriteUtils::getProbabilityFromFlags(flags); + *outHasNext = BigramListReadWriteUtils::hasNext(flags); + if (usesAdditionalBuffer) { + *pos += mBuffer->getOriginalBufferSize(); + } +} + +void DynamicBigramListPolicy::skipAllBigrams(int *const pos) const { + const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*pos); + const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer); + if (usesAdditionalBuffer) { + *pos -= mBuffer->getOriginalBufferSize(); + } + BigramListReadWriteUtils::skipExistingBigrams(buffer, pos); + if (usesAdditionalBuffer) { + *pos += mBuffer->getOriginalBufferSize(); + } +} + +bool DynamicBigramListPolicy::copyAllBigrams(int *const fromPos, int *const toPos, + int *outBigramsCount) { const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*fromPos); if (usesAdditionalBuffer) { *fromPos -= mBuffer->getOriginalBufferSize(); } + *outBigramsCount = 0; BigramListReadWriteUtils::BigramFlags flags; do { // The buffer address can be changed after calling buffer writing methods. const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer); flags = BigramListReadWriteUtils::getFlagsAndForwardPointer(buffer, fromPos); - int bigramPos = BigramListReadWriteUtils::getBigramAddressAndForwardPointer( + int originalBigramPos = BigramListReadWriteUtils::getBigramAddressAndForwardPointer( buffer, flags, fromPos); - if (bigramPos == NOT_A_VALID_WORD_POS) { + if (originalBigramPos == NOT_A_DICT_POS) { // skip invalid bigram entry. continue; } if (usesAdditionalBuffer) { - bigramPos += mBuffer->getOriginalBufferSize(); + originalBigramPos += mBuffer->getOriginalBufferSize(); } + const int bigramPos = followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos); BigramListReadWriteUtils::BigramFlags newBigramFlags; uint32_t newBigramOffset; int newBigramOffsetFieldSize; @@ -54,6 +93,7 @@ bool DynamicBigramListPolicy::copyAllBigrams(int *const fromPos, int *const toPo toPos)) { return false; } + (*outBigramsCount)++; } while(BigramListReadWriteUtils::hasNext(flags)); if (usesAdditionalBuffer) { *fromPos += mBuffer->getOriginalBufferSize(); @@ -61,8 +101,8 @@ bool DynamicBigramListPolicy::copyAllBigrams(int *const fromPos, int *const toPo return true; } -bool DynamicBigramListPolicy::addBigramEntry(const int bigramPos, const int probability, - int *const pos) { +bool DynamicBigramListPolicy::addNewBigramEntryToBigramList(const int bigramPos, + const int probability, int *const pos) { const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*pos); if (usesAdditionalBuffer) { *pos -= mBuffer->getOriginalBufferSize(); @@ -76,7 +116,17 @@ bool DynamicBigramListPolicy::addBigramEntry(const int bigramPos, const int prob // The buffer address can be changed after calling buffer writing methods. const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer); flags = BigramListReadWriteUtils::getFlagsAndForwardPointer(buffer, pos); - BigramListReadWriteUtils::getBigramAddressAndForwardPointer(buffer, flags, pos); + int originalBigramPos = BigramListReadWriteUtils::getBigramAddressAndForwardPointer( + buffer, flags, pos); + if (usesAdditionalBuffer && originalBigramPos != NOT_A_DICT_POS) { + originalBigramPos += mBuffer->getOriginalBufferSize(); + } + if (followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos) == bigramPos) { + // Update this bigram entry. + const BigramListReadWriteUtils::BigramFlags updatedFlags = + BigramListReadWriteUtils::setProbabilityInFlags(flags, probability); + return mBuffer->writeUintAndAdvancePosition(updatedFlags, 1 /* size */, &entryPos); + } if (BigramListReadWriteUtils::hasNext(flags)) { continue; } @@ -87,33 +137,35 @@ bool DynamicBigramListPolicy::addBigramEntry(const int bigramPos, const int prob if (!mBuffer->writeUintAndAdvancePosition(updatedFlags, 1 /* size */, &entryPos)) { return false; } - // Then, add a new entry after the last entry. - BigramListReadWriteUtils::BigramFlags newBigramFlags; - uint32_t newBigramOffset; - int newBigramOffsetFieldSize; - if(!BigramListReadWriteUtils::createBigramEntryAndGetFlagsAndOffsetAndOffsetFieldSize( - *pos, bigramPos, BigramListReadWriteUtils::getProbabilityFromFlags(flags), - BigramListReadWriteUtils::hasNext(flags), &newBigramFlags, &newBigramOffset, - &newBigramOffsetFieldSize)) { - continue; - } - int newEntryPos = *pos; if (usesAdditionalBuffer) { - newEntryPos += mBuffer->getOriginalBufferSize(); - } - // Write bigram flags. - if (!mBuffer->writeUintAndAdvancePosition(newBigramFlags, 1 /* size */, - &newEntryPos)) { - return false; - } - // Write bigram positon offset. - if (!mBuffer->writeUintAndAdvancePosition(newBigramOffset, newBigramOffsetFieldSize, - &newEntryPos)) { - return false; + *pos += mBuffer->getOriginalBufferSize(); } + // Then, add a new entry after the last entry. + return writeNewBigramEntry(bigramPos, probability, pos); } while(BigramListReadWriteUtils::hasNext(flags)); - if (usesAdditionalBuffer) { - *pos += mBuffer->getOriginalBufferSize(); + // We return directly from the while loop. + ASSERT(false); + return false; +} + +bool DynamicBigramListPolicy::writeNewBigramEntry(const int bigramPos, const int probability, + int *const writingPos) { + BigramListReadWriteUtils::BigramFlags newBigramFlags; + uint32_t newBigramOffset; + int newBigramOffsetFieldSize; + if(!BigramListReadWriteUtils::createBigramEntryAndGetFlagsAndOffsetAndOffsetFieldSize( + *writingPos, bigramPos, probability, false /* hasNext */, &newBigramFlags, + &newBigramOffset, &newBigramOffsetFieldSize)) { + return false; + } + // Write bigram flags. + if (!mBuffer->writeUintAndAdvancePosition(newBigramFlags, 1 /* size */, writingPos)) { + return false; + } + // Write bigram positon offset. + if (!mBuffer->writeUintAndAdvancePosition(newBigramOffset, newBigramOffsetFieldSize, + writingPos)) { + return false; } return true; } @@ -133,11 +185,12 @@ bool DynamicBigramListPolicy::removeBigram(const int bigramListPos, const int ta if (usesAdditionalBuffer) { bigramOffsetFieldPos += mBuffer->getOriginalBufferSize(); } - int bigramPos = BigramListReadWriteUtils::getBigramAddressAndForwardPointer( + int originalBigramPos = BigramListReadWriteUtils::getBigramAddressAndForwardPointer( buffer, flags, &pos); - if (usesAdditionalBuffer && bigramPos != NOT_A_VALID_WORD_POS) { - bigramPos += mBuffer->getOriginalBufferSize(); + if (usesAdditionalBuffer && originalBigramPos != NOT_A_DICT_POS) { + originalBigramPos += mBuffer->getOriginalBufferSize(); } + const int bigramPos = followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos); if (bigramPos != targetBigramPos) { continue; } @@ -152,4 +205,26 @@ bool DynamicBigramListPolicy::removeBigram(const int bigramListPos, const int ta return false; } +int DynamicBigramListPolicy::followBigramLinkAndGetCurrentBigramPtNodePos( + const int originalBigramPos) const { + if (originalBigramPos == NOT_A_DICT_POS) { + return NOT_A_DICT_POS; + } + int currentPos = originalBigramPos; + DynamicPatriciaTrieNodeReader nodeReader(mBuffer, this /* bigramsPolicy */, mShortcutPolicy); + nodeReader.fetchNodeInfoFromBuffer(currentPos); + int bigramLinkCount = 0; + while (nodeReader.getBigramLinkedNodePos() != NOT_A_DICT_POS) { + currentPos = nodeReader.getBigramLinkedNodePos(); + nodeReader.fetchNodeInfoFromBuffer(currentPos); + bigramLinkCount++; + if (bigramLinkCount > BIGRAM_LINK_COUNT_LIMIT) { + AKLOGI("Bigram link is invalid. start position: %d", bigramPos); + ASSERT(false); + return NOT_A_DICT_POS; + } + } + return currentPos; +} + } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h index b7c05376d..dafb62d80 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h @@ -21,7 +21,9 @@ #include "defines.h" #include "suggest/core/policy/dictionary_bigrams_structure_policy.h" +#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h" #include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h" +#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" namespace latinime { @@ -31,49 +33,26 @@ namespace latinime { */ class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy { public: - DynamicBigramListPolicy(BufferWithExtendableBuffer *const buffer) - : mBuffer(buffer) {} + DynamicBigramListPolicy(BufferWithExtendableBuffer *const buffer, + const DictionaryShortcutsStructurePolicy *const shortcutPolicy) + : mBuffer(buffer), mShortcutPolicy(shortcutPolicy) {} ~DynamicBigramListPolicy() {} void getNextBigram(int *const outBigramPos, int *const outProbability, bool *const outHasNext, - int *const pos) const { - const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*pos); - const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer); - if (usesAdditionalBuffer) { - *pos -= mBuffer->getOriginalBufferSize(); - } - const BigramListReadWriteUtils::BigramFlags flags = - BigramListReadWriteUtils::getFlagsAndForwardPointer(buffer, pos); - *outBigramPos = BigramListReadWriteUtils::getBigramAddressAndForwardPointer( - buffer, flags, pos); - if (usesAdditionalBuffer && *outBigramPos != NOT_A_VALID_WORD_POS) { - *outBigramPos += mBuffer->getOriginalBufferSize(); - } - *outProbability = BigramListReadWriteUtils::getProbabilityFromFlags(flags); - *outHasNext = BigramListReadWriteUtils::hasNext(flags); - if (usesAdditionalBuffer) { - *pos += mBuffer->getOriginalBufferSize(); - } - } - - void skipAllBigrams(int *const pos) const { - const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*pos); - const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer); - if (usesAdditionalBuffer) { - *pos -= mBuffer->getOriginalBufferSize(); - } - BigramListReadWriteUtils::skipExistingBigrams(buffer, pos); - if (usesAdditionalBuffer) { - *pos += mBuffer->getOriginalBufferSize(); - } - } + int *const pos) const; + + void skipAllBigrams(int *const pos) const; // Copy bigrams from the bigram list that starts at fromPos to toPos and advance these - // positions after bigram lists. This method skips invalid bigram entries. - bool copyAllBigrams(int *const fromPos, int *const toPos); + // positions after bigram lists. This method skips invalid bigram entries and write the valid + // bigram entry count to outBigramsCount. + bool copyAllBigrams(int *const fromPos, int *const toPos, int *outBigramsCount); + + bool addNewBigramEntryToBigramList(const int bigramPos, const int probability, int *const pos); - bool addBigramEntry(const int bigramPos, const int probability, int *const pos); + bool writeNewBigramEntry(const int bigramPos, const int probability, + int *const writingPos); // Return if targetBigramPos is found or not. bool removeBigram(const int bigramListPos, const int targetBigramPos); @@ -81,7 +60,13 @@ class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy { private: DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicBigramListPolicy); + static const int BIGRAM_LINK_COUNT_LIMIT; + BufferWithExtendableBuffer *const mBuffer; + const DictionaryShortcutsStructurePolicy *const mShortcutPolicy; + + // Follow bigram link and return the position of bigram target PtNode that is currently valid. + int followBigramLinkAndGetCurrentBigramPtNodePos(const int originalBigramPos) const; }; } // namespace latinime #endif // LATINIME_DYNAMIC_BIGRAM_LIST_POLICY_H diff --git a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.cpp b/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.cpp index 434858d67..ff80dd2f6 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.cpp @@ -27,12 +27,12 @@ namespace latinime { /* static */ DictionaryStructureWithBufferPolicy *DictionaryStructureWithBufferPolicyFactory - ::newDictionaryStructureWithBufferPolicy(const char *const path, const int pathLength, - const int bufOffset, const int size, const bool isUpdatable) { + ::newDictionaryStructureWithBufferPolicy(const char *const path, const int bufOffset, + const int size, const bool isUpdatable) { // Allocated buffer in MmapedBuffer::openBuffer() will be freed in the destructor of // impl classes of DictionaryStructureWithBufferPolicy. - const MmappedBuffer *const mmapedBuffer = MmappedBuffer::openBuffer(path, pathLength, bufOffset, - size, isUpdatable); + const MmappedBuffer *const mmapedBuffer = MmappedBuffer::openBuffer(path, bufOffset, size, + isUpdatable); if (!mmapedBuffer) { return 0; } diff --git a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h b/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h index 1cb7a89c4..8cebc3b16 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h @@ -27,8 +27,7 @@ namespace latinime { class DictionaryStructureWithBufferPolicyFactory { public: static DictionaryStructureWithBufferPolicy *newDictionaryStructureWithBufferPolicy( - const char *const path, const int pathLength, const int bufOffset, const int size, - const bool isUpdatable); + const char *const path, const int bufOffset, const int size, const bool isUpdatable); private: DISALLOW_IMPLICIT_CONSTRUCTORS(DictionaryStructureWithBufferPolicyFactory); diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp index 5674cb48e..737098423 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp @@ -25,9 +25,17 @@ namespace latinime { void DynamicPatriciaTrieNodeReader::fetchNodeInfoFromBufferAndProcessMovedNode(const int nodePos, const int maxCodePointCount, int *const outCodePoints) { + if (nodePos < 0 || nodePos >= mBuffer->getTailPosition()) { + AKLOGE("Fetching PtNode info form invalid dictionary position: %d, dictionary size: %d", + nodePos, mBuffer->getTailPosition()); + ASSERT(false); + invalidatePtNodeInfo(); + return; + } const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(nodePos); const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer); int pos = nodePos; + mHeadPos = nodePos; if (usesAdditionalBuffer) { pos -= mBuffer->getOriginalBufferSize(); } @@ -57,10 +65,17 @@ void DynamicPatriciaTrieNodeReader::fetchNodeInfoFromBufferAndProcessMovedNode(c mChildrenPosFieldPos += mBuffer->getOriginalBufferSize(); } mChildrenPos = DynamicPatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition( - dictBuf, mFlags, &pos); + dictBuf, &pos); if (usesAdditionalBuffer && mChildrenPos != NOT_A_DICT_POS) { mChildrenPos += mBuffer->getOriginalBufferSize(); } + if (mSiblingPos == NOT_A_DICT_POS) { + if (DynamicPatriciaTrieReadingUtils::isMoved(mFlags)) { + mBigramLinkedNodePos = mChildrenPos; + } else { + mBigramLinkedNodePos = NOT_A_DICT_POS; + } + } if (usesAdditionalBuffer) { pos += mBuffer->getOriginalBufferSize(); } @@ -77,7 +92,7 @@ void DynamicPatriciaTrieNodeReader::fetchNodeInfoFromBufferAndProcessMovedNode(c mBigramPos = NOT_A_DICT_POS; } // Update siblingPos if needed. - if (mSiblingPos == NOT_A_VALID_WORD_POS) { + if (mSiblingPos == NOT_A_DICT_POS) { // Sibling position is the tail position of current node. mSiblingPos = pos; } @@ -88,4 +103,19 @@ void DynamicPatriciaTrieNodeReader::fetchNodeInfoFromBufferAndProcessMovedNode(c } } +void DynamicPatriciaTrieNodeReader::invalidatePtNodeInfo() { + mHeadPos = NOT_A_DICT_POS; + mFlags = 0; + mParentPos = NOT_A_DICT_POS; + mCodePointCount = 0; + mProbabilityFieldPos = NOT_A_DICT_POS; + mProbability = NOT_A_PROBABILITY; + mChildrenPosFieldPos = NOT_A_DICT_POS; + mChildrenPos = NOT_A_DICT_POS; + mBigramLinkedNodePos = NOT_A_DICT_POS; + mShortcutPos = NOT_A_DICT_POS; + mBigramPos = NOT_A_DICT_POS; + mSiblingPos = NOT_A_DICT_POS; +} + } diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h index 2ee7c2495..6ef5f5813 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h @@ -39,12 +39,12 @@ class DynamicPatriciaTrieNodeReader { const DictionaryBigramsStructurePolicy *const bigramsPolicy, const DictionaryShortcutsStructurePolicy *const shortcutsPolicy) : mBuffer(buffer), mBigramsPolicy(bigramsPolicy), - mShortcutsPolicy(shortcutsPolicy), mNodePos(NOT_A_VALID_WORD_POS), mFlags(0), - mParentPos(NOT_A_DICT_POS), mCodePointCount(0), - mProbabilityFieldPos(NOT_A_DICT_POS), mProbability(NOT_A_PROBABILITY), - mChildrenPosFieldPos(NOT_A_DICT_POS), mChildrenPos(NOT_A_DICT_POS), - mShortcutPos(NOT_A_DICT_POS), mBigramPos(NOT_A_DICT_POS), - mSiblingPos(NOT_A_VALID_WORD_POS) {} + mShortcutsPolicy(shortcutsPolicy), mHeadPos(NOT_A_DICT_POS), mFlags(0), + mParentPos(NOT_A_DICT_POS), mCodePointCount(0), mProbabilityFieldPos(NOT_A_DICT_POS), + mProbability(NOT_A_PROBABILITY), mChildrenPosFieldPos(NOT_A_DICT_POS), + mChildrenPos(NOT_A_DICT_POS), mBigramLinkedNodePos(NOT_A_DICT_POS), + mShortcutPos(NOT_A_DICT_POS), mBigramPos(NOT_A_DICT_POS), + mSiblingPos(NOT_A_DICT_POS) {} ~DynamicPatriciaTrieNodeReader() {} @@ -56,13 +56,14 @@ class DynamicPatriciaTrieNodeReader { AK_FORCE_INLINE void fetchNodeInfoFromBufferAndGetNodeCodePoints(const int nodePos, const int maxCodePointCount, int *const outCodePoints) { - mNodePos = nodePos; - mSiblingPos = NOT_A_VALID_WORD_POS; - fetchNodeInfoFromBufferAndProcessMovedNode(mNodePos, maxCodePointCount, outCodePoints); + mSiblingPos = NOT_A_DICT_POS; + mBigramLinkedNodePos = NOT_A_DICT_POS; + fetchNodeInfoFromBufferAndProcessMovedNode(nodePos, maxCodePointCount, outCodePoints); } - AK_FORCE_INLINE int getNodePos() const { - return mNodePos; + // HeadPos is different from NodePos when the current PtNode is a moved PtNode. + AK_FORCE_INLINE int getHeadPos() const { + return mHeadPos; } // Flags @@ -114,6 +115,11 @@ class DynamicPatriciaTrieNodeReader { return mChildrenPos; } + // Bigram linked node position. + AK_FORCE_INLINE int getBigramLinkedNodePos() const { + return mBigramLinkedNodePos; + } + // Shortcutlist position AK_FORCE_INLINE int getShortcutPos() const { return mShortcutPos; @@ -135,7 +141,7 @@ class DynamicPatriciaTrieNodeReader { const BufferWithExtendableBuffer *const mBuffer; const DictionaryBigramsStructurePolicy *const mBigramsPolicy; const DictionaryShortcutsStructurePolicy *const mShortcutsPolicy; - int mNodePos; + int mHeadPos; DynamicPatriciaTrieReadingUtils::NodeFlags mFlags; int mParentPos; uint8_t mCodePointCount; @@ -143,12 +149,15 @@ class DynamicPatriciaTrieNodeReader { int mProbability; int mChildrenPosFieldPos; int mChildrenPos; + int mBigramLinkedNodePos; int mShortcutPos; int mBigramPos; int mSiblingPos; void fetchNodeInfoFromBufferAndProcessMovedNode(const int nodePos, const int maxCodePointCount, int *const outCodePoints); + + void invalidatePtNodeInfo(); }; } // namespace latinime #endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_NODE_READER_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp index 945677b50..3cfbfd85b 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp @@ -38,7 +38,7 @@ void DynamicPatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const d readingHelper.initWithNodeArrayPos(dicNode->getChildrenPos()); const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader(); while (!readingHelper.isEnd()) { - childDicNodes->pushLeavingChild(dicNode, nodeReader->getNodePos(), + childDicNodes->pushLeavingChild(dicNode, nodeReader->getHeadPos(), nodeReader->getChildrenPos(), nodeReader->getProbability(), nodeReader->isTerminal() && !nodeReader->isDeleted(), nodeReader->hasChildren(), nodeReader->isBlacklisted() || nodeReader->isNotAWord(), @@ -116,23 +116,23 @@ int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const in if (!readingHelper.isMatchedCodePoint( j, searchCodePoints[matchedCodePointCount + j])) { // Different code point is found. The given word is not included in the dictionary. - return NOT_A_VALID_WORD_POS; + return NOT_A_DICT_POS; } } // All characters are matched. if (length == readingHelper.getTotalCodePointCount()) { // Terminal position is found. - return nodeReader->getNodePos(); + return nodeReader->getHeadPos(); } if (!nodeReader->hasChildren()) { - return NOT_A_VALID_WORD_POS; + return NOT_A_DICT_POS; } // Advance to the children nodes. readingHelper.readChildNode(); } // If we already traversed the tree further than the word is long, there means // there was no match (or we would have found it). - return NOT_A_VALID_WORD_POS; + return NOT_A_DICT_POS; } int DynamicPatriciaTriePolicy::getProbability(const int unigramProbability, @@ -149,7 +149,7 @@ int DynamicPatriciaTriePolicy::getProbability(const int unigramProbability, } int DynamicPatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int nodePos) const { - if (nodePos == NOT_A_VALID_WORD_POS) { + if (nodePos == NOT_A_DICT_POS) { return NOT_A_PROBABILITY; } DynamicPatriciaTrieNodeReader nodeReader(&mBufferWithExtendableBuffer, @@ -162,7 +162,7 @@ int DynamicPatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int nodePos) } int DynamicPatriciaTriePolicy::getShortcutPositionOfNode(const int nodePos) const { - if (nodePos == NOT_A_VALID_WORD_POS) { + if (nodePos == NOT_A_DICT_POS) { return NOT_A_DICT_POS; } DynamicPatriciaTrieNodeReader nodeReader(&mBufferWithExtendableBuffer, @@ -175,7 +175,7 @@ int DynamicPatriciaTriePolicy::getShortcutPositionOfNode(const int nodePos) cons } int DynamicPatriciaTriePolicy::getBigramsPositionOfNode(const int nodePos) const { - if (nodePos == NOT_A_VALID_WORD_POS) { + if (nodePos == NOT_A_DICT_POS) { return NOT_A_DICT_POS; } DynamicPatriciaTrieNodeReader nodeReader(&mBufferWithExtendableBuffer, @@ -209,12 +209,12 @@ bool DynamicPatriciaTriePolicy::addBigramWords(const int *const word0, const int } const int word0Pos = getTerminalNodePositionOfWord(word0, length0, false /* forceLowerCaseSearch */); - if (word0Pos == NOT_A_VALID_WORD_POS) { + if (word0Pos == NOT_A_DICT_POS) { return false; } const int word1Pos = getTerminalNodePositionOfWord(word1, length1, false /* forceLowerCaseSearch */); - if (word1Pos == NOT_A_VALID_WORD_POS) { + if (word1Pos == NOT_A_DICT_POS) { return false; } DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer, @@ -230,12 +230,12 @@ bool DynamicPatriciaTriePolicy::removeBigramWords(const int *const word0, const } const int word0Pos = getTerminalNodePositionOfWord(word0, length0, false /* forceLowerCaseSearch */); - if (word0Pos == NOT_A_VALID_WORD_POS) { + if (word0Pos == NOT_A_DICT_POS) { return false; } const int word1Pos = getTerminalNodePositionOfWord(word1, length1, false /* forceLowerCaseSearch */); - if (word1Pos == NOT_A_VALID_WORD_POS) { + if (word1Pos == NOT_A_DICT_POS) { return false; } DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer, @@ -243,4 +243,29 @@ bool DynamicPatriciaTriePolicy::removeBigramWords(const int *const word0, const return writingHelper.removeBigramWords(word0Pos, word1Pos); } +void DynamicPatriciaTriePolicy::flush(const char *const filePath) { + if (!mBuffer->isUpdatable()) { + AKLOGI("Warning: flush() is called for non-updatable dictionary."); + return; + } + // TODO: Implement. +} + +void DynamicPatriciaTriePolicy::flushWithGC(const char *const filePath) { + if (!mBuffer->isUpdatable()) { + AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary."); + return; + } + // TODO: Implement. +} + +bool DynamicPatriciaTriePolicy::needsToRunGC() const { + if (!mBuffer->isUpdatable()) { + AKLOGI("Warning: needsToRunGC() is called for non-updatable dictionary."); + return false; + } + // TODO: Implement. + return false; +} + } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h index cdab0e16a..2cbb0ff3b 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h @@ -36,8 +36,8 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { : mBuffer(buffer), mHeaderPolicy(mBuffer->getBuffer()), mBufferWithExtendableBuffer(mBuffer->getBuffer() + mHeaderPolicy.getSize(), mBuffer->getBufferSize() - mHeaderPolicy.getSize()), - mBigramListPolicy(&mBufferWithExtendableBuffer), - mShortcutListPolicy(&mBufferWithExtendableBuffer) {} + mShortcutListPolicy(&mBufferWithExtendableBuffer), + mBigramListPolicy(&mBufferWithExtendableBuffer, &mShortcutListPolicy) {} ~DynamicPatriciaTriePolicy() { delete mBuffer; @@ -85,14 +85,20 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { bool removeBigramWords(const int *const word0, const int length0, const int *const word1, const int length1); + void flush(const char *const filePath); + + void flushWithGC(const char *const filePath); + + bool needsToRunGC() const; + private: DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTriePolicy); const MmappedBuffer *const mBuffer; const HeaderPolicy mHeaderPolicy; BufferWithExtendableBuffer mBufferWithExtendableBuffer; - DynamicBigramListPolicy mBigramListPolicy; DynamicShortcutListPolicy mShortcutListPolicy; + DynamicBigramListPolicy mBigramListPolicy; }; } // namespace latinime #endif // LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h index db1c392bb..120fd7699 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h @@ -72,8 +72,7 @@ class DynamicPatriciaTrieReadingHelper { // Initialize reading state with the head position of a node. AK_FORCE_INLINE void initWithNodePos(const int nodePos) { - // TODO: Consolidate NOT_A_VALID_WORD_POS and NOT_A_DICT_POS - if (nodePos == NOT_A_VALID_WORD_POS || nodePos == NOT_A_DICT_POS) { + if (nodePos == NOT_A_DICT_POS) { mPos = NOT_A_DICT_POS; } else { mIsError = false; diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp index 5d979fa51..8428c0b15 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp @@ -28,19 +28,26 @@ const DptReadingUtils::NodeFlags DptReadingUtils::FLAG_IS_NOT_MOVED = 0xC0; const DptReadingUtils::NodeFlags DptReadingUtils::FLAG_IS_MOVED = 0x40; const DptReadingUtils::NodeFlags DptReadingUtils::FLAG_IS_DELETED = 0x80; +/* static */ int DptReadingUtils::getForwardLinkPosition(const uint8_t *const buffer, + const int pos) { + int linkAddressPos = pos; + return ByteArrayUtils::readSint24AndAdvancePosition(buffer, &linkAddressPos); +} + +/* static */ int DptReadingUtils::getParentPosAndAdvancePosition(const uint8_t *const buffer, + int *const pos) { + return ByteArrayUtils::readSint24AndAdvancePosition(buffer, pos); +} + /* static */ int DptReadingUtils::readChildrenPositionAndAdvancePosition( - const uint8_t *const buffer, const NodeFlags flags, int *const pos) { - if ((flags & MASK_MOVED) == FLAG_IS_NOT_MOVED) { - const int base = *pos; - const int offset = ByteArrayUtils::readSint24AndAdvancePosition(buffer, pos); - if (offset == 0) { - // 0 offset means that the node does not have children. - return NOT_A_DICT_POS; - } else { - return base + offset; - } - } else { + const uint8_t *const buffer, int *const pos) { + const int base = *pos; + const int offset = ByteArrayUtils::readSint24AndAdvancePosition(buffer, pos); + if (offset == 0) { + // 0 offset means that the node does not have children. return NOT_A_DICT_POS; + } else { + return base + offset; } } diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h index 62d73bb02..db5f9b1bd 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h @@ -20,7 +20,6 @@ #include <stdint.h> #include "defines.h" -#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h" namespace latinime { @@ -28,22 +27,15 @@ class DynamicPatriciaTrieReadingUtils { public: typedef uint8_t NodeFlags; - static AK_FORCE_INLINE int getForwardLinkPosition(const uint8_t *const buffer, const int pos) { - int linkAddressPos = pos; - return ByteArrayUtils::readSint24AndAdvancePosition(buffer, &linkAddressPos); - } + static int getForwardLinkPosition(const uint8_t *const buffer, const int pos); static AK_FORCE_INLINE bool isValidForwardLinkPosition(const int forwardLinkAddress) { return forwardLinkAddress != 0; } - static AK_FORCE_INLINE int getParentPosAndAdvancePosition(const uint8_t *const buffer, - int *const pos) { - return ByteArrayUtils::readSint24AndAdvancePosition(buffer, pos); - } + static int getParentPosAndAdvancePosition(const uint8_t *const buffer, int *const pos); - static int readChildrenPositionAndAdvancePosition(const uint8_t *const buffer, - const NodeFlags flags, int *const pos); + static int readChildrenPositionAndAdvancePosition(const uint8_t *const buffer, int *const pos); /** * Node Flags @@ -59,9 +51,9 @@ class DynamicPatriciaTrieReadingUtils { static AK_FORCE_INLINE NodeFlags updateAndGetFlags(const NodeFlags originalFlags, const bool isMoved, const bool isDeleted) { NodeFlags flags = originalFlags; - flags = isMoved ? ((flags & (!MASK_MOVED)) | FLAG_IS_MOVED) : flags; - flags = isDeleted ? ((flags & (!MASK_MOVED)) | FLAG_IS_DELETED) : flags; - flags = (!isMoved && !isDeleted) ? ((flags & (!MASK_MOVED)) | FLAG_IS_NOT_MOVED) : flags; + flags = isMoved ? ((flags & (~MASK_MOVED)) | FLAG_IS_MOVED) : flags; + flags = isDeleted ? ((flags & (~MASK_MOVED)) | FLAG_IS_DELETED) : flags; + flags = (!isMoved && !isDeleted) ? ((flags & (~MASK_MOVED)) | FLAG_IS_NOT_MOVED) : flags; return flags; } diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp index 99a983f21..311d31e5d 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp @@ -26,10 +26,12 @@ namespace latinime { +const int DynamicPatriciaTrieWritingHelper::CHILDREN_POSITION_FIELD_SIZE = 3; + bool DynamicPatriciaTrieWritingHelper::addUnigramWord( DynamicPatriciaTrieReadingHelper *const readingHelper, const int *const wordCodePoints, const int codePointCount, const int probability) { - int parentPos = NOT_A_VALID_WORD_POS; + int parentPos = NOT_A_DICT_POS; while (!readingHelper->isEnd()) { const int matchedCodePointCount = readingHelper->getPrevTotalCodePointCount(); if (!readingHelper->isMatchedCodePoint(0 /* index */, @@ -63,7 +65,7 @@ bool DynamicPatriciaTrieWritingHelper::addUnigramWord( codePointCount - readingHelper->getTotalCodePointCount()); } // Advance to the children nodes. - parentPos = nodeReader->getNodePos(); + parentPos = nodeReader->getHeadPos(); readingHelper->readChildNode(); } if (readingHelper->isError()) { @@ -79,29 +81,60 @@ bool DynamicPatriciaTrieWritingHelper::addUnigramWord( bool DynamicPatriciaTrieWritingHelper::addBigramWords(const int word0Pos, const int word1Pos, const int probability) { + int mMergedNodeCodePoints[MAX_WORD_LENGTH]; DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy); - nodeReader.fetchNodeInfoFromBuffer(word0Pos); - if (nodeReader.isDeleted()) { + nodeReader.fetchNodeInfoFromBufferAndGetNodeCodePoints(word0Pos, MAX_WORD_LENGTH, + mMergedNodeCodePoints); + // Move node to add bigram entry. + const int newNodePos = mBuffer->getTailPosition(); + if (!markNodeAsMovedAndSetPosition(&nodeReader, newNodePos, newNodePos)) { return false; } - // TODO: Implement. - return false; + int writingPos = newNodePos; + // Write a new PtNode using original PtNode's info to the tail of the dictionary. + if (!writePtNodeToBufferByCopyingPtNodeInfo(&nodeReader, nodeReader.getParentPos(), + mMergedNodeCodePoints, nodeReader.getCodePointCount(), nodeReader.getProbability(), + &writingPos)) { + return false; + } + nodeReader.fetchNodeInfoFromBuffer(newNodePos); + if (nodeReader.getBigramsPos() != NOT_A_DICT_POS) { + // Insert a new bigram entry into the existing bigram list. + int bigramListPos = nodeReader.getBigramsPos(); + return mBigramPolicy->addNewBigramEntryToBigramList(word1Pos, probability, &bigramListPos); + } else { + // The PtNode doesn't have a bigram list. + // First, Write a bigram entry at the tail position of the PtNode. + if (!mBigramPolicy->writeNewBigramEntry(word1Pos, probability, &writingPos)) { + return false; + } + // Then, Mark as the PtNode having bigram list in the flags. + const PatriciaTrieReadingUtils::NodeFlags updatedFlags = + PatriciaTrieReadingUtils::createAndGetFlags(nodeReader.isBlacklisted(), + nodeReader.isNotAWord(), nodeReader.getProbability() != NOT_A_PROBABILITY, + nodeReader.getShortcutPos() != NOT_A_DICT_POS, true /* hasBigrams */, + nodeReader.getCodePointCount() > 1, CHILDREN_POSITION_FIELD_SIZE); + writingPos = newNodePos; + // Write updated flags into the moved PtNode's flags field. + return DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags, + &writingPos); + } } // Remove a bigram relation from word0Pos to word1Pos. bool DynamicPatriciaTrieWritingHelper::removeBigramWords(const int word0Pos, const int word1Pos) { DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy); nodeReader.fetchNodeInfoFromBuffer(word0Pos); - if (nodeReader.isDeleted() || nodeReader.getBigramsPos() == NOT_A_DICT_POS) { + if (nodeReader.getBigramsPos() == NOT_A_DICT_POS) { return false; } - // TODO: Implement. - return false; + return mBigramPolicy->removeBigram(nodeReader.getBigramsPos(), word1Pos); } bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition( - const DynamicPatriciaTrieNodeReader *const originalNode, const int movedPos) { - int pos = originalNode->getNodePos(); + const DynamicPatriciaTrieNodeReader *const originalNode, const int movedPos, + const int bigramLinkedNodePos) { + int pos = originalNode->getHeadPos(); const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(pos); const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer); if (usesAdditionalBuffer) { @@ -113,18 +146,42 @@ bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition( const PatriciaTrieReadingUtils::NodeFlags updatedFlags = DynamicPatriciaTrieReadingUtils::updateAndGetFlags(originalFlags, true /* isMoved */, false /* isDeleted */); - int writingPos = originalNode->getNodePos(); + int writingPos = originalNode->getHeadPos(); // Update flags. if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags, &writingPos)) { return false; } // Update moved position, which is stored in the parent offset field. - const int movedPosOffset = movedPos - originalNode->getNodePos(); + const int movedPosOffset = movedPos - originalNode->getHeadPos(); if (!DynamicPatriciaTrieWritingUtils::writeParentOffsetAndAdvancePosition( mBuffer, movedPosOffset, &writingPos)) { return false; } + // Update bigram linked node position, which is stored in the children position field. + int childrenPosFieldPos = originalNode->getChildrenPosFieldPos(); + if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition( + mBuffer, bigramLinkedNodePos, &childrenPosFieldPos)) { + return false; + } + if (originalNode->hasChildren()) { + // Update children's parent position. + DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, mBigramPolicy, mShortcutPolicy); + const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader(); + readingHelper.initWithNodeArrayPos(originalNode->getChildrenPos()); + while (!readingHelper.isEnd()) { + const int childPtNodeWrittenPos = nodeReader->getHeadPos(); + const int parentOffset = movedPos - childPtNodeWrittenPos; + int parentOffsetFieldPos = childPtNodeWrittenPos + 1 /* Flags */; + if (!DynamicPatriciaTrieWritingUtils::writeParentOffsetAndAdvancePosition( + mBuffer, parentOffset, &parentOffsetFieldPos)) { + // Parent offset cannot be written because of a bug or a broken dictionary; thus, + // we give up to update dictionary. + return false; + } + readingHelper.readNextSiblingNode(); + } + } return true; } @@ -135,13 +192,9 @@ bool DynamicPatriciaTrieWritingHelper::writePtNodeWithFullInfoToBuffer(const boo const int originalBigramListPos, const int originalShortcutListPos, int *const writingPos) { const int nodePos = *writingPos; - // Create node flags and write them. - const PatriciaTrieReadingUtils::NodeFlags nodeFlags = - PatriciaTrieReadingUtils::createAndGetFlags(isBlacklisted, isNotAWord, - probability != NOT_A_PROBABILITY, originalShortcutListPos != NOT_A_DICT_POS, - originalBigramListPos != NOT_A_DICT_POS, codePointCount > 1, - 3 /* childrenPositionFieldSize */); - if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, nodeFlags, + // Write dummy flags. The Node flags are updated with appropriate flags at the last step of the + // PtNode writing. + if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, 0 /* nodeFlags */, writingPos)) { return false; } @@ -154,7 +207,7 @@ bool DynamicPatriciaTrieWritingHelper::writePtNodeWithFullInfoToBuffer(const boo // Write code points if (!DynamicPatriciaTrieWritingUtils::writeCodePointsAndAdvancePosition(mBuffer, codePoints, codePointCount, writingPos)) { - return false;; + return false; } // Write probability when the probability is a valid probability, which means this node is // terminal. @@ -177,12 +230,25 @@ bool DynamicPatriciaTrieWritingHelper::writePtNodeWithFullInfoToBuffer(const boo } } // Copy bigram list when the originalBigramListPos is valid dictionary position. + int bigramCount = 0; if (originalBigramListPos != NOT_A_DICT_POS) { int fromPos = originalBigramListPos; - if (!mBigramPolicy->copyAllBigrams(&fromPos, writingPos)) { + if (!mBigramPolicy->copyAllBigrams(&fromPos, writingPos, &bigramCount)) { return false; } } + // Create node flags and write them. + PatriciaTrieReadingUtils::NodeFlags nodeFlags = + PatriciaTrieReadingUtils::createAndGetFlags(isBlacklisted, isNotAWord, + probability != NOT_A_PROBABILITY /* isTerminal */, + originalShortcutListPos != NOT_A_DICT_POS /* hasShortcutTargets */, + bigramCount > 0 /* hasBigrams */, codePointCount > 1 /* hasMultipleChars */, + CHILDREN_POSITION_FIELD_SIZE); + int flagsFieldPos = nodePos; + if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, nodeFlags, + &flagsFieldPos)) { + return false; + } return true; } @@ -230,7 +296,7 @@ bool DynamicPatriciaTrieWritingHelper::setPtNodeProbability( } else { // Make the node terminal and write the probability. int movedPos = mBuffer->getTailPosition(); - if (!markNodeAsMovedAndSetPosition(originalPtNode, movedPos)) { + if (!markNodeAsMovedAndSetPosition(originalPtNode, movedPos, movedPos)) { return false; } if (!writePtNodeToBufferByCopyingPtNodeInfo(originalPtNode, originalPtNode->getParentPos(), @@ -250,7 +316,7 @@ bool DynamicPatriciaTrieWritingHelper::createChildrenPtNodeArrayAndAChildPtNode( newPtNodeArrayPos, &childrenPosFieldPos)) { return false; } - return createNewPtNodeArrayWithAChildPtNode(parentNode->getNodePos(), codePoints, + return createNewPtNodeArrayWithAChildPtNode(parentNode->getHeadPos(), codePoints, codePointCount, probability); } @@ -287,11 +353,8 @@ bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes( // Reallocating PtNode: abcde, newNode: abc. // abc (1st, terminal) __ de (2nd) const bool addsExtraChild = newNodeCodePointCount > overlappingCodePointCount; - const int firstPtNodePos = mBuffer->getTailPosition(); - if (!markNodeAsMovedAndSetPosition(reallocatingPtNode, firstPtNodePos)) { - return false; - } - int writingPos = firstPtNodePos; + const int firstPartOfReallocatedPtNodePos = mBuffer->getTailPosition(); + int writingPos = firstPartOfReallocatedPtNodePos; // Write the 1st part of the reallocating node. The children position will be updated later // with actual children position. const int newProbability = addsExtraChild ? NOT_A_PROBABILITY : probabilityOfNewPtNode; @@ -307,15 +370,15 @@ bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes( return false; } // Write the 2nd part of the reallocating node. - if (!writePtNodeToBufferByCopyingPtNodeInfo(reallocatingPtNode, - reallocatingPtNode->getNodePos(), + const int secondPartOfReallocatedPtNodePos = writingPos; + if (!writePtNodeToBufferByCopyingPtNodeInfo(reallocatingPtNode, firstPartOfReallocatedPtNodePos, reallocatingPtNodeCodePoints + overlappingCodePointCount, reallocatingPtNode->getCodePointCount() - overlappingCodePointCount, reallocatingPtNode->getProbability(), &writingPos)) { return false; } if (addsExtraChild) { - if (!writePtNodeToBuffer(reallocatingPtNode->getNodePos(), + if (!writePtNodeToBuffer(firstPartOfReallocatedPtNodePos, newNodeCodePoints + overlappingCodePointCount, newNodeCodePointCount - overlappingCodePointCount, probabilityOfNewPtNode, &writingPos)) { @@ -326,9 +389,14 @@ bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes( NOT_A_DICT_POS /* forwardLinkPos */, &writingPos)) { return false; } + // Update original reallocatingPtNode as moved. + if (!markNodeAsMovedAndSetPosition(reallocatingPtNode, firstPartOfReallocatedPtNodePos, + secondPartOfReallocatedPtNodePos)) { + return false; + } // Load node info. Information of the 1st part will be fetched. DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy); - nodeReader.fetchNodeInfoFromBuffer(firstPtNodePos); + nodeReader.fetchNodeInfoFromBuffer(firstPartOfReallocatedPtNodePos); // Update children position. int childrenPosFieldPos = nodeReader.getChildrenPosFieldPos(); if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBuffer, diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h index ada634a54..20e35abcf 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h @@ -49,12 +49,14 @@ class DynamicPatriciaTrieWritingHelper { private: DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieWritingHelper); + static const int CHILDREN_POSITION_FIELD_SIZE; + BufferWithExtendableBuffer *const mBuffer; DynamicBigramListPolicy *const mBigramPolicy; DynamicShortcutListPolicy *const mShortcutPolicy; bool markNodeAsMovedAndSetPosition(const DynamicPatriciaTrieNodeReader *const nodeToUpdate, - const int movedPos); + const int movedPos, const int bigramLinkedNodePos); bool writePtNodeWithFullInfoToBuffer(const bool isBlacklisted, const bool isNotAWord, const int parentPos, const int *const codePoints, const int codePointCount, diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp index d5a83a938..e6cff431b 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp @@ -219,7 +219,7 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( } // This function gets the position of the terminal node of the exact matching word in the -// dictionary. If no match is found, it returns NOT_A_VALID_WORD_POS. +// dictionary. If no match is found, it returns NOT_A_DICT_POS. int PatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord, const int length, const bool forceLowerCaseSearch) const { int pos = getRootPosition(); @@ -228,7 +228,7 @@ int PatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord, while (true) { // If we already traversed the tree further than the word is long, there means // there was no match (or we would have found it). - if (wordPos >= length) return NOT_A_VALID_WORD_POS; + if (wordPos >= length) return NOT_A_DICT_POS; int ptNodeCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(mDictRoot, &pos); const int wChar = forceLowerCaseSearch @@ -236,7 +236,7 @@ int PatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord, while (true) { // If there are no more PtNodes in this array, it means we could not // find a matching character for this depth, therefore there is no match. - if (0 >= ptNodeCount) return NOT_A_VALID_WORD_POS; + if (0 >= ptNodeCount) return NOT_A_DICT_POS; const int ptNodePos = pos; const PatriciaTrieReadingUtils::NodeFlags flags = PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos); @@ -245,7 +245,7 @@ int PatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord, if (character == wChar) { // This is the correct PtNode. Only one PtNode may start with the same char within // a PtNode array, so either we found our match in this array, or there is - // no match and we can return NOT_A_VALID_WORD_POS. So we will check all the + // no match and we can return NOT_A_DICT_POS. So we will check all the // characters in this PtNode indeed does match. if (PatriciaTrieReadingUtils::hasMultipleChars(flags)) { character = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(mDictRoot, @@ -256,8 +256,8 @@ int PatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord, // character that does not match, as explained above, it means the word is // not in the dictionary (by virtue of this PtNode being the only one to // match the word on the first character, but not matching the whole word). - if (wordPos >= length) return NOT_A_VALID_WORD_POS; - if (inWord[wordPos] != character) return NOT_A_VALID_WORD_POS; + if (wordPos >= length) return NOT_A_DICT_POS; + if (inWord[wordPos] != character) return NOT_A_DICT_POS; character = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition( mDictRoot, &pos); } @@ -274,7 +274,7 @@ int PatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord, PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos); } if (!PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) { - return NOT_A_VALID_WORD_POS; + return NOT_A_DICT_POS; } // We have children and we are still shorter than the word we are searching for, so // we need to traverse children. Put the pointer on the children position, and @@ -320,7 +320,7 @@ int PatriciaTriePolicy::getProbability(const int unigramProbability, } int PatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int nodePos) const { - if (nodePos == NOT_A_VALID_WORD_POS) { + if (nodePos == NOT_A_DICT_POS) { return NOT_A_PROBABILITY; } int pos = nodePos; @@ -342,7 +342,7 @@ int PatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int nodePos) const { } int PatriciaTriePolicy::getShortcutPositionOfNode(const int nodePos) const { - if (nodePos == NOT_A_VALID_WORD_POS) { + if (nodePos == NOT_A_DICT_POS) { return NOT_A_DICT_POS; } int pos = nodePos; @@ -362,7 +362,7 @@ int PatriciaTriePolicy::getShortcutPositionOfNode(const int nodePos) const { } int PatriciaTriePolicy::getBigramsPositionOfNode(const int nodePos) const { - if (nodePos == NOT_A_VALID_WORD_POS) { + if (nodePos == NOT_A_DICT_POS) { return NOT_A_DICT_POS; } int pos = nodePos; diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h index 75d976205..cee3e4ab2 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h @@ -96,6 +96,22 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { return false; } + void flush(const char *const filePath) { + // This method should not be called for non-updatable dictionary. + AKLOGI("Warning: flush() is called for non-updatable dictionary."); + } + + void flushWithGC(const char *const filePath) { + // This method should not be called for non-updatable dictionary. + AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary."); + } + + bool needsToRunGC() const { + // This method should not be called for non-updatable dictionary. + AKLOGI("Warning: needsToRunGC() is called for non-updatable dictionary."); + return false; + } + private: DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTriePolicy); diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp index 576a158bc..1316b425f 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp @@ -42,6 +42,63 @@ const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_IS_NOT_A_WORD = 0x02; // Flag for blacklist const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_IS_BLACKLISTED = 0x01; +/* static */ int PtReadingUtils::getPtNodeArraySizeAndAdvancePosition( + const uint8_t *const buffer, int *const pos) { + const uint8_t firstByte = ByteArrayUtils::readUint8AndAdvancePosition(buffer, pos); + if (firstByte < 0x80) { + return firstByte; + } else { + return ((firstByte & 0x7F) << 8) ^ ByteArrayUtils::readUint8AndAdvancePosition( + buffer, pos); + } +} + +/* static */ PtReadingUtils::NodeFlags PtReadingUtils::getFlagsAndAdvancePosition( + const uint8_t *const buffer, int *const pos) { + return ByteArrayUtils::readUint8AndAdvancePosition(buffer, pos); +} + +/* static */ int PtReadingUtils::getCodePointAndAdvancePosition(const uint8_t *const buffer, + int *const pos) { + return ByteArrayUtils::readCodePointAndAdvancePosition(buffer, pos); +} + +// Returns the number of read characters. +/* static */ int PtReadingUtils::getCharsAndAdvancePosition(const uint8_t *const buffer, + const NodeFlags flags, const int maxLength, int *const outBuffer, int *const pos) { + int length = 0; + if (hasMultipleChars(flags)) { + length = ByteArrayUtils::readStringAndAdvancePosition(buffer, maxLength, outBuffer, + pos); + } else { + if (maxLength > 0) { + outBuffer[0] = getCodePointAndAdvancePosition(buffer, pos); + length = 1; + } + } + return length; +} + +// Returns the number of skipped characters. +/* static */ int PtReadingUtils::skipCharacters(const uint8_t *const buffer, const NodeFlags flags, + const int maxLength, int *const pos) { + if (hasMultipleChars(flags)) { + return ByteArrayUtils::advancePositionToBehindString(buffer, maxLength, pos); + } else { + if (maxLength > 0) { + getCodePointAndAdvancePosition(buffer, pos); + return 1; + } else { + return 0; + } + } +} + +/* static */ int PtReadingUtils::readProbabilityAndAdvancePosition(const uint8_t *const buffer, + int *const pos) { + return ByteArrayUtils::readUint8AndAdvancePosition(buffer, pos); +} + /* static */ int PtReadingUtils::readChildrenPositionAndAdvancePosition( const uint8_t *const buffer, const NodeFlags flags, int *const pos) { const int base = *pos; diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.h index 2b0646db2..8420ee95a 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.h @@ -20,7 +20,6 @@ #include <stdint.h> #include "defines.h" -#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h" namespace latinime { @@ -28,62 +27,21 @@ class PatriciaTrieReadingUtils { public: typedef uint8_t NodeFlags; - static AK_FORCE_INLINE int getPtNodeArraySizeAndAdvancePosition( - const uint8_t *const buffer, int *const pos) { - const uint8_t firstByte = ByteArrayUtils::readUint8AndAdvancePosition(buffer, pos); - if (firstByte < 0x80) { - return firstByte; - } else { - return ((firstByte & 0x7F) << 8) ^ ByteArrayUtils::readUint8AndAdvancePosition( - buffer, pos); - } - } + static int getPtNodeArraySizeAndAdvancePosition(const uint8_t *const buffer, int *const pos); - static AK_FORCE_INLINE NodeFlags getFlagsAndAdvancePosition(const uint8_t *const buffer, - int *const pos) { - return ByteArrayUtils::readUint8AndAdvancePosition(buffer, pos); - } + static NodeFlags getFlagsAndAdvancePosition(const uint8_t *const buffer, int *const pos); - static AK_FORCE_INLINE int getCodePointAndAdvancePosition(const uint8_t *const buffer, - int *const pos) { - return ByteArrayUtils::readCodePointAndAdvancePosition(buffer, pos); - } + static int getCodePointAndAdvancePosition(const uint8_t *const buffer, int *const pos); // Returns the number of read characters. - static AK_FORCE_INLINE int getCharsAndAdvancePosition(const uint8_t *const buffer, - const NodeFlags flags, const int maxLength, int *const outBuffer, int *const pos) { - int length = 0; - if (hasMultipleChars(flags)) { - length = ByteArrayUtils::readStringAndAdvancePosition(buffer, maxLength, outBuffer, - pos); - } else { - if (maxLength > 0) { - outBuffer[0] = getCodePointAndAdvancePosition(buffer, pos); - length = 1; - } - } - return length; - } + static int getCharsAndAdvancePosition(const uint8_t *const buffer, const NodeFlags flags, + const int maxLength, int *const outBuffer, int *const pos); // Returns the number of skipped characters. - static AK_FORCE_INLINE int skipCharacters(const uint8_t *const buffer, const NodeFlags flags, - const int maxLength, int *const pos) { - if (hasMultipleChars(flags)) { - return ByteArrayUtils::advancePositionToBehindString(buffer, maxLength, pos); - } else { - if (maxLength > 0) { - getCodePointAndAdvancePosition(buffer, pos); - return 1; - } else { - return 0; - } - } - } + static int skipCharacters(const uint8_t *const buffer, const NodeFlags flags, + const int maxLength, int *const pos); - static AK_FORCE_INLINE int readProbabilityAndAdvancePosition(const uint8_t *const buffer, - int *const pos) { - return ByteArrayUtils::readUint8AndAdvancePosition(buffer, pos); - } + static int readProbabilityAndAdvancePosition(const uint8_t *const buffer, int *const pos); static int readChildrenPositionAndAdvancePosition(const uint8_t *const buffer, const NodeFlags flags, int *const pos); diff --git a/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.cpp index e70bb5071..847dcdee5 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.cpp @@ -16,6 +16,8 @@ #include "suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h" +#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h" + namespace latinime { // Flag for presence of more attributes @@ -28,4 +30,22 @@ const int ShortcutListReadingUtils::SHORTCUT_LIST_SIZE_FIELD_SIZE = 2; // The numeric value of the shortcut probability that means 'whitelist'. const int ShortcutListReadingUtils::WHITELIST_SHORTCUT_PROBABILITY = 15; +/* static */ ShortcutListReadingUtils::ShortcutFlags + ShortcutListReadingUtils::getFlagsAndForwardPointer(const uint8_t *const dictRoot, + int *const pos) { + return ByteArrayUtils::readUint8AndAdvancePosition(dictRoot, pos); +} + +/* static */ int ShortcutListReadingUtils::getShortcutListSizeAndForwardPointer( + const uint8_t *const dictRoot, int *const pos) { + // readUint16andAdvancePosition() returns an offset *including* the uint16 field itself. + return ByteArrayUtils::readUint16AndAdvancePosition(dictRoot, pos) + - SHORTCUT_LIST_SIZE_FIELD_SIZE; +} + +/* static */ int ShortcutListReadingUtils::readShortcutTarget( + const uint8_t *const dictRoot, const int maxLength, int *const outWord, int *const pos) { + return ByteArrayUtils::readStringAndAdvancePosition(dictRoot, maxLength, outWord, pos); +} + } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h index 5f4f240f5..a83ed5a50 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h @@ -20,7 +20,6 @@ #include <stdint.h> #include "defines.h" -#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h" namespace latinime { @@ -28,10 +27,7 @@ class ShortcutListReadingUtils { public: typedef uint8_t ShortcutFlags; - static AK_FORCE_INLINE ShortcutFlags getFlagsAndForwardPointer( - const uint8_t *const dictRoot, int *const pos) { - return ByteArrayUtils::readUint8AndAdvancePosition(dictRoot, pos); - } + static ShortcutFlags getFlagsAndForwardPointer(const uint8_t *const dictRoot, int *const pos); static AK_FORCE_INLINE int getProbabilityFromFlags(const ShortcutFlags flags) { return flags & MASK_ATTRIBUTE_PROBABILITY; @@ -43,12 +39,7 @@ class ShortcutListReadingUtils { // This method returns the size of the shortcut list region excluding the shortcut list size // field at the beginning. - static AK_FORCE_INLINE int getShortcutListSizeAndForwardPointer( - const uint8_t *const dictRoot, int *const pos) { - // readUint16andAdvancePosition() returns an offset *including* the uint16 field itself. - return ByteArrayUtils::readUint16AndAdvancePosition(dictRoot, pos) - - SHORTCUT_LIST_SIZE_FIELD_SIZE; - } + static int getShortcutListSizeAndForwardPointer(const uint8_t *const dictRoot, int *const pos); static AK_FORCE_INLINE int getShortcutListSizeFieldSize() { return SHORTCUT_LIST_SIZE_FIELD_SIZE; @@ -63,11 +54,8 @@ class ShortcutListReadingUtils { return getProbabilityFromFlags(flags) == WHITELIST_SHORTCUT_PROBABILITY; } - static AK_FORCE_INLINE int readShortcutTarget( - const uint8_t *const dictRoot, const int maxLength, int *const outWord, - int *const pos) { - return ByteArrayUtils::readStringAndAdvancePosition(dictRoot, maxLength, outWord, pos); - } + static int readShortcutTarget(const uint8_t *const dictRoot, const int maxLength, + int *const outWord, int *const pos); private: DISALLOW_IMPLICIT_CONSTRUCTORS(ShortcutListReadingUtils); diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp index dfdaebd18..0fed275e9 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp @@ -76,7 +76,7 @@ bool BufferWithExtendableBuffer::checkAndPrepareWriting(const int pos, const int } } mUsedAdditionalBufferSize += size; - } else if (pos + size >= tailPosition) { + } else if (pos + size > tailPosition) { // The access will beyond the tail of used region. return false; } diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.cpp index a84cfb9d5..1833e8832 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.cpp @@ -18,7 +18,8 @@ namespace latinime { -const uint8_t ByteArrayUtils::MINIMAL_ONE_BYTE_CHARACTER_VALUE = 0x20; +const uint8_t ByteArrayUtils::MINIMUM_ONE_BYTE_CHARACTER_VALUE = 0x20; +const uint8_t ByteArrayUtils::MAXIMUM_ONE_BYTE_CHARACTER_VALUE = 0xFF; const uint8_t ByteArrayUtils::CHARACTER_ARRAY_TERMINATOR = 0x1F; } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h index f727ecf8e..0c1576818 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h @@ -135,7 +135,7 @@ class ByteArrayUtils { static AK_FORCE_INLINE int readCodePointAndAdvancePosition( const uint8_t *const buffer, int *const pos) { const uint8_t firstByte = readUint8(buffer, *pos); - if (firstByte < MINIMAL_ONE_BYTE_CHARACTER_VALUE) { + if (firstByte < MINIMUM_ONE_BYTE_CHARACTER_VALUE) { if (firstByte == CHARACTER_ARRAY_TERMINATOR) { *pos += 1; return NOT_A_CODE_POINT; @@ -172,6 +172,7 @@ class ByteArrayUtils { int codePoint = readCodePointAndAdvancePosition(buffer, pos); while (NOT_A_CODE_POINT != codePoint && length < maxLength) { codePoint = readCodePointAndAdvancePosition(buffer, pos); + length++; } return length; } @@ -186,7 +187,8 @@ class ByteArrayUtils { const int codePoint = codePoints[i]; if (codePoint == NOT_A_CODE_POINT || codePoint == CHARACTER_ARRAY_TERMINATOR) { break; - } else if (codePoint < MINIMAL_ONE_BYTE_CHARACTER_VALUE) { + } else if (codePoint < MINIMUM_ONE_BYTE_CHARACTER_VALUE + || codePoint > MAXIMUM_ONE_BYTE_CHARACTER_VALUE) { // three bytes character. writeUint24AndAdvancePosition(buffer, codePoint, pos); } else { @@ -206,7 +208,8 @@ class ByteArrayUtils { const int codePoint = codePoints[i]; if (codePoint == NOT_A_CODE_POINT || codePoint == CHARACTER_ARRAY_TERMINATOR) { break; - } else if (codePoint < MINIMAL_ONE_BYTE_CHARACTER_VALUE) { + } else if (codePoint < MINIMUM_ONE_BYTE_CHARACTER_VALUE + || codePoint > MAXIMUM_ONE_BYTE_CHARACTER_VALUE) { // three bytes character. byteCount += 3; } else { @@ -224,7 +227,8 @@ class ByteArrayUtils { private: DISALLOW_IMPLICIT_CONSTRUCTORS(ByteArrayUtils); - static const uint8_t MINIMAL_ONE_BYTE_CHARACTER_VALUE; + static const uint8_t MINIMUM_ONE_BYTE_CHARACTER_VALUE; + static const uint8_t MAXIMUM_ONE_BYTE_CHARACTER_VALUE; static const uint8_t CHARACTER_ARRAY_TERMINATOR; static AK_FORCE_INLINE void writeUint32AndAdvancePosition(uint8_t *const buffer, diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h b/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h index 6febd7832..6b69116eb 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h @@ -29,8 +29,8 @@ namespace latinime { class MmappedBuffer { public: - static MmappedBuffer* openBuffer(const char *const path, const int pathLength, - const int bufferOffset, const int bufferSize, const bool isUpdatable) { + static MmappedBuffer* openBuffer(const char *const path, const int bufferOffset, + const int bufferSize, const bool isUpdatable) { const int openMode = isUpdatable ? O_RDWR : O_RDONLY; const int mmapFd = open(path, openMode); if (mmapFd < 0) { |