diff options
Diffstat (limited to 'native/jni/src')
23 files changed, 192 insertions, 139 deletions
diff --git a/native/jni/src/defines.h b/native/jni/src/defines.h index 4605890c7..89dfa39b3 100644 --- a/native/jni/src/defines.h +++ b/native/jni/src/defines.h @@ -292,7 +292,6 @@ static inline void prof_out(void) { // of the binary dictionary where a {key,value} string pair scheme is used. #define LARGEST_INT_DIGIT_COUNT 11 -#define NOT_A_VALID_WORD_POS (-99) #define NOT_A_CODE_POINT (-1) #define NOT_A_DISTANCE (-1) #define NOT_A_COORDINATE (-1) diff --git a/native/jni/src/suggest/core/dicnode/dic_node.h b/native/jni/src/suggest/core/dicnode/dic_node.h index cdd9f59aa..377015371 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node.h +++ b/native/jni/src/suggest/core/dicnode/dic_node.h @@ -112,7 +112,7 @@ class DicNode { mIsUsed = true; mIsCachedForNextSuggestion = false; mDicNodeProperties.init( - NOT_A_VALID_WORD_POS /* pos */, rootGroupPos, NOT_A_CODE_POINT /* nodeCodePoint */, + NOT_A_DICT_POS /* pos */, rootGroupPos, NOT_A_CODE_POINT /* nodeCodePoint */, NOT_A_PROBABILITY /* probability */, false /* isTerminal */, true /* hasChildren */, false /* isBlacklistedOrNotAWord */, 0 /* depth */, 0 /* terminalDepth */); @@ -125,7 +125,7 @@ class DicNode { mIsUsed = true; mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion; mDicNodeProperties.init( - NOT_A_VALID_WORD_POS /* pos */, rootGroupPos, NOT_A_CODE_POINT /* nodeCodePoint */, + NOT_A_DICT_POS /* pos */, rootGroupPos, NOT_A_CODE_POINT /* nodeCodePoint */, NOT_A_PROBABILITY /* probability */, false /* isTerminal */, true /* hasChildren */, false /* isBlacklistedOrNotAWord */, 0 /* depth */, 0 /* terminalDepth */); @@ -234,7 +234,7 @@ class DicNode { } bool isFirstWord() const { - return mDicNodeState.mDicNodeStatePrevWord.getPrevWordNodePos() == NOT_A_VALID_WORD_POS; + return mDicNodeState.mDicNodeStatePrevWord.getPrevWordNodePos() == NOT_A_DICT_POS; } bool isCompletion(const int inputSize) const { diff --git a/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp b/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp index e81591992..ec65114c7 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp +++ b/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp @@ -89,7 +89,7 @@ namespace latinime { const int unigramProbability = node->getProbability(); const int wordPos = node->getPos(); const int prevWordPos = node->getPrevWordPos(); - if (NOT_A_VALID_WORD_POS == wordPos || NOT_A_VALID_WORD_POS == prevWordPos) { + if (NOT_A_DICT_POS == wordPos || NOT_A_DICT_POS == prevWordPos) { // Note: Normally wordPos comes from the dictionary and should never equal // NOT_A_VALID_WORD_POS. return dictionaryStructurePolicy->getProbability(unigramProbability, diff --git a/native/jni/src/suggest/core/dicnode/internal/dic_node_state_prevword.h b/native/jni/src/suggest/core/dicnode/internal/dic_node_state_prevword.h index 9bc96877e..b7af97018 100644 --- a/native/jni/src/suggest/core/dicnode/internal/dic_node_state_prevword.h +++ b/native/jni/src/suggest/core/dicnode/internal/dic_node_state_prevword.h @@ -29,7 +29,7 @@ class DicNodeStatePrevWord { public: AK_FORCE_INLINE DicNodeStatePrevWord() : mPrevWordCount(0), mPrevWordLength(0), mPrevWordStart(0), mPrevWordProbability(0), - mPrevWordNodePos(NOT_A_VALID_WORD_POS) { + mPrevWordNodePos(NOT_A_DICT_POS) { memset(mPrevWord, 0, sizeof(mPrevWord)); memset(mPrevSpacePositions, 0, sizeof(mPrevSpacePositions)); } @@ -41,7 +41,7 @@ class DicNodeStatePrevWord { mPrevWordCount = 0; mPrevWordStart = 0; mPrevWordProbability = -1; - mPrevWordNodePos = NOT_A_VALID_WORD_POS; + mPrevWordNodePos = NOT_A_DICT_POS; memset(mPrevSpacePositions, 0, sizeof(mPrevSpacePositions)); } diff --git a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp index cf1cd8815..425b07624 100644 --- a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp +++ b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp @@ -116,7 +116,7 @@ int BigramDictionary::getPredictions(const int *prevWord, const int prevWordLeng mDictionaryStructurePolicy->getBigramsStructurePolicy(), pos); while (bigramsIt.hasNext()) { bigramsIt.next(); - if (bigramsIt.getBigramPos() == NOT_A_VALID_WORD_POS) { + if (bigramsIt.getBigramPos() == NOT_A_DICT_POS) { continue; } const int codePointCount = mDictionaryStructurePolicy-> @@ -146,7 +146,7 @@ int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const in if (0 >= prevWordLength) return NOT_A_DICT_POS; int pos = mDictionaryStructurePolicy->getTerminalNodePositionOfWord(prevWord, prevWordLength, forceLowerCaseSearch); - if (NOT_A_VALID_WORD_POS == pos) return NOT_A_DICT_POS; + if (NOT_A_DICT_POS == pos) return NOT_A_DICT_POS; return mDictionaryStructurePolicy->getBigramsPositionOfNode(pos); } @@ -157,7 +157,7 @@ bool BigramDictionary::isValidBigram(const int *word0, int length0, const int *w if (NOT_A_DICT_POS == pos) return false; int nextWordPos = mDictionaryStructurePolicy->getTerminalNodePositionOfWord(word1, length1, false /* forceLowerCaseSearch */); - if (NOT_A_VALID_WORD_POS == nextWordPos) return false; + if (NOT_A_DICT_POS == nextWordPos) return false; BinaryDictionaryBigramsIterator bigramsIt( mDictionaryStructurePolicy->getBigramsStructurePolicy(), pos); diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp index 02ece639c..29fe7ab94 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.cpp +++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp @@ -87,7 +87,7 @@ int Dictionary::getBigrams(const int *word, int length, int *outWords, int *freq int Dictionary::getProbability(const int *word, int length) const { int pos = getDictionaryStructurePolicy()->getTerminalNodePositionOfWord(word, length, false /* forceLowerCaseSearch */); - if (NOT_A_VALID_WORD_POS == pos) { + if (NOT_A_DICT_POS == pos) { return NOT_A_PROBABILITY; } return getDictionaryStructurePolicy()->getUnigramProbabilityOfPtNode(pos); diff --git a/native/jni/src/suggest/core/dictionary/multi_bigram_map.h b/native/jni/src/suggest/core/dictionary/multi_bigram_map.h index 9efe5f6f9..aecf41386 100644 --- a/native/jni/src/suggest/core/dictionary/multi_bigram_map.h +++ b/native/jni/src/suggest/core/dictionary/multi_bigram_map.h @@ -73,7 +73,7 @@ class MultiBigramMap { bigramsListPos); while (bigramsIt.hasNext()) { bigramsIt.next(); - if (bigramsIt.getBigramPos() == NOT_A_VALID_WORD_POS) { + if (bigramsIt.getBigramPos() == NOT_A_DICT_POS) { continue; } mBigramMap[bigramsIt.getBigramPos()] = bigramsIt.getProbability(); diff --git a/native/jni/src/suggest/core/session/dic_traverse_session.cpp b/native/jni/src/suggest/core/session/dic_traverse_session.cpp index 2c2259214..50f2bbd8d 100644 --- a/native/jni/src/suggest/core/session/dic_traverse_session.cpp +++ b/native/jni/src/suggest/core/session/dic_traverse_session.cpp @@ -35,13 +35,13 @@ void DicTraverseSession::init(const Dictionary *const dictionary, const int *pre ->getMultiWordCostMultiplier(); mSuggestOptions = suggestOptions; if (!prevWord) { - mPrevWordPos = NOT_A_VALID_WORD_POS; + mPrevWordPos = NOT_A_DICT_POS; return; } // TODO: merge following similar calls to getTerminalPosition into one case-insensitive call. mPrevWordPos = getDictionaryStructurePolicy()->getTerminalNodePositionOfWord( prevWord, prevWordLength, false /* forceLowerCaseSearch */); - if (mPrevWordPos == NOT_A_VALID_WORD_POS) { + if (mPrevWordPos == NOT_A_DICT_POS) { // Check bigrams for lower-cased previous word if original was not found. Useful for // auto-capitalized words like "The [current_word]". mPrevWordPos = getDictionaryStructurePolicy()->getTerminalNodePositionOfWord( diff --git a/native/jni/src/suggest/core/session/dic_traverse_session.h b/native/jni/src/suggest/core/session/dic_traverse_session.h index fe8893590..e2ef5fc76 100644 --- a/native/jni/src/suggest/core/session/dic_traverse_session.h +++ b/native/jni/src/suggest/core/session/dic_traverse_session.h @@ -59,7 +59,7 @@ class DicTraverseSession { } AK_FORCE_INLINE DicTraverseSession(JNIEnv *env, jstring localeStr, bool usesLargeCache) - : mPrevWordPos(NOT_A_VALID_WORD_POS), mProximityInfo(0), + : mPrevWordPos(NOT_A_DICT_POS), mProximityInfo(0), mDictionary(0), mSuggestOptions(0), mDicNodesCache(usesLargeCache), mMultiBigramMap(), mInputSize(0), mPartiallyCommited(false), mMaxPointerCount(1), mMultiWordCostMultiplier(1.0f) { diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp index d57547445..09e832f07 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp @@ -38,6 +38,22 @@ const BigramListReadWriteUtils::BigramFlags BigramListReadWriteUtils::MASK_ATTRIBUTE_PROBABILITY = 0x0F; const int BigramListReadWriteUtils::ATTRIBUTE_ADDRESS_SHIFT = 4; +/* static */ BigramListReadWriteUtils::BigramFlags + BigramListReadWriteUtils::getFlagsAndForwardPointer(const uint8_t *const bigramsBuf, + int *const pos) { + return ByteArrayUtils::readUint8AndAdvancePosition(bigramsBuf, pos); +} + +/* static */ void BigramListReadWriteUtils::skipExistingBigrams(const uint8_t *const bigramsBuf, + int *const pos) { + BigramFlags flags = getFlagsAndForwardPointer(bigramsBuf, pos); + while (hasNext(flags)) { + *pos += attributeAddressSize(flags); + flags = getFlagsAndForwardPointer(bigramsBuf, pos); + } + *pos += attributeAddressSize(flags); +} + /* static */ int BigramListReadWriteUtils::getBigramAddressAndForwardPointer( const uint8_t *const bigramsBuf, const BigramFlags flags, int *const pos) { int offset = 0; @@ -54,7 +70,7 @@ const int BigramListReadWriteUtils::ATTRIBUTE_ADDRESS_SHIFT = 4; break; } if (offset == 0) { - return NOT_A_VALID_WORD_POS; + return NOT_A_DICT_POS; } if (isOffsetNegative(flags)) { return origin - offset; diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h index ee2b722a4..9a930747c 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h @@ -21,7 +21,6 @@ #include <stdint.h> #include "defines.h" -#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h" namespace latinime { @@ -29,10 +28,7 @@ class BigramListReadWriteUtils { public: typedef uint8_t BigramFlags; - static AK_FORCE_INLINE BigramFlags getFlagsAndForwardPointer( - const uint8_t *const bigramsBuf, int *const pos) { - return ByteArrayUtils::readUint8AndAdvancePosition(bigramsBuf, pos); - } + static BigramFlags getFlagsAndForwardPointer(const uint8_t *const bigramsBuf, int *const pos); static AK_FORCE_INLINE int getProbabilityFromFlags(const BigramFlags flags) { return flags & MASK_ATTRIBUTE_PROBABILITY; @@ -43,15 +39,7 @@ public: } // Bigrams reading methods - static AK_FORCE_INLINE void skipExistingBigrams(const uint8_t *const bigramsBuf, - int *const pos) { - BigramFlags flags = getFlagsAndForwardPointer(bigramsBuf, pos); - while (hasNext(flags)) { - *pos += attributeAddressSize(flags); - flags = getFlagsAndForwardPointer(bigramsBuf, pos); - } - *pos += attributeAddressSize(flags); - } + static void skipExistingBigrams(const uint8_t *const bigramsBuf, int *const pos); static int getBigramAddressAndForwardPointer(const uint8_t *const bigramsBuf, const BigramFlags flags, int *const pos); @@ -79,7 +67,7 @@ public: const int entryPos, const int targetPos, const int probability, const bool hasNext, BigramFlags *const outBigramFlags, uint32_t *const outOffset, int *const outOffsetFieldSize) { - if (targetPos == NOT_A_VALID_WORD_POS) { + if (targetPos == NOT_A_DICT_POS) { return false; } BigramFlags flags = probability & MASK_ATTRIBUTE_PROBABILITY; diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp index 4ee138125..ca3b64da1 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp @@ -31,7 +31,7 @@ void DynamicBigramListPolicy::getNextBigram(int *const outBigramPos, int *const BigramListReadWriteUtils::getFlagsAndForwardPointer(buffer, pos); int originalBigramPos = BigramListReadWriteUtils::getBigramAddressAndForwardPointer( buffer, flags, pos); - if (usesAdditionalBuffer && originalBigramPos != NOT_A_VALID_WORD_POS) { + if (usesAdditionalBuffer && originalBigramPos != NOT_A_DICT_POS) { originalBigramPos += mBuffer->getOriginalBufferSize(); } *outBigramPos = followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos); @@ -66,7 +66,7 @@ bool DynamicBigramListPolicy::copyAllBigrams(int *const fromPos, int *const toPo flags = BigramListReadWriteUtils::getFlagsAndForwardPointer(buffer, fromPos); int originalBigramPos = BigramListReadWriteUtils::getBigramAddressAndForwardPointer( buffer, flags, fromPos); - if (originalBigramPos == NOT_A_VALID_WORD_POS) { + if (originalBigramPos == NOT_A_DICT_POS) { // skip invalid bigram entry. continue; } @@ -172,7 +172,7 @@ bool DynamicBigramListPolicy::removeBigram(const int bigramListPos, const int ta } int originalBigramPos = BigramListReadWriteUtils::getBigramAddressAndForwardPointer( buffer, flags, &pos); - if (usesAdditionalBuffer && originalBigramPos != NOT_A_VALID_WORD_POS) { + if (usesAdditionalBuffer && originalBigramPos != NOT_A_DICT_POS) { originalBigramPos += mBuffer->getOriginalBufferSize(); } const int bigramPos = followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos); @@ -192,8 +192,8 @@ bool DynamicBigramListPolicy::removeBigram(const int bigramListPos, const int ta int DynamicBigramListPolicy::followBigramLinkAndGetCurrentBigramPtNodePos( const int originalBigramPos) const { - if (originalBigramPos == NOT_A_VALID_WORD_POS) { - return NOT_A_VALID_WORD_POS; + if (originalBigramPos == NOT_A_DICT_POS) { + return NOT_A_DICT_POS; } int currentPos = originalBigramPos; DynamicPatriciaTrieNodeReader nodeReader(mBuffer, this /* bigramsPolicy */, mShortcutPolicy); @@ -206,7 +206,7 @@ int DynamicBigramListPolicy::followBigramLinkAndGetCurrentBigramPtNodePos( if (bigramLinkCount > BIGRAM_LINK_COUNT_LIMIT) { AKLOGI("Bigram link is invalid. start position: %d", bigramPos); ASSERT(false); - return NOT_A_VALID_WORD_POS; + return NOT_A_DICT_POS; } } return currentPos; diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp index 56ef60ae4..e455080d7 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp @@ -25,6 +25,13 @@ namespace latinime { void DynamicPatriciaTrieNodeReader::fetchNodeInfoFromBufferAndProcessMovedNode(const int nodePos, const int maxCodePointCount, int *const outCodePoints) { + if (nodePos < 0 || nodePos >= mBuffer->getTailPosition()) { + AKLOGE("Fetching PtNode info form invalid dictionary position: %d, dictionary size: %d", + nodePos, mBuffer->getTailPosition()); + ASSERT(false); + invalidatePtNodeInfo(); + return; + } const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(nodePos); const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer); int pos = nodePos; @@ -62,7 +69,7 @@ void DynamicPatriciaTrieNodeReader::fetchNodeInfoFromBufferAndProcessMovedNode(c if (usesAdditionalBuffer && mChildrenPos != NOT_A_DICT_POS) { mChildrenPos += mBuffer->getOriginalBufferSize(); } - if (mSiblingPos == NOT_A_VALID_WORD_POS && DynamicPatriciaTrieReadingUtils::isMoved(mFlags)) { + if (mSiblingPos == NOT_A_DICT_POS && DynamicPatriciaTrieReadingUtils::isMoved(mFlags)) { mBigramLinkedNodePos = mChildrenPos; } else { mBigramLinkedNodePos = NOT_A_DICT_POS; @@ -83,7 +90,7 @@ void DynamicPatriciaTrieNodeReader::fetchNodeInfoFromBufferAndProcessMovedNode(c mBigramPos = NOT_A_DICT_POS; } // Update siblingPos if needed. - if (mSiblingPos == NOT_A_VALID_WORD_POS) { + if (mSiblingPos == NOT_A_DICT_POS) { // Sibling position is the tail position of current node. mSiblingPos = pos; } @@ -94,4 +101,19 @@ void DynamicPatriciaTrieNodeReader::fetchNodeInfoFromBufferAndProcessMovedNode(c } } +void DynamicPatriciaTrieNodeReader::invalidatePtNodeInfo() { + mHeadPos = NOT_A_DICT_POS; + mFlags = 0; + mParentPos = NOT_A_DICT_POS; + mCodePointCount = 0; + mProbabilityFieldPos = NOT_A_DICT_POS; + mProbability = NOT_A_PROBABILITY; + mChildrenPosFieldPos = NOT_A_DICT_POS; + mChildrenPos = NOT_A_DICT_POS; + mBigramLinkedNodePos = NOT_A_DICT_POS; + mShortcutPos = NOT_A_DICT_POS; + mBigramPos = NOT_A_DICT_POS; + mSiblingPos = NOT_A_DICT_POS; +} + } diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h index 89d38a590..6ef5f5813 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h @@ -39,12 +39,12 @@ class DynamicPatriciaTrieNodeReader { const DictionaryBigramsStructurePolicy *const bigramsPolicy, const DictionaryShortcutsStructurePolicy *const shortcutsPolicy) : mBuffer(buffer), mBigramsPolicy(bigramsPolicy), - mShortcutsPolicy(shortcutsPolicy), mHeadPos(NOT_A_VALID_WORD_POS), mFlags(0), + mShortcutsPolicy(shortcutsPolicy), mHeadPos(NOT_A_DICT_POS), mFlags(0), mParentPos(NOT_A_DICT_POS), mCodePointCount(0), mProbabilityFieldPos(NOT_A_DICT_POS), mProbability(NOT_A_PROBABILITY), mChildrenPosFieldPos(NOT_A_DICT_POS), mChildrenPos(NOT_A_DICT_POS), mBigramLinkedNodePos(NOT_A_DICT_POS), mShortcutPos(NOT_A_DICT_POS), mBigramPos(NOT_A_DICT_POS), - mSiblingPos(NOT_A_VALID_WORD_POS) {} + mSiblingPos(NOT_A_DICT_POS) {} ~DynamicPatriciaTrieNodeReader() {} @@ -56,7 +56,7 @@ class DynamicPatriciaTrieNodeReader { AK_FORCE_INLINE void fetchNodeInfoFromBufferAndGetNodeCodePoints(const int nodePos, const int maxCodePointCount, int *const outCodePoints) { - mSiblingPos = NOT_A_VALID_WORD_POS; + mSiblingPos = NOT_A_DICT_POS; mBigramLinkedNodePos = NOT_A_DICT_POS; fetchNodeInfoFromBufferAndProcessMovedNode(nodePos, maxCodePointCount, outCodePoints); } @@ -156,6 +156,8 @@ class DynamicPatriciaTrieNodeReader { void fetchNodeInfoFromBufferAndProcessMovedNode(const int nodePos, const int maxCodePointCount, int *const outCodePoints); + + void invalidatePtNodeInfo(); }; } // namespace latinime #endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_NODE_READER_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp index ece1781f1..cccc09041 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp @@ -116,7 +116,7 @@ int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const in if (!readingHelper.isMatchedCodePoint( j, searchCodePoints[matchedCodePointCount + j])) { // Different code point is found. The given word is not included in the dictionary. - return NOT_A_VALID_WORD_POS; + return NOT_A_DICT_POS; } } // All characters are matched. @@ -125,14 +125,14 @@ int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const in return nodeReader->getHeadPos(); } if (!nodeReader->hasChildren()) { - return NOT_A_VALID_WORD_POS; + return NOT_A_DICT_POS; } // Advance to the children nodes. readingHelper.readChildNode(); } // If we already traversed the tree further than the word is long, there means // there was no match (or we would have found it). - return NOT_A_VALID_WORD_POS; + return NOT_A_DICT_POS; } int DynamicPatriciaTriePolicy::getProbability(const int unigramProbability, @@ -149,7 +149,7 @@ int DynamicPatriciaTriePolicy::getProbability(const int unigramProbability, } int DynamicPatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int nodePos) const { - if (nodePos == NOT_A_VALID_WORD_POS) { + if (nodePos == NOT_A_DICT_POS) { return NOT_A_PROBABILITY; } DynamicPatriciaTrieNodeReader nodeReader(&mBufferWithExtendableBuffer, @@ -162,7 +162,7 @@ int DynamicPatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int nodePos) } int DynamicPatriciaTriePolicy::getShortcutPositionOfNode(const int nodePos) const { - if (nodePos == NOT_A_VALID_WORD_POS) { + if (nodePos == NOT_A_DICT_POS) { return NOT_A_DICT_POS; } DynamicPatriciaTrieNodeReader nodeReader(&mBufferWithExtendableBuffer, @@ -175,7 +175,7 @@ int DynamicPatriciaTriePolicy::getShortcutPositionOfNode(const int nodePos) cons } int DynamicPatriciaTriePolicy::getBigramsPositionOfNode(const int nodePos) const { - if (nodePos == NOT_A_VALID_WORD_POS) { + if (nodePos == NOT_A_DICT_POS) { return NOT_A_DICT_POS; } DynamicPatriciaTrieNodeReader nodeReader(&mBufferWithExtendableBuffer, @@ -209,12 +209,12 @@ bool DynamicPatriciaTriePolicy::addBigramWords(const int *const word0, const int } const int word0Pos = getTerminalNodePositionOfWord(word0, length0, false /* forceLowerCaseSearch */); - if (word0Pos == NOT_A_VALID_WORD_POS) { + if (word0Pos == NOT_A_DICT_POS) { return false; } const int word1Pos = getTerminalNodePositionOfWord(word1, length1, false /* forceLowerCaseSearch */); - if (word1Pos == NOT_A_VALID_WORD_POS) { + if (word1Pos == NOT_A_DICT_POS) { return false; } DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer, @@ -230,12 +230,12 @@ bool DynamicPatriciaTriePolicy::removeBigramWords(const int *const word0, const } const int word0Pos = getTerminalNodePositionOfWord(word0, length0, false /* forceLowerCaseSearch */); - if (word0Pos == NOT_A_VALID_WORD_POS) { + if (word0Pos == NOT_A_DICT_POS) { return false; } const int word1Pos = getTerminalNodePositionOfWord(word1, length1, false /* forceLowerCaseSearch */); - if (word1Pos == NOT_A_VALID_WORD_POS) { + if (word1Pos == NOT_A_DICT_POS) { return false; } DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer, diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h index db1c392bb..120fd7699 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h @@ -72,8 +72,7 @@ class DynamicPatriciaTrieReadingHelper { // Initialize reading state with the head position of a node. AK_FORCE_INLINE void initWithNodePos(const int nodePos) { - // TODO: Consolidate NOT_A_VALID_WORD_POS and NOT_A_DICT_POS - if (nodePos == NOT_A_VALID_WORD_POS || nodePos == NOT_A_DICT_POS) { + if (nodePos == NOT_A_DICT_POS) { mPos = NOT_A_DICT_POS; } else { mIsError = false; diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp index c7e89fff8..8428c0b15 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp @@ -28,6 +28,17 @@ const DptReadingUtils::NodeFlags DptReadingUtils::FLAG_IS_NOT_MOVED = 0xC0; const DptReadingUtils::NodeFlags DptReadingUtils::FLAG_IS_MOVED = 0x40; const DptReadingUtils::NodeFlags DptReadingUtils::FLAG_IS_DELETED = 0x80; +/* static */ int DptReadingUtils::getForwardLinkPosition(const uint8_t *const buffer, + const int pos) { + int linkAddressPos = pos; + return ByteArrayUtils::readSint24AndAdvancePosition(buffer, &linkAddressPos); +} + +/* static */ int DptReadingUtils::getParentPosAndAdvancePosition(const uint8_t *const buffer, + int *const pos) { + return ByteArrayUtils::readSint24AndAdvancePosition(buffer, pos); +} + /* static */ int DptReadingUtils::readChildrenPositionAndAdvancePosition( const uint8_t *const buffer, int *const pos) { const int base = *pos; diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h index 5a2ad9cb9..db5f9b1bd 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h @@ -20,7 +20,6 @@ #include <stdint.h> #include "defines.h" -#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h" namespace latinime { @@ -28,19 +27,13 @@ class DynamicPatriciaTrieReadingUtils { public: typedef uint8_t NodeFlags; - static AK_FORCE_INLINE int getForwardLinkPosition(const uint8_t *const buffer, const int pos) { - int linkAddressPos = pos; - return ByteArrayUtils::readSint24AndAdvancePosition(buffer, &linkAddressPos); - } + static int getForwardLinkPosition(const uint8_t *const buffer, const int pos); static AK_FORCE_INLINE bool isValidForwardLinkPosition(const int forwardLinkAddress) { return forwardLinkAddress != 0; } - static AK_FORCE_INLINE int getParentPosAndAdvancePosition(const uint8_t *const buffer, - int *const pos) { - return ByteArrayUtils::readSint24AndAdvancePosition(buffer, pos); - } + static int getParentPosAndAdvancePosition(const uint8_t *const buffer, int *const pos); static int readChildrenPositionAndAdvancePosition(const uint8_t *const buffer, int *const pos); diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp index d5a83a938..e6cff431b 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp @@ -219,7 +219,7 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( } // This function gets the position of the terminal node of the exact matching word in the -// dictionary. If no match is found, it returns NOT_A_VALID_WORD_POS. +// dictionary. If no match is found, it returns NOT_A_DICT_POS. int PatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord, const int length, const bool forceLowerCaseSearch) const { int pos = getRootPosition(); @@ -228,7 +228,7 @@ int PatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord, while (true) { // If we already traversed the tree further than the word is long, there means // there was no match (or we would have found it). - if (wordPos >= length) return NOT_A_VALID_WORD_POS; + if (wordPos >= length) return NOT_A_DICT_POS; int ptNodeCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(mDictRoot, &pos); const int wChar = forceLowerCaseSearch @@ -236,7 +236,7 @@ int PatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord, while (true) { // If there are no more PtNodes in this array, it means we could not // find a matching character for this depth, therefore there is no match. - if (0 >= ptNodeCount) return NOT_A_VALID_WORD_POS; + if (0 >= ptNodeCount) return NOT_A_DICT_POS; const int ptNodePos = pos; const PatriciaTrieReadingUtils::NodeFlags flags = PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos); @@ -245,7 +245,7 @@ int PatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord, if (character == wChar) { // This is the correct PtNode. Only one PtNode may start with the same char within // a PtNode array, so either we found our match in this array, or there is - // no match and we can return NOT_A_VALID_WORD_POS. So we will check all the + // no match and we can return NOT_A_DICT_POS. So we will check all the // characters in this PtNode indeed does match. if (PatriciaTrieReadingUtils::hasMultipleChars(flags)) { character = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(mDictRoot, @@ -256,8 +256,8 @@ int PatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord, // character that does not match, as explained above, it means the word is // not in the dictionary (by virtue of this PtNode being the only one to // match the word on the first character, but not matching the whole word). - if (wordPos >= length) return NOT_A_VALID_WORD_POS; - if (inWord[wordPos] != character) return NOT_A_VALID_WORD_POS; + if (wordPos >= length) return NOT_A_DICT_POS; + if (inWord[wordPos] != character) return NOT_A_DICT_POS; character = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition( mDictRoot, &pos); } @@ -274,7 +274,7 @@ int PatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord, PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos); } if (!PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) { - return NOT_A_VALID_WORD_POS; + return NOT_A_DICT_POS; } // We have children and we are still shorter than the word we are searching for, so // we need to traverse children. Put the pointer on the children position, and @@ -320,7 +320,7 @@ int PatriciaTriePolicy::getProbability(const int unigramProbability, } int PatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int nodePos) const { - if (nodePos == NOT_A_VALID_WORD_POS) { + if (nodePos == NOT_A_DICT_POS) { return NOT_A_PROBABILITY; } int pos = nodePos; @@ -342,7 +342,7 @@ int PatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int nodePos) const { } int PatriciaTriePolicy::getShortcutPositionOfNode(const int nodePos) const { - if (nodePos == NOT_A_VALID_WORD_POS) { + if (nodePos == NOT_A_DICT_POS) { return NOT_A_DICT_POS; } int pos = nodePos; @@ -362,7 +362,7 @@ int PatriciaTriePolicy::getShortcutPositionOfNode(const int nodePos) const { } int PatriciaTriePolicy::getBigramsPositionOfNode(const int nodePos) const { - if (nodePos == NOT_A_VALID_WORD_POS) { + if (nodePos == NOT_A_DICT_POS) { return NOT_A_DICT_POS; } int pos = nodePos; diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp index 576a158bc..1316b425f 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp @@ -42,6 +42,63 @@ const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_IS_NOT_A_WORD = 0x02; // Flag for blacklist const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_IS_BLACKLISTED = 0x01; +/* static */ int PtReadingUtils::getPtNodeArraySizeAndAdvancePosition( + const uint8_t *const buffer, int *const pos) { + const uint8_t firstByte = ByteArrayUtils::readUint8AndAdvancePosition(buffer, pos); + if (firstByte < 0x80) { + return firstByte; + } else { + return ((firstByte & 0x7F) << 8) ^ ByteArrayUtils::readUint8AndAdvancePosition( + buffer, pos); + } +} + +/* static */ PtReadingUtils::NodeFlags PtReadingUtils::getFlagsAndAdvancePosition( + const uint8_t *const buffer, int *const pos) { + return ByteArrayUtils::readUint8AndAdvancePosition(buffer, pos); +} + +/* static */ int PtReadingUtils::getCodePointAndAdvancePosition(const uint8_t *const buffer, + int *const pos) { + return ByteArrayUtils::readCodePointAndAdvancePosition(buffer, pos); +} + +// Returns the number of read characters. +/* static */ int PtReadingUtils::getCharsAndAdvancePosition(const uint8_t *const buffer, + const NodeFlags flags, const int maxLength, int *const outBuffer, int *const pos) { + int length = 0; + if (hasMultipleChars(flags)) { + length = ByteArrayUtils::readStringAndAdvancePosition(buffer, maxLength, outBuffer, + pos); + } else { + if (maxLength > 0) { + outBuffer[0] = getCodePointAndAdvancePosition(buffer, pos); + length = 1; + } + } + return length; +} + +// Returns the number of skipped characters. +/* static */ int PtReadingUtils::skipCharacters(const uint8_t *const buffer, const NodeFlags flags, + const int maxLength, int *const pos) { + if (hasMultipleChars(flags)) { + return ByteArrayUtils::advancePositionToBehindString(buffer, maxLength, pos); + } else { + if (maxLength > 0) { + getCodePointAndAdvancePosition(buffer, pos); + return 1; + } else { + return 0; + } + } +} + +/* static */ int PtReadingUtils::readProbabilityAndAdvancePosition(const uint8_t *const buffer, + int *const pos) { + return ByteArrayUtils::readUint8AndAdvancePosition(buffer, pos); +} + /* static */ int PtReadingUtils::readChildrenPositionAndAdvancePosition( const uint8_t *const buffer, const NodeFlags flags, int *const pos) { const int base = *pos; diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.h index 2b0646db2..8420ee95a 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.h @@ -20,7 +20,6 @@ #include <stdint.h> #include "defines.h" -#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h" namespace latinime { @@ -28,62 +27,21 @@ class PatriciaTrieReadingUtils { public: typedef uint8_t NodeFlags; - static AK_FORCE_INLINE int getPtNodeArraySizeAndAdvancePosition( - const uint8_t *const buffer, int *const pos) { - const uint8_t firstByte = ByteArrayUtils::readUint8AndAdvancePosition(buffer, pos); - if (firstByte < 0x80) { - return firstByte; - } else { - return ((firstByte & 0x7F) << 8) ^ ByteArrayUtils::readUint8AndAdvancePosition( - buffer, pos); - } - } + static int getPtNodeArraySizeAndAdvancePosition(const uint8_t *const buffer, int *const pos); - static AK_FORCE_INLINE NodeFlags getFlagsAndAdvancePosition(const uint8_t *const buffer, - int *const pos) { - return ByteArrayUtils::readUint8AndAdvancePosition(buffer, pos); - } + static NodeFlags getFlagsAndAdvancePosition(const uint8_t *const buffer, int *const pos); - static AK_FORCE_INLINE int getCodePointAndAdvancePosition(const uint8_t *const buffer, - int *const pos) { - return ByteArrayUtils::readCodePointAndAdvancePosition(buffer, pos); - } + static int getCodePointAndAdvancePosition(const uint8_t *const buffer, int *const pos); // Returns the number of read characters. - static AK_FORCE_INLINE int getCharsAndAdvancePosition(const uint8_t *const buffer, - const NodeFlags flags, const int maxLength, int *const outBuffer, int *const pos) { - int length = 0; - if (hasMultipleChars(flags)) { - length = ByteArrayUtils::readStringAndAdvancePosition(buffer, maxLength, outBuffer, - pos); - } else { - if (maxLength > 0) { - outBuffer[0] = getCodePointAndAdvancePosition(buffer, pos); - length = 1; - } - } - return length; - } + static int getCharsAndAdvancePosition(const uint8_t *const buffer, const NodeFlags flags, + const int maxLength, int *const outBuffer, int *const pos); // Returns the number of skipped characters. - static AK_FORCE_INLINE int skipCharacters(const uint8_t *const buffer, const NodeFlags flags, - const int maxLength, int *const pos) { - if (hasMultipleChars(flags)) { - return ByteArrayUtils::advancePositionToBehindString(buffer, maxLength, pos); - } else { - if (maxLength > 0) { - getCodePointAndAdvancePosition(buffer, pos); - return 1; - } else { - return 0; - } - } - } + static int skipCharacters(const uint8_t *const buffer, const NodeFlags flags, + const int maxLength, int *const pos); - static AK_FORCE_INLINE int readProbabilityAndAdvancePosition(const uint8_t *const buffer, - int *const pos) { - return ByteArrayUtils::readUint8AndAdvancePosition(buffer, pos); - } + static int readProbabilityAndAdvancePosition(const uint8_t *const buffer, int *const pos); static int readChildrenPositionAndAdvancePosition(const uint8_t *const buffer, const NodeFlags flags, int *const pos); diff --git a/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.cpp index e70bb5071..847dcdee5 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.cpp @@ -16,6 +16,8 @@ #include "suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h" +#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h" + namespace latinime { // Flag for presence of more attributes @@ -28,4 +30,22 @@ const int ShortcutListReadingUtils::SHORTCUT_LIST_SIZE_FIELD_SIZE = 2; // The numeric value of the shortcut probability that means 'whitelist'. const int ShortcutListReadingUtils::WHITELIST_SHORTCUT_PROBABILITY = 15; +/* static */ ShortcutListReadingUtils::ShortcutFlags + ShortcutListReadingUtils::getFlagsAndForwardPointer(const uint8_t *const dictRoot, + int *const pos) { + return ByteArrayUtils::readUint8AndAdvancePosition(dictRoot, pos); +} + +/* static */ int ShortcutListReadingUtils::getShortcutListSizeAndForwardPointer( + const uint8_t *const dictRoot, int *const pos) { + // readUint16andAdvancePosition() returns an offset *including* the uint16 field itself. + return ByteArrayUtils::readUint16AndAdvancePosition(dictRoot, pos) + - SHORTCUT_LIST_SIZE_FIELD_SIZE; +} + +/* static */ int ShortcutListReadingUtils::readShortcutTarget( + const uint8_t *const dictRoot, const int maxLength, int *const outWord, int *const pos) { + return ByteArrayUtils::readStringAndAdvancePosition(dictRoot, maxLength, outWord, pos); +} + } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h index 5f4f240f5..a83ed5a50 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h @@ -20,7 +20,6 @@ #include <stdint.h> #include "defines.h" -#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h" namespace latinime { @@ -28,10 +27,7 @@ class ShortcutListReadingUtils { public: typedef uint8_t ShortcutFlags; - static AK_FORCE_INLINE ShortcutFlags getFlagsAndForwardPointer( - const uint8_t *const dictRoot, int *const pos) { - return ByteArrayUtils::readUint8AndAdvancePosition(dictRoot, pos); - } + static ShortcutFlags getFlagsAndForwardPointer(const uint8_t *const dictRoot, int *const pos); static AK_FORCE_INLINE int getProbabilityFromFlags(const ShortcutFlags flags) { return flags & MASK_ATTRIBUTE_PROBABILITY; @@ -43,12 +39,7 @@ class ShortcutListReadingUtils { // This method returns the size of the shortcut list region excluding the shortcut list size // field at the beginning. - static AK_FORCE_INLINE int getShortcutListSizeAndForwardPointer( - const uint8_t *const dictRoot, int *const pos) { - // readUint16andAdvancePosition() returns an offset *including* the uint16 field itself. - return ByteArrayUtils::readUint16AndAdvancePosition(dictRoot, pos) - - SHORTCUT_LIST_SIZE_FIELD_SIZE; - } + static int getShortcutListSizeAndForwardPointer(const uint8_t *const dictRoot, int *const pos); static AK_FORCE_INLINE int getShortcutListSizeFieldSize() { return SHORTCUT_LIST_SIZE_FIELD_SIZE; @@ -63,11 +54,8 @@ class ShortcutListReadingUtils { return getProbabilityFromFlags(flags) == WHITELIST_SHORTCUT_PROBABILITY; } - static AK_FORCE_INLINE int readShortcutTarget( - const uint8_t *const dictRoot, const int maxLength, int *const outWord, - int *const pos) { - return ByteArrayUtils::readStringAndAdvancePosition(dictRoot, maxLength, outWord, pos); - } + static int readShortcutTarget(const uint8_t *const dictRoot, const int maxLength, + int *const outWord, int *const pos); private: DISALLOW_IMPLICIT_CONSTRUCTORS(ShortcutListReadingUtils); |