diff options
Diffstat (limited to 'native/jni/src')
4 files changed, 54 insertions, 22 deletions
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h index faaf44162..e4847fcf9 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h @@ -30,18 +30,19 @@ namespace latinime { class PtNodeParams { public: // Invalid PtNode. - PtNodeParams() : mHeadPos(NOT_A_DICT_POS), mFlags(0), mParentPos(NOT_A_DICT_POS), - mCodePointCount(0), mCodePoints(), mTerminalIdFieldPos(NOT_A_DICT_POS), - mTerminalId(Ver4DictConstants::NOT_A_TERMINAL_ID), mProbabilityFieldPos(NOT_A_DICT_POS), - mProbability(NOT_A_PROBABILITY), mChildrenPosFieldPos(NOT_A_DICT_POS), - mChildrenPos(NOT_A_DICT_POS), mBigramLinkedNodePos(NOT_A_DICT_POS), - mShortcutPos(NOT_A_DICT_POS), mBigramPos(NOT_A_DICT_POS), - mSiblingPos(NOT_A_DICT_POS) {} + PtNodeParams() : mHeadPos(NOT_A_DICT_POS), mFlags(0), mHasMovedFlag(false), + mParentPos(NOT_A_DICT_POS), mCodePointCount(0), mCodePoints(), + mTerminalIdFieldPos(NOT_A_DICT_POS), mTerminalId(Ver4DictConstants::NOT_A_TERMINAL_ID), + mProbabilityFieldPos(NOT_A_DICT_POS), mProbability(NOT_A_PROBABILITY), + mChildrenPosFieldPos(NOT_A_DICT_POS), mChildrenPos(NOT_A_DICT_POS), + mBigramLinkedNodePos(NOT_A_DICT_POS), mShortcutPos(NOT_A_DICT_POS), + mBigramPos(NOT_A_DICT_POS), mSiblingPos(NOT_A_DICT_POS) {} PtNodeParams(const PtNodeParams& ptNodeParams) : mHeadPos(ptNodeParams.mHeadPos), mFlags(ptNodeParams.mFlags), - mParentPos(ptNodeParams.mParentPos), mCodePointCount(ptNodeParams.mCodePointCount), - mCodePoints(), mTerminalIdFieldPos(ptNodeParams.mTerminalIdFieldPos), + mHasMovedFlag(ptNodeParams.mHasMovedFlag), mParentPos(ptNodeParams.mParentPos), + mCodePointCount(ptNodeParams.mCodePointCount), mCodePoints(), + mTerminalIdFieldPos(ptNodeParams.mTerminalIdFieldPos), mTerminalId(ptNodeParams.mTerminalId), mProbabilityFieldPos(ptNodeParams.mProbabilityFieldPos), mProbability(ptNodeParams.mProbability), @@ -58,7 +59,7 @@ class PtNodeParams { const int codePointCount, const int *const codePoints, const int probability, const int childrenPos, const int shortcutPos, const int bigramPos, const int siblingPos) - : mHeadPos(headPos), mFlags(flags), mParentPos(NOT_A_DICT_POS), + : mHeadPos(headPos), mFlags(flags), mHasMovedFlag(false), mParentPos(NOT_A_DICT_POS), mCodePointCount(codePointCount), mCodePoints(), mTerminalIdFieldPos(NOT_A_DICT_POS), mTerminalId(Ver4DictConstants::NOT_A_TERMINAL_ID), mProbabilityFieldPos(NOT_A_DICT_POS), mProbability(probability), @@ -73,7 +74,7 @@ class PtNodeParams { const int parentPos, const int codePointCount, const int *const codePoints, const int terminalIdFieldPos, const int terminalId, const int probability, const int childrenPosFieldPos, const int childrenPos, const int siblingPos) - : mHeadPos(headPos), mFlags(flags), mParentPos(parentPos), + : mHeadPos(headPos), mFlags(flags), mHasMovedFlag(true), mParentPos(parentPos), mCodePointCount(codePointCount), mCodePoints(), mTerminalIdFieldPos(terminalIdFieldPos), mTerminalId(terminalId), mProbabilityFieldPos(NOT_A_DICT_POS), mProbability(probability), @@ -87,8 +88,8 @@ class PtNodeParams { PtNodeParams(const PtNodeParams *const ptNodeParams, const PatriciaTrieReadingUtils::NodeFlags flags, const int parentPos, const int codePointCount, const int *const codePoints, const int probability) - : mHeadPos(ptNodeParams->getHeadPos()), mFlags(flags), mParentPos(parentPos), - mCodePointCount(codePointCount), mCodePoints(), + : mHeadPos(ptNodeParams->getHeadPos()), mFlags(flags), mHasMovedFlag(true), + mParentPos(parentPos), mCodePointCount(codePointCount), mCodePoints(), mTerminalIdFieldPos(ptNodeParams->getTerminalIdFieldPos()), mTerminalId(ptNodeParams->getTerminalId()), mProbabilityFieldPos(ptNodeParams->getProbabilityFieldPos()), @@ -104,7 +105,7 @@ class PtNodeParams { PtNodeParams(const PatriciaTrieReadingUtils::NodeFlags flags, const int parentPos, const int codePointCount, const int *const codePoints, const int probability) - : mHeadPos(NOT_A_DICT_POS), mFlags(flags), mParentPos(parentPos), + : mHeadPos(NOT_A_DICT_POS), mFlags(flags), mHasMovedFlag(true), mParentPos(parentPos), mCodePointCount(codePointCount), mCodePoints(), mTerminalIdFieldPos(NOT_A_DICT_POS), mTerminalId(Ver4DictConstants::NOT_A_TERMINAL_ID), @@ -126,11 +127,11 @@ class PtNodeParams { // Flags AK_FORCE_INLINE bool isDeleted() const { - return DynamicPtReadingUtils::isDeleted(mFlags); + return mHasMovedFlag && DynamicPtReadingUtils::isDeleted(mFlags); } AK_FORCE_INLINE bool willBecomeNonTerminal() const { - return DynamicPtReadingUtils::willBecomeNonTerminal(mFlags); + return mHasMovedFlag && DynamicPtReadingUtils::willBecomeNonTerminal(mFlags); } AK_FORCE_INLINE bool hasChildren() const { @@ -224,6 +225,7 @@ class PtNodeParams { const int mHeadPos; const PatriciaTrieReadingUtils::NodeFlags mFlags; + const bool mHasMovedFlag; const int mParentPos; const uint8_t mCodePointCount; int mCodePoints[MAX_WORD_LENGTH]; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp index bf38dffa5..fa5993090 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp @@ -363,4 +363,33 @@ const WordProperty PatriciaTriePolicy::getWordProperty(const int *const codePoin &bigrams, &shortcuts); } +int PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints) { + if (token == 0) { + // Start iterating the dictionary. + mTerminalPtNodePositionsForIteratingWords.clear(); + DynamicPtReadingHelper::TraversePolicyToGetAllTerminalPtNodePositions traversePolicy( + &mTerminalPtNodePositionsForIteratingWords); + DynamicPtReadingHelper readingHelper(&mPtNodeReader, &mPtNodeArrayReader); + readingHelper.initWithPtNodeArrayPos(getRootPosition()); + readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(&traversePolicy); + } + const int terminalPtNodePositionsVectorSize = + static_cast<int>(mTerminalPtNodePositionsForIteratingWords.size()); + if (token < 0 || token >= terminalPtNodePositionsVectorSize) { + AKLOGE("Given token %d is invalid.", token); + return 0; + } + const int terminalPtNodePos = mTerminalPtNodePositionsForIteratingWords[token]; + int unigramProbability = NOT_A_PROBABILITY; + getCodePointsAndProbabilityAndReturnCodePointCount(terminalPtNodePos, MAX_WORD_LENGTH, + outCodePoints, &unigramProbability); + const int nextToken = token + 1; + if (nextToken >= terminalPtNodePositionsVectorSize) { + // All words have been iterated. + mTerminalPtNodePositionsForIteratingWords.clear(); + return 0; + } + return nextToken; +} + } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h index da4be87ce..8fbca2612 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h @@ -18,6 +18,7 @@ #define LATINIME_PATRICIA_TRIE_POLICY_H #include <stdint.h> +#include <vector> #include "defines.h" #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" @@ -44,7 +45,8 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { - mHeaderPolicy.getSize()), mBigramListPolicy(mDictRoot), mShortcutListPolicy(mDictRoot), mPtNodeReader(mDictRoot, mDictBufferSize, &mBigramListPolicy, &mShortcutListPolicy), - mPtNodeArrayReader(mDictRoot, mDictBufferSize) {} + mPtNodeArrayReader(mDictRoot, mDictBufferSize), + mTerminalPtNodePositionsForIteratingWords() {} AK_FORCE_INLINE int getRootPosition() const { return 0; @@ -130,10 +132,7 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { const WordProperty getWordProperty(const int *const codePoints, const int codePointCount) const; - int getNextWordAndNextToken(const int token, int *const outCodePoints) { - // getNextWordAndNextToken is not supported. - return 0; - } + int getNextWordAndNextToken(const int token, int *const outCodePoints); private: DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTriePolicy); @@ -146,6 +145,7 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { const ShortcutListPolicy mShortcutListPolicy; const Ver2ParticiaTrieNodeReader mPtNodeReader; const Ver2PtNodeArrayReader mPtNodeArrayReader; + std::vector<int> mTerminalPtNodePositionsForIteratingWords; int createAndGetLeavingChildNode(const DicNode *const dicNode, const int ptNodePos, DicNodeVector *const childDicNodes) const; diff --git a/native/jni/src/utils/char_utils.cpp b/native/jni/src/utils/char_utils.cpp index d41fc8924..adc474b4c 100644 --- a/native/jni/src/utils/char_utils.cpp +++ b/native/jni/src/utils/char_utils.cpp @@ -1118,7 +1118,8 @@ static int compare_pair_capital(const void *a, const void *b) { /* U+0118 */ 0x0045, 0x0065, 0x0045, 0x0065, 0x0047, 0x0067, 0x0047, 0x0067, /* U+0120 */ 0x0047, 0x0067, 0x0047, 0x0067, 0x0048, 0x0068, 0x0126, 0x0127, /* U+0128 */ 0x0049, 0x0069, 0x0049, 0x0069, 0x0049, 0x0069, 0x0049, 0x0069, - /* U+0130 */ 0x0049, 0x0131, 0x0049, 0x0069, 0x004A, 0x006A, 0x004B, 0x006B, + // U+0131: Manually changed from 0131 to 0049 + /* U+0130 */ 0x0049, 0x0049, 0x0049, 0x0069, 0x004A, 0x006A, 0x004B, 0x006B, /* U+0138 */ 0x0138, 0x004C, 0x006C, 0x004C, 0x006C, 0x004C, 0x006C, 0x004C, /* U+0140 */ 0x006C, 0x004C, 0x006C, 0x004E, 0x006E, 0x004E, 0x006E, 0x004E, // U+0141: Manually changed from 0141 to 004C |