diff options
Diffstat (limited to 'native/jni')
22 files changed, 305 insertions, 203 deletions
diff --git a/native/jni/Android.mk b/native/jni/Android.mk index e14cf5a71..4786ef6c7 100644 --- a/native/jni/Android.mk +++ b/native/jni/Android.mk @@ -71,6 +71,7 @@ LATIN_IME_CORE_SRC_FILES := \ suggest/core/policy/weighting.cpp \ suggest/core/session/dic_traverse_session.cpp \ $(addprefix suggest/policyimpl/dictionary/, \ + dictionary_structure_with_buffer_policy_factory.cpp \ dynamic_patricia_trie_node_reader.cpp \ dynamic_patricia_trie_policy.cpp \ dynamic_patricia_trie_reading_utils.cpp \ diff --git a/native/jni/src/suggest/core/dicnode/dic_node_proximity_filter.h b/native/jni/src/suggest/core/dicnode/dic_node_proximity_filter.h index 1a39f2ef3..c7ab571de 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node_proximity_filter.h +++ b/native/jni/src/suggest/core/dicnode/dic_node_proximity_filter.h @@ -20,11 +20,11 @@ #include "defines.h" #include "suggest/core/layout/proximity_info_state.h" #include "suggest/core/layout/proximity_info_utils.h" -#include "suggest/core/policy/dictionary_structure_policy.h" +#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" namespace latinime { -class DicNodeProximityFilter : public DictionaryStructurePolicy::NodeFilter { +class DicNodeProximityFilter : public DictionaryStructureWithBufferPolicy::NodeFilter { public: DicNodeProximityFilter(const ProximityInfoState *const pInfoState, const int pointIndex, const bool exactOnly) diff --git a/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp b/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp index 6b4ef2fea..ec70ed30b 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp +++ b/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp @@ -24,7 +24,7 @@ #include "suggest/core/dictionary/binary_dictionary_info.h" #include "suggest/core/dictionary/multi_bigram_map.h" #include "suggest/core/dictionary/probability_utils.h" -#include "suggest/core/policy/dictionary_structure_policy.h" +#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" #include "utils/char_utils.h" namespace latinime { @@ -83,7 +83,7 @@ namespace latinime { DicNodeUtils::createAndGetPassingChildNode(dicNode, &childrenFilter, childDicNodes); } else { binaryDictionaryInfo->getStructurePolicy()->createAndGetAllChildNodes(dicNode, - binaryDictionaryInfo, &childrenFilter, childDicNodes); + &childrenFilter, childDicNodes); } } diff --git a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp index 3751ae500..d78493b45 100644 --- a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp +++ b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp @@ -116,9 +116,8 @@ int BigramDictionary::getPredictions(const int *prevWord, const int prevWordLeng while (bigramsIt.hasNext()) { bigramsIt.next(); const int length = mBinaryDictionaryInfo->getStructurePolicy()-> - getCodePointsAndProbabilityAndReturnCodePointCount( - mBinaryDictionaryInfo, bigramsIt.getBigramPos(), MAX_WORD_LENGTH, - bigramBuffer, &unigramProbability); + getCodePointsAndProbabilityAndReturnCodePointCount(bigramsIt.getBigramPos(), + MAX_WORD_LENGTH, bigramBuffer, &unigramProbability); // Due to space constraints, the probability for bigrams is approximate - the lower the // unigram probability, the worse the precision. The theoritical maximum error in // resulting probability is 8 - although in the practice it's never bigger than 3 or 4 @@ -139,10 +138,9 @@ int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const in const bool forceLowerCaseSearch) const { if (0 >= prevWordLength) return NOT_A_DICT_POS; int pos = mBinaryDictionaryInfo->getStructurePolicy()->getTerminalNodePositionOfWord( - mBinaryDictionaryInfo, prevWord, prevWordLength, forceLowerCaseSearch); + prevWord, prevWordLength, forceLowerCaseSearch); if (NOT_A_VALID_WORD_POS == pos) return NOT_A_DICT_POS; - return mBinaryDictionaryInfo->getStructurePolicy()->getBigramsPositionOfNode( - mBinaryDictionaryInfo, pos); + return mBinaryDictionaryInfo->getStructurePolicy()->getBigramsPositionOfNode(pos); } bool BigramDictionary::isValidBigram(const int *word0, int length0, const int *word1, @@ -151,7 +149,7 @@ bool BigramDictionary::isValidBigram(const int *word0, int length0, const int *w // getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams if (NOT_A_DICT_POS == pos) return false; int nextWordPos = mBinaryDictionaryInfo->getStructurePolicy()->getTerminalNodePositionOfWord( - mBinaryDictionaryInfo, word1, length1, false /* forceLowerCaseSearch */); + word1, length1, false /* forceLowerCaseSearch */); if (NOT_A_VALID_WORD_POS == nextWordPos) return false; BinaryDictionaryBigramsIterator bigramsIt(mBinaryDictionaryInfo, pos); diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.cpp b/native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.cpp index 5d14a0554..0e8d72f2e 100644 --- a/native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.cpp +++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.cpp @@ -61,8 +61,7 @@ const int BinaryDictionaryFormatUtils::HEADER_VERSION_2_MINIMUM_SIZE = 12; if (ByteArrayUtils::readUint16(dict, 4) == 2) { return VERSION_2; } else if (ByteArrayUtils::readUint16(dict, 4) == 3) { - // TODO: Support version 3 dictionary. - return UNKNOWN_VERSION; + return VERSION_3; } else { return UNKNOWN_VERSION; } diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_info.h b/native/jni/src/suggest/core/dictionary/binary_dictionary_info.h index cbea18f90..c694c6a3a 100644 --- a/native/jni/src/suggest/core/dictionary/binary_dictionary_info.h +++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_info.h @@ -23,7 +23,7 @@ #include "jni.h" #include "suggest/core/dictionary/binary_dictionary_format_utils.h" #include "suggest/core/dictionary/binary_dictionary_header.h" -#include "suggest/policyimpl/dictionary/dictionary_structure_policy_factory.h" +#include "suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h" #include "utils/log_utils.h" namespace latinime { @@ -37,11 +37,16 @@ class BinaryDictionaryInfo { mDictionaryFormat(BinaryDictionaryFormatUtils::detectFormatVersion( mDictBuf, mDictSize)), mDictionaryHeader(this), mDictRoot(mDictBuf + mDictionaryHeader.getSize()), - mStructurePolicy(DictionaryStructurePolicyFactory::getDictionaryStructurePolicy( - mDictionaryFormat)) { + // TODO: Remove. + mStructurePolicy(DictionaryStructureWithBufferPolicyFactory + ::newDictionaryStructurePolicy(this)) { logDictionaryInfo(env); } + ~BinaryDictionaryInfo() { + delete mStructurePolicy; + } + AK_FORCE_INLINE const uint8_t *getDictBuf() const { return mDictBuf; } @@ -66,6 +71,7 @@ class BinaryDictionaryInfo { return mDictionaryFormat; } + // TODO: Move to DictionaryStructurePolicy. AK_FORCE_INLINE const BinaryDictionaryHeader *getHeader() const { return &mDictionaryHeader; } @@ -76,7 +82,8 @@ class BinaryDictionaryInfo { return mIsUpdatable && isUpdatableDictionaryFormat; } - AK_FORCE_INLINE const DictionaryStructurePolicy *getStructurePolicy() const { + // TODO: remove + AK_FORCE_INLINE const DictionaryStructureWithBufferPolicy *getStructurePolicy() const { return mStructurePolicy; } @@ -89,9 +96,12 @@ class BinaryDictionaryInfo { const int mDictBufOffset; const bool mIsUpdatable; const BinaryDictionaryFormatUtils::FORMAT_VERSION mDictionaryFormat; + // TODO: Move BinaryDictionaryHeader to policyimpl and introduce dedicated API to the + // DictionaryStructurePolicy. const BinaryDictionaryHeader mDictionaryHeader; const uint8_t *const mDictRoot; - const DictionaryStructurePolicy *const mStructurePolicy; + // TODO: remove + const DictionaryStructureWithBufferPolicy *const mStructurePolicy; AK_FORCE_INLINE void logDictionaryInfo(JNIEnv *const env) const { const int BUFFER_SIZE = 16; diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp index 4a9e38fe8..891b80331 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.cpp +++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp @@ -83,14 +83,14 @@ int Dictionary::getBigrams(const int *word, int length, int *outWords, int *freq } int Dictionary::getProbability(const int *word, int length) const { - const DictionaryStructurePolicy *const structurePolicy = + const DictionaryStructureWithBufferPolicy *const structurePolicy = mBinaryDictionaryInfo.getStructurePolicy(); - int pos = structurePolicy->getTerminalNodePositionOfWord(&mBinaryDictionaryInfo, word, length, + int pos = structurePolicy->getTerminalNodePositionOfWord(word, length, false /* forceLowerCaseSearch */); if (NOT_A_VALID_WORD_POS == pos) { return NOT_A_PROBABILITY; } - return structurePolicy->getUnigramProbability(&mBinaryDictionaryInfo, pos); + return structurePolicy->getUnigramProbability(pos); } bool Dictionary::isValidBigram(const int *word0, int length0, const int *word1, int length1) const { diff --git a/native/jni/src/suggest/core/dictionary/multi_bigram_map.h b/native/jni/src/suggest/core/dictionary/multi_bigram_map.h index d5eafe1bf..085438008 100644 --- a/native/jni/src/suggest/core/dictionary/multi_bigram_map.h +++ b/native/jni/src/suggest/core/dictionary/multi_bigram_map.h @@ -68,7 +68,7 @@ class MultiBigramMap { void init(const BinaryDictionaryInfo *const binaryDictionaryInfo, const int nodePos) { const int bigramsListPos = binaryDictionaryInfo->getStructurePolicy()-> - getBigramsPositionOfNode(binaryDictionaryInfo, nodePos); + getBigramsPositionOfNode(nodePos); BinaryDictionaryBigramsIterator bigramsIt(binaryDictionaryInfo, bigramsListPos); while (bigramsIt.hasNext()) { bigramsIt.next(); @@ -108,7 +108,7 @@ class MultiBigramMap { const BinaryDictionaryInfo *const binaryDictionaryInfo, const int nodePos, const int nextWordPosition, const int unigramProbability) { const int bigramsListPos = binaryDictionaryInfo->getStructurePolicy()-> - getBigramsPositionOfNode(binaryDictionaryInfo, nodePos); + getBigramsPositionOfNode(nodePos); BinaryDictionaryBigramsIterator bigramsIt(binaryDictionaryInfo, bigramsListPos); while (bigramsIt.hasNext()) { bigramsIt.next(); diff --git a/native/jni/src/suggest/core/policy/dictionary_structure_policy.h b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h index cc14c982c..dce4e741a 100644 --- a/native/jni/src/suggest/core/policy/dictionary_structure_policy.h +++ b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h @@ -21,7 +21,6 @@ namespace latinime { -class BinaryDictionaryInfo; class DicNode; class DicNodeVector; @@ -29,7 +28,7 @@ class DicNodeVector; * This class abstracts structure of dictionaries. * Implement this policy to support additional dictionaries. */ -class DictionaryStructurePolicy { +class DictionaryStructureWithBufferPolicy { public: // This provides a filtering method for filtering new node. class NodeFilter { @@ -44,36 +43,31 @@ class DictionaryStructurePolicy { DISALLOW_COPY_AND_ASSIGN(NodeFilter); }; + virtual ~DictionaryStructureWithBufferPolicy() {} + virtual int getRootPosition() const = 0; virtual void createAndGetAllChildNodes(const DicNode *const dicNode, - const BinaryDictionaryInfo *const binaryDictionaryInfo, const NodeFilter *const nodeFilter, DicNodeVector *const childDicNodes) const = 0; virtual int getCodePointsAndProbabilityAndReturnCodePointCount( - const BinaryDictionaryInfo *const binaryDictionaryInfo, const int nodePos, const int maxCodePointCount, int *const outCodePoints, int *const outUnigramProbability) const = 0; - virtual int getTerminalNodePositionOfWord( - const BinaryDictionaryInfo *const binaryDictionaryInfo, const int *const inWord, + virtual int getTerminalNodePositionOfWord(const int *const inWord, const int length, const bool forceLowerCaseSearch) const = 0; - virtual int getUnigramProbability(const BinaryDictionaryInfo *const binaryDictionaryInfo, - const int nodePos) const = 0; + virtual int getUnigramProbability(const int nodePos) const = 0; - virtual int getShortcutPositionOfNode(const BinaryDictionaryInfo *const binaryDictionaryInfo, - const int nodePos) const = 0; + virtual int getShortcutPositionOfNode(const int nodePos) const = 0; - virtual int getBigramsPositionOfNode(const BinaryDictionaryInfo *const binaryDictionaryInfo, - const int nodePos) const = 0; + virtual int getBigramsPositionOfNode(const int nodePos) const = 0; protected: - DictionaryStructurePolicy() {} - virtual ~DictionaryStructurePolicy() {} + DictionaryStructureWithBufferPolicy() {} private: - DISALLOW_COPY_AND_ASSIGN(DictionaryStructurePolicy); + DISALLOW_COPY_AND_ASSIGN(DictionaryStructureWithBufferPolicy); }; } // namespace latinime #endif /* LATINIME_DICTIONARY_STRUCTURE_POLICY_H */ diff --git a/native/jni/src/suggest/core/session/dic_traverse_session.cpp b/native/jni/src/suggest/core/session/dic_traverse_session.cpp index 7651b19a0..11a147bda 100644 --- a/native/jni/src/suggest/core/session/dic_traverse_session.cpp +++ b/native/jni/src/suggest/core/session/dic_traverse_session.cpp @@ -37,12 +37,12 @@ void DicTraverseSession::init(const Dictionary *const dictionary, const int *pre } // TODO: merge following similar calls to getTerminalPosition into one case-insensitive call. mPrevWordPos = binaryDictionaryInfo->getStructurePolicy()->getTerminalNodePositionOfWord( - binaryDictionaryInfo, prevWord, prevWordLength, false /* forceLowerCaseSearch */); + prevWord, prevWordLength, false /* forceLowerCaseSearch */); if (mPrevWordPos == NOT_A_VALID_WORD_POS) { // Check bigrams for lower-cased previous word if original was not found. Useful for // auto-capitalized words like "The [current_word]". mPrevWordPos = binaryDictionaryInfo->getStructurePolicy()->getTerminalNodePositionOfWord( - binaryDictionaryInfo, prevWord, prevWordLength, true /* forceLowerCaseSearch */); + prevWord, prevWordLength, true /* forceLowerCaseSearch */); } } diff --git a/native/jni/src/suggest/core/session/dic_traverse_session.h b/native/jni/src/suggest/core/session/dic_traverse_session.h index de57e041a..5c4cef02d 100644 --- a/native/jni/src/suggest/core/session/dic_traverse_session.h +++ b/native/jni/src/suggest/core/session/dic_traverse_session.h @@ -75,7 +75,7 @@ class DicTraverseSession { const int maxPointerCount); void resetCache(const int nextActiveCacheSize, const int maxWords); - // TODO: Remove + // TODO: Use DictionaryStructurePolicy instead of BinaryDictionaryInfo. const BinaryDictionaryInfo *getBinaryDictionaryInfo() const; //-------------------- diff --git a/native/jni/src/suggest/core/suggest.cpp b/native/jni/src/suggest/core/suggest.cpp index 9376d7b93..f28efd526 100644 --- a/native/jni/src/suggest/core/suggest.cpp +++ b/native/jni/src/suggest/core/suggest.cpp @@ -215,7 +215,7 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen traverseSession->getBinaryDictionaryInfo(); const TerminalAttributes terminalAttributes(traverseSession->getBinaryDictionaryInfo(), binaryDictionaryInfo->getStructurePolicy()->getShortcutPositionOfNode( - binaryDictionaryInfo, terminalDicNode->getPos())); + terminalDicNode->getPos())); // Shortcut is not supported for multiple words suggestions. // TODO: Check shortcuts during traversal for multiple words suggestions. const bool sameAsTyped = TRAVERSAL->sameAsTyped(traverseSession, terminalDicNode); diff --git a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_policy_factory.h b/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_policy_factory.h deleted file mode 100644 index c0df89f49..000000000 --- a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_policy_factory.h +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Copyright (C) 2013 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef LATINIME_DICTIONARY_STRUCTURE_POLICY_FACTORY_H -#define LATINIME_DICTIONARY_STRUCTURE_POLICY_FACTORY_H - -#include "defines.h" -#include "suggest/core/dictionary/binary_dictionary_format_utils.h" -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h" -#include "suggest/policyimpl/dictionary/patricia_trie_policy.h" - -namespace latinime { - -class DictionaryStructurePolicy; - -class DictionaryStructurePolicyFactory { - public: - static const DictionaryStructurePolicy *getDictionaryStructurePolicy( - const BinaryDictionaryFormatUtils::FORMAT_VERSION dictionaryFormat) { - switch (dictionaryFormat) { - case BinaryDictionaryFormatUtils::VERSION_2: - return PatriciaTriePolicy::getInstance(); - case BinaryDictionaryFormatUtils::VERSION_3: - return DynamicPatriciaTriePolicy::getInstance(); - default: - ASSERT(false); - return 0; - } - } - - private: - DISALLOW_IMPLICIT_CONSTRUCTORS(DictionaryStructurePolicyFactory); -}; -} // namespace latinime -#endif // LATINIME_DICTIONARY_STRUCTURE_POLICY_FACTORY_H diff --git a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.cpp b/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.cpp new file mode 100644 index 000000000..f2c586245 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.cpp @@ -0,0 +1,42 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h" + +#include "defines.h" +#include "suggest/core/dictionary/binary_dictionary_info.h" +#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h" +#include "suggest/policyimpl/dictionary/patricia_trie_policy.h" + +namespace latinime { + +/* static */ DictionaryStructureWithBufferPolicy *DictionaryStructureWithBufferPolicyFactory + ::newDictionaryStructurePolicy( + const BinaryDictionaryInfo *const binaryDictionaryInfo) { + switch (binaryDictionaryInfo->getFormat()) { + case BinaryDictionaryFormatUtils::VERSION_2: + return new PatriciaTriePolicy(binaryDictionaryInfo->getDictRoot(), + binaryDictionaryInfo); + case BinaryDictionaryFormatUtils::VERSION_3: + return new DynamicPatriciaTriePolicy(binaryDictionaryInfo->getDictRoot(), + binaryDictionaryInfo); + default: + ASSERT(false); + return 0; + } +} + +} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h b/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h new file mode 100644 index 000000000..95f82aabe --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h @@ -0,0 +1,37 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DICTIONARY_STRUCTURE_WITH_BUFFER_POLICY_FACTORY_H +#define LATINIME_DICTIONARY_STRUCTURE_WITH_BUFFER_POLICY_FACTORY_H + +#include "defines.h" + +#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" + +namespace latinime { + +class BinaryDictionaryInfo; + +class DictionaryStructureWithBufferPolicyFactory { + public: + static DictionaryStructureWithBufferPolicy *newDictionaryStructurePolicy( + const BinaryDictionaryInfo *const binaryDictionaryInfo); + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(DictionaryStructureWithBufferPolicyFactory); +}; +} // namespace latinime +#endif // LATINIME_DICTIONARY_STRUCTURE_WITH_BUFFER_POLICY_FACTORY_H diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp index 20cda91a3..7ac635a00 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp @@ -27,7 +27,9 @@ void DynamicPatriciaTrieNodeReader::fetchNodeInfoFromBufferAndProcessMovedNode(c const uint8_t *const dictRoot = mBinaryDictionaryInfo->getDictRoot(); int pos = nodePos; mFlags = PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictRoot, &pos); - mParentPos = DynamicPatriciaTrieReadingUtils::getParentPosAndAdvancePosition(dictRoot, &pos); + const int parentPos = + DynamicPatriciaTrieReadingUtils::getParentPosAndAdvancePosition(dictRoot, &pos); + mParentPos = (parentPos != 0) ? mNodePos + parentPos : NOT_A_DICT_POS; if (outCodePoints != 0) { mCodePointCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition( dictRoot, mFlags, maxCodePointCount, outCodePoints, &pos); diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h index b668aab78..71558edaa 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h @@ -42,7 +42,7 @@ class DynamicPatriciaTrieNodeReader { // Reads node information from dictionary buffer and updates members with the information. AK_FORCE_INLINE void fetchNodeInfoFromBuffer(const int nodePos) { - fetchNodeInfoFromBufferAndGetNodeCodePoints(mNodePos , 0 /* maxCodePointCount */, + fetchNodeInfoFromBufferAndGetNodeCodePoints(nodePos , 0 /* maxCodePointCount */, 0 /* outCodePoints */); } diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp index 9a180e6f7..bb49bb15c 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp @@ -26,25 +26,34 @@ namespace latinime { -const DynamicPatriciaTriePolicy DynamicPatriciaTriePolicy::sInstance; +// To avoid infinite loop caused by invalid or malicious forward links. +const int DynamicPatriciaTriePolicy::MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP = 100000; void DynamicPatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode, - const BinaryDictionaryInfo *const binaryDictionaryInfo, const NodeFilter *const nodeFilter, DicNodeVector *const childDicNodes) const { if (!dicNode->hasChildren()) { return; } - DynamicPatriciaTrieNodeReader nodeReader(binaryDictionaryInfo); + DynamicPatriciaTrieNodeReader nodeReader(mBinaryDictionaryInfo); int mergedNodeCodePoints[MAX_WORD_LENGTH]; int nextPos = dicNode->getChildrenPos(); + int totalChildCount = 0; do { const int childCount = PatriciaTrieReadingUtils::getGroupCountAndAdvancePosition( - binaryDictionaryInfo->getDictRoot(), &nextPos); + mDictRoot, &nextPos); + totalChildCount += childCount; + if (childCount <= 0 || totalChildCount > MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP) { + // Invalid dictionary. + AKLOGI("Invalid dictionary. childCount: %d, totalChildCount: %d, MAX: %d", + childCount, totalChildCount, MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP); + ASSERT(false); + return; + } for (int i = 0; i < childCount; i++) { nodeReader.fetchNodeInfoFromBufferAndGetNodeCodePoints(nextPos, MAX_WORD_LENGTH, mergedNodeCodePoints); if (!nodeReader.isDeleted() && !nodeFilter->isFilteredOut(mergedNodeCodePoints[0])) { - // Push child note when the node is not deleted and not filtered out. + // Push child node when the node is not deleted and not filtered out. childDicNodes->pushLeavingChild(dicNode, nodeReader.getNodePos(), nodeReader.getChildrenPos(), nodeReader.getProbability(), nodeReader.isTerminal(), nodeReader.hasChildren(), @@ -53,22 +62,24 @@ void DynamicPatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const d } nextPos = nodeReader.getSiblingNodePos(); } - nextPos = DynamicPatriciaTrieReadingUtils::getForwardLinkPosition( - binaryDictionaryInfo->getDictRoot(), nextPos); - } while(DynamicPatriciaTrieReadingUtils::isValidForwardLinkPosition(nextPos)); + nextPos = DynamicPatriciaTrieReadingUtils::getForwardLinkPosition(mDictRoot, nextPos); + } while (DynamicPatriciaTrieReadingUtils::isValidForwardLinkPosition(nextPos)); } int DynamicPatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( - const BinaryDictionaryInfo *const binaryDictionaryInfo, const int nodePos, const int maxCodePointCount, int *const outCodePoints, int *const outUnigramProbability) const { + if (nodePos == NOT_A_VALID_WORD_POS) { + *outUnigramProbability = NOT_A_PROBABILITY; + return 0; + } // This method traverses parent nodes from the terminal by following parent pointers; thus, // node code points are stored in the buffer in the reverse order. int reverseCodePoints[maxCodePointCount]; int mergedNodeCodePoints[maxCodePointCount]; int codePointCount = 0; - DynamicPatriciaTrieNodeReader nodeReader(binaryDictionaryInfo); + DynamicPatriciaTrieNodeReader nodeReader(mBinaryDictionaryInfo); // First, read terminal node and get its probability. nodeReader.fetchNodeInfoFromBufferAndGetNodeCodePoints(nodePos, maxCodePointCount, mergedNodeCodePoints); @@ -79,7 +90,7 @@ int DynamicPatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCoun reverseCodePoints[codePointCount++] = mergedNodeCodePoints[i]; } // Then, follow parent pos toward the root node. - while (nodeReader.getParentPos() != getRootPosition()) { + while (nodeReader.getParentPos() != NOT_A_DICT_POS) { // codePointCount must be incremented at least once in each iteration to ensure preventing // infinite loop. if (nodeReader.isDeleted() || codePointCount > maxCodePointCount @@ -103,16 +114,87 @@ int DynamicPatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCoun return codePointCount; } -int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord( - const BinaryDictionaryInfo *const binaryDictionaryInfo, const int *const inWord, +int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord, const int length, const bool forceLowerCaseSearch) const { - // TODO: Implement. - return NOT_A_DICT_POS; + int searchCodePoints[length]; + for (int i = 0; i < length; ++i) { + searchCodePoints[i] = forceLowerCaseSearch ? CharUtils::toLowerCase(inWord[i]) : inWord[i]; + } + int mergedNodeCodePoints[MAX_WORD_LENGTH]; + int currentLength = 0; + int pos = getRootPosition(); + DynamicPatriciaTrieNodeReader nodeReader(mBinaryDictionaryInfo); + while (currentLength <= length) { + // When foundMatchedNode becomes true, currentLength is increased at least once. + bool foundMatchedNode = false; + int totalChildCount = 0; + do { + const int childCount = PatriciaTrieReadingUtils::getGroupCountAndAdvancePosition( + mDictRoot, &pos); + totalChildCount += childCount; + if (childCount <= 0 || totalChildCount > MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP) { + // Invalid dictionary. + AKLOGI("Invalid dictionary. childCount: %d, totalChildCount: %d, MAX: %d", + childCount, totalChildCount, MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP); + ASSERT(false); + return NOT_A_VALID_WORD_POS; + } + for (int i = 0; i < childCount; i++) { + nodeReader.fetchNodeInfoFromBufferAndGetNodeCodePoints(pos, MAX_WORD_LENGTH, + mergedNodeCodePoints); + if (nodeReader.isDeleted() || nodeReader.getCodePointCount() <= 0) { + // Skip deleted or empty node. + pos = nodeReader.getSiblingNodePos(); + continue; + } + bool matched = true; + for (int j = 0; j < nodeReader.getCodePointCount(); ++j) { + if (mergedNodeCodePoints[j] != searchCodePoints[currentLength + j]) { + // Different code point is found. + matched = false; + break; + } + } + if (matched) { + currentLength += nodeReader.getCodePointCount(); + if (length == currentLength) { + // Terminal position is found. + return nodeReader.getNodePos(); + } + if (!nodeReader.hasChildren()) { + return NOT_A_VALID_WORD_POS; + } + foundMatchedNode = true; + // Advance to the children nodes. + pos = nodeReader.getChildrenPos(); + break; + } + // Try next sibling node. + pos = nodeReader.getSiblingNodePos(); + } + if (foundMatchedNode) { + break; + } + // If the matched node is not found in the current node group, try to follow the + // forward link. + pos = DynamicPatriciaTrieReadingUtils::getForwardLinkPosition( + mDictRoot, pos); + } while (DynamicPatriciaTrieReadingUtils::isValidForwardLinkPosition(pos)); + if (!foundMatchedNode) { + // Matched node is not found. + return NOT_A_VALID_WORD_POS; + } + } + // If we already traversed the tree further than the word is long, there means + // there was no match (or we would have found it). + return NOT_A_VALID_WORD_POS; } -int DynamicPatriciaTriePolicy::getUnigramProbability( - const BinaryDictionaryInfo *const binaryDictionaryInfo, const int nodePos) const { - DynamicPatriciaTrieNodeReader nodeReader(binaryDictionaryInfo); +int DynamicPatriciaTriePolicy::getUnigramProbability(const int nodePos) const { + if (nodePos == NOT_A_VALID_WORD_POS) { + return NOT_A_PROBABILITY; + } + DynamicPatriciaTrieNodeReader nodeReader(mBinaryDictionaryInfo); nodeReader.fetchNodeInfoFromBuffer(nodePos); if (nodeReader.isDeleted() || nodeReader.isBlacklisted() || nodeReader.isNotAWord()) { return NOT_A_PROBABILITY; @@ -120,10 +202,11 @@ int DynamicPatriciaTriePolicy::getUnigramProbability( return nodeReader.getProbability(); } -int DynamicPatriciaTriePolicy::getShortcutPositionOfNode( - const BinaryDictionaryInfo *const binaryDictionaryInfo, - const int nodePos) const { - DynamicPatriciaTrieNodeReader nodeReader(binaryDictionaryInfo); +int DynamicPatriciaTriePolicy::getShortcutPositionOfNode(const int nodePos) const { + if (nodePos == NOT_A_VALID_WORD_POS) { + return NOT_A_DICT_POS; + } + DynamicPatriciaTrieNodeReader nodeReader(mBinaryDictionaryInfo); nodeReader.fetchNodeInfoFromBuffer(nodePos); if (nodeReader.isDeleted()) { return NOT_A_DICT_POS; @@ -131,10 +214,11 @@ int DynamicPatriciaTriePolicy::getShortcutPositionOfNode( return nodeReader.getShortcutPos(); } -int DynamicPatriciaTriePolicy::getBigramsPositionOfNode( - const BinaryDictionaryInfo *const binaryDictionaryInfo, - const int nodePos) const { - DynamicPatriciaTrieNodeReader nodeReader(binaryDictionaryInfo); +int DynamicPatriciaTriePolicy::getBigramsPositionOfNode(const int nodePos) const { + if (nodePos == NOT_A_VALID_WORD_POS) { + return NOT_A_DICT_POS; + } + DynamicPatriciaTrieNodeReader nodeReader(mBinaryDictionaryInfo); nodeReader.fetchNodeInfoFromBuffer(nodePos); if (nodeReader.isDeleted()) { return NOT_A_DICT_POS; diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h index 39dfb86fd..e92672128 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h @@ -17,8 +17,10 @@ #ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H #define LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H +#include <stdint.h> + #include "defines.h" -#include "suggest/core/policy/dictionary_structure_policy.h" +#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" namespace latinime { @@ -26,44 +28,41 @@ class BinaryDictionaryInfo; class DicNode; class DicNodeVector; -class DynamicPatriciaTriePolicy : public DictionaryStructurePolicy { +class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { public: - static AK_FORCE_INLINE const DynamicPatriciaTriePolicy *getInstance() { - return &sInstance; - } + DynamicPatriciaTriePolicy(const uint8_t *const dictRoot, + const BinaryDictionaryInfo *const binaryDictionaryInfo) + : mDictRoot(dictRoot), mBinaryDictionaryInfo(binaryDictionaryInfo) {} + + ~DynamicPatriciaTriePolicy() {} AK_FORCE_INLINE int getRootPosition() const { return 0; } void createAndGetAllChildNodes(const DicNode *const dicNode, - const BinaryDictionaryInfo *const binaryDictionaryInfo, const NodeFilter *const nodeFilter, DicNodeVector *const childDicNodes) const; int getCodePointsAndProbabilityAndReturnCodePointCount( - const BinaryDictionaryInfo *const binaryDictionaryInfo, const int terminalNodePos, const int maxCodePointCount, int *const outCodePoints, int *const outUnigramProbability) const; - int getTerminalNodePositionOfWord( - const BinaryDictionaryInfo *const binaryDictionaryInfo, const int *const inWord, + int getTerminalNodePositionOfWord(const int *const inWord, const int length, const bool forceLowerCaseSearch) const; - int getUnigramProbability(const BinaryDictionaryInfo *const binaryDictionaryInfo, - const int nodePos) const; + int getUnigramProbability(const int nodePos) const; - int getShortcutPositionOfNode(const BinaryDictionaryInfo *const binaryDictionaryInfo, - const int nodePos) const; + int getShortcutPositionOfNode(const int nodePos) const; - int getBigramsPositionOfNode(const BinaryDictionaryInfo *const binaryDictionaryInfo, - const int nodePos) const; + int getBigramsPositionOfNode(const int nodePos) const; private: - DISALLOW_COPY_AND_ASSIGN(DynamicPatriciaTriePolicy); - static const DynamicPatriciaTriePolicy sInstance; + DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTriePolicy); + static const int MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP; - DynamicPatriciaTriePolicy() {} - ~DynamicPatriciaTriePolicy() {} + const uint8_t *const mDictRoot; + // TODO: remove + const BinaryDictionaryInfo *const mBinaryDictionaryInfo; }; } // namespace latinime #endif // LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h index f44c2651a..5398d7e37 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h @@ -39,8 +39,7 @@ class DynamicPatriciaTrieReadingUtils { static AK_FORCE_INLINE int getParentPosAndAdvancePosition(const uint8_t *const buffer, int *const pos) { - const int base = *pos; - return base + ByteArrayUtils::readSint24AndAdvancePosition(buffer, pos); + return ByteArrayUtils::readSint24AndAdvancePosition(buffer, pos); } static int readChildrenPositionAndAdvancePosition(const uint8_t *const buffer, diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp index 097f7c86a..fd5f6e7dd 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp @@ -27,48 +27,39 @@ namespace latinime { -const PatriciaTriePolicy PatriciaTriePolicy::sInstance; - void PatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode, - const BinaryDictionaryInfo *const binaryDictionaryInfo, const NodeFilter *const nodeFilter, DicNodeVector *const childDicNodes) const { if (!dicNode->hasChildren()) { return; } int nextPos = dicNode->getChildrenPos(); const int childCount = PatriciaTrieReadingUtils::getGroupCountAndAdvancePosition( - binaryDictionaryInfo->getDictRoot(), &nextPos); + mDictRoot, &nextPos); for (int i = 0; i < childCount; i++) { - nextPos = createAndGetLeavingChildNode(dicNode, nextPos, binaryDictionaryInfo, - nodeFilter, childDicNodes); + nextPos = createAndGetLeavingChildNode(dicNode, nextPos, nodeFilter, childDicNodes); } } int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( - const BinaryDictionaryInfo *const binaryDictionaryInfo, const int nodePos, const int maxCodePointCount, int *const outCodePoints, int *const outUnigramProbability) const { - return BinaryFormat::getCodePointsAndProbabilityAndReturnCodePointCount( - binaryDictionaryInfo->getDictRoot(), nodePos, + return BinaryFormat::getCodePointsAndProbabilityAndReturnCodePointCount(mDictRoot, nodePos, maxCodePointCount, outCodePoints, outUnigramProbability); } -int PatriciaTriePolicy::getTerminalNodePositionOfWord( - const BinaryDictionaryInfo *const binaryDictionaryInfo, const int *const inWord, +int PatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord, const int length, const bool forceLowerCaseSearch) const { - return BinaryFormat::getTerminalPosition(binaryDictionaryInfo->getDictRoot(), inWord, + return BinaryFormat::getTerminalPosition(mDictRoot, inWord, length, forceLowerCaseSearch); } -int PatriciaTriePolicy::getUnigramProbability( - const BinaryDictionaryInfo *const binaryDictionaryInfo, const int nodePos) const { +int PatriciaTriePolicy::getUnigramProbability(const int nodePos) const { if (nodePos == NOT_A_VALID_WORD_POS) { return NOT_A_PROBABILITY; } - const uint8_t *const dictRoot = binaryDictionaryInfo->getDictRoot(); int pos = nodePos; const PatriciaTrieReadingUtils::NodeFlags flags = - PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictRoot, &pos); + PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos); if (!PatriciaTrieReadingUtils::isTerminal(flags)) { return NOT_A_PROBABILITY; } @@ -79,81 +70,74 @@ int PatriciaTriePolicy::getUnigramProbability( // for shortcuts). return NOT_A_PROBABILITY; } - PatriciaTrieReadingUtils::skipCharacters(dictRoot, flags, MAX_WORD_LENGTH, &pos); - return PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(dictRoot, &pos); + PatriciaTrieReadingUtils::skipCharacters(mDictRoot, flags, MAX_WORD_LENGTH, &pos); + return PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos); } -int PatriciaTriePolicy::getShortcutPositionOfNode( - const BinaryDictionaryInfo *const binaryDictionaryInfo, - const int nodePos) const { +int PatriciaTriePolicy::getShortcutPositionOfNode(const int nodePos) const { if (nodePos == NOT_A_VALID_WORD_POS) { return NOT_A_DICT_POS; } - const uint8_t *const dictRoot = binaryDictionaryInfo->getDictRoot(); int pos = nodePos; const PatriciaTrieReadingUtils::NodeFlags flags = - PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictRoot, &pos); + PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos); if (!PatriciaTrieReadingUtils::hasShortcutTargets(flags)) { return NOT_A_DICT_POS; } - PatriciaTrieReadingUtils::skipCharacters(dictRoot, flags, MAX_WORD_LENGTH, &pos); + PatriciaTrieReadingUtils::skipCharacters(mDictRoot, flags, MAX_WORD_LENGTH, &pos); if (PatriciaTrieReadingUtils::isTerminal(flags)) { - PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(dictRoot, &pos); + PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos); } if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) { - PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(dictRoot, flags, &pos); + PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(mDictRoot, flags, &pos); } return pos; } -int PatriciaTriePolicy::getBigramsPositionOfNode( - const BinaryDictionaryInfo *const binaryDictionaryInfo, - const int nodePos) const { +int PatriciaTriePolicy::getBigramsPositionOfNode(const int nodePos) const { if (nodePos == NOT_A_VALID_WORD_POS) { return NOT_A_DICT_POS; } - const uint8_t *const dictRoot = binaryDictionaryInfo->getDictRoot(); int pos = nodePos; const PatriciaTrieReadingUtils::NodeFlags flags = - PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictRoot, &pos); + PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos); if (!PatriciaTrieReadingUtils::hasBigrams(flags)) { return NOT_A_DICT_POS; } - PatriciaTrieReadingUtils::skipCharacters(dictRoot, flags, MAX_WORD_LENGTH, &pos); + PatriciaTrieReadingUtils::skipCharacters(mDictRoot, flags, MAX_WORD_LENGTH, &pos); if (PatriciaTrieReadingUtils::isTerminal(flags)) { - PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(dictRoot, &pos); + PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos); } if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) { - PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(dictRoot, flags, &pos); + PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(mDictRoot, flags, &pos); } if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) { - BinaryDictionaryTerminalAttributesReadingUtils::skipShortcuts(binaryDictionaryInfo, &pos); + BinaryDictionaryTerminalAttributesReadingUtils::skipShortcuts(mBinaryDictionaryInfo, &pos); } return pos; } int PatriciaTriePolicy::createAndGetLeavingChildNode(const DicNode *const dicNode, - const int nodePos, const BinaryDictionaryInfo *const binaryDictionaryInfo, - const NodeFilter *const childrenFilter, DicNodeVector *childDicNodes) const { - const uint8_t *const dictRoot = binaryDictionaryInfo->getDictRoot(); + const int nodePos, const NodeFilter *const childrenFilter, + DicNodeVector *childDicNodes) const { int pos = nodePos; const PatriciaTrieReadingUtils::NodeFlags flags = - PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictRoot, &pos); + PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos); int mergedNodeCodePoints[MAX_WORD_LENGTH]; const int mergedNodeCodePointCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition( - dictRoot, flags, MAX_WORD_LENGTH, mergedNodeCodePoints, &pos); + mDictRoot, flags, MAX_WORD_LENGTH, mergedNodeCodePoints, &pos); const int probability = (PatriciaTrieReadingUtils::isTerminal(flags))? - PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(dictRoot, &pos) + PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos) : NOT_A_PROBABILITY; const int childrenPos = PatriciaTrieReadingUtils::hasChildrenInFlags(flags) ? PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition( - dictRoot, flags, &pos) : NOT_A_DICT_POS; + mDictRoot, flags, &pos) : NOT_A_DICT_POS; if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) { - BinaryDictionaryTerminalAttributesReadingUtils::skipShortcuts(binaryDictionaryInfo, &pos); + BinaryDictionaryTerminalAttributesReadingUtils::skipShortcuts(mBinaryDictionaryInfo, &pos); } if (PatriciaTrieReadingUtils::hasBigrams(flags)) { BinaryDictionaryTerminalAttributesReadingUtils::skipExistingBigrams( - binaryDictionaryInfo, &pos); + mBinaryDictionaryInfo, &pos); } if (!childrenFilter->isFilteredOut(mergedNodeCodePoints[0])) { childDicNodes->pushLeavingChild(dicNode, nodePos, childrenPos, probability, diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h index 71f256eee..e1034127c 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h @@ -17,52 +17,53 @@ #ifndef LATINIME_PATRICIA_TRIE_POLICY_H #define LATINIME_PATRICIA_TRIE_POLICY_H +#include <stdint.h> + #include "defines.h" -#include "suggest/core/policy/dictionary_structure_policy.h" +#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" namespace latinime { -class PatriciaTriePolicy : public DictionaryStructurePolicy { +class BinaryDictionaryInfo; +class DicNode; +class DicNodeVector; + +class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { public: - static AK_FORCE_INLINE const PatriciaTriePolicy *getInstance() { - return &sInstance; - } + PatriciaTriePolicy(const uint8_t *const dictRoot, + const BinaryDictionaryInfo *const binaryDictionaryInfo) + : mDictRoot(dictRoot), mBinaryDictionaryInfo(binaryDictionaryInfo) {} + + ~PatriciaTriePolicy() {} AK_FORCE_INLINE int getRootPosition() const { return 0; } void createAndGetAllChildNodes(const DicNode *const dicNode, - const BinaryDictionaryInfo *const binaryDictionaryInfo, const NodeFilter *const nodeFilter, DicNodeVector *const childDicNodes) const; int getCodePointsAndProbabilityAndReturnCodePointCount( - const BinaryDictionaryInfo *const binaryDictionaryInfo, const int terminalNodePos, const int maxCodePointCount, int *const outCodePoints, int *const outUnigramProbability) const; - int getTerminalNodePositionOfWord( - const BinaryDictionaryInfo *const binaryDictionaryInfo, const int *const inWord, + int getTerminalNodePositionOfWord(const int *const inWord, const int length, const bool forceLowerCaseSearch) const; - int getUnigramProbability(const BinaryDictionaryInfo *const binaryDictionaryInfo, - const int nodePos) const; + int getUnigramProbability(const int nodePos) const; - int getShortcutPositionOfNode(const BinaryDictionaryInfo *const binaryDictionaryInfo, - const int nodePos) const; + int getShortcutPositionOfNode(const int nodePos) const; - int getBigramsPositionOfNode(const BinaryDictionaryInfo *const binaryDictionaryInfo, - const int nodePos) const; + int getBigramsPositionOfNode(const int nodePos) const; private: - DISALLOW_COPY_AND_ASSIGN(PatriciaTriePolicy); - static const PatriciaTriePolicy sInstance; + DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTriePolicy); - PatriciaTriePolicy() {} - ~PatriciaTriePolicy() {} + const uint8_t *const mDictRoot; + // TODO: remove + const BinaryDictionaryInfo *const mBinaryDictionaryInfo; int createAndGetLeavingChildNode(const DicNode *const dicNode, const int nodePos, - const BinaryDictionaryInfo *const binaryDictionaryInfo, const NodeFilter *const nodeFilter, DicNodeVector *const childDicNodes) const; }; } // namespace latinime |