diff options
Diffstat (limited to 'native')
75 files changed, 2030 insertions, 1057 deletions
diff --git a/native/jni/Android.mk b/native/jni/Android.mk index ca6a77997..55a5c06d7 100644 --- a/native/jni/Android.mk +++ b/native/jni/Android.mk @@ -57,6 +57,7 @@ LATIN_IME_CORE_SRC_FILES := \ bloom_filter.cpp \ dictionary.cpp \ digraph_utils.cpp \ + error_type_utils.cpp \ multi_bigram_map.cpp) \ $(addprefix suggest/core/layout/, \ additional_proximity_chars.cpp \ @@ -72,22 +73,28 @@ LATIN_IME_CORE_SRC_FILES := \ header/header_policy.cpp \ header/header_read_write_utils.cpp \ shortcut/shortcut_list_reading_utils.cpp \ - dictionary_structure_with_buffer_policy_factory.cpp \ + structure/dictionary_structure_with_buffer_policy_factory.cpp) \ + $(addprefix suggest/policyimpl/dictionary/structure/v2/, \ + patricia_trie_policy.cpp \ + patricia_trie_reading_utils.cpp) \ + $(addprefix suggest/policyimpl/dictionary/structure/v3/, \ dynamic_patricia_trie_gc_event_listeners.cpp \ dynamic_patricia_trie_node_reader.cpp \ dynamic_patricia_trie_policy.cpp \ dynamic_patricia_trie_reading_helper.cpp \ dynamic_patricia_trie_reading_utils.cpp \ dynamic_patricia_trie_writing_helper.cpp \ - dynamic_patricia_trie_writing_utils.cpp \ - patricia_trie_policy.cpp \ - patricia_trie_reading_utils.cpp) \ + dynamic_patricia_trie_writing_utils.cpp) \ + $(addprefix suggest/policyimpl/dictionary/structure/v4/, \ + ver4_dict_constants.cpp \ + ver4_patricia_trie_policy.cpp) \ $(addprefix suggest/policyimpl/dictionary/utils/, \ buffer_with_extendable_buffer.cpp \ byte_array_utils.cpp \ dict_file_writing_utils.cpp \ forgetting_curve_utils.cpp \ - format_utils.cpp) \ + format_utils.cpp \ + mmapped_buffer.cpp) \ suggest/policyimpl/gesture/gesture_suggest_policy_factory.cpp \ $(addprefix suggest/policyimpl/typing/, \ scoring_params.cpp \ diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp index 8f21c50ec..3becc7e39 100644 --- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp +++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp @@ -25,7 +25,7 @@ #include "jni_common.h" #include "suggest/core/dictionary/dictionary.h" #include "suggest/core/suggest_options.h" -#include "suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h" +#include "suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h" #include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h" #include "utils/autocorrection_threshold_utils.h" @@ -86,11 +86,11 @@ static jlong latinime_BinaryDictionary_open(JNIEnv *env, jclass clazz, jstring s char sourceDirChars[sourceDirUtf8Length + 1]; env->GetStringUTFRegion(sourceDir, 0, env->GetStringLength(sourceDir), sourceDirChars); sourceDirChars[sourceDirUtf8Length] = '\0'; - DictionaryStructureWithBufferPolicy *const dictionaryStructureWithBufferPolicy = + DictionaryStructureWithBufferPolicy::StructurePoilcyPtr dictionaryStructureWithBufferPolicy( DictionaryStructureWithBufferPolicyFactory::newDictionaryStructureWithBufferPolicy( sourceDirChars, static_cast<int>(dictOffset), static_cast<int>(dictSize), - isUpdatable == JNI_TRUE); - if (!dictionaryStructureWithBufferPolicy) { + isUpdatable == JNI_TRUE)); + if (!dictionaryStructureWithBufferPolicy.get()) { return 0; } diff --git a/native/jni/src/defines.h b/native/jni/src/defines.h index 742e388e4..fbcd612b7 100644 --- a/native/jni/src/defines.h +++ b/native/jni/src/defines.h @@ -392,24 +392,4 @@ typedef enum { // Create new word with space substitution CT_NEW_WORD_SPACE_SUBSTITUTION, } CorrectionType; - -// ErrorType is mainly decided by CorrectionType but it is also depending on if -// the correction has really been performed or not. -typedef enum { - // Substitution, omission and transposition - ET_EDIT_CORRECTION, - // Proximity error - ET_PROXIMITY_CORRECTION, - // Completion - ET_COMPLETION, - // New word - // TODO: Remove. - // A new word error should be an edit correction error or a proximity correction error. - ET_NEW_WORD, - // Treat error as an intentional omission when the CorrectionType is omission and the node can - // be intentional omission. - ET_INTENTIONAL_OMISSION, - // Not treated as an error. Tracked for checking exact match - ET_NOT_AN_ERROR -} ErrorType; #endif // LATINIME_DEFINES_H diff --git a/native/jni/src/suggest/core/dicnode/dic_node.h b/native/jni/src/suggest/core/dicnode/dic_node.h index 49cfdecac..0b2b4a9e8 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node.h +++ b/native/jni/src/suggest/core/dicnode/dic_node.h @@ -99,7 +99,7 @@ class DicNode { virtual ~DicNode() {} // Init for copy - void initByCopy(const DicNode *dicNode) { + void initByCopy(const DicNode *const dicNode) { mIsUsed = true; mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion; mDicNodeProperties.init(&dicNode->mDicNodeProperties); @@ -107,25 +107,25 @@ class DicNode { PROF_NODE_COPY(&dicNode->mProfiler, mProfiler); } - // Init for root with prevWordNodePos which is used for bigram - void initAsRoot(const int rootGroupPos, const int prevWordNodePos) { + // Init for root with prevWordPtNodePos which is used for bigram + void initAsRoot(const int rootPtNodeArrayPos, const int prevWordPtNodePos) { mIsUsed = true; mIsCachedForNextSuggestion = false; mDicNodeProperties.init( - NOT_A_DICT_POS /* pos */, rootGroupPos, NOT_A_CODE_POINT /* nodeCodePoint */, + NOT_A_DICT_POS /* pos */, rootPtNodeArrayPos, NOT_A_CODE_POINT /* nodeCodePoint */, NOT_A_PROBABILITY /* probability */, false /* isTerminal */, true /* hasChildren */, false /* isBlacklistedOrNotAWord */, 0 /* depth */, 0 /* terminalDepth */); - mDicNodeState.init(prevWordNodePos); + mDicNodeState.init(prevWordPtNodePos); PROF_NODE_RESET(mProfiler); } // Init for root with previous word - void initAsRootWithPreviousWord(DicNode *dicNode, const int rootGroupPos) { + void initAsRootWithPreviousWord(const DicNode *const dicNode, const int rootPtNodeArrayPos) { mIsUsed = true; mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion; mDicNodeProperties.init( - NOT_A_DICT_POS /* pos */, rootGroupPos, NOT_A_CODE_POINT /* nodeCodePoint */, + NOT_A_DICT_POS /* pos */, rootPtNodeArrayPos, NOT_A_CODE_POINT /* nodeCodePoint */, NOT_A_PROBABILITY /* probability */, false /* isTerminal */, true /* hasChildren */, false /* isBlacklistedOrNotAWord */, 0 /* depth */, 0 /* terminalDepth */); @@ -138,7 +138,7 @@ class DicNode { mDicNodeState.mDicNodeStatePrevWord.init( dicNode->mDicNodeState.mDicNodeStatePrevWord.getPrevWordCount() + 1, dicNode->mDicNodeProperties.getProbability(), - dicNode->mDicNodeProperties.getPos(), + dicNode->mDicNodeProperties.getPtNodePos(), dicNode->mDicNodeState.mDicNodeStatePrevWord.mPrevWord, dicNode->mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength(), dicNode->getOutputWordBuf(), @@ -148,26 +148,27 @@ class DicNode { PROF_NODE_COPY(&dicNode->mProfiler, mProfiler); } - void initAsPassingChild(DicNode *parentNode) { + void initAsPassingChild(DicNode *parentDicNode) { mIsUsed = true; - mIsCachedForNextSuggestion = parentNode->mIsCachedForNextSuggestion; - const int c = parentNode->getNodeTypedCodePoint(); - mDicNodeProperties.init(&parentNode->mDicNodeProperties, c); - mDicNodeState.init(&parentNode->mDicNodeState); - PROF_NODE_COPY(&parentNode->mProfiler, mProfiler); + mIsCachedForNextSuggestion = parentDicNode->mIsCachedForNextSuggestion; + const int parentCodePoint = parentDicNode->getNodeTypedCodePoint(); + mDicNodeProperties.init(&parentDicNode->mDicNodeProperties, parentCodePoint); + mDicNodeState.init(&parentDicNode->mDicNodeState); + PROF_NODE_COPY(&parentDicNode->mProfiler, mProfiler); } - void initAsChild(const DicNode *const dicNode, const int pos, const int childrenPos, - const int probability, const bool isTerminal, const bool hasChildren, - const bool isBlacklistedOrNotAWord, const uint16_t mergedNodeCodePointCount, - const int *const mergedNodeCodePoints) { + void initAsChild(const DicNode *const dicNode, const int ptNodePos, + const int childrenPtNodeArrayPos, const int probability, const bool isTerminal, + const bool hasChildren, const bool isBlacklistedOrNotAWord, + const uint16_t mergedNodeCodePointCount, const int *const mergedNodeCodePoints) { mIsUsed = true; uint16_t newDepth = static_cast<uint16_t>(dicNode->getNodeCodePointCount() + 1); mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion; const uint16_t newLeavingDepth = static_cast<uint16_t>( dicNode->mDicNodeProperties.getLeavingDepth() + mergedNodeCodePointCount); - mDicNodeProperties.init(pos, childrenPos, mergedNodeCodePoints[0], probability, - isTerminal, hasChildren, isBlacklistedOrNotAWord, newDepth, newLeavingDepth); + mDicNodeProperties.init(ptNodePos, childrenPtNodeArrayPos, mergedNodeCodePoints[0], + probability, isTerminal, hasChildren, isBlacklistedOrNotAWord, newDepth, + newLeavingDepth); mDicNodeState.init(&dicNode->mDicNodeState, mergedNodeCodePointCount, mergedNodeCodePoints); PROF_NODE_COPY(&dicNode->mProfiler, mProfiler); @@ -234,7 +235,7 @@ class DicNode { } bool isFirstWord() const { - return mDicNodeState.mDicNodeStatePrevWord.getPrevWordNodePos() == NOT_A_DICT_POS; + return mDicNodeState.mDicNodeStatePrevWord.getPrevWordPtNodePos() == NOT_A_DICT_POS; } bool isCompletion(const int inputSize) const { @@ -246,29 +247,30 @@ class DicNode { } // Used to get bigram probability in DicNodeUtils - int getPos() const { - return mDicNodeProperties.getPos(); + int getPtNodePos() const { + return mDicNodeProperties.getPtNodePos(); } // Used to get bigram probability in DicNodeUtils - int getPrevWordPos() const { - return mDicNodeState.mDicNodeStatePrevWord.getPrevWordNodePos(); + int getPrevWordTerminalPtNodePos() const { + return mDicNodeState.mDicNodeStatePrevWord.getPrevWordPtNodePos(); } // Used in DicNodeUtils - int getChildrenPos() const { - return mDicNodeProperties.getChildrenPos(); + int getChildrenPtNodeArrayPos() const { + return mDicNodeProperties.getChildrenPtNodeArrayPos(); } int getProbability() const { return mDicNodeProperties.getProbability(); } - AK_FORCE_INLINE bool isTerminalWordNode() const { - const bool isTerminalNodes = mDicNodeProperties.isTerminal(); - const int currentNodeDepth = getNodeCodePointCount(); - const int terminalNodeDepth = mDicNodeProperties.getLeavingDepth(); - return isTerminalNodes && currentNodeDepth > 0 && currentNodeDepth == terminalNodeDepth; + AK_FORCE_INLINE bool isTerminalDicNode() const { + const bool isTerminalPtNode = mDicNodeProperties.isTerminal(); + const int currentDicNodeDepth = getNodeCodePointCount(); + const int terminalDicNodeDepth = mDicNodeProperties.getLeavingDepth(); + return isTerminalPtNode && currentDicNodeDepth > 0 + && currentDicNodeDepth == terminalDicNodeDepth; } bool shouldBeFilteredBySafetyNetForBigram() const { @@ -374,8 +376,8 @@ class DicNode { } // Used to commit input partially - int getPrevWordNodePos() const { - return mDicNodeState.mDicNodeStatePrevWord.getPrevWordNodePos(); + int getPrevWordPtNodePos() const { + return mDicNodeState.mDicNodeStatePrevWord.getPrevWordPtNodePos(); } AK_FORCE_INLINE const int *getOutputWordBuf() const { @@ -410,7 +412,7 @@ class DicNode { // TODO: Remove once touch path is merged into ProximityInfoState // Note: Returned codepoint may be a digraph codepoint if the node is in a composite glyph. int getNodeCodePoint() const { - const int codePoint = mDicNodeProperties.getNodeCodePoint(); + const int codePoint = mDicNodeProperties.getDicNodeCodePoint(); const DigraphUtils::DigraphCodePointIndex digraphIndex = mDicNodeState.mDicNodeStateScoring.getDigraphIndex(); if (digraphIndex == DigraphUtils::NOT_A_DIGRAPH_INDEX) { @@ -423,8 +425,8 @@ class DicNode { // Utils for cost calculation // //////////////////////////////// AK_FORCE_INLINE bool isSameNodeCodePoint(const DicNode *const dicNode) const { - return mDicNodeProperties.getNodeCodePoint() - == dicNode->mDicNodeProperties.getNodeCodePoint(); + return mDicNodeProperties.getDicNodeCodePoint() + == dicNode->mDicNodeProperties.getDicNodeCodePoint(); } // TODO: remove @@ -574,7 +576,8 @@ class DicNode { // Caveat: Must not be called outside Weighting // This restriction is guaranteed by "friend" AK_FORCE_INLINE void addCost(const float spatialCost, const float languageCost, - const bool doNormalization, const int inputSize, const ErrorType errorType) { + const bool doNormalization, const int inputSize, + const ErrorTypeUtils::ErrorType errorType) { if (DEBUG_GEO_FULL) { LOGI_SHOW_ADD_COST_PROP; } diff --git a/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp b/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp index ec65114c7..5540b6df5 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp +++ b/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp @@ -22,7 +22,6 @@ #include "suggest/core/dicnode/dic_node_vector.h" #include "suggest/core/dictionary/multi_bigram_map.h" #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" -#include "utils/char_utils.h" namespace latinime { @@ -32,19 +31,20 @@ namespace latinime { /* static */ void DicNodeUtils::initAsRoot( const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, - const int prevWordNodePos, DicNode *const newRootNode) { - newRootNode->initAsRoot(dictionaryStructurePolicy->getRootPosition(), prevWordNodePos); + const int prevWordPtNodePos, DicNode *const newRootDicNode) { + newRootDicNode->initAsRoot(dictionaryStructurePolicy->getRootPosition(), prevWordPtNodePos); } /*static */ void DicNodeUtils::initAsRootWithPreviousWord( const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, - DicNode *const prevWordLastNode, DicNode *const newRootNode) { - newRootNode->initAsRootWithPreviousWord( - prevWordLastNode, dictionaryStructurePolicy->getRootPosition()); + const DicNode *const prevWordLastDicNode, DicNode *const newRootDicNode) { + newRootDicNode->initAsRootWithPreviousWord( + prevWordLastDicNode, dictionaryStructurePolicy->getRootPosition()); } -/* static */ void DicNodeUtils::initByCopy(DicNode *srcNode, DicNode *destNode) { - destNode->initByCopy(srcNode); +/* static */ void DicNodeUtils::initByCopy(const DicNode *const srcDicNode, + DicNode *const destDicNode) { + destDicNode->initByCopy(srcDicNode); } /////////////////////////////////// @@ -52,14 +52,14 @@ namespace latinime { /////////////////////////////////// /* static */ void DicNodeUtils::getAllChildDicNodes(DicNode *dicNode, const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, - DicNodeVector *childDicNodes) { + DicNodeVector *const childDicNodes) { if (dicNode->isTotalInputSizeExceedingLimit()) { return; } if (!dicNode->isLeavingNode()) { childDicNodes->pushPassingChild(dicNode); } else { - dictionaryStructurePolicy->createAndGetAllChildNodes(dicNode, childDicNodes); + dictionaryStructurePolicy->createAndGetAllChildDicNodes(dicNode, childDicNodes); } } @@ -71,11 +71,11 @@ namespace latinime { */ /* static */ float DicNodeUtils::getBigramNodeImprobability( const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, - const DicNode *const node, MultiBigramMap *multiBigramMap) { - if (node->hasMultipleWords() && !node->isValidMultipleWordSuggestion()) { + const DicNode *const dicNode, MultiBigramMap *const multiBigramMap) { + if (dicNode->hasMultipleWords() && !dicNode->isValidMultipleWordSuggestion()) { return static_cast<float>(MAX_VALUE_FOR_WEIGHTING); } - const int probability = getBigramNodeProbability(dictionaryStructurePolicy, node, + const int probability = getBigramNodeProbability(dictionaryStructurePolicy, dicNode, multiBigramMap); // TODO: This equation to calculate the improbability looks unreasonable. Investigate this. const float cost = static_cast<float>(MAX_PROBABILITY - probability) @@ -85,19 +85,19 @@ namespace latinime { /* static */ int DicNodeUtils::getBigramNodeProbability( const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, - const DicNode *const node, MultiBigramMap *multiBigramMap) { - const int unigramProbability = node->getProbability(); - const int wordPos = node->getPos(); - const int prevWordPos = node->getPrevWordPos(); - if (NOT_A_DICT_POS == wordPos || NOT_A_DICT_POS == prevWordPos) { + const DicNode *const dicNode, MultiBigramMap *const multiBigramMap) { + const int unigramProbability = dicNode->getProbability(); + const int ptNodePos = dicNode->getPtNodePos(); + const int prevWordTerminalPtNodePos = dicNode->getPrevWordTerminalPtNodePos(); + if (NOT_A_DICT_POS == ptNodePos || NOT_A_DICT_POS == prevWordTerminalPtNodePos) { // Note: Normally wordPos comes from the dictionary and should never equal // NOT_A_VALID_WORD_POS. return dictionaryStructurePolicy->getProbability(unigramProbability, NOT_A_PROBABILITY); } if (multiBigramMap) { - return multiBigramMap->getBigramProbability(dictionaryStructurePolicy, prevWordPos, - wordPos, unigramProbability); + return multiBigramMap->getBigramProbability(dictionaryStructurePolicy, + prevWordTerminalPtNodePos, ptNodePos, unigramProbability); } return dictionaryStructurePolicy->getProbability(unigramProbability, NOT_A_PROBABILITY); @@ -109,7 +109,7 @@ namespace latinime { // TODO: Move to char_utils? /* static */ int DicNodeUtils::appendTwoWords(const int *const src0, const int16_t length0, - const int *const src1, const int16_t length1, int *dest) { + const int *const src1, const int16_t length1, int *const dest) { int actualLength0 = 0; for (int i = 0; i < length0; ++i) { if (src0[i] == 0) { diff --git a/native/jni/src/suggest/core/dicnode/dic_node_utils.h b/native/jni/src/suggest/core/dicnode/dic_node_utils.h index 3fb351a61..3f1514a52 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node_utils.h +++ b/native/jni/src/suggest/core/dicnode/dic_node_utils.h @@ -31,20 +31,20 @@ class MultiBigramMap; class DicNodeUtils { public: static int appendTwoWords(const int *src0, const int16_t length0, const int *src1, - const int16_t length1, int *dest); + const int16_t length1, int *const dest); static void initAsRoot( const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, - const int prevWordNodePos, DicNode *newRootNode); + const int prevWordPtNodePos, DicNode *const newRootDicNode); static void initAsRootWithPreviousWord( const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, - DicNode *prevWordLastNode, DicNode *newRootNode); - static void initByCopy(DicNode *srcNode, DicNode *destNode); + const DicNode *const prevWordLastDicNode, DicNode *const newRootDicNode); + static void initByCopy(const DicNode *const srcDicNode, DicNode *const destDicNode); static void getAllChildDicNodes(DicNode *dicNode, const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, DicNodeVector *childDicNodes); static float getBigramNodeImprobability( const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, - const DicNode *const node, MultiBigramMap *const multiBigramMap); + const DicNode *const dicNode, MultiBigramMap *const multiBigramMap); private: DISALLOW_IMPLICIT_CONSTRUCTORS(DicNodeUtils); @@ -53,7 +53,7 @@ class DicNodeUtils { static int getBigramNodeProbability( const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, - const DicNode *const node, MultiBigramMap *multiBigramMap); + const DicNode *const dicNode, MultiBigramMap *const multiBigramMap); }; } // namespace latinime #endif // LATINIME_DIC_NODE_UTILS_H diff --git a/native/jni/src/suggest/core/dicnode/dic_node_vector.h b/native/jni/src/suggest/core/dicnode/dic_node_vector.h index 42addae8d..9364e7751 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node_vector.h +++ b/native/jni/src/suggest/core/dicnode/dic_node_vector.h @@ -62,14 +62,14 @@ class DicNodeVector { mDicNodes.back().initAsPassingChild(dicNode); } - void pushLeavingChild(const DicNode *const dicNode, const int pos, const int childrenPos, - const int probability, const bool isTerminal, const bool hasChildren, - const bool isBlacklistedOrNotAWord, const uint16_t mergedNodeCodePointCount, - const int *const mergedNodeCodePoints) { + void pushLeavingChild(const DicNode *const dicNode, const int ptNodePos, + const int childrenPtNodeArrayPos, const int probability, const bool isTerminal, + const bool hasChildren, const bool isBlacklistedOrNotAWord, + const uint16_t mergedNodeCodePointCount, const int *const mergedNodeCodePoints) { ASSERT(!mLock); mDicNodes.push_back(mEmptyNode); - mDicNodes.back().initAsChild(dicNode, pos, childrenPos, probability, isTerminal, - hasChildren, isBlacklistedOrNotAWord, mergedNodeCodePointCount, + mDicNodes.back().initAsChild(dicNode, ptNodePos, childrenPtNodeArrayPos, probability, + isTerminal, hasChildren, isBlacklistedOrNotAWord, mergedNodeCodePointCount, mergedNodeCodePoints); } diff --git a/native/jni/src/suggest/core/dicnode/internal/dic_node_properties.h b/native/jni/src/suggest/core/dicnode/internal/dic_node_properties.h index 9e0f62ceb..c41a7243a 100644 --- a/native/jni/src/suggest/core/dicnode/internal/dic_node_properties.h +++ b/native/jni/src/suggest/core/dicnode/internal/dic_node_properties.h @@ -24,15 +24,14 @@ namespace latinime { /** - * Node for traversing the lexicon trie. + * PtNode information related to the DicNode from the lexicon trie. */ -// TODO: Introduce a dictionary node class which has attribute members required to understand the -// dictionary structure. class DicNodeProperties { public: AK_FORCE_INLINE DicNodeProperties() - : mPos(0), mChildrenPos(0), mProbability(0), mNodeCodePoint(0), mIsTerminal(false), - mHasChildren(false), mIsBlacklistedOrNotAWord(false), mDepth(0), mLeavingDepth(0) {} + : mPtNodePos(0), mChildrenPtNodeArrayPos(0), mProbability(0), mDicNodeCodePoint(0), + mIsTerminal(false), mHasChildrenPtNodes(false), mIsBlacklistedOrNotAWord(false), + mDepth(0), mLeavingDepth(0) {} virtual ~DicNodeProperties() {} @@ -40,57 +39,57 @@ class DicNodeProperties { void init(const int pos, const int childrenPos, const int nodeCodePoint, const int probability, const bool isTerminal, const bool hasChildren, const bool isBlacklistedOrNotAWord, const uint16_t depth, const uint16_t leavingDepth) { - mPos = pos; - mChildrenPos = childrenPos; - mNodeCodePoint = nodeCodePoint; + mPtNodePos = pos; + mChildrenPtNodeArrayPos = childrenPos; + mDicNodeCodePoint = nodeCodePoint; mProbability = probability; mIsTerminal = isTerminal; - mHasChildren = hasChildren; + mHasChildrenPtNodes = hasChildren; mIsBlacklistedOrNotAWord = isBlacklistedOrNotAWord; mDepth = depth; mLeavingDepth = leavingDepth; } // Init for copy - void init(const DicNodeProperties *const nodeProp) { - mPos = nodeProp->mPos; - mChildrenPos = nodeProp->mChildrenPos; - mNodeCodePoint = nodeProp->mNodeCodePoint; - mProbability = nodeProp->mProbability; - mIsTerminal = nodeProp->mIsTerminal; - mHasChildren = nodeProp->mHasChildren; - mIsBlacklistedOrNotAWord = nodeProp->mIsBlacklistedOrNotAWord; - mDepth = nodeProp->mDepth; - mLeavingDepth = nodeProp->mLeavingDepth; + void init(const DicNodeProperties *const dicNodeProp) { + mPtNodePos = dicNodeProp->mPtNodePos; + mChildrenPtNodeArrayPos = dicNodeProp->mChildrenPtNodeArrayPos; + mDicNodeCodePoint = dicNodeProp->mDicNodeCodePoint; + mProbability = dicNodeProp->mProbability; + mIsTerminal = dicNodeProp->mIsTerminal; + mHasChildrenPtNodes = dicNodeProp->mHasChildrenPtNodes; + mIsBlacklistedOrNotAWord = dicNodeProp->mIsBlacklistedOrNotAWord; + mDepth = dicNodeProp->mDepth; + mLeavingDepth = dicNodeProp->mLeavingDepth; } // Init as passing child - void init(const DicNodeProperties *const nodeProp, const int codePoint) { - mPos = nodeProp->mPos; - mChildrenPos = nodeProp->mChildrenPos; - mNodeCodePoint = codePoint; // Overwrite the node char of a passing child - mProbability = nodeProp->mProbability; - mIsTerminal = nodeProp->mIsTerminal; - mHasChildren = nodeProp->mHasChildren; - mIsBlacklistedOrNotAWord = nodeProp->mIsBlacklistedOrNotAWord; - mDepth = nodeProp->mDepth + 1; // Increment the depth of a passing child - mLeavingDepth = nodeProp->mLeavingDepth; + void init(const DicNodeProperties *const dicNodeProp, const int codePoint) { + mPtNodePos = dicNodeProp->mPtNodePos; + mChildrenPtNodeArrayPos = dicNodeProp->mChildrenPtNodeArrayPos; + mDicNodeCodePoint = codePoint; // Overwrite the node char of a passing child + mProbability = dicNodeProp->mProbability; + mIsTerminal = dicNodeProp->mIsTerminal; + mHasChildrenPtNodes = dicNodeProp->mHasChildrenPtNodes; + mIsBlacklistedOrNotAWord = dicNodeProp->mIsBlacklistedOrNotAWord; + mDepth = dicNodeProp->mDepth + 1; // Increment the depth of a passing child + mLeavingDepth = dicNodeProp->mLeavingDepth; } - int getPos() const { - return mPos; + int getPtNodePos() const { + return mPtNodePos; } - int getChildrenPos() const { - return mChildrenPos; + int getChildrenPtNodeArrayPos() const { + return mChildrenPtNodeArrayPos; } int getProbability() const { return mProbability; } - int getNodeCodePoint() const { - return mNodeCodePoint; + int getDicNodeCodePoint() const { + return mDicNodeCodePoint; } uint16_t getDepth() const { @@ -107,7 +106,7 @@ class DicNodeProperties { } bool hasChildren() const { - return mHasChildren || mDepth != mLeavingDepth; + return mHasChildrenPtNodes || mDepth != mLeavingDepth; } bool isBlacklistedOrNotAWord() const { @@ -118,12 +117,12 @@ class DicNodeProperties { // Caution!!! // Use a default copy constructor and an assign operator because shallow copies are ok // for this class - int mPos; - int mChildrenPos; + int mPtNodePos; + int mChildrenPtNodeArrayPos; int mProbability; - int mNodeCodePoint; + int mDicNodeCodePoint; bool mIsTerminal; - bool mHasChildren; + bool mHasChildrenPtNodes; bool mIsBlacklistedOrNotAWord; uint16_t mDepth; uint16_t mLeavingDepth; diff --git a/native/jni/src/suggest/core/dicnode/internal/dic_node_state_prevword.h b/native/jni/src/suggest/core/dicnode/internal/dic_node_state_prevword.h index b8986203d..dba57056b 100644 --- a/native/jni/src/suggest/core/dicnode/internal/dic_node_state_prevword.h +++ b/native/jni/src/suggest/core/dicnode/internal/dic_node_state_prevword.h @@ -30,7 +30,7 @@ class DicNodeStatePrevWord { public: AK_FORCE_INLINE DicNodeStatePrevWord() : mPrevWordCount(0), mPrevWordLength(0), mPrevWordStart(0), mPrevWordProbability(0), - mPrevWordNodePos(NOT_A_DICT_POS), mSecondWordFirstInputIndex(NOT_AN_INDEX) { + mPrevWordPtNodePos(NOT_A_DICT_POS), mSecondWordFirstInputIndex(NOT_AN_INDEX) { memset(mPrevWord, 0, sizeof(mPrevWord)); } @@ -41,7 +41,7 @@ class DicNodeStatePrevWord { mPrevWordCount = 0; mPrevWordStart = 0; mPrevWordProbability = -1; - mPrevWordNodePos = NOT_A_DICT_POS; + mPrevWordPtNodePos = NOT_A_DICT_POS; mSecondWordFirstInputIndex = NOT_AN_INDEX; } @@ -50,7 +50,7 @@ class DicNodeStatePrevWord { mPrevWordCount = 0; mPrevWordStart = 0; mPrevWordProbability = -1; - mPrevWordNodePos = prevWordNodePos; + mPrevWordPtNodePos = prevWordNodePos; mSecondWordFirstInputIndex = NOT_AN_INDEX; } @@ -60,7 +60,7 @@ class DicNodeStatePrevWord { mPrevWordCount = prevWord->mPrevWordCount; mPrevWordStart = prevWord->mPrevWordStart; mPrevWordProbability = prevWord->mPrevWordProbability; - mPrevWordNodePos = prevWord->mPrevWordNodePos; + mPrevWordPtNodePos = prevWord->mPrevWordPtNodePos; mSecondWordFirstInputIndex = prevWord->mSecondWordFirstInputIndex; memcpy(mPrevWord, prevWord->mPrevWord, prevWord->mPrevWordLength * sizeof(mPrevWord[0])); } @@ -71,7 +71,7 @@ class DicNodeStatePrevWord { const int prevWordSecondWordFirstInputIndex, const int lastInputIndex) { mPrevWordCount = min(prevWordCount, static_cast<int16_t>(MAX_RESULTS)); mPrevWordProbability = prevWordProbability; - mPrevWordNodePos = prevWordNodePos; + mPrevWordPtNodePos = prevWordNodePos; int twoWordsLen = DicNodeUtils::appendTwoWords(src0, length0, src1, length1, mPrevWord); if (twoWordsLen >= MAX_WORD_LENGTH) { @@ -116,8 +116,8 @@ class DicNodeStatePrevWord { return mPrevWordStart; } - int getPrevWordNodePos() const { - return mPrevWordNodePos; + int getPrevWordPtNodePos() const { + return mPrevWordPtNodePos; } int getPrevWordCodePointAt(const int id) const { @@ -147,7 +147,7 @@ class DicNodeStatePrevWord { int16_t mPrevWordLength; int16_t mPrevWordStart; int16_t mPrevWordProbability; - int mPrevWordNodePos; + int mPrevWordPtNodePos; int mSecondWordFirstInputIndex; }; } // namespace latinime diff --git a/native/jni/src/suggest/core/dicnode/internal/dic_node_state_scoring.h b/native/jni/src/suggest/core/dicnode/internal/dic_node_state_scoring.h index 3c85d0e9d..74f9eee92 100644 --- a/native/jni/src/suggest/core/dicnode/internal/dic_node_state_scoring.h +++ b/native/jni/src/suggest/core/dicnode/internal/dic_node_state_scoring.h @@ -21,6 +21,7 @@ #include "defines.h" #include "suggest/core/dictionary/digraph_utils.h" +#include "suggest/core/dictionary/error_type_utils.h" namespace latinime { @@ -31,7 +32,7 @@ class DicNodeStateScoring { mDigraphIndex(DigraphUtils::NOT_A_DIGRAPH_INDEX), mEditCorrectionCount(0), mProximityCorrectionCount(0), mNormalizedCompoundDistance(0.0f), mSpatialDistance(0.0f), mLanguageDistance(0.0f), - mRawLength(0.0f), mExactMatch(true), + mRawLength(0.0f), mContainingErrorTypes(ErrorTypeUtils::NOT_AN_ERROR), mNormalizedCompoundDistanceAfterFirstWord(MAX_VALUE_FOR_WEIGHTING) { } @@ -47,7 +48,7 @@ class DicNodeStateScoring { mDoubleLetterLevel = NOT_A_DOUBLE_LETTER; mDigraphIndex = DigraphUtils::NOT_A_DIGRAPH_INDEX; mNormalizedCompoundDistanceAfterFirstWord = MAX_VALUE_FOR_WEIGHTING; - mExactMatch = true; + mContainingErrorTypes = ErrorTypeUtils::NOT_AN_ERROR; } AK_FORCE_INLINE void init(const DicNodeStateScoring *const scoring) { @@ -59,34 +60,21 @@ class DicNodeStateScoring { mRawLength = scoring->mRawLength; mDoubleLetterLevel = scoring->mDoubleLetterLevel; mDigraphIndex = scoring->mDigraphIndex; - mExactMatch = scoring->mExactMatch; + mContainingErrorTypes = scoring->mContainingErrorTypes; mNormalizedCompoundDistanceAfterFirstWord = scoring->mNormalizedCompoundDistanceAfterFirstWord; } void addCost(const float spatialCost, const float languageCost, const bool doNormalization, - const int inputSize, const int totalInputIndex, const ErrorType errorType) { + const int inputSize, const int totalInputIndex, + const ErrorTypeUtils::ErrorType errorType) { addDistance(spatialCost, languageCost, doNormalization, inputSize, totalInputIndex); - switch (errorType) { - case ET_EDIT_CORRECTION: - ++mEditCorrectionCount; - mExactMatch = false; - break; - case ET_PROXIMITY_CORRECTION: - ++mProximityCorrectionCount; - mExactMatch = false; - break; - case ET_COMPLETION: - mExactMatch = false; - break; - case ET_NEW_WORD: - mExactMatch = false; - break; - case ET_INTENTIONAL_OMISSION: - mExactMatch = false; - break; - case ET_NOT_AN_ERROR: - break; + mContainingErrorTypes = mContainingErrorTypes | errorType; + if (ErrorTypeUtils::isEditCorrectionError(errorType)) { + ++mEditCorrectionCount; + } + if (ErrorTypeUtils::isProximityCorrectionError(errorType)) { + ++mProximityCorrectionCount; } } @@ -182,7 +170,7 @@ class DicNodeStateScoring { } bool isExactMatch() const { - return mExactMatch; + return ErrorTypeUtils::isExactMatch(mContainingErrorTypes); } private: @@ -199,7 +187,8 @@ class DicNodeStateScoring { float mSpatialDistance; float mLanguageDistance; float mRawLength; - bool mExactMatch; + // All accumulated error types so far + ErrorTypeUtils::ErrorType mContainingErrorTypes; float mNormalizedCompoundDistanceAfterFirstWord; AK_FORCE_INLINE void addDistance(float spatialDistance, float languageDistance, diff --git a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp index 71f4ef6ea..c2a15a312 100644 --- a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp +++ b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp @@ -144,7 +144,7 @@ int BigramDictionary::getPredictions(const int *prevWord, const int prevWordLeng int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const int prevWordLength, const bool forceLowerCaseSearch) const { if (0 >= prevWordLength) return NOT_A_DICT_POS; - int pos = mDictionaryStructurePolicy->getTerminalNodePositionOfWord(prevWord, prevWordLength, + int pos = mDictionaryStructurePolicy->getTerminalPtNodePositionOfWord(prevWord, prevWordLength, forceLowerCaseSearch); if (NOT_A_DICT_POS == pos) return NOT_A_DICT_POS; return mDictionaryStructurePolicy->getBigramsPositionOfPtNode(pos); @@ -155,7 +155,7 @@ int BigramDictionary::getBigramProbability(const int *word0, int length0, const int pos = getBigramListPositionForWord(word0, length0, false /* forceLowerCaseSearch */); // getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams if (NOT_A_DICT_POS == pos) return NOT_A_PROBABILITY; - int nextWordPos = mDictionaryStructurePolicy->getTerminalNodePositionOfWord(word1, length1, + int nextWordPos = mDictionaryStructurePolicy->getTerminalPtNodePositionOfWord(word1, length1, false /* forceLowerCaseSearch */); if (NOT_A_DICT_POS == nextWordPos) return NOT_A_PROBABILITY; diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp index 59ead1894..7b83f27df 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.cpp +++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp @@ -21,9 +21,7 @@ #include <stdint.h> #include "defines.h" -#include "suggest/core/dictionary/bigram_dictionary.h" #include "suggest/core/policy/dictionary_header_structure_policy.h" -#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" #include "suggest/core/session/dic_traverse_session.h" #include "suggest/core/suggest.h" #include "suggest/core/suggest_options.h" @@ -35,22 +33,15 @@ namespace latinime { const int Dictionary::HEADER_ATTRIBUTE_BUFFER_SIZE = 32; -Dictionary::Dictionary(JNIEnv *env, - DictionaryStructureWithBufferPolicy *const dictionaryStructureWithBufferPolicy) +Dictionary::Dictionary(JNIEnv *env, const DictionaryStructureWithBufferPolicy::StructurePoilcyPtr + &dictionaryStructureWithBufferPolicy) : mDictionaryStructureWithBufferPolicy(dictionaryStructureWithBufferPolicy), - mBigramDictionary(new BigramDictionary(mDictionaryStructureWithBufferPolicy)), + mBigramDictionary(new BigramDictionary(mDictionaryStructureWithBufferPolicy.get())), mGestureSuggest(new Suggest(GestureSuggestPolicyFactory::getGestureSuggestPolicy())), mTypingSuggest(new Suggest(TypingSuggestPolicyFactory::getTypingSuggestPolicy())) { logDictionaryInfo(env); } -Dictionary::~Dictionary() { - delete mBigramDictionary; - delete mGestureSuggest; - delete mTypingSuggest; - delete mDictionaryStructureWithBufferPolicy; -} - int Dictionary::getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession *traverseSession, int *xcoordinates, int *ycoordinates, int *times, int *pointerIds, int *inputCodePoints, int inputSize, int *prevWordCodePoints, int prevWordLength, int commitPoint, @@ -60,7 +51,7 @@ int Dictionary::getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession if (suggestOptions->isGesture()) { DicTraverseSession::initSessionInstance( traverseSession, this, prevWordCodePoints, prevWordLength, suggestOptions); - result = mGestureSuggest->getSuggestions(proximityInfo, traverseSession, xcoordinates, + result = mGestureSuggest.get()->getSuggestions(proximityInfo, traverseSession, xcoordinates, ycoordinates, times, pointerIds, inputCodePoints, inputSize, commitPoint, outWords, frequencies, spaceIndices, outputTypes, outputAutoCommitFirstWordConfidence); if (DEBUG_DICT) { @@ -70,7 +61,7 @@ int Dictionary::getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession } else { DicTraverseSession::initSessionInstance( traverseSession, this, prevWordCodePoints, prevWordLength, suggestOptions); - result = mTypingSuggest->getSuggestions(proximityInfo, traverseSession, xcoordinates, + result = mTypingSuggest.get()->getSuggestions(proximityInfo, traverseSession, xcoordinates, ycoordinates, times, pointerIds, inputCodePoints, inputSize, commitPoint, outWords, frequencies, spaceIndices, outputTypes, outputAutoCommitFirstWordConfidence); @@ -84,11 +75,12 @@ int Dictionary::getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession int Dictionary::getBigrams(const int *word, int length, int *outWords, int *frequencies, int *outputTypes) const { if (length <= 0) return 0; - return mBigramDictionary->getPredictions(word, length, outWords, frequencies, outputTypes); + return mBigramDictionary.get()->getPredictions(word, length, outWords, frequencies, + outputTypes); } int Dictionary::getProbability(const int *word, int length) const { - int pos = getDictionaryStructurePolicy()->getTerminalNodePositionOfWord(word, length, + int pos = getDictionaryStructurePolicy()->getTerminalPtNodePositionOfWord(word, length, false /* forceLowerCaseSearch */); if (NOT_A_DICT_POS == pos) { return NOT_A_PROBABILITY; @@ -98,39 +90,39 @@ int Dictionary::getProbability(const int *word, int length) const { int Dictionary::getBigramProbability(const int *word0, int length0, const int *word1, int length1) const { - return mBigramDictionary->getBigramProbability(word0, length0, word1, length1); + return mBigramDictionary.get()->getBigramProbability(word0, length0, word1, length1); } void Dictionary::addUnigramWord(const int *const word, const int length, const int probability) { - mDictionaryStructureWithBufferPolicy->addUnigramWord(word, length, probability); + mDictionaryStructureWithBufferPolicy.get()->addUnigramWord(word, length, probability); } void Dictionary::addBigramWords(const int *const word0, const int length0, const int *const word1, const int length1, const int probability) { - mDictionaryStructureWithBufferPolicy->addBigramWords(word0, length0, word1, length1, + mDictionaryStructureWithBufferPolicy.get()->addBigramWords(word0, length0, word1, length1, probability); } void Dictionary::removeBigramWords(const int *const word0, const int length0, const int *const word1, const int length1) { - mDictionaryStructureWithBufferPolicy->removeBigramWords(word0, length0, word1, length1); + mDictionaryStructureWithBufferPolicy.get()->removeBigramWords(word0, length0, word1, length1); } void Dictionary::flush(const char *const filePath) { - mDictionaryStructureWithBufferPolicy->flush(filePath); + mDictionaryStructureWithBufferPolicy.get()->flush(filePath); } void Dictionary::flushWithGC(const char *const filePath) { - mDictionaryStructureWithBufferPolicy->flushWithGC(filePath); + mDictionaryStructureWithBufferPolicy.get()->flushWithGC(filePath); } bool Dictionary::needsToRunGC(const bool mindsBlockByGC) { - return mDictionaryStructureWithBufferPolicy->needsToRunGC(mindsBlockByGC); + return mDictionaryStructureWithBufferPolicy.get()->needsToRunGC(mindsBlockByGC); } void Dictionary::getProperty(const char *const query, char *const outResult, const int maxResultLength) { - return mDictionaryStructureWithBufferPolicy->getProperty(query, outResult, maxResultLength); + return mDictionaryStructureWithBufferPolicy.get()->getProperty(query, outResult, maxResultLength); } void Dictionary::logDictionaryInfo(JNIEnv *const env) const { diff --git a/native/jni/src/suggest/core/dictionary/dictionary.h b/native/jni/src/suggest/core/dictionary/dictionary.h index 0195d5bf0..e52a40f1d 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.h +++ b/native/jni/src/suggest/core/dictionary/dictionary.h @@ -21,14 +21,16 @@ #include "defines.h" #include "jni.h" +#include "suggest/core/dictionary/bigram_dictionary.h" +#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" +#include "suggest/core/suggest_interface.h" +#include "utils/exclusive_ownership_pointer.h" namespace latinime { -class BigramDictionary; class DictionaryStructureWithBufferPolicy; class DicTraverseSession; class ProximityInfo; -class SuggestInterface; class SuggestOptions; class Dictionary { @@ -53,8 +55,8 @@ class Dictionary { static const int KIND_FLAG_POSSIBLY_OFFENSIVE = 0x80000000; static const int KIND_FLAG_EXACT_MATCH = 0x40000000; - Dictionary(JNIEnv *env, - DictionaryStructureWithBufferPolicy *const dictionaryStructureWithBufferPoilcy); + Dictionary(JNIEnv *env, const DictionaryStructureWithBufferPolicy::StructurePoilcyPtr + &dictionaryStructureWithBufferPoilcy); int getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession *traverseSession, int *xcoordinates, int *ycoordinates, int *times, int *pointerIds, int *inputCodePoints, @@ -87,20 +89,22 @@ class Dictionary { const int maxResultLength); const DictionaryStructureWithBufferPolicy *getDictionaryStructurePolicy() const { - return mDictionaryStructureWithBufferPolicy; + return mDictionaryStructureWithBufferPolicy.get(); } - virtual ~Dictionary(); - private: DISALLOW_IMPLICIT_CONSTRUCTORS(Dictionary); + typedef ExclusiveOwnershipPointer<BigramDictionary> BigramDictionaryPtr; + typedef ExclusiveOwnershipPointer<SuggestInterface> SuggestInterfacePtr; + static const int HEADER_ATTRIBUTE_BUFFER_SIZE; - DictionaryStructureWithBufferPolicy *const mDictionaryStructureWithBufferPolicy; - const BigramDictionary *const mBigramDictionary; - const SuggestInterface *const mGestureSuggest; - const SuggestInterface *const mTypingSuggest; + const DictionaryStructureWithBufferPolicy::StructurePoilcyPtr + mDictionaryStructureWithBufferPolicy; + const BigramDictionaryPtr mBigramDictionary; + const SuggestInterfacePtr mGestureSuggest; + const SuggestInterfacePtr mTypingSuggest; void logDictionaryInfo(JNIEnv *const env) const; }; diff --git a/native/jni/src/suggest/core/dictionary/error_type_utils.cpp b/native/jni/src/suggest/core/dictionary/error_type_utils.cpp new file mode 100644 index 000000000..0635fef7e --- /dev/null +++ b/native/jni/src/suggest/core/dictionary/error_type_utils.cpp @@ -0,0 +1,34 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "suggest/core/dictionary/error_type_utils.h" + +namespace latinime { + +const ErrorTypeUtils::ErrorType ErrorTypeUtils::NOT_AN_ERROR = 0x0; +const ErrorTypeUtils::ErrorType ErrorTypeUtils::MATCH_WITH_CASE_ERROR = 0x1; +const ErrorTypeUtils::ErrorType ErrorTypeUtils::MATCH_WITH_ACCENT_ERROR = 0x2; +const ErrorTypeUtils::ErrorType ErrorTypeUtils::MATCH_WITH_DIGRAPH = 0x4; +const ErrorTypeUtils::ErrorType ErrorTypeUtils::INTENTIONAL_OMISSION = 0x8; +const ErrorTypeUtils::ErrorType ErrorTypeUtils::EDIT_CORRECTION = 0x10; +const ErrorTypeUtils::ErrorType ErrorTypeUtils::PROXIMITY_CORRECTION = 0x20; +const ErrorTypeUtils::ErrorType ErrorTypeUtils::COMPLETION = 0x40; +const ErrorTypeUtils::ErrorType ErrorTypeUtils::NEW_WORD = 0x80; + +const ErrorTypeUtils::ErrorType ErrorTypeUtils::ERRORS_TREATED_AS_AN_EXACT_MATCH = + NOT_AN_ERROR | MATCH_WITH_CASE_ERROR | MATCH_WITH_ACCENT_ERROR | MATCH_WITH_DIGRAPH; + +} // namespace latinime diff --git a/native/jni/src/suggest/core/dictionary/error_type_utils.h b/native/jni/src/suggest/core/dictionary/error_type_utils.h new file mode 100644 index 000000000..ab4a65e48 --- /dev/null +++ b/native/jni/src/suggest/core/dictionary/error_type_utils.h @@ -0,0 +1,69 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_ERROR_TYPE_UTILS_H +#define LATINIME_ERROR_TYPE_UTILS_H + +#include <stdint.h> + +#include "defines.h" + +namespace latinime { + +class ErrorTypeUtils { + public: + // ErrorType is mainly decided by CorrectionType but it is also depending on if + // the correction has really been performed or not. + typedef uint32_t ErrorType; + + static const ErrorType NOT_AN_ERROR; + static const ErrorType MATCH_WITH_CASE_ERROR; + static const ErrorType MATCH_WITH_ACCENT_ERROR; + static const ErrorType MATCH_WITH_DIGRAPH; + // Treat error as an intentional omission when the CorrectionType is omission and the node can + // be intentional omission. + static const ErrorType INTENTIONAL_OMISSION; + // Substitution, omission and transposition + static const ErrorType EDIT_CORRECTION; + // Proximity error + static const ErrorType PROXIMITY_CORRECTION; + // Completion + static const ErrorType COMPLETION; + // New word + // TODO: Remove. + // A new word error should be an edit correction error or a proximity correction error. + static const ErrorType NEW_WORD; + + // TODO: Differentiate errors. + static bool isExactMatch(const ErrorType containingErrors) { + return (containingErrors & ~ERRORS_TREATED_AS_AN_EXACT_MATCH) == 0; + } + + static bool isEditCorrectionError(const ErrorType errorType) { + return (errorType & EDIT_CORRECTION) != 0; + } + + static bool isProximityCorrectionError(const ErrorType errorType) { + return (errorType & PROXIMITY_CORRECTION) != 0; + } + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(ErrorTypeUtils); + + static const ErrorType ERRORS_TREATED_AS_AN_EXACT_MATCH; +}; +} // namespace latinime +#endif // LATINIME_ERROR_TYPE_UTILS_H diff --git a/native/jni/src/suggest/core/layout/proximity_info.cpp b/native/jni/src/suggest/core/layout/proximity_info.cpp index e64476d82..ee8e59ef9 100644 --- a/native/jni/src/suggest/core/layout/proximity_info.cpp +++ b/native/jni/src/suggest/core/layout/proximity_info.cpp @@ -71,7 +71,7 @@ ProximityInfo::ProximityInfo(JNIEnv *env, const jstring localeJStr, && sweetSpotCenterYs && sweetSpotRadii), mProximityCharsArray(new int[GRID_WIDTH * GRID_HEIGHT * MAX_PROXIMITY_CHARS_SIZE /* proximityCharsLength */]), - mCodeToKeyMap() { + mLowerCodePointToKeyMap() { /* Let's check the input array length here to make sure */ const jsize proximityCharsLength = env->GetArrayLength(proximityChars); if (proximityCharsLength != GRID_WIDTH * GRID_HEIGHT * MAX_PROXIMITY_CHARS_SIZE) { @@ -147,7 +147,14 @@ int ProximityInfo::getCodePointOf(const int keyIndex) const { if (keyIndex < 0 || keyIndex >= KEY_COUNT) { return NOT_A_CODE_POINT; } - return mKeyIndexToCodePointG[keyIndex]; + return mKeyIndexToLowerCodePointG[keyIndex]; +} + +int ProximityInfo::getOriginalCodePointOf(const int keyIndex) const { + if (keyIndex < 0 || keyIndex >= KEY_COUNT) { + return NOT_A_CODE_POINT; + } + return mKeyIndexToOriginalCodePoint[keyIndex]; } void ProximityInfo::initializeG() { @@ -164,8 +171,9 @@ void ProximityInfo::initializeG() { const float gapY = sweetSpotCenterY - mCenterYsG[i]; mSweetSpotCenterYsG[i] = static_cast<int>(mCenterYsG[i] + gapY * verticalScale); } - mCodeToKeyMap[lowerCode] = i; - mKeyIndexToCodePointG[i] = lowerCode; + mLowerCodePointToKeyMap[lowerCode] = i; + mKeyIndexToOriginalCodePoint[i] = code; + mKeyIndexToLowerCodePointG[i] = lowerCode; } for (int i = 0; i < KEY_COUNT; i++) { mKeyKeyDistancesG[i][i] = 0; diff --git a/native/jni/src/suggest/core/layout/proximity_info.h b/native/jni/src/suggest/core/layout/proximity_info.h index f25949001..a91b9d674 100644 --- a/native/jni/src/suggest/core/layout/proximity_info.h +++ b/native/jni/src/suggest/core/layout/proximity_info.h @@ -39,6 +39,7 @@ class ProximityInfo { float getNormalizedSquaredDistanceFromCenterFloatG( const int keyId, const int x, const int y, const bool isGeometric) const; int getCodePointOf(const int keyIndex) const; + int getOriginalCodePointOf(const int keyIndex) const; bool hasSweetSpotData(const int keyIndex) const { // When there are no calibration data for a key, // the radius of the key is assigned to zero. @@ -76,11 +77,11 @@ class ProximityInfo { ProximityInfoUtils::initializeProximities(inputCodes, inputXCoordinates, inputYCoordinates, inputSize, mKeyXCoordinates, mKeyYCoordinates, mKeyWidths, mKeyHeights, mProximityCharsArray, CELL_HEIGHT, CELL_WIDTH, GRID_WIDTH, MOST_COMMON_KEY_WIDTH, - KEY_COUNT, mLocaleStr, &mCodeToKeyMap, allInputCodes); + KEY_COUNT, mLocaleStr, &mLowerCodePointToKeyMap, allInputCodes); } AK_FORCE_INLINE int getKeyIndexOf(const int c) const { - return ProximityInfoUtils::getKeyIndexOf(KEY_COUNT, c, &mCodeToKeyMap); + return ProximityInfoUtils::getKeyIndexOf(KEY_COUNT, c, &mLowerCodePointToKeyMap); } AK_FORCE_INLINE bool isCodePointOnKeyboard(const int codePoint) const { @@ -117,9 +118,9 @@ class ProximityInfo { // Sweet spots for geometric input. Note that we have extra sweet spots only for Y coordinates. float mSweetSpotCenterYsG[MAX_KEY_COUNT_IN_A_KEYBOARD]; float mSweetSpotRadii[MAX_KEY_COUNT_IN_A_KEYBOARD]; - hash_map_compat<int, int> mCodeToKeyMap; - - int mKeyIndexToCodePointG[MAX_KEY_COUNT_IN_A_KEYBOARD]; + hash_map_compat<int, int> mLowerCodePointToKeyMap; + int mKeyIndexToOriginalCodePoint[MAX_KEY_COUNT_IN_A_KEYBOARD]; + int mKeyIndexToLowerCodePointG[MAX_KEY_COUNT_IN_A_KEYBOARD]; int mCenterXsG[MAX_KEY_COUNT_IN_A_KEYBOARD]; int mCenterYsG[MAX_KEY_COUNT_IN_A_KEYBOARD]; int mKeyKeyDistancesG[MAX_KEY_COUNT_IN_A_KEYBOARD][MAX_KEY_COUNT_IN_A_KEYBOARD]; diff --git a/native/jni/src/suggest/core/layout/proximity_info_state.cpp b/native/jni/src/suggest/core/layout/proximity_info_state.cpp index fbabd92f2..bb4b41714 100644 --- a/native/jni/src/suggest/core/layout/proximity_info_state.cpp +++ b/native/jni/src/suggest/core/layout/proximity_info_state.cpp @@ -30,6 +30,12 @@ namespace latinime { +int ProximityInfoState::getPrimaryOriginalCodePointAt(const int index) const { + const int primaryCodePoint = getPrimaryCodePointAt(index); + const int keyIndex = mProximityInfo->getKeyIndexOf(primaryCodePoint); + return mProximityInfo->getOriginalCodePointOf(keyIndex); +} + // TODO: Remove the dependency of "isGeometric" void ProximityInfoState::initInputParams(const int pointerId, const float maxPointToKeyLength, const ProximityInfo *proximityInfo, const int *const inputCodes, const int inputSize, diff --git a/native/jni/src/suggest/core/layout/proximity_info_state.h b/native/jni/src/suggest/core/layout/proximity_info_state.h index c94060fa9..e941e43d8 100644 --- a/native/jni/src/suggest/core/layout/proximity_info_state.h +++ b/native/jni/src/suggest/core/layout/proximity_info_state.h @@ -65,6 +65,8 @@ class ProximityInfoState { return getProximityCodePointsAt(index)[0]; } + int getPrimaryOriginalCodePointAt(const int index) const; + inline bool sameAsTyped(const int *word, int length) const { if (length != mSampledInputSize) { return false; diff --git a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h index 41f82049f..e649844dc 100644 --- a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h +++ b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h @@ -18,6 +18,7 @@ #define LATINIME_DICTIONARY_STRUCTURE_POLICY_H #include "defines.h" +#include "utils/exclusive_ownership_pointer.h" namespace latinime { @@ -33,18 +34,20 @@ class DictionaryShortcutsStructurePolicy; */ class DictionaryStructureWithBufferPolicy { public: + typedef ExclusiveOwnershipPointer<DictionaryStructureWithBufferPolicy> StructurePoilcyPtr; + virtual ~DictionaryStructureWithBufferPolicy() {} virtual int getRootPosition() const = 0; - virtual void createAndGetAllChildNodes(const DicNode *const dicNode, + virtual void createAndGetAllChildDicNodes(const DicNode *const dicNode, DicNodeVector *const childDicNodes) const = 0; virtual int getCodePointsAndProbabilityAndReturnCodePointCount( const int nodePos, const int maxCodePointCount, int *const outCodePoints, int *const outUnigramProbability) const = 0; - virtual int getTerminalNodePositionOfWord(const int *const inWord, + virtual int getTerminalPtNodePositionOfWord(const int *const inWord, const int length, const bool forceLowerCaseSearch) const = 0; virtual int getProbability(const int unigramProbability, diff --git a/native/jni/src/suggest/core/policy/weighting.cpp b/native/jni/src/suggest/core/policy/weighting.cpp index 0c4016893..c202b81fe 100644 --- a/native/jni/src/suggest/core/policy/weighting.cpp +++ b/native/jni/src/suggest/core/policy/weighting.cpp @@ -20,6 +20,7 @@ #include "suggest/core/dicnode/dic_node.h" #include "suggest/core/dicnode/dic_node_profiler.h" #include "suggest/core/dicnode/dic_node_utils.h" +#include "suggest/core/dictionary/error_type_utils.h" #include "suggest/core/session/dic_traverse_session.h" namespace latinime { @@ -82,8 +83,8 @@ static inline void profile(const CorrectionType correctionType, DicNode *const n traverseSession, parentDicNode, dicNode, &inputStateG); const float languageCost = Weighting::getLanguageCost(weighting, correctionType, traverseSession, parentDicNode, dicNode, multiBigramMap); - const ErrorType errorType = weighting->getErrorType(correctionType, traverseSession, - parentDicNode, dicNode); + const ErrorTypeUtils::ErrorType errorType = weighting->getErrorType(correctionType, + traverseSession, parentDicNode, dicNode); profile(correctionType, dicNode); if (inputStateG.mNeedsToUpdateInputStateG) { dicNode->updateInputIndexG(&inputStateG); diff --git a/native/jni/src/suggest/core/policy/weighting.h b/native/jni/src/suggest/core/policy/weighting.h index 2d49e98a6..bd6b3cf41 100644 --- a/native/jni/src/suggest/core/policy/weighting.h +++ b/native/jni/src/suggest/core/policy/weighting.h @@ -18,6 +18,7 @@ #define LATINIME_WEIGHTING_H #include "defines.h" +#include "suggest/core/dictionary/error_type_utils.h" namespace latinime { @@ -84,7 +85,7 @@ class Weighting { virtual float getSpaceSubstitutionCost(const DicTraverseSession *const traverseSession, const DicNode *const dicNode) const = 0; - virtual ErrorType getErrorType(const CorrectionType correctionType, + virtual ErrorTypeUtils::ErrorType getErrorType(const CorrectionType correctionType, const DicTraverseSession *const traverseSession, const DicNode *const parentDicNode, const DicNode *const dicNode) const = 0; diff --git a/native/jni/src/suggest/core/session/dic_traverse_session.cpp b/native/jni/src/suggest/core/session/dic_traverse_session.cpp index 50f2bbd8d..5070491f4 100644 --- a/native/jni/src/suggest/core/session/dic_traverse_session.cpp +++ b/native/jni/src/suggest/core/session/dic_traverse_session.cpp @@ -35,16 +35,16 @@ void DicTraverseSession::init(const Dictionary *const dictionary, const int *pre ->getMultiWordCostMultiplier(); mSuggestOptions = suggestOptions; if (!prevWord) { - mPrevWordPos = NOT_A_DICT_POS; + mPrevWordPtNodePos = NOT_A_DICT_POS; return; } // TODO: merge following similar calls to getTerminalPosition into one case-insensitive call. - mPrevWordPos = getDictionaryStructurePolicy()->getTerminalNodePositionOfWord( + mPrevWordPtNodePos = getDictionaryStructurePolicy()->getTerminalPtNodePositionOfWord( prevWord, prevWordLength, false /* forceLowerCaseSearch */); - if (mPrevWordPos == NOT_A_DICT_POS) { + if (mPrevWordPtNodePos == NOT_A_DICT_POS) { // Check bigrams for lower-cased previous word if original was not found. Useful for // auto-capitalized words like "The [current_word]". - mPrevWordPos = getDictionaryStructurePolicy()->getTerminalNodePositionOfWord( + mPrevWordPtNodePos = getDictionaryStructurePolicy()->getTerminalPtNodePositionOfWord( prevWord, prevWordLength, true /* forceLowerCaseSearch */); } } diff --git a/native/jni/src/suggest/core/session/dic_traverse_session.h b/native/jni/src/suggest/core/session/dic_traverse_session.h index e0b1c67d9..6e4dda44d 100644 --- a/native/jni/src/suggest/core/session/dic_traverse_session.h +++ b/native/jni/src/suggest/core/session/dic_traverse_session.h @@ -59,7 +59,7 @@ class DicTraverseSession { } AK_FORCE_INLINE DicTraverseSession(JNIEnv *env, jstring localeStr, bool usesLargeCache) - : mPrevWordPos(NOT_A_DICT_POS), mProximityInfo(0), + : mPrevWordPtNodePos(NOT_A_DICT_POS), mProximityInfo(0), mDictionary(0), mSuggestOptions(0), mDicNodesCache(usesLargeCache), mMultiBigramMap(), mInputSize(0), mPartiallyCommited(false), mMaxPointerCount(1), mMultiWordCostMultiplier(1.0f) { @@ -86,11 +86,9 @@ class DicTraverseSession { //-------------------- const ProximityInfo *getProximityInfo() const { return mProximityInfo; } const SuggestOptions *getSuggestOptions() const { return mSuggestOptions; } - int getPrevWordPos() const { return mPrevWordPos; } + int getPrevWordPtNodePos() const { return mPrevWordPtNodePos; } // TODO: REMOVE - void setPrevWordPos(int pos) { mPrevWordPos = pos; } - // TODO: Use proper parameter when changed - int getDicRootPos() const { return 0; } + void setPrevWordPtNodePos(const int ptNodePos) { mPrevWordPtNodePos = ptNodePos; } DicNodesCache *getDicTraverseCache() { return &mDicNodesCache; } MultiBigramMap *getMultiBigramMap() { return &mMultiBigramMap; } const ProximityInfoState *getProximityInfoState(int id) const { @@ -119,26 +117,13 @@ class DicTraverseSession { return true; } - void getSearchKeys(const DicNode *node, std::vector<int> *const outputSearchKeyVector) const { - for (int i = 0; i < MAX_POINTER_COUNT_G; ++i) { - if (!mProximityInfoStates[i].isUsed()) { - continue; - } - const int pointerId = node->getInputIndex(i); - const std::vector<int> *const searchKeyVector = - mProximityInfoStates[i].getSearchKeyVector(pointerId); - outputSearchKeyVector->insert(outputSearchKeyVector->end(), searchKeyVector->begin(), - searchKeyVector->end()); - } - } - - ProximityType getProximityTypeG(const DicNode *const node, const int childCodePoint) const { + ProximityType getProximityTypeG(const DicNode *const dicNode, const int childCodePoint) const { ProximityType proximityType = UNRELATED_CHAR; for (int i = 0; i < MAX_POINTER_COUNT_G; ++i) { if (!mProximityInfoStates[i].isUsed()) { continue; } - const int pointerId = node->getInputIndex(i); + const int pointerId = dicNode->getInputIndex(i); proximityType = mProximityInfoStates[i].getProximityTypeG(pointerId, childCodePoint); ASSERT(proximityType == UNRELATED_CHAR || proximityType == MATCH_CHAR); // TODO: Make this more generic @@ -192,7 +177,7 @@ class DicTraverseSession { const int *const inputYs, const int *const times, const int *const pointerIds, const int inputSize, const float maxSpatialDistance, const int maxPointerCount); - int mPrevWordPos; + int mPrevWordPtNodePos; const ProximityInfo *mProximityInfo; const Dictionary *mDictionary; const SuggestOptions *mSuggestOptions; diff --git a/native/jni/src/suggest/core/suggest.cpp b/native/jni/src/suggest/core/suggest.cpp index 73ccebc88..2eda414f4 100644 --- a/native/jni/src/suggest/core/suggest.cpp +++ b/native/jni/src/suggest/core/suggest.cpp @@ -98,7 +98,7 @@ void Suggest::initializeSearch(DicTraverseSession *traverseSession, int commitPo // Continue suggestion after partial commit. DicNode *topDicNode = traverseSession->getDicTraverseCache()->setCommitPoint(commitPoint); - traverseSession->setPrevWordPos(topDicNode->getPrevWordNodePos()); + traverseSession->setPrevWordPtNodePos(topDicNode->getPrevWordPtNodePos()); traverseSession->getDicTraverseCache()->continueSearch(); traverseSession->setPartiallyCommited(); } @@ -109,7 +109,7 @@ void Suggest::initializeSearch(DicTraverseSession *traverseSession, int commitPo // Create a new dic node here DicNode rootNode; DicNodeUtils::initAsRoot(traverseSession->getDictionaryStructurePolicy(), - traverseSession->getPrevWordPos(), &rootNode); + traverseSession->getPrevWordPtNodePos(), &rootNode); traverseSession->getDicTraverseCache()->copyPushActive(&rootNode); } } @@ -231,7 +231,7 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen BinaryDictionaryShortcutIterator shortcutIt( traverseSession->getDictionaryStructurePolicy()->getShortcutsStructurePolicy(), traverseSession->getDictionaryStructurePolicy() - ->getShortcutPositionOfPtNode(terminalDicNode->getPos())); + ->getShortcutPositionOfPtNode(terminalDicNode->getPtNodePos())); // Shortcut is not supported for multiple words suggestions. // TODO: Check shortcuts during traversal for multiple words suggestions. const bool sameAsTyped = TRAVERSAL->sameAsTyped(traverseSession, terminalDicNode); @@ -421,15 +421,15 @@ void Suggest::expandCurrentDicNodes(DicTraverseSession *traverseSession) const { } break; case UNRELATED_CHAR: - // Just drop this node and do nothing. + // Just drop this dicNode and do nothing. break; default: - // Just drop this node and do nothing. + // Just drop this dicNode and do nothing. break; } } - // Push the node for look-ahead correction + // Push the dicNode for look-ahead correction if (allowsErrorCorrections && canDoLookAheadCorrection) { traverseSession->getDicTraverseCache()->copyPushNextActive(&dicNode); } @@ -442,7 +442,7 @@ void Suggest::processTerminalDicNode( if (dicNode->getCompoundDistance() >= static_cast<float>(MAX_VALUE_FOR_WEIGHTING)) { return; } - if (!dicNode->isTerminalWordNode()) { + if (!dicNode->isTerminalDicNode()) { return; } if (dicNode->shouldBeFilteredBySafetyNetForBigram()) { @@ -463,7 +463,7 @@ void Suggest::processTerminalDicNode( /** * Adds the expanded dicNode to the next search priority queue. Also creates an additional next word - * (by the space omission error correction) search path if input dicNode is on a terminal node. + * (by the space omission error correction) search path if input dicNode is on a terminal. */ void Suggest::processExpandedDicNode( DicTraverseSession *traverseSession, DicNode *dicNode) const { @@ -505,7 +505,7 @@ void Suggest::processDicNodeAsSubstitution(DicTraverseSession *traverseSession, processExpandedDicNode(traverseSession, childDicNode); } -// Process the node codepoint as a digraph. This means that composite glyphs like the German +// Process the DicNode codepoint as a digraph. This means that composite glyphs like the German // u-umlaut is expanded to the transliteration "ue". Note that this happens in parallel with // the normal non-digraph traversal, so both "uber" and "ueber" can be corrected to "[u-umlaut]ber". void Suggest::processDicNodeAsDigraph(DicTraverseSession *traverseSession, @@ -518,7 +518,7 @@ void Suggest::processDicNodeAsDigraph(DicTraverseSession *traverseSession, /** * Handle the dicNode as an omission error (e.g., ths => this). Skip the current letter and consider * matches for all possible next letters. Note that just skipping the current letter without any - * other conditions tends to flood the search dic nodes cache with omission nodes. Instead, check + * other conditions tends to flood the search DicNodes cache with omission DicNodes. Instead, check * the possible *next* letters after the omission to better limit search to plausible omissions. * Note that apostrophes are handled as omissions. */ @@ -605,7 +605,7 @@ void Suggest::processDicNodeAsTransposition(DicTraverseSession *traverseSession, } /** - * Weight child node by aligning it to the key + * Weight child dicNode by aligning it to the key */ void Suggest::weightChildNode(DicTraverseSession *traverseSession, DicNode *dicNode) const { const int inputSize = traverseSession->getInputSize(); diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp index 1926b9831..de9fc9bbc 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp @@ -16,7 +16,7 @@ #include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h" -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/utils/byte_array_utils.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp index b1170e251..83a32fb0b 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp @@ -17,8 +17,8 @@ #include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h" #include "suggest/core/policy/dictionary_shortcuts_structure_policy.h" -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h" -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" #include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h" @@ -157,8 +157,9 @@ bool DynamicBigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries( } const int bigramTargetNodePos = followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos); - nodeReader.fetchNodeInfoInBufferFromPtNodePos(bigramTargetNodePos); - if (nodeReader.isDeleted() || !nodeReader.isTerminal() + const PtNodeParams ptNodeParams(nodeReader.fetchNodeInfoInBufferFromPtNodePos( + bigramTargetNodePos)); + if (ptNodeParams.isDeleted() || !ptNodeParams.isTerminal() || bigramTargetNodePos == NOT_A_DICT_POS) { // The target is no longer valid terminal. Invalidate the current bigram entry. if (!BigramListReadWriteUtils::writeBigramEntry(mBuffer, bigramFlags, @@ -342,20 +343,22 @@ int DynamicBigramListPolicy::followBigramLinkAndGetCurrentBigramPtNodePos( if (originalBigramPos == NOT_A_DICT_POS) { return NOT_A_DICT_POS; } - int currentPos = originalBigramPos; DynamicPatriciaTrieNodeReader nodeReader(mBuffer, this /* bigramsPolicy */, mShortcutPolicy); - nodeReader.fetchNodeInfoInBufferFromPtNodePos(currentPos); + int currentPos = NOT_A_DICT_POS; int bigramLinkCount = 0; - while (nodeReader.getBigramLinkedNodePos() != NOT_A_DICT_POS) { - currentPos = nodeReader.getBigramLinkedNodePos(); - nodeReader.fetchNodeInfoInBufferFromPtNodePos(currentPos); + int bigramLinkedNodePos = originalBigramPos; + do { + currentPos = bigramLinkedNodePos; + const PtNodeParams ptNodeParams(nodeReader.fetchNodeInfoInBufferFromPtNodePos(currentPos)); + bigramLinkedNodePos = ptNodeParams.getBigramLinkedNodePos(); bigramLinkCount++; if (bigramLinkCount > CONTINUING_BIGRAM_LINK_COUNT_LIMIT) { AKLOGE("Bigram link is invalid. start position: %d", originalBigramPos); ASSERT(false); return NOT_A_DICT_POS; } - } + bigramLinkedNodePos = ptNodeParams.getBigramLinkedNodePos(); + } while (bigramLinkedNodePos != NOT_A_DICT_POS); return currentPos; } diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h index 0504b59d5..5de456656 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h @@ -22,7 +22,7 @@ #include "defines.h" #include "suggest/core/policy/dictionary_bigrams_structure_policy.h" #include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h" -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.h" namespace latinime { diff --git a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.cpp b/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.cpp deleted file mode 100644 index ff80dd2f6..000000000 --- a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.cpp +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (C) 2013 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h" - -#include <stdint.h> - -#include "defines.h" -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h" -#include "suggest/policyimpl/dictionary/patricia_trie_policy.h" -#include "suggest/policyimpl/dictionary/utils/format_utils.h" -#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h" - -namespace latinime { - -/* static */ DictionaryStructureWithBufferPolicy *DictionaryStructureWithBufferPolicyFactory - ::newDictionaryStructureWithBufferPolicy(const char *const path, const int bufOffset, - const int size, const bool isUpdatable) { - // Allocated buffer in MmapedBuffer::openBuffer() will be freed in the destructor of - // impl classes of DictionaryStructureWithBufferPolicy. - const MmappedBuffer *const mmapedBuffer = MmappedBuffer::openBuffer(path, bufOffset, size, - isUpdatable); - if (!mmapedBuffer) { - return 0; - } - switch (FormatUtils::detectFormatVersion(mmapedBuffer->getBuffer(), - mmapedBuffer->getBufferSize())) { - case FormatUtils::VERSION_2: - return new PatriciaTriePolicy(mmapedBuffer); - case FormatUtils::VERSION_3: - return new DynamicPatriciaTriePolicy(mmapedBuffer); - default: - AKLOGE("DICT: dictionary format is unknown, bad magic number"); - delete mmapedBuffer; - ASSERT(false); - return 0; - } -} - -} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp deleted file mode 100644 index 2fa3111d3..000000000 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp +++ /dev/null @@ -1,124 +0,0 @@ -/* - * Copyright (C) 2013, The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h" - -#include "suggest/core/policy/dictionary_bigrams_structure_policy.h" -#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h" -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h" -#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" - -namespace latinime { - -void DynamicPatriciaTrieNodeReader::fetchPtNodeInfoFromBufferAndProcessMovedPtNode( - const int ptNodePos, const int maxCodePointCount, int *const outCodePoints) { - if (ptNodePos < 0 || ptNodePos >= mBuffer->getTailPosition()) { - // Reading invalid position because of bug or broken dictionary. - AKLOGE("Fetching PtNode info from invalid dictionary position: %d, dictionary size: %d", - ptNodePos, mBuffer->getTailPosition()); - ASSERT(false); - invalidatePtNodeInfo(); - return; - } - const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(ptNodePos); - const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer); - int pos = ptNodePos; - mHeadPos = ptNodePos; - if (usesAdditionalBuffer) { - pos -= mBuffer->getOriginalBufferSize(); - } - mFlags = PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos); - const int parentPosOffset = - DynamicPatriciaTrieReadingUtils::getParentPtNodePosOffsetAndAdvancePosition(dictBuf, - &pos); - mParentPos = DynamicPatriciaTrieReadingUtils::getParentPtNodePos(parentPosOffset, mHeadPos); - if (outCodePoints != 0) { - mCodePointCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition( - dictBuf, mFlags, maxCodePointCount, outCodePoints, &pos); - } else { - mCodePointCount = PatriciaTrieReadingUtils::skipCharacters( - dictBuf, mFlags, MAX_WORD_LENGTH, &pos); - } - if (isTerminal()) { - mProbabilityFieldPos = pos; - if (usesAdditionalBuffer) { - mProbabilityFieldPos += mBuffer->getOriginalBufferSize(); - } - mProbability = PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(dictBuf, &pos); - } else { - mProbabilityFieldPos = NOT_A_DICT_POS; - mProbability = NOT_A_PROBABILITY; - } - mChildrenPosFieldPos = pos; - if (usesAdditionalBuffer) { - mChildrenPosFieldPos += mBuffer->getOriginalBufferSize(); - } - mChildrenPos = DynamicPatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition( - dictBuf, &pos); - if (usesAdditionalBuffer && mChildrenPos != NOT_A_DICT_POS) { - mChildrenPos += mBuffer->getOriginalBufferSize(); - } - if (mSiblingPos == NOT_A_DICT_POS) { - if (DynamicPatriciaTrieReadingUtils::isMoved(mFlags)) { - mBigramLinkedNodePos = mChildrenPos; - } else { - mBigramLinkedNodePos = NOT_A_DICT_POS; - } - } - if (usesAdditionalBuffer) { - pos += mBuffer->getOriginalBufferSize(); - } - if (PatriciaTrieReadingUtils::hasShortcutTargets(mFlags)) { - mShortcutPos = pos; - mShortcutsPolicy->skipAllShortcuts(&pos); - } else { - mShortcutPos = NOT_A_DICT_POS; - } - if (PatriciaTrieReadingUtils::hasBigrams(mFlags)) { - mBigramPos = pos; - mBigramsPolicy->skipAllBigrams(&pos); - } else { - mBigramPos = NOT_A_DICT_POS; - } - // Update siblingPos if needed. - if (mSiblingPos == NOT_A_DICT_POS) { - // Sibling position is the tail position of current node. - mSiblingPos = pos; - } - // Read destination node if the read node is a moved node. - if (DynamicPatriciaTrieReadingUtils::isMoved(mFlags)) { - // The destination position is stored at the same place as the parent position. - fetchPtNodeInfoFromBufferAndProcessMovedPtNode(mParentPos, maxCodePointCount, - outCodePoints); - } -} - -void DynamicPatriciaTrieNodeReader::invalidatePtNodeInfo() { - mHeadPos = NOT_A_DICT_POS; - mFlags = 0; - mParentPos = NOT_A_DICT_POS; - mCodePointCount = 0; - mProbabilityFieldPos = NOT_A_DICT_POS; - mProbability = NOT_A_PROBABILITY; - mChildrenPosFieldPos = NOT_A_DICT_POS; - mChildrenPos = NOT_A_DICT_POS; - mBigramLinkedNodePos = NOT_A_DICT_POS; - mShortcutPos = NOT_A_DICT_POS; - mBigramPos = NOT_A_DICT_POS; - mSiblingPos = NOT_A_DICT_POS; -} - -} diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h deleted file mode 100644 index 3b36d425f..000000000 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h +++ /dev/null @@ -1,163 +0,0 @@ -/* - * Copyright (C) 2013, The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_NODE_READER_H -#define LATINIME_DYNAMIC_PATRICIA_TRIE_NODE_READER_H - -#include <stdint.h> - -#include "defines.h" -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h" -#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h" - -namespace latinime { - -class BufferWithExtendableBuffer; -class DictionaryBigramsStructurePolicy; -class DictionaryShortcutsStructurePolicy; - -/* - * This class is used for helping to read nodes of dynamic patricia trie. This class handles moved - * node and reads node attributes. - */ -class DynamicPatriciaTrieNodeReader { - public: - DynamicPatriciaTrieNodeReader(const BufferWithExtendableBuffer *const buffer, - const DictionaryBigramsStructurePolicy *const bigramsPolicy, - const DictionaryShortcutsStructurePolicy *const shortcutsPolicy) - : mBuffer(buffer), mBigramsPolicy(bigramsPolicy), - mShortcutsPolicy(shortcutsPolicy), mHeadPos(NOT_A_DICT_POS), mFlags(0), - mParentPos(NOT_A_DICT_POS), mCodePointCount(0), mProbabilityFieldPos(NOT_A_DICT_POS), - mProbability(NOT_A_PROBABILITY), mChildrenPosFieldPos(NOT_A_DICT_POS), - mChildrenPos(NOT_A_DICT_POS), mBigramLinkedNodePos(NOT_A_DICT_POS), - mShortcutPos(NOT_A_DICT_POS), mBigramPos(NOT_A_DICT_POS), - mSiblingPos(NOT_A_DICT_POS) {} - - ~DynamicPatriciaTrieNodeReader() {} - - // Reads PtNode information from dictionary buffer and updates members with the information. - AK_FORCE_INLINE void fetchNodeInfoInBufferFromPtNodePos(const int ptNodePos) { - fetchNodeInfoInBufferFromPtNodePosAndGetNodeCodePoints(ptNodePos , - 0 /* maxCodePointCount */, 0 /* outCodePoints */); - } - - AK_FORCE_INLINE void fetchNodeInfoInBufferFromPtNodePosAndGetNodeCodePoints( - const int ptNodePos, const int maxCodePointCount, int *const outCodePoints) { - mSiblingPos = NOT_A_DICT_POS; - mBigramLinkedNodePos = NOT_A_DICT_POS; - fetchPtNodeInfoFromBufferAndProcessMovedPtNode(ptNodePos, maxCodePointCount, outCodePoints); - } - - // HeadPos is different from NodePos when the current PtNode is a moved PtNode. - AK_FORCE_INLINE int getHeadPos() const { - return mHeadPos; - } - - // Flags - AK_FORCE_INLINE bool isDeleted() const { - return DynamicPatriciaTrieReadingUtils::isDeleted(mFlags); - } - - AK_FORCE_INLINE bool hasChildren() const { - return mChildrenPos != NOT_A_DICT_POS; - } - - AK_FORCE_INLINE bool isTerminal() const { - return PatriciaTrieReadingUtils::isTerminal(mFlags); - } - - AK_FORCE_INLINE bool isBlacklisted() const { - return PatriciaTrieReadingUtils::isBlacklisted(mFlags); - } - - AK_FORCE_INLINE bool isNotAWord() const { - return PatriciaTrieReadingUtils::isNotAWord(mFlags); - } - - // Parent node position - AK_FORCE_INLINE int getParentPos() const { - return mParentPos; - } - - // Number of code points - AK_FORCE_INLINE uint8_t getCodePointCount() const { - return mCodePointCount; - } - - // Probability - AK_FORCE_INLINE int getProbabilityFieldPos() const { - return mProbabilityFieldPos; - } - - AK_FORCE_INLINE int getProbability() const { - return mProbability; - } - - // Children PtNode array position - AK_FORCE_INLINE int getChildrenPosFieldPos() const { - return mChildrenPosFieldPos; - } - - AK_FORCE_INLINE int getChildrenPos() const { - return mChildrenPos; - } - - // Bigram linked node position. - AK_FORCE_INLINE int getBigramLinkedNodePos() const { - return mBigramLinkedNodePos; - } - - // Shortcutlist position - AK_FORCE_INLINE int getShortcutPos() const { - return mShortcutPos; - } - - // Bigrams position - AK_FORCE_INLINE int getBigramsPos() const { - return mBigramPos; - } - - // Sibling node position - AK_FORCE_INLINE int getSiblingNodePos() const { - return mSiblingPos; - } - - private: - DISALLOW_COPY_AND_ASSIGN(DynamicPatriciaTrieNodeReader); - - const BufferWithExtendableBuffer *const mBuffer; - const DictionaryBigramsStructurePolicy *const mBigramsPolicy; - const DictionaryShortcutsStructurePolicy *const mShortcutsPolicy; - int mHeadPos; - DynamicPatriciaTrieReadingUtils::NodeFlags mFlags; - int mParentPos; - uint8_t mCodePointCount; - int mProbabilityFieldPos; - int mProbability; - int mChildrenPosFieldPos; - int mChildrenPos; - int mBigramLinkedNodePos; - int mShortcutPos; - int mBigramPos; - int mSiblingPos; - - void fetchPtNodeInfoFromBufferAndProcessMovedPtNode(const int ptNodePos, - const int maxCodePointCount, int *const outCodePoints); - - void invalidatePtNodeInfo(); -}; -} // namespace latinime -#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_NODE_READER_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h index a9c7805a8..7c06a7117 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h @@ -30,8 +30,8 @@ namespace latinime { class HeaderPolicy : public DictionaryHeaderStructurePolicy { public: // Reads information from existing dictionary buffer. - HeaderPolicy(const uint8_t *const dictBuf, const int dictSize) - : mDictFormatVersion(FormatUtils::detectFormatVersion(dictBuf, dictSize)), + HeaderPolicy(const uint8_t *const dictBuf, const FormatUtils::FORMAT_VERSION formatVersion) + : mDictFormatVersion(formatVersion), mDictionaryFlags(HeaderReadWriteUtils::getFlags(dictBuf)), mSize(HeaderReadWriteUtils::getHeaderSize(dictBuf)), mAttributeMap(createAttributeMapAndReadAllAttributes(dictBuf)), diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp index 5ded8f6a1..5ef8e50b4 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp @@ -118,6 +118,9 @@ const char *const HeaderReadWriteUtils::REQUIRES_FRENCH_LIGATURE_PROCESSING_KEY case FormatUtils::VERSION_3: return buffer->writeUintAndAdvancePosition(3 /* data */, HEADER_DICTIONARY_VERSION_SIZE, writingPos); + case FormatUtils::VERSION_4: + return buffer->writeUintAndAdvancePosition(4 /* data */, + HEADER_DICTIONARY_VERSION_SIZE, writingPos); default: return false; } diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp new file mode 100644 index 000000000..3ab6a8e21 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp @@ -0,0 +1,78 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h" + +#include <stdint.h> +#include <string> + +#include "defines.h" +#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.h" +#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h" +#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h" +#include "suggest/policyimpl/dictionary/utils/format_utils.h" +#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h" + +namespace latinime { + +/* static */ DictionaryStructureWithBufferPolicy::StructurePoilcyPtr + DictionaryStructureWithBufferPolicyFactory + ::newDictionaryStructureWithBufferPolicy(const char *const path, + const int bufOffset, const int size, const bool isUpdatable) { + // Allocated buffer in MmapedBuffer::newBuffer() will be freed in the destructor of + // MmappedBufferWrapper if the instance has the responsibility. + MmappedBuffer::MmappedBufferPtr mmappedBuffer(MmappedBuffer::openBuffer(path, bufOffset, size, + isUpdatable)); + if (!mmappedBuffer.get()) { + return DictionaryStructureWithBufferPolicy::StructurePoilcyPtr(0); + } + switch (FormatUtils::detectFormatVersion(mmappedBuffer.get()->getBuffer(), + mmappedBuffer.get()->getBufferSize())) { + case FormatUtils::VERSION_2: + return DictionaryStructureWithBufferPolicy::StructurePoilcyPtr( + new PatriciaTriePolicy(mmappedBuffer)); + case FormatUtils::VERSION_3: + return DictionaryStructureWithBufferPolicy::StructurePoilcyPtr( + new DynamicPatriciaTriePolicy(mmappedBuffer)); + case FormatUtils::VERSION_4: { + std::string dictDirPath(path); + const std::string::size_type pos = + dictDirPath.rfind(Ver4DictConstants::TRIE_FILE_EXTENSION); + if (pos == std::string::npos) { + // Dictionary file name is not valid as a version 4 dictionary. + return DictionaryStructureWithBufferPolicy::StructurePoilcyPtr(0); + } + // Removing extension to get the base path. + dictDirPath.erase(pos); + const Ver4DictBuffers::Ver4DictBuffersPtr dictBuffers( + Ver4DictBuffers::openVer4DictBuffers(dictDirPath.c_str(), mmappedBuffer)); + if (!dictBuffers.get()->isValid()) { + AKLOGE("DICT: The dictionary doesn't satisfy ver4 format requirements."); + ASSERT(false); + return DictionaryStructureWithBufferPolicy::StructurePoilcyPtr(0); + } + return DictionaryStructureWithBufferPolicy::StructurePoilcyPtr( + new Ver4PatriciaTriePolicy(dictBuffers)); + } + default: + AKLOGE("DICT: dictionary format is unknown, bad magic number"); + ASSERT(false); + return DictionaryStructureWithBufferPolicy::StructurePoilcyPtr(0); + } +} + +} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h index 8cebc3b16..1359575f1 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h @@ -21,13 +21,15 @@ #include "defines.h" #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" +#include "utils/exclusive_ownership_pointer.h" namespace latinime { class DictionaryStructureWithBufferPolicyFactory { public: - static DictionaryStructureWithBufferPolicy *newDictionaryStructureWithBufferPolicy( - const char *const path, const int bufOffset, const int size, const bool isUpdatable); + static DictionaryStructureWithBufferPolicy::StructurePoilcyPtr + newDictionaryStructureWithBufferPolicy(const char *const path, const int bufOffset, + const int size, const bool isUpdatable); private: DISALLOW_IMPLICIT_CONSTRUCTORS(DictionaryStructureWithBufferPolicyFactory); diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h new file mode 100644 index 000000000..7bdd829cd --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h @@ -0,0 +1,185 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_PT_NODE_PARAMS_H +#define LATINIME_PT_NODE_PARAMS_H + +#include <cstring> + +#include "defines.h" +#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h" + +namespace latinime { + +// This class has information of a PtNode. This class is immutable. +class PtNodeParams { + public: + // Invalid PtNode. + PtNodeParams() : mHeadPos(NOT_A_DICT_POS), mFlags(0), mParentPos(NOT_A_DICT_POS), + mCodePointCount(0), mCodePoints(), mTerminalIdFieldPos(NOT_A_DICT_POS), + mTerminalId(Ver4DictConstants::NOT_A_TERMINAL), mProbabilityFieldPos(NOT_A_DICT_POS), + mProbability(NOT_A_PROBABILITY), mChildrenPosFieldPos(NOT_A_DICT_POS), + mChildrenPos(NOT_A_DICT_POS), mBigramLinkedNodePos(NOT_A_DICT_POS), + mShortcutPos(NOT_A_DICT_POS), mBigramPos(NOT_A_DICT_POS), + mSiblingPos(NOT_A_DICT_POS) {} + + PtNodeParams(const PtNodeParams& ptNodeParams) + : mHeadPos(ptNodeParams.mHeadPos), mFlags(ptNodeParams.mFlags), + mParentPos(ptNodeParams.mParentPos), mCodePointCount(ptNodeParams.mCodePointCount), + mCodePoints(), mTerminalIdFieldPos(ptNodeParams.mTerminalIdFieldPos), + mTerminalId(ptNodeParams.mTerminalId), + mProbabilityFieldPos(ptNodeParams.mProbabilityFieldPos), + mProbability(ptNodeParams.mProbability), + mChildrenPosFieldPos(ptNodeParams.mChildrenPosFieldPos), + mChildrenPos(ptNodeParams.mChildrenPos), + mBigramLinkedNodePos(ptNodeParams.mBigramLinkedNodePos), + mShortcutPos(ptNodeParams.mShortcutPos), mBigramPos(ptNodeParams.mBigramPos), + mSiblingPos(ptNodeParams.mSiblingPos) { + memcpy(mCodePoints, ptNodeParams.getCodePoints(), sizeof(int) * mCodePointCount); + } + + PtNodeParams(const int headPos, const PatriciaTrieReadingUtils::NodeFlags flags, + const int parentPos, const int codePointCount, const int *const codePoints, + const int probabilityFieldPos, const int probability, const int childrenPosFieldPos, + const int childrenPos, const int bigramLinkedNodePos, const int shortcutPos, + const int bigramPos, const int siblingPos) + : mHeadPos(headPos), mFlags(flags), mParentPos(parentPos), + mCodePointCount(codePointCount), mCodePoints(), + mTerminalIdFieldPos(NOT_A_DICT_POS), mTerminalId(Ver4DictConstants::NOT_A_TERMINAL), + mProbabilityFieldPos(probabilityFieldPos), mProbability(probability), + mChildrenPosFieldPos(childrenPosFieldPos), mChildrenPos(childrenPos), + mBigramLinkedNodePos(bigramLinkedNodePos), mShortcutPos(shortcutPos), + mBigramPos(bigramPos), mSiblingPos(siblingPos) { + memcpy(mCodePoints, codePoints, sizeof(int) * mCodePointCount); + } + + AK_FORCE_INLINE bool isValid() const { + return mCodePointCount > 0; + } + + // Head position of the PtNode + AK_FORCE_INLINE int getHeadPos() const { + return mHeadPos; + } + + // Flags + AK_FORCE_INLINE bool isDeleted() const { + return DynamicPatriciaTrieReadingUtils::isDeleted(mFlags); + } + + AK_FORCE_INLINE bool hasChildren() const { + return mChildrenPos != NOT_A_DICT_POS; + } + + AK_FORCE_INLINE bool isTerminal() const { + return PatriciaTrieReadingUtils::isTerminal(mFlags); + } + + AK_FORCE_INLINE bool isBlacklisted() const { + return PatriciaTrieReadingUtils::isBlacklisted(mFlags); + } + + AK_FORCE_INLINE bool isNotAWord() const { + return PatriciaTrieReadingUtils::isNotAWord(mFlags); + } + + // Parent node position + AK_FORCE_INLINE int getParentPos() const { + return mParentPos; + } + + // Number of code points + AK_FORCE_INLINE uint8_t getCodePointCount() const { + return mCodePointCount; + } + + AK_FORCE_INLINE const int *getCodePoints() const { + return mCodePoints; + } + + // Probability + AK_FORCE_INLINE int getTerminalIdFieldPos() const { + return mTerminalIdFieldPos; + } + + AK_FORCE_INLINE int getTerminalId() const { + return mTerminalId; + } + + // Probability + AK_FORCE_INLINE int getProbabilityFieldPos() const { + return mProbabilityFieldPos; + } + + AK_FORCE_INLINE int getProbability() const { + return mProbability; + } + + // Children PtNode array position + AK_FORCE_INLINE int getChildrenPosFieldPos() const { + return mChildrenPosFieldPos; + } + + AK_FORCE_INLINE int getChildrenPos() const { + return mChildrenPos; + } + + // Bigram linked node position. + AK_FORCE_INLINE int getBigramLinkedNodePos() const { + return mBigramLinkedNodePos; + } + + // Shortcutlist position + AK_FORCE_INLINE int getShortcutPos() const { + return mShortcutPos; + } + + // Bigrams position + AK_FORCE_INLINE int getBigramsPos() const { + return mBigramPos; + } + + // Sibling node position + AK_FORCE_INLINE int getSiblingNodePos() const { + return mSiblingPos; + } + + private: + // This class have a public copy constructor to be used as a return value. + + // Disallowing the assignment operator. + PtNodeParams &operator=(PtNodeParams &ptNodeParams); + + const int mHeadPos; + const PatriciaTrieReadingUtils::NodeFlags mFlags; + const int mParentPos; + const uint8_t mCodePointCount; + int mCodePoints[MAX_WORD_LENGTH]; + const int mTerminalIdFieldPos; + const int mTerminalId; + const int mProbabilityFieldPos; + const int mProbability; + const int mChildrenPosFieldPos; + const int mChildrenPos; + const int mBigramLinkedNodePos; + const int mShortcutPos; + const int mBigramPos; + const int mSiblingPos; +}; +} // namespace latinime +#endif /* LATINIME_PT_NODE_PARAMS_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h new file mode 100644 index 000000000..c6b2a8bed --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h @@ -0,0 +1,39 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_PT_NODE_READER_H +#define LATINIME_PT_NODE_READER_H + +#include "defines.h" + +#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h" + +namespace latinime { + +// Interface class used to read PtNode information. +class PtNodeReader { + public: + virtual ~PtNodeReader() {} + virtual const PtNodeParams fetchNodeInfoInBufferFromPtNodePos(const int ptNodePos) const = 0; + + protected: + PtNodeReader() {}; + + private: + DISALLOW_COPY_AND_ASSIGN(PtNodeReader); +}; +} // namespace latinime +#endif /* LATINIME_PT_NODE_READER_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp index 8a84bd261..960c1b936 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp @@ -15,22 +15,22 @@ */ -#include "suggest/policyimpl/dictionary/patricia_trie_policy.h" +#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h" #include "defines.h" #include "suggest/core/dicnode/dic_node.h" #include "suggest/core/dicnode/dic_node_vector.h" -#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/utils/probability_utils.h" namespace latinime { -void PatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode, +void PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode, DicNodeVector *const childDicNodes) const { if (!dicNode->hasChildren()) { return; } - int nextPos = dicNode->getChildrenPos(); + int nextPos = dicNode->getChildrenPtNodeArrayPos(); if (nextPos < 0 || nextPos >= mDictBufferSize) { AKLOGE("Children PtNode array position is invalid. pos: %d, dict size: %d", nextPos, mDictBufferSize); @@ -52,14 +52,14 @@ void PatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode, // This retrieves code points and the probability of the word by its terminal position. // Due to the fact that words are ordered in the dictionary in a strict breadth-first order, -// it is possible to check for this with advantageous complexity. For each node, we search +// it is possible to check for this with advantageous complexity. For each PtNode array, we search // for PtNodes with children and compare the children position with the position we look for. // When we shoot the position we look for, it means the word we look for is in the children // of the previous PtNode. The only tricky part is the fact that if we arrive at the end of a // PtNode array with the last PtNode's children position still less than what we are searching for, // we must descend the last PtNode's children (for example, if the word we are searching for starts // with a z, it's the last PtNode of the root array, so all children addresses will be smaller -// than the position we look for, and we have to descend the z node). +// than the position we look for, and we have to descend the z PtNode). /* Parameters : * ptNodePos: the byte position of the terminal PtNode of the word we are searching for (this is * what is stored as the "bigram position" in each bigram) @@ -74,9 +74,9 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( int pos = getRootPosition(); int wordPos = 0; // One iteration of the outer loop iterates through PtNode arrays. As stated above, we will - // only traverse nodes that are actually a part of the terminal we are searching, so each time - // we enter this loop we are one depth level further than last time. - // The only reason we count nodes is because we want to reduce the probability of infinite + // only traverse PtNodes that are actually a part of the terminal we are searching, so each + // time we enter this loop we are one depth level further than last time. + // The only reason we count PtNodes is because we want to reduce the probability of infinite // looping in case there is a bug. Since we know there is an upper bound to the depth we are // supposed to traverse, it does not hurt to count iterations. for (int loopCount = maxCodePointCount; loopCount > 0; --loopCount) { @@ -140,8 +140,9 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( found = true; } else if (1 >= ptNodeCount) { // However if we are on the LAST PtNode of this array, and we have NOT shot the - // position we should descend THIS node. So we trick the lastCandidatePtNodePos - // so that we will descend this PtNode, not the previous one. + // position we should descend THIS PtNode. So we trick the + // lastCandidatePtNodePos so that we will descend this PtNode, not the previous + // one. lastCandidatePtNodePos = startPos; found = true; } else { @@ -149,7 +150,7 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( found = false; } } else { - // Even if we don't have children here, we could still be on the last PtNode of / + // Even if we don't have children here, we could still be on the last PtNode of // this array. If this is the case, we should descend the last PtNode that had // children, and their position is already in lastCandidatePtNodePos. found = (1 >= ptNodeCount); @@ -230,9 +231,9 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( return 0; } -// This function gets the position of the terminal node of the exact matching word in the +// This function gets the position of the terminal PtNode of the exact matching word in the // dictionary. If no match is found, it returns NOT_A_DICT_POS. -int PatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord, +int PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord, const int length, const bool forceLowerCaseSearch) const { int pos = getRootPosition(); int wordPos = 0; diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h index 0f8662aea..5d99632a5 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h @@ -24,6 +24,7 @@ #include "suggest/policyimpl/dictionary/bigram/bigram_list_policy.h" #include "suggest/policyimpl/dictionary/header/header_policy.h" #include "suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h" +#include "suggest/policyimpl/dictionary/utils/format_utils.h" #include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h" namespace latinime { @@ -33,28 +34,26 @@ class DicNodeVector; class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { public: - PatriciaTriePolicy(const MmappedBuffer *const buffer) - : mBuffer(buffer), mHeaderPolicy(mBuffer->getBuffer(), buffer->getBufferSize()), - mDictRoot(mBuffer->getBuffer() + mHeaderPolicy.getSize()), - mDictBufferSize(mBuffer->getBufferSize() - mHeaderPolicy.getSize()), + PatriciaTriePolicy(const MmappedBuffer::MmappedBufferPtr &mmappedBuffer) + : mMmappedBuffer(mmappedBuffer), + mHeaderPolicy(mMmappedBuffer.get()->getBuffer(), FormatUtils::VERSION_2), + mDictRoot(mMmappedBuffer.get()->getBuffer() + mHeaderPolicy.getSize()), + mDictBufferSize(mMmappedBuffer.get()->getBufferSize() + - mHeaderPolicy.getSize()), mBigramListPolicy(mDictRoot), mShortcutListPolicy(mDictRoot) {} - ~PatriciaTriePolicy() { - delete mBuffer; - } - AK_FORCE_INLINE int getRootPosition() const { return 0; } - void createAndGetAllChildNodes(const DicNode *const dicNode, + void createAndGetAllChildDicNodes(const DicNode *const dicNode, DicNodeVector *const childDicNodes) const; int getCodePointsAndProbabilityAndReturnCodePointCount( const int terminalNodePos, const int maxCodePointCount, int *const outCodePoints, int *const outUnigramProbability) const; - int getTerminalNodePositionOfWord(const int *const inWord, + int getTerminalPtNodePositionOfWord(const int *const inWord, const int length, const bool forceLowerCaseSearch) const; int getProbability(const int unigramProbability, const int bigramProbability) const; @@ -124,7 +123,7 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { private: DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTriePolicy); - const MmappedBuffer *const mBuffer; + const MmappedBuffer::MmappedBufferPtr mMmappedBuffer; const HeaderPolicy mHeaderPolicy; const uint8_t *const mDictRoot; const int mDictBufferSize; diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.cpp index 7df55815f..82b3593c8 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h" #include "defines.h" #include "suggest/policyimpl/dictionary/utils/byte_array_utils.h" diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h index 8420ee95a..8420ee95a 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.cpp index 5724c5d88..db4e86da1 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.cpp @@ -14,25 +14,25 @@ * limitations under the License. */ -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.h" #include "suggest/core/policy/dictionary_header_structure_policy.h" +#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h" #include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h" namespace latinime { bool DynamicPatriciaTrieGcEventListeners ::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted - ::onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node, - const int *const nodeCodePoints) { + ::onVisitingPtNode(const PtNodeParams *const ptNodeParams) { // PtNode is useless when the PtNode is not a terminal and doesn't have any not useless // children. - bool isUselessPtNode = !node->isTerminal(); - if (node->isTerminal() && mIsDecayingDict) { + bool isUselessPtNode = !ptNodeParams->isTerminal(); + if (ptNodeParams->isTerminal() && mIsDecayingDict) { const int newProbability = - ForgettingCurveUtils::getEncodedProbabilityToSave(node->getProbability(), + ForgettingCurveUtils::getEncodedProbabilityToSave(ptNodeParams->getProbability(), mHeaderPolicy); - int writingPos = node->getProbabilityFieldPos(); + int writingPos = ptNodeParams->getProbabilityFieldPos(); // Update probability. if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition( mBuffer, newProbability, &writingPos)) { @@ -44,9 +44,9 @@ bool DynamicPatriciaTrieGcEventListeners } if (mChildrenValue > 0) { isUselessPtNode = false; - } else if (node->isTerminal()) { + } else if (ptNodeParams->isTerminal()) { // Remove children as all children are useless. - int writingPos = node->getChildrenPosFieldPos(); + int writingPos = ptNodeParams->getChildrenPosFieldPos(); if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition( mBuffer, NOT_A_DICT_POS /* childrenPosition */, &writingPos)) { return false; @@ -54,12 +54,12 @@ bool DynamicPatriciaTrieGcEventListeners } if (isUselessPtNode) { // Current PtNode is no longer needed. Mark it as deleted. - if (!mWritingHelper->markNodeAsDeleted(node)) { + if (!mWritingHelper->markNodeAsDeleted(ptNodeParams)) { return false; } } else { mValueStack.back() += 1; - if (node->isTerminal()) { + if (ptNodeParams->isTerminal()) { mValidUnigramCount += 1; } } @@ -67,10 +67,9 @@ bool DynamicPatriciaTrieGcEventListeners } bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateBigramProbability - ::onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node, - const int *const nodeCodePoints) { - if (!node->isDeleted()) { - int pos = node->getBigramsPos(); + ::onVisitingPtNode(const PtNodeParams *const ptNodeParams) { + if (!ptNodeParams->isDeleted()) { + int pos = ptNodeParams->getBigramsPos(); if (pos != NOT_A_DICT_POS) { int bigramEntryCount = 0; if (!mBigramPolicy->updateAllBigramEntriesAndDeleteUselessEntries(&pos, @@ -117,31 +116,29 @@ bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNo // Write valid PtNode to buffer and memorize mapping from the old position to the new position. bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer - ::onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node, - const int *const nodeCodePoints) { - if (node->isDeleted()) { + ::onVisitingPtNode(const PtNodeParams *const ptNodeParams) { + if (ptNodeParams->isDeleted()) { // Current PtNode is not written in new buffer because it has been deleted. mDictPositionRelocationMap->mPtNodePositionRelocationMap.insert( DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap::value_type( - node->getHeadPos(), NOT_A_DICT_POS)); + ptNodeParams->getHeadPos(), NOT_A_DICT_POS)); return true; } int writingPos = mBufferToWrite->getTailPosition(); mDictPositionRelocationMap->mPtNodePositionRelocationMap.insert( DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap::value_type( - node->getHeadPos(), writingPos)); + ptNodeParams->getHeadPos(), writingPos)); mValidPtNodeCount++; // Writes current PtNode. - return mWritingHelper->writePtNodeToBufferByCopyingPtNodeInfo(mBufferToWrite, node, - node->getParentPos(), nodeCodePoints, node->getCodePointCount(), - node->getProbability(), &writingPos); + return mWritingHelper->writePtNodeToBufferByCopyingPtNodeInfo(mBufferToWrite, ptNodeParams, + ptNodeParams->getParentPos(), ptNodeParams->getCodePoints(), + ptNodeParams->getCodePointCount(), ptNodeParams->getProbability(), &writingPos); } bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateAllPositionFields - ::onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node, - const int *const nodeCodePoints) { + ::onVisitingPtNode(const PtNodeParams *const ptNodeParams) { // Updates parent position. - int parentPos = node->getParentPos(); + int parentPos = ptNodeParams->getParentPos(); if (parentPos != NOT_A_DICT_POS) { DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap::const_iterator it = mDictPositionRelocationMap->mPtNodePositionRelocationMap.find(parentPos); @@ -149,15 +146,16 @@ bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateAllPositionField parentPos = it->second; } } - int writingPos = node->getHeadPos() + DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE; + int writingPos = ptNodeParams->getHeadPos() + + DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE; // Write updated parent offset. if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(mBufferToWrite, - parentPos, node->getHeadPos(), &writingPos)) { + parentPos, ptNodeParams->getHeadPos(), &writingPos)) { return false; } // Updates children position. - int childrenPos = node->getChildrenPos(); + int childrenPos = ptNodeParams->getChildrenPos(); if (childrenPos != NOT_A_DICT_POS) { DynamicPatriciaTrieWritingHelper::PtNodeArrayPositionRelocationMap::const_iterator it = mDictPositionRelocationMap->mPtNodeArrayPositionRelocationMap.find(childrenPos); @@ -165,14 +163,14 @@ bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateAllPositionField childrenPos = it->second; } } - writingPos = node->getChildrenPosFieldPos(); + writingPos = ptNodeParams->getChildrenPosFieldPos(); if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBufferToWrite, childrenPos, &writingPos)) { return false; } // Updates bigram target PtNode positions in the bigram list. - int bigramsPos = node->getBigramsPos(); + int bigramsPos = ptNodeParams->getBigramsPos(); if (bigramsPos != NOT_A_DICT_POS) { int bigramEntryCount; if (!mBigramPolicy->updateAllBigramTargetPtNodePositions(&bigramsPos, @@ -181,7 +179,7 @@ bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateAllPositionField } mBigramCount += bigramEntryCount; } - if (node->isTerminal()) { + if (ptNodeParams->isTerminal()) { mUnigramCount++; } diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.h index 9755120b0..cfe3c145c 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.h @@ -21,15 +21,16 @@ #include "defines.h" #include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h" -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h" -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h" -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" #include "utils/hash_map_compat.h" namespace latinime { class DictionaryHeaderStructurePolicy; +class PtNodeParams; class DynamicPatriciaTrieGcEventListeners { public: @@ -66,8 +67,7 @@ class DynamicPatriciaTrieGcEventListeners { bool onReadingPtNodeArrayTail() { return true; } - bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node, - const int *const nodeCodePoints); + bool onVisitingPtNode(const PtNodeParams *const ptNodeParams); int getValidUnigramCount() const { return mValidUnigramCount; @@ -101,8 +101,7 @@ class DynamicPatriciaTrieGcEventListeners { bool onReadingPtNodeArrayTail() { return true; } - bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node, - const int *const nodeCodePoints); + bool onVisitingPtNode(const PtNodeParams *const ptNodeParams); int getValidBigramEntryCount() const { return mValidBigramEntryCount; @@ -133,8 +132,7 @@ class DynamicPatriciaTrieGcEventListeners { bool onReadingPtNodeArrayTail(); - bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node, - const int *const nodeCodePoints); + bool onVisitingPtNode(const PtNodeParams *const ptNodeParams); private: DISALLOW_IMPLICIT_CONSTRUCTORS(TraversePolicyToPlaceAndWriteValidPtNodesToBuffer); @@ -167,8 +165,7 @@ class DynamicPatriciaTrieGcEventListeners { bool onReadingPtNodeArrayTail() { return true; } - bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node, - const int *const nodeCodePoints); + bool onVisitingPtNode(const PtNodeParams *const ptNodeParams); int getUnigramCount() const { return mUnigramCount; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.cpp new file mode 100644 index 000000000..3393ce662 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.cpp @@ -0,0 +1,107 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h" + +#include "suggest/core/policy/dictionary_bigrams_structure_policy.h" +#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h" +#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" + +namespace latinime { + +const PtNodeParams DynamicPatriciaTrieNodeReader::fetchPtNodeInfoFromBufferAndProcessMovedPtNode( + const int ptNodePos, const int siblingNodePos, const int bigramLinkedNodePos) const { + if (ptNodePos < 0 || ptNodePos >= mBuffer->getTailPosition()) { + // Reading invalid position because of bug or broken dictionary. + AKLOGE("Fetching PtNode info from invalid dictionary position: %d, dictionary size: %d", + ptNodePos, mBuffer->getTailPosition()); + ASSERT(false); + return PtNodeParams(); + } + const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(ptNodePos); + const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer); + int pos = ptNodePos; + const int headPos = ptNodePos; + if (usesAdditionalBuffer) { + pos -= mBuffer->getOriginalBufferSize(); + } + const PatriciaTrieReadingUtils::NodeFlags flags = + PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos); + const int parentPosOffset = + DynamicPatriciaTrieReadingUtils::getParentPtNodePosOffsetAndAdvancePosition(dictBuf, + &pos); + const int parentPos = + DynamicPatriciaTrieReadingUtils::getParentPtNodePos(parentPosOffset, headPos); + int codePoints[MAX_WORD_LENGTH]; + const int codePonitCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition( + dictBuf, flags, MAX_WORD_LENGTH, codePoints, &pos); + int probability = NOT_A_PROBABILITY; + int probabilityFieldPos = NOT_A_DICT_POS; + if (PatriciaTrieReadingUtils::isTerminal(flags)) { + probabilityFieldPos = pos; + if (usesAdditionalBuffer) { + probabilityFieldPos += mBuffer->getOriginalBufferSize(); + } + probability = PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(dictBuf, &pos); + } + int childrenPosFieldPos = pos; + if (usesAdditionalBuffer) { + childrenPosFieldPos += mBuffer->getOriginalBufferSize(); + } + int childrenPos = DynamicPatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition( + dictBuf, &pos); + if (usesAdditionalBuffer && childrenPos != NOT_A_DICT_POS) { + childrenPos += mBuffer->getOriginalBufferSize(); + } + int newBigramLinkedNodePos = bigramLinkedNodePos; + if (siblingNodePos == NOT_A_DICT_POS) { + if (DynamicPatriciaTrieReadingUtils::isMoved(flags)) { + newBigramLinkedNodePos = childrenPos; + } + } + if (usesAdditionalBuffer) { + pos += mBuffer->getOriginalBufferSize(); + } + int shortcutsPos = NOT_A_DICT_POS; + if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) { + shortcutsPos = pos; + mShortcutsPolicy->skipAllShortcuts(&pos); + } + int bigramsPos = NOT_A_DICT_POS; + if (PatriciaTrieReadingUtils::hasBigrams(flags)) { + bigramsPos = pos; + mBigramsPolicy->skipAllBigrams(&pos); + } + int newSiblingNodePos = siblingNodePos; + if (siblingNodePos == NOT_A_DICT_POS) { + // Sibling position is the tail position of current node. + newSiblingNodePos = pos; + } + // Read destination node if the read node is a moved node. + if (DynamicPatriciaTrieReadingUtils::isMoved(flags)) { + // The destination position is stored at the same place as the parent position. + return fetchPtNodeInfoFromBufferAndProcessMovedPtNode(parentPos, newSiblingNodePos, + newBigramLinkedNodePos); + } else { + return PtNodeParams(headPos, flags, parentPos, codePonitCount, codePoints, + probabilityFieldPos, probability, childrenPosFieldPos, childrenPos, + newBigramLinkedNodePos, shortcutsPos, bigramsPos, newSiblingNodePos); + } +} + +} diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h new file mode 100644 index 000000000..b5abffeda --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h @@ -0,0 +1,62 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_NODE_READER_H +#define LATINIME_DYNAMIC_PATRICIA_TRIE_NODE_READER_H + +#include <stdint.h> + +#include "defines.h" +#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h" +#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h" + +namespace latinime { + +class BufferWithExtendableBuffer; +class DictionaryBigramsStructurePolicy; +class DictionaryShortcutsStructurePolicy; + +/* + * This class is used for helping to read nodes of dynamic patricia trie. This class handles moved + * node and reads node attributes. + */ +class DynamicPatriciaTrieNodeReader : public PtNodeReader { + public: + DynamicPatriciaTrieNodeReader(const BufferWithExtendableBuffer *const buffer, + const DictionaryBigramsStructurePolicy *const bigramsPolicy, + const DictionaryShortcutsStructurePolicy *const shortcutsPolicy) + : mBuffer(buffer), mBigramsPolicy(bigramsPolicy), + mShortcutsPolicy(shortcutsPolicy) {} + + ~DynamicPatriciaTrieNodeReader() {} + + virtual const PtNodeParams fetchNodeInfoInBufferFromPtNodePos(const int ptNodePos) const { + return fetchPtNodeInfoFromBufferAndProcessMovedPtNode(ptNodePos, + NOT_A_DICT_POS /* siblingNodePos */, NOT_A_DICT_POS /* bigramLinkedNodePos */); + } + + private: + DISALLOW_COPY_AND_ASSIGN(DynamicPatriciaTrieNodeReader); + + const BufferWithExtendableBuffer *const mBuffer; + const DictionaryBigramsStructurePolicy *const mBigramsPolicy; + const DictionaryShortcutsStructurePolicy *const mShortcutsPolicy; + + const PtNodeParams fetchPtNodeInfoFromBufferAndProcessMovedPtNode(const int ptNodePos, + const int siblingNodePos, const int bigramLinkedNodePos) const; +}; +} // namespace latinime +#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_NODE_READER_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.cpp index 495b146c2..50882b3e9 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.h" #include <cstdio> #include <cstring> @@ -23,11 +23,11 @@ #include "defines.h" #include "suggest/core/dicnode/dic_node.h" #include "suggest/core/dicnode/dic_node_vector.h" -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h" -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h" -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h" -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h" -#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.h" #include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h" #include "suggest/policyimpl/dictionary/utils/probability_utils.h" @@ -45,29 +45,32 @@ const int DynamicPatriciaTriePolicy::MAX_DICT_EXTENDED_REGION_SIZE = 1024 * 1024 const int DynamicPatriciaTriePolicy::MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS = DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE - 1024; -void DynamicPatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode, +void DynamicPatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode, DicNodeVector *const childDicNodes) const { if (!dicNode->hasChildren()) { return; } - DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer, - getBigramsStructurePolicy(), getShortcutsStructurePolicy()); - readingHelper.initWithPtNodeArrayPos(dicNode->getChildrenPos()); - const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader(); + DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer, &mNodeReader); + readingHelper.initWithPtNodeArrayPos(dicNode->getChildrenPtNodeArrayPos()); while (!readingHelper.isEnd()) { - bool isTerminal = nodeReader->isTerminal() && !nodeReader->isDeleted(); + const PtNodeParams ptNodeParams(readingHelper.getPtNodeParams()); + if (!ptNodeParams.isValid()) { + break; + } + bool isTerminal = ptNodeParams.isTerminal() && !ptNodeParams.isDeleted(); if (isTerminal && mHeaderPolicy.isDecayingDict()) { // A DecayingDict may have a terminal PtNode that has a terminal DicNode whose // probability is NOT_A_PROBABILITY. In such case, we don't want to treat it as a // valid terminal DicNode. - isTerminal = getProbability(nodeReader->getProbability(), NOT_A_PROBABILITY) + isTerminal = getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY) != NOT_A_PROBABILITY; } - childDicNodes->pushLeavingChild(dicNode, nodeReader->getHeadPos(), - nodeReader->getChildrenPos(), nodeReader->getProbability(), isTerminal, - nodeReader->hasChildren(), nodeReader->isBlacklisted() || nodeReader->isNotAWord(), - nodeReader->getCodePointCount(), readingHelper.getMergedNodeCodePoints()); - readingHelper.readNextSiblingNode(); + childDicNodes->pushLeavingChild(dicNode, ptNodeParams.getHeadPos(), + ptNodeParams.getChildrenPos(), ptNodeParams.getProbability(), isTerminal, + ptNodeParams.hasChildren(), + ptNodeParams.isBlacklisted() || ptNodeParams.isNotAWord(), + ptNodeParams.getCodePointCount(), ptNodeParams.getCodePoints()); + readingHelper.readNextSiblingNode(ptNodeParams); } } @@ -77,29 +80,33 @@ int DynamicPatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCoun // This method traverses parent nodes from the terminal by following parent pointers; thus, // node code points are stored in the buffer in the reverse order. int reverseCodePoints[maxCodePointCount]; - DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer, - getBigramsStructurePolicy(), getShortcutsStructurePolicy()); + DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer, &mNodeReader); // First, read the terminal node and get its probability. readingHelper.initWithPtNodePos(ptNodePos); - if (!readingHelper.isValidTerminalNode()) { + + const PtNodeParams terminalPtNodeParams(readingHelper.getPtNodeParams()); + if (!readingHelper.isValidTerminalNode(terminalPtNodeParams)) { // Node at the ptNodePos is not a valid terminal node. *outUnigramProbability = NOT_A_PROBABILITY; return 0; } // Store terminal node probability. - *outUnigramProbability = readingHelper.getNodeReader()->getProbability(); + *outUnigramProbability = terminalPtNodeParams.getProbability(); // Then, following parent node link to the dictionary root and fetch node code points. + int totalCodePointCount = 0; while (!readingHelper.isEnd()) { - if (readingHelper.getTotalCodePointCount() > maxCodePointCount) { + const PtNodeParams ptNodeParams(readingHelper.getPtNodeParams()); + totalCodePointCount = readingHelper.getTotalCodePointCount(ptNodeParams); + if (!ptNodeParams.isValid() || totalCodePointCount > maxCodePointCount) { // The ptNodePos is not a valid terminal node position in the dictionary. *outUnigramProbability = NOT_A_PROBABILITY; return 0; } // Store node code points to buffer in the reverse order. - readingHelper.fetchMergedNodeCodePointsInReverseOrder( + readingHelper.fetchMergedNodeCodePointsInReverseOrder(ptNodeParams, readingHelper.getPrevTotalCodePointCount(), reverseCodePoints); // Follow parent node toward the root node. - readingHelper.readParentNode(); + readingHelper.readParentNode(ptNodeParams); } if (readingHelper.isError()) { // The node position or the dictionary is invalid. @@ -107,52 +114,54 @@ int DynamicPatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCoun return 0; } // Reverse the stored code points to output them. - const int codePointCount = readingHelper.getTotalCodePointCount(); - for (int i = 0; i < codePointCount; ++i) { - outCodePoints[i] = reverseCodePoints[codePointCount - i - 1]; + for (int i = 0; i < totalCodePointCount; ++i) { + outCodePoints[i] = reverseCodePoints[totalCodePointCount - i - 1]; } - return codePointCount; + return totalCodePointCount; } -int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord, +int DynamicPatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord, const int length, const bool forceLowerCaseSearch) const { int searchCodePoints[length]; for (int i = 0; i < length; ++i) { searchCodePoints[i] = forceLowerCaseSearch ? CharUtils::toLowerCase(inWord[i]) : inWord[i]; } - DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer, - getBigramsStructurePolicy(), getShortcutsStructurePolicy()); + + DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer, &mNodeReader); readingHelper.initWithPtNodeArrayPos(getRootPosition()); - const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader(); while (!readingHelper.isEnd()) { + const PtNodeParams ptNodeParams(readingHelper.getPtNodeParams()); + if (!ptNodeParams.isValid()) { + break; + } const int matchedCodePointCount = readingHelper.getPrevTotalCodePointCount(); - if (readingHelper.getTotalCodePointCount() > length - || !readingHelper.isMatchedCodePoint(0 /* index */, + if (readingHelper.getTotalCodePointCount(ptNodeParams) > length + || !readingHelper.isMatchedCodePoint(ptNodeParams, 0 /* index */, searchCodePoints[matchedCodePointCount])) { // Current node has too many code points or its first code point is different from // target code point. Skip this node and read the next sibling node. - readingHelper.readNextSiblingNode(); + readingHelper.readNextSiblingNode(ptNodeParams); continue; } // Check following merged node code points. - const int nodeCodePointCount = nodeReader->getCodePointCount(); + const int nodeCodePointCount = ptNodeParams.getCodePointCount(); for (int j = 1; j < nodeCodePointCount; ++j) { - if (!readingHelper.isMatchedCodePoint( + if (!readingHelper.isMatchedCodePoint(ptNodeParams, j, searchCodePoints[matchedCodePointCount + j])) { // Different code point is found. The given word is not included in the dictionary. return NOT_A_DICT_POS; } } // All characters are matched. - if (length == readingHelper.getTotalCodePointCount()) { + if (length == readingHelper.getTotalCodePointCount(ptNodeParams)) { // Terminal position is found. - return nodeReader->getHeadPos(); + return ptNodeParams.getHeadPos(); } - if (!nodeReader->hasChildren()) { + if (!ptNodeParams.hasChildren()) { return NOT_A_DICT_POS; } // Advance to the children nodes. - readingHelper.readChildNode(); + readingHelper.readChildNode(ptNodeParams); } // If we already traversed the tree further than the word is long, there means // there was no match (or we would have found it). @@ -179,44 +188,38 @@ int DynamicPatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int ptNodePos if (ptNodePos == NOT_A_DICT_POS) { return NOT_A_PROBABILITY; } - DynamicPatriciaTrieNodeReader nodeReader(&mBufferWithExtendableBuffer, - getBigramsStructurePolicy(), getShortcutsStructurePolicy()); - nodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos); - if (nodeReader.isDeleted() || nodeReader.isBlacklisted() || nodeReader.isNotAWord()) { + const PtNodeParams ptNodeParams(mNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos)); + if (ptNodeParams.isDeleted() || ptNodeParams.isBlacklisted() || ptNodeParams.isNotAWord()) { return NOT_A_PROBABILITY; } - return getProbability(nodeReader.getProbability(), NOT_A_PROBABILITY); + return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY); } int DynamicPatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const { if (ptNodePos == NOT_A_DICT_POS) { return NOT_A_DICT_POS; } - DynamicPatriciaTrieNodeReader nodeReader(&mBufferWithExtendableBuffer, - getBigramsStructurePolicy(), getShortcutsStructurePolicy()); - nodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos); - if (nodeReader.isDeleted()) { + const PtNodeParams ptNodeParams(mNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos)); + if (ptNodeParams.isDeleted()) { return NOT_A_DICT_POS; } - return nodeReader.getShortcutPos(); + return ptNodeParams.getShortcutPos(); } int DynamicPatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const { if (ptNodePos == NOT_A_DICT_POS) { return NOT_A_DICT_POS; } - DynamicPatriciaTrieNodeReader nodeReader(&mBufferWithExtendableBuffer, - getBigramsStructurePolicy(), getShortcutsStructurePolicy()); - nodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos); - if (nodeReader.isDeleted()) { + const PtNodeParams ptNodeParams(mNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos)); + if (ptNodeParams.isDeleted()) { return NOT_A_DICT_POS; } - return nodeReader.getBigramsPos(); + return ptNodeParams.getBigramsPos(); } bool DynamicPatriciaTriePolicy::addUnigramWord(const int *const word, const int length, const int probability) { - if (!mBuffer->isUpdatable()) { + if (!mMmappedBuffer.get()->isUpdatable()) { AKLOGI("Warning: addUnigramWord() is called for non-updatable dictionary."); return false; } @@ -225,8 +228,7 @@ bool DynamicPatriciaTriePolicy::addUnigramWord(const int *const word, const int AKLOGE("The dictionary is too large to dynamically update."); return false; } - DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer, - getBigramsStructurePolicy(), getShortcutsStructurePolicy()); + DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer, &mNodeReader); readingHelper.initWithPtNodeArrayPos(getRootPosition()); DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer, &mBigramListPolicy, &mShortcutListPolicy, mHeaderPolicy.isDecayingDict()); @@ -244,7 +246,7 @@ bool DynamicPatriciaTriePolicy::addUnigramWord(const int *const word, const int bool DynamicPatriciaTriePolicy::addBigramWords(const int *const word0, const int length0, const int *const word1, const int length1, const int probability) { - if (!mBuffer->isUpdatable()) { + if (!mMmappedBuffer.get()->isUpdatable()) { AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary."); return false; } @@ -253,12 +255,12 @@ bool DynamicPatriciaTriePolicy::addBigramWords(const int *const word0, const int AKLOGE("The dictionary is too large to dynamically update."); return false; } - const int word0Pos = getTerminalNodePositionOfWord(word0, length0, + const int word0Pos = getTerminalPtNodePositionOfWord(word0, length0, false /* forceLowerCaseSearch */); if (word0Pos == NOT_A_DICT_POS) { return false; } - const int word1Pos = getTerminalNodePositionOfWord(word1, length1, + const int word1Pos = getTerminalPtNodePositionOfWord(word1, length1, false /* forceLowerCaseSearch */); if (word1Pos == NOT_A_DICT_POS) { return false; @@ -278,7 +280,7 @@ bool DynamicPatriciaTriePolicy::addBigramWords(const int *const word0, const int bool DynamicPatriciaTriePolicy::removeBigramWords(const int *const word0, const int length0, const int *const word1, const int length1) { - if (!mBuffer->isUpdatable()) { + if (!mMmappedBuffer.get()->isUpdatable()) { AKLOGI("Warning: removeBigramWords() is called for non-updatable dictionary."); return false; } @@ -287,12 +289,12 @@ bool DynamicPatriciaTriePolicy::removeBigramWords(const int *const word0, const AKLOGE("The dictionary is too large to dynamically update."); return false; } - const int word0Pos = getTerminalNodePositionOfWord(word0, length0, + const int word0Pos = getTerminalPtNodePositionOfWord(word0, length0, false /* forceLowerCaseSearch */); if (word0Pos == NOT_A_DICT_POS) { return false; } - const int word1Pos = getTerminalNodePositionOfWord(word1, length1, + const int word1Pos = getTerminalPtNodePositionOfWord(word1, length1, false /* forceLowerCaseSearch */); if (word1Pos == NOT_A_DICT_POS) { return false; @@ -308,7 +310,7 @@ bool DynamicPatriciaTriePolicy::removeBigramWords(const int *const word0, const } void DynamicPatriciaTriePolicy::flush(const char *const filePath) { - if (!mBuffer->isUpdatable()) { + if (!mMmappedBuffer.get()->isUpdatable()) { AKLOGI("Warning: flush() is called for non-updatable dictionary."); return; } @@ -318,7 +320,7 @@ void DynamicPatriciaTriePolicy::flush(const char *const filePath) { } void DynamicPatriciaTriePolicy::flushWithGC(const char *const filePath) { - if (!mBuffer->isUpdatable()) { + if (!mMmappedBuffer.get()->isUpdatable()) { AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary."); return; } @@ -334,7 +336,7 @@ void DynamicPatriciaTriePolicy::flushWithGC(const char *const filePath) { } bool DynamicPatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const { - if (!mBuffer->isUpdatable()) { + if (!mMmappedBuffer.get()->isUpdatable()) { AKLOGI("Warning: needsToRunGC() is called for non-updatable dictionary."); return false; } diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.h index be97ee1a5..7a81a9c0a 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.h @@ -22,7 +22,9 @@ #include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h" #include "suggest/policyimpl/dictionary/header/header_policy.h" #include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" +#include "suggest/policyimpl/dictionary/utils/format_utils.h" #include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h" namespace latinime { @@ -32,32 +34,33 @@ class DicNodeVector; class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { public: - DynamicPatriciaTriePolicy(const MmappedBuffer *const buffer) - : mBuffer(buffer), mHeaderPolicy(mBuffer->getBuffer(), buffer->getBufferSize()), - mBufferWithExtendableBuffer(mBuffer->getBuffer() + mHeaderPolicy.getSize(), - mBuffer->getBufferSize() - mHeaderPolicy.getSize()), + DynamicPatriciaTriePolicy(const MmappedBuffer::MmappedBufferPtr &mmappedBuffer) + : mMmappedBuffer(mmappedBuffer), + mHeaderPolicy(mMmappedBuffer.get()->getBuffer(), FormatUtils::VERSION_3), + mBufferWithExtendableBuffer(mMmappedBuffer.get()->getBuffer() + + mHeaderPolicy.getSize(), mMmappedBuffer.get()->getBufferSize() + - mHeaderPolicy.getSize(), + BufferWithExtendableBuffer + ::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE), mShortcutListPolicy(&mBufferWithExtendableBuffer), mBigramListPolicy(&mHeaderPolicy, &mBufferWithExtendableBuffer, &mShortcutListPolicy, mHeaderPolicy.isDecayingDict()), + mNodeReader(&mBufferWithExtendableBuffer, &mBigramListPolicy, &mShortcutListPolicy), mUnigramCount(mHeaderPolicy.getUnigramCount()), mBigramCount(mHeaderPolicy.getBigramCount()), mNeedsToDecayForTesting(false) {} - ~DynamicPatriciaTriePolicy() { - delete mBuffer; - } - AK_FORCE_INLINE int getRootPosition() const { return 0; } - void createAndGetAllChildNodes(const DicNode *const dicNode, + void createAndGetAllChildDicNodes(const DicNode *const dicNode, DicNodeVector *const childDicNodes) const; int getCodePointsAndProbabilityAndReturnCodePointCount( const int terminalPtNodePos, const int maxCodePointCount, int *const outCodePoints, int *const outUnigramProbability) const; - int getTerminalNodePositionOfWord(const int *const inWord, + int getTerminalPtNodePositionOfWord(const int *const inWord, const int length, const bool forceLowerCaseSearch) const; int getProbability(const int unigramProbability, const int bigramProbability) const; @@ -108,11 +111,12 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { static const int MAX_DICT_EXTENDED_REGION_SIZE; static const int MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS; - const MmappedBuffer *const mBuffer; + const MmappedBuffer::MmappedBufferPtr mMmappedBuffer; const HeaderPolicy mHeaderPolicy; BufferWithExtendableBuffer mBufferWithExtendableBuffer; DynamicShortcutListPolicy mShortcutListPolicy; DynamicBigramListPolicy mBigramListPolicy; + DynamicPatriciaTrieNodeReader mNodeReader; int mUnigramCount; int mBigramCount; int mNeedsToDecayForTesting; diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.cpp index f108c219f..398ff21cf 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.cpp @@ -14,15 +14,17 @@ * limitations under the License. */ -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" +#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h" namespace latinime { // To avoid infinite loop caused by invalid or malicious forward links. const int DynamicPatriciaTrieReadingHelper::MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP = 100000; -const int DynamicPatriciaTrieReadingHelper::MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP = 100000; +const int DynamicPatriciaTrieReadingHelper::MAX_PT_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP = 100000; const size_t DynamicPatriciaTrieReadingHelper::MAX_READING_STATE_STACK_SIZE = MAX_WORD_LENGTH; // Visits all PtNodes in post-order depth first manner. @@ -37,22 +39,26 @@ bool DynamicPatriciaTrieReadingHelper::traverseAllPtNodesInPostorderDepthFirstMa return false; } while (!isEnd()) { + const PtNodeParams ptNodeParams(getPtNodeParams()); + if (!ptNodeParams.isValid()) { + break; + } if (!alreadyVisitedChildren) { - if (mNodeReader.hasChildren()) { + if (ptNodeParams.hasChildren()) { // Move to the first child. - if (!listener->onDescend(mNodeReader.getChildrenPos())) { + if (!listener->onDescend(ptNodeParams.getChildrenPos())) { return false; } pushReadingStateToStack(); - readChildNode(); + readChildNode(ptNodeParams); } else { alreadyVisitedChildren = true; } } else { - if (!listener->onVisitingPtNode(&mNodeReader, mMergedNodeCodePoints)) { + if (!listener->onVisitingPtNode(&ptNodeParams)) { return false; } - readNextSiblingNode(); + readNextSiblingNode(ptNodeParams); if (isEnd()) { // All PtNodes in current linked PtNode arrays have been visited. // Return to the parent. @@ -101,10 +107,14 @@ bool DynamicPatriciaTrieReadingHelper::traverseAllPtNodesInPtNodeArrayLevelPreor } pushReadingStateToStack(); while (!isEnd()) { + const PtNodeParams ptNodeParams(getPtNodeParams()); + if (!ptNodeParams.isValid()) { + break; + } if (alreadyVisitedAllPtNodesInArray) { if (alreadyVisitedChildren) { // Move to next sibling PtNode's children. - readNextSiblingNode(); + readNextSiblingNode(ptNodeParams); if (isEnd()) { // Return to the parent PTNode. if (!listener->onAscend()) { @@ -120,13 +130,13 @@ bool DynamicPatriciaTrieReadingHelper::traverseAllPtNodesInPtNodeArrayLevelPreor alreadyVisitedChildren = false; } } else { - if (mNodeReader.hasChildren()) { + if (ptNodeParams.hasChildren()) { // Move to the first child. - if (!listener->onDescend(mNodeReader.getChildrenPos())) { + if (!listener->onDescend(ptNodeParams.getChildrenPos())) { return false; } pushReadingStateToStack(); - readChildNode(); + readChildNode(ptNodeParams); // Push state to return the head of PtNode array. pushReadingStateToStack(); alreadyVisitedAllPtNodesInArray = false; @@ -136,10 +146,10 @@ bool DynamicPatriciaTrieReadingHelper::traverseAllPtNodesInPtNodeArrayLevelPreor } } } else { - if (!listener->onVisitingPtNode(&mNodeReader, mMergedNodeCodePoints)) { + if (!listener->onVisitingPtNode(&ptNodeParams)) { return false; } - readNextSiblingNode(); + readNextSiblingNode(ptNodeParams); if (isEnd()) { if (!listener->onReadingPtNodeArrayTail()) { return false; @@ -170,35 +180,41 @@ void DynamicPatriciaTrieReadingHelper::nextPtNodeArray() { mReadingState.mPos = NOT_A_DICT_POS; return; } - mReadingState.mPosOfLastPtNodeArrayHead = mReadingState.mPos; + mReadingState.mPosOfThisPtNodeArrayHead = mReadingState.mPos; const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(mReadingState.mPos); const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer); if (usesAdditionalBuffer) { mReadingState.mPos -= mBuffer->getOriginalBufferSize(); } - mReadingState.mNodeCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition( - dictBuf, &mReadingState.mPos); + mReadingState.mRemainingPtNodeCountInThisArray = + PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(dictBuf, + &mReadingState.mPos); if (usesAdditionalBuffer) { mReadingState.mPos += mBuffer->getOriginalBufferSize(); } // Count up nodes and node arrays to avoid infinite loop. - mReadingState.mTotalNodeCount += mReadingState.mNodeCount; - mReadingState.mNodeArrayCount++; - if (mReadingState.mNodeCount < 0 - || mReadingState.mTotalNodeCount > MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP - || mReadingState.mNodeArrayCount > MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP) { + mReadingState.mTotalPtNodeIndexInThisArrayChain += + mReadingState.mRemainingPtNodeCountInThisArray; + mReadingState.mPtNodeArrayIndexInThisArrayChain++; + if (mReadingState.mRemainingPtNodeCountInThisArray < 0 + || mReadingState.mTotalPtNodeIndexInThisArrayChain + > MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP + || mReadingState.mPtNodeArrayIndexInThisArrayChain + > MAX_PT_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP) { // Invalid dictionary. AKLOGI("Invalid dictionary. nodeCount: %d, totalNodeCount: %d, MAX_CHILD_COUNT: %d" "nodeArrayCount: %d, MAX_NODE_ARRAY_COUNT: %d", - mReadingState.mNodeCount, mReadingState.mTotalNodeCount, - MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP, mReadingState.mNodeArrayCount, - MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP); + mReadingState.mRemainingPtNodeCountInThisArray, + mReadingState.mTotalPtNodeIndexInThisArrayChain, + MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP, + mReadingState.mPtNodeArrayIndexInThisArrayChain, + MAX_PT_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP); ASSERT(false); mIsError = true; mReadingState.mPos = NOT_A_DICT_POS; return; } - if (mReadingState.mNodeCount == 0) { + if (mReadingState.mRemainingPtNodeCountInThisArray == 0) { // Empty node array. Try following forward link. followForwardLink(); } diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h index a71c06971..1e9218e58 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h @@ -21,9 +21,8 @@ #include <vector> #include "defines.h" -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h" -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h" -#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h" +#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h" namespace latinime { @@ -35,6 +34,7 @@ class DictionaryShortcutsStructurePolicy; * This class is used for traversing dynamic patricia trie. This class supports iterating nodes and * dealing with additional buffer. This class counts nodes and node arrays to avoid infinite loop. */ +// TODO: Move to pt_common. class DynamicPatriciaTrieReadingHelper { public: class TraversingEventListener { @@ -51,8 +51,7 @@ class DynamicPatriciaTrieReadingHelper { virtual bool onReadingPtNodeArrayTail() = 0; // Returns whether the event handling was succeeded or not. - virtual bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node, - const int *const nodeCodePoints) = 0; + virtual bool onVisitingPtNode(const PtNodeParams *const node) = 0; protected: TraversingEventListener() {}; @@ -62,10 +61,9 @@ class DynamicPatriciaTrieReadingHelper { }; DynamicPatriciaTrieReadingHelper(const BufferWithExtendableBuffer *const buffer, - const DictionaryBigramsStructurePolicy *const bigramsPolicy, - const DictionaryShortcutsStructurePolicy *const shortcutsPolicy) + const PtNodeReader *const ptNodeReader) : mIsError(false), mReadingState(), mBuffer(buffer), - mNodeReader(mBuffer, bigramsPolicy, shortcutsPolicy), mReadingStateStack() {} + mPtNodeReader(ptNodeReader), mReadingStateStack() {} ~DynamicPatriciaTrieReadingHelper() {} @@ -84,15 +82,12 @@ class DynamicPatriciaTrieReadingHelper { } else { mIsError = false; mReadingState.mPos = ptNodeArrayPos; - mReadingState.mPrevTotalCodePointCount = 0; - mReadingState.mTotalNodeCount = 0; - mReadingState.mNodeArrayCount = 0; + mReadingState.mTotalCodePointCountSinceInitialization = 0; + mReadingState.mTotalPtNodeIndexInThisArrayChain = 0; + mReadingState.mPtNodeArrayIndexInThisArrayChain = 0; mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS; mReadingStateStack.clear(); nextPtNodeArray(); - if (!isEnd()) { - fetchPtNodeInfo(); - } } } @@ -103,94 +98,88 @@ class DynamicPatriciaTrieReadingHelper { } else { mIsError = false; mReadingState.mPos = ptNodePos; - mReadingState.mNodeCount = 1; - mReadingState.mPrevTotalCodePointCount = 0; - mReadingState.mTotalNodeCount = 1; - mReadingState.mNodeArrayCount = 1; + mReadingState.mRemainingPtNodeCountInThisArray = 1; + mReadingState.mTotalCodePointCountSinceInitialization = 0; + mReadingState.mTotalPtNodeIndexInThisArrayChain = 1; + mReadingState.mPtNodeArrayIndexInThisArrayChain = 1; mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS; - mReadingState.mPosOfLastPtNodeArrayHead = NOT_A_DICT_POS; + mReadingState.mPosOfThisPtNodeArrayHead = NOT_A_DICT_POS; mReadingStateStack.clear(); - fetchPtNodeInfo(); } } - AK_FORCE_INLINE const DynamicPatriciaTrieNodeReader* getNodeReader() const { - return &mNodeReader; + AK_FORCE_INLINE const PtNodeParams getPtNodeParams() const { + if (isEnd()) { + return PtNodeParams(); + } + return mPtNodeReader->fetchNodeInfoInBufferFromPtNodePos(mReadingState.mPos); } - AK_FORCE_INLINE bool isValidTerminalNode() const { - return !isEnd() && !mNodeReader.isDeleted() && mNodeReader.isTerminal(); + AK_FORCE_INLINE bool isValidTerminalNode(const PtNodeParams &ptNodeParams) const { + return !isEnd() && !ptNodeParams.isDeleted() && ptNodeParams.isTerminal(); } - AK_FORCE_INLINE bool isMatchedCodePoint(const int index, const int codePoint) const { - return mMergedNodeCodePoints[index] == codePoint; + AK_FORCE_INLINE bool isMatchedCodePoint(const PtNodeParams &ptNodeParams, const int index, + const int codePoint) const { + return ptNodeParams.getCodePoints()[index] == codePoint; } // Return code point count exclude the last read node's code points. AK_FORCE_INLINE int getPrevTotalCodePointCount() const { - return mReadingState.mPrevTotalCodePointCount; + return mReadingState.mTotalCodePointCountSinceInitialization; } // Return code point count include the last read node's code points. - AK_FORCE_INLINE int getTotalCodePointCount() const { - return mReadingState.mPrevTotalCodePointCount + mNodeReader.getCodePointCount(); + AK_FORCE_INLINE int getTotalCodePointCount(const PtNodeParams &ptNodeParams) const { + return mReadingState.mTotalCodePointCountSinceInitialization + + ptNodeParams.getCodePointCount(); } - AK_FORCE_INLINE void fetchMergedNodeCodePointsInReverseOrder( + AK_FORCE_INLINE void fetchMergedNodeCodePointsInReverseOrder(const PtNodeParams &ptNodeParams, const int index, int *const outCodePoints) const { - const int nodeCodePointCount = mNodeReader.getCodePointCount(); + const int nodeCodePointCount = ptNodeParams.getCodePointCount(); + const int *const nodeCodePoints = ptNodeParams.getCodePoints(); for (int i = 0; i < nodeCodePointCount; ++i) { - outCodePoints[index + i] = mMergedNodeCodePoints[nodeCodePointCount - 1 - i]; + outCodePoints[index + i] = nodeCodePoints[nodeCodePointCount - 1 - i]; } } - AK_FORCE_INLINE const int *getMergedNodeCodePoints() const { - return mMergedNodeCodePoints; - } - - AK_FORCE_INLINE void readNextSiblingNode() { - mReadingState.mNodeCount -= 1; - mReadingState.mPos = mNodeReader.getSiblingNodePos(); - if (mReadingState.mNodeCount <= 0) { + AK_FORCE_INLINE void readNextSiblingNode(const PtNodeParams &ptNodeParams) { + mReadingState.mRemainingPtNodeCountInThisArray -= 1; + mReadingState.mPos = ptNodeParams.getSiblingNodePos(); + if (mReadingState.mRemainingPtNodeCountInThisArray <= 0) { // All nodes in the current node array have been read. followForwardLink(); - if (!isEnd()) { - fetchPtNodeInfo(); - } - } else { - fetchPtNodeInfo(); } } // Read the first child node of the current node. - AK_FORCE_INLINE void readChildNode() { - if (mNodeReader.hasChildren()) { - mReadingState.mPrevTotalCodePointCount += mNodeReader.getCodePointCount(); - mReadingState.mTotalNodeCount = 0; - mReadingState.mNodeArrayCount = 0; - mReadingState.mPos = mNodeReader.getChildrenPos(); + AK_FORCE_INLINE void readChildNode(const PtNodeParams &ptNodeParams) { + if (ptNodeParams.hasChildren()) { + mReadingState.mTotalCodePointCountSinceInitialization += + ptNodeParams.getCodePointCount(); + mReadingState.mTotalPtNodeIndexInThisArrayChain = 0; + mReadingState.mPtNodeArrayIndexInThisArrayChain = 0; + mReadingState.mPos = ptNodeParams.getChildrenPos(); mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS; // Read children node array. nextPtNodeArray(); - if (!isEnd()) { - fetchPtNodeInfo(); - } } else { mReadingState.mPos = NOT_A_DICT_POS; } } // Read the parent node of the current node. - AK_FORCE_INLINE void readParentNode() { - if (mNodeReader.getParentPos() != NOT_A_DICT_POS) { - mReadingState.mPrevTotalCodePointCount += mNodeReader.getCodePointCount(); - mReadingState.mTotalNodeCount = 1; - mReadingState.mNodeArrayCount = 1; - mReadingState.mNodeCount = 1; - mReadingState.mPos = mNodeReader.getParentPos(); + AK_FORCE_INLINE void readParentNode(const PtNodeParams &ptNodeParams) { + if (ptNodeParams.getParentPos() != NOT_A_DICT_POS) { + mReadingState.mTotalCodePointCountSinceInitialization += + ptNodeParams.getCodePointCount(); + mReadingState.mTotalPtNodeIndexInThisArrayChain = 1; + mReadingState.mPtNodeArrayIndexInThisArrayChain = 1; + mReadingState.mRemainingPtNodeCountInThisArray = 1; + mReadingState.mPos = ptNodeParams.getParentPos(); mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS; - mReadingState.mPosOfLastPtNodeArrayHead = NOT_A_DICT_POS; - fetchPtNodeInfo(); + mReadingState.mPosOfThisPtNodeArrayHead = NOT_A_DICT_POS; } else { mReadingState.mPos = NOT_A_DICT_POS; } @@ -201,13 +190,7 @@ class DynamicPatriciaTrieReadingHelper { } AK_FORCE_INLINE int getPosOfLastPtNodeArrayHead() const { - return mReadingState.mPosOfLastPtNodeArrayHead; - } - - AK_FORCE_INLINE void reloadCurrentPtNodeInfo() { - if (!isEnd()) { - fetchPtNodeInfo(); - } + return mReadingState.mPosOfThisPtNodeArrayHead; } bool traverseAllPtNodesInPostorderDepthFirstManner(TraversingEventListener *const listener); @@ -218,50 +201,45 @@ class DynamicPatriciaTrieReadingHelper { private: DISALLOW_COPY_AND_ASSIGN(DynamicPatriciaTrieReadingHelper); - class ReadingState { + // This class encapsulates the reading state of a position in the dictionary. It points at a + // specific PtNode in the dictionary. + class PtNodeReadingState { public: // Note that copy constructor and assignment operator are used for this class to use // std::vector. - ReadingState() : mPos(NOT_A_DICT_POS), mNodeCount(0), mPrevTotalCodePointCount(0), - mTotalNodeCount(0), mNodeArrayCount(0), mPosOfLastForwardLinkField(NOT_A_DICT_POS), - mPosOfLastPtNodeArrayHead(NOT_A_DICT_POS) {} + PtNodeReadingState() : mPos(NOT_A_DICT_POS), mRemainingPtNodeCountInThisArray(0), + mTotalCodePointCountSinceInitialization(0), mTotalPtNodeIndexInThisArrayChain(0), + mPtNodeArrayIndexInThisArrayChain(0), mPosOfLastForwardLinkField(NOT_A_DICT_POS), + mPosOfThisPtNodeArrayHead(NOT_A_DICT_POS) {} int mPos; - // Node count of a node array. - int mNodeCount; - int mPrevTotalCodePointCount; - int mTotalNodeCount; - int mNodeArrayCount; + // Remaining node count in the current array. + int mRemainingPtNodeCountInThisArray; + int mTotalCodePointCountSinceInitialization; + // Counter of PtNodes used to avoid infinite loops caused by broken or malicious links. + int mTotalPtNodeIndexInThisArrayChain; + // Counter of PtNode arrays used to avoid infinite loops caused by cyclic links of empty + // PtNode arrays. + int mPtNodeArrayIndexInThisArrayChain; int mPosOfLastForwardLinkField; - int mPosOfLastPtNodeArrayHead; + int mPosOfThisPtNodeArrayHead; }; static const int MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP; - static const int MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP; + static const int MAX_PT_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP; static const size_t MAX_READING_STATE_STACK_SIZE; // TODO: Introduce error code to track what caused the error. bool mIsError; - ReadingState mReadingState; + PtNodeReadingState mReadingState; const BufferWithExtendableBuffer *const mBuffer; - DynamicPatriciaTrieNodeReader mNodeReader; - int mMergedNodeCodePoints[MAX_WORD_LENGTH]; - std::vector<ReadingState> mReadingStateStack; + const PtNodeReader *const mPtNodeReader; + std::vector<PtNodeReadingState> mReadingStateStack; void nextPtNodeArray(); void followForwardLink(); - AK_FORCE_INLINE void fetchPtNodeInfo() { - mNodeReader.fetchNodeInfoInBufferFromPtNodePosAndGetNodeCodePoints(mReadingState.mPos, - MAX_WORD_LENGTH, mMergedNodeCodePoints); - if (mNodeReader.getCodePointCount() <= 0) { - // Empty node is not allowed. - mIsError = true; - mReadingState.mPos = NOT_A_DICT_POS; - } - } - AK_FORCE_INLINE void pushReadingStateToStack() { if (mReadingStateStack.size() > MAX_READING_STATE_STACK_SIZE) { AKLOGI("Reading state stack overflow. Max size: %zd", MAX_READING_STATE_STACK_SIZE); @@ -279,9 +257,6 @@ class DynamicPatriciaTrieReadingHelper { } else { mReadingState = mReadingStateStack.back(); mReadingStateStack.pop_back(); - if (!isEnd()) { - fetchPtNodeInfo(); - } } } }; diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.cpp index d68446db6..e94925365 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h" #include "defines.h" #include "suggest/policyimpl/dictionary/utils/byte_array_utils.h" diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h index 67c3cc57e..67c3cc57e 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.cpp index 052558bfc..05caaebac 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.cpp @@ -14,16 +14,16 @@ * limitations under the License. */ -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.h" #include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h" -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h" -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h" -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h" -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h" -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h" +#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.h" #include "suggest/policyimpl/dictionary/header/header_policy.h" -#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h" #include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h" #include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h" @@ -41,24 +41,26 @@ bool DynamicPatriciaTrieWritingHelper::addUnigramWord( bool *const outAddedNewUnigram) { int parentPos = NOT_A_DICT_POS; while (!readingHelper->isEnd()) { + const PtNodeParams ptNodeParams(readingHelper->getPtNodeParams()); + if (!ptNodeParams.isValid()) { + break; + } const int matchedCodePointCount = readingHelper->getPrevTotalCodePointCount(); - if (!readingHelper->isMatchedCodePoint(0 /* index */, + if (!readingHelper->isMatchedCodePoint(ptNodeParams, 0 /* index */, wordCodePoints[matchedCodePointCount])) { // The first code point is different from target code point. Skip this node and read // the next sibling node. - readingHelper->readNextSiblingNode(); + readingHelper->readNextSiblingNode(ptNodeParams); continue; } // Check following merged node code points. - const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper->getNodeReader(); - const int nodeCodePointCount = nodeReader->getCodePointCount(); + const int nodeCodePointCount = ptNodeParams.getCodePointCount(); for (int j = 1; j < nodeCodePointCount; ++j) { const int nextIndex = matchedCodePointCount + j; - if (nextIndex >= codePointCount || !readingHelper->isMatchedCodePoint(j, + if (nextIndex >= codePointCount || !readingHelper->isMatchedCodePoint(ptNodeParams, j, wordCodePoints[matchedCodePointCount + j])) { *outAddedNewUnigram = true; - return reallocatePtNodeAndAddNewPtNodes(nodeReader, - readingHelper->getMergedNodeCodePoints(), j, + return reallocatePtNodeAndAddNewPtNodes(&ptNodeParams, j, getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, probability), wordCodePoints + matchedCodePointCount, @@ -66,20 +68,19 @@ bool DynamicPatriciaTrieWritingHelper::addUnigramWord( } } // All characters are matched. - if (codePointCount == readingHelper->getTotalCodePointCount()) { - return setPtNodeProbability(nodeReader, probability, - readingHelper->getMergedNodeCodePoints(), outAddedNewUnigram); + if (codePointCount == readingHelper->getTotalCodePointCount(ptNodeParams)) { + return setPtNodeProbability(&ptNodeParams, probability, outAddedNewUnigram); } - if (!nodeReader->hasChildren()) { + if (!ptNodeParams.hasChildren()) { *outAddedNewUnigram = true; - return createChildrenPtNodeArrayAndAChildPtNode(nodeReader, + return createChildrenPtNodeArrayAndAChildPtNode(&ptNodeParams, getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, probability), - wordCodePoints + readingHelper->getTotalCodePointCount(), - codePointCount - readingHelper->getTotalCodePointCount()); + wordCodePoints + readingHelper->getTotalCodePointCount(ptNodeParams), + codePointCount - readingHelper->getTotalCodePointCount(ptNodeParams)); } // Advance to the children nodes. - parentPos = nodeReader->getHeadPos(); - readingHelper->readChildNode(); + parentPos = ptNodeParams.getHeadPos(); + readingHelper->readChildNode(ptNodeParams); } if (readingHelper->isError()) { // The dictionary is invalid. @@ -95,26 +96,24 @@ bool DynamicPatriciaTrieWritingHelper::addUnigramWord( bool DynamicPatriciaTrieWritingHelper::addBigramWords(const int word0Pos, const int word1Pos, const int probability, bool *const outAddedNewBigram) { - int mMergedNodeCodePoints[MAX_WORD_LENGTH]; DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy); - nodeReader.fetchNodeInfoInBufferFromPtNodePosAndGetNodeCodePoints(word0Pos, MAX_WORD_LENGTH, - mMergedNodeCodePoints); + const PtNodeParams ptNodeParams(nodeReader.fetchNodeInfoInBufferFromPtNodePos(word0Pos)); // Move node to add bigram entry. const int newNodePos = mBuffer->getTailPosition(); - if (!markNodeAsMovedAndSetPosition(&nodeReader, newNodePos, newNodePos)) { + if (!markNodeAsMovedAndSetPosition(&ptNodeParams, newNodePos, newNodePos)) { return false; } int writingPos = newNodePos; // Write a new PtNode using original PtNode's info to the tail of the dictionary in mBuffer. - if (!writePtNodeToBufferByCopyingPtNodeInfo(mBuffer, &nodeReader, nodeReader.getParentPos(), - mMergedNodeCodePoints, nodeReader.getCodePointCount(), nodeReader.getProbability(), - &writingPos)) { + if (!writePtNodeToBufferByCopyingPtNodeInfo(mBuffer, &ptNodeParams, ptNodeParams.getParentPos(), + ptNodeParams.getCodePoints(), ptNodeParams.getCodePointCount(), + ptNodeParams.getProbability(), &writingPos)) { return false; } - nodeReader.fetchNodeInfoInBufferFromPtNodePos(newNodePos); - if (nodeReader.getBigramsPos() != NOT_A_DICT_POS) { + const PtNodeParams newPtNodeParams(nodeReader.fetchNodeInfoInBufferFromPtNodePos(newNodePos)); + if (newPtNodeParams.getBigramsPos() != NOT_A_DICT_POS) { // Insert a new bigram entry into the existing bigram list. - int bigramListPos = nodeReader.getBigramsPos(); + int bigramListPos = newPtNodeParams.getBigramsPos(); return mBigramPolicy->addNewBigramEntryToBigramList(word1Pos, probability, &bigramListPos, outAddedNewBigram); } else { @@ -126,10 +125,11 @@ bool DynamicPatriciaTrieWritingHelper::addBigramWords(const int word0Pos, const } // Then, Mark as the PtNode having bigram list in the flags. const PatriciaTrieReadingUtils::NodeFlags updatedFlags = - PatriciaTrieReadingUtils::createAndGetFlags(nodeReader.isBlacklisted(), - nodeReader.isNotAWord(), nodeReader.getProbability() != NOT_A_PROBABILITY, - nodeReader.getShortcutPos() != NOT_A_DICT_POS, true /* hasBigrams */, - nodeReader.getCodePointCount() > 1, CHILDREN_POSITION_FIELD_SIZE); + PatriciaTrieReadingUtils::createAndGetFlags(newPtNodeParams.isBlacklisted(), + newPtNodeParams.isNotAWord(), + newPtNodeParams.getProbability() != NOT_A_PROBABILITY, + newPtNodeParams.getShortcutPos() != NOT_A_DICT_POS, true /* hasBigrams */, + newPtNodeParams.getCodePointCount() > 1, CHILDREN_POSITION_FIELD_SIZE); writingPos = newNodePos; // Write updated flags into the moved PtNode's flags field. return DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags, @@ -140,16 +140,17 @@ bool DynamicPatriciaTrieWritingHelper::addBigramWords(const int word0Pos, const // Remove a bigram relation from word0Pos to word1Pos. bool DynamicPatriciaTrieWritingHelper::removeBigramWords(const int word0Pos, const int word1Pos) { DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy); - nodeReader.fetchNodeInfoInBufferFromPtNodePos(word0Pos); - if (nodeReader.getBigramsPos() == NOT_A_DICT_POS) { + const PtNodeParams ptNodeParams(nodeReader.fetchNodeInfoInBufferFromPtNodePos(word0Pos)); + if (ptNodeParams.getBigramsPos() == NOT_A_DICT_POS) { return false; } - return mBigramPolicy->removeBigram(nodeReader.getBigramsPos(), word1Pos); + return mBigramPolicy->removeBigram(ptNodeParams.getBigramsPos(), word1Pos); } void DynamicPatriciaTrieWritingHelper::writeToDictFile(const char *const fileName, const HeaderPolicy *const headerPolicy, const int unigramCount, const int bigramCount) { - BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */); + BufferWithExtendableBuffer headerBuffer( + BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE); const int extendedRegionSize = headerPolicy->getExtendedRegionSize() + mBuffer->getUsedAdditionalBufferSize(); if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, false /* updatesLastUpdatedTime */, @@ -161,8 +162,7 @@ void DynamicPatriciaTrieWritingHelper::writeToDictFile(const char *const fileNam void DynamicPatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNodeArrayPos, const char *const fileName, const HeaderPolicy *const headerPolicy) { - BufferWithExtendableBuffer newDictBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */, - MAX_DICTIONARY_SIZE); + BufferWithExtendableBuffer newDictBuffer(MAX_DICTIONARY_SIZE); int unigramCount = 0; int bigramCount = 0; if (mNeedsToDecay) { @@ -171,7 +171,8 @@ void DynamicPatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNod if (!runGC(rootPtNodeArrayPos, headerPolicy, &newDictBuffer, &unigramCount, &bigramCount)) { return; } - BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */); + BufferWithExtendableBuffer headerBuffer( + BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE); if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, true /* updatesLastUpdatedTime */, mNeedsToDecay, unigramCount, bigramCount, 0 /* extendedRegionSize */)) { return; @@ -180,8 +181,8 @@ void DynamicPatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNod } bool DynamicPatriciaTrieWritingHelper::markNodeAsDeleted( - const DynamicPatriciaTrieNodeReader *const nodeToUpdate) { - int pos = nodeToUpdate->getHeadPos(); + const PtNodeParams *const toBeUpdatedPtNodeParams) { + int pos = toBeUpdatedPtNodeParams->getHeadPos(); const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(pos); const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer); if (usesAdditionalBuffer) { @@ -193,16 +194,16 @@ bool DynamicPatriciaTrieWritingHelper::markNodeAsDeleted( const PatriciaTrieReadingUtils::NodeFlags updatedFlags = DynamicPatriciaTrieReadingUtils::updateAndGetFlags(originalFlags, false /* isMoved */, true /* isDeleted */); - int writingPos = nodeToUpdate->getHeadPos(); + int writingPos = toBeUpdatedPtNodeParams->getHeadPos(); // Update flags. return DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags, &writingPos); } bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition( - const DynamicPatriciaTrieNodeReader *const originalNode, const int movedPos, + const PtNodeParams *const toBeUpdatedPtNodeParams, const int movedPos, const int bigramLinkedNodePos) { - int pos = originalNode->getHeadPos(); + int pos = toBeUpdatedPtNodeParams->getHeadPos(); const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(pos); const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer); if (usesAdditionalBuffer) { @@ -214,7 +215,7 @@ bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition( const PatriciaTrieReadingUtils::NodeFlags updatedFlags = DynamicPatriciaTrieReadingUtils::updateAndGetFlags(originalFlags, true /* isMoved */, false /* isDeleted */); - int writingPos = originalNode->getHeadPos(); + int writingPos = toBeUpdatedPtNodeParams->getHeadPos(); // Update flags. if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags, &writingPos)) { @@ -222,31 +223,32 @@ bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition( } // Update moved position, which is stored in the parent offset field. if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition( - mBuffer, movedPos, originalNode->getHeadPos(), &writingPos)) { + mBuffer, movedPos, toBeUpdatedPtNodeParams->getHeadPos(), &writingPos)) { return false; } // Update bigram linked node position, which is stored in the children position field. - int childrenPosFieldPos = originalNode->getChildrenPosFieldPos(); + int childrenPosFieldPos = toBeUpdatedPtNodeParams->getChildrenPosFieldPos(); if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition( mBuffer, bigramLinkedNodePos, &childrenPosFieldPos)) { return false; } - if (originalNode->hasChildren()) { + if (toBeUpdatedPtNodeParams->hasChildren()) { // Update children's parent position. - DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, mBigramPolicy, mShortcutPolicy); - const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader(); - readingHelper.initWithPtNodeArrayPos(originalNode->getChildrenPos()); + DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy); + DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, &nodeReader); + readingHelper.initWithPtNodeArrayPos(toBeUpdatedPtNodeParams->getChildrenPos()); while (!readingHelper.isEnd()) { - int parentOffsetFieldPos = nodeReader->getHeadPos() + const PtNodeParams childPtNodeParams(readingHelper.getPtNodeParams()); + int parentOffsetFieldPos = childPtNodeParams.getHeadPos() + DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE; if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition( - mBuffer, bigramLinkedNodePos, nodeReader->getHeadPos(), + mBuffer, bigramLinkedNodePos, childPtNodeParams.getHeadPos(), &parentOffsetFieldPos)) { // Parent offset cannot be written because of a bug or a broken dictionary; thus, // we give up to update dictionary. return false; } - readingHelper.readNextSiblingNode(); + readingHelper.readNextSiblingNode(childPtNodeParams); } } return true; @@ -332,13 +334,13 @@ bool DynamicPatriciaTrieWritingHelper::writePtNodeToBuffer( bool DynamicPatriciaTrieWritingHelper::writePtNodeToBufferByCopyingPtNodeInfo( BufferWithExtendableBuffer *const bufferToWrite, - const DynamicPatriciaTrieNodeReader *const originalNode, const int parentPos, + const PtNodeParams *const originalPtNodeParams, const int parentPos, const int *const codePoints, const int codePointCount, const int probability, int *const writingPos) { - return writePtNodeWithFullInfoToBuffer(bufferToWrite, originalNode->isBlacklisted(), - originalNode->isNotAWord(), parentPos, codePoints, codePointCount, probability, - originalNode->getChildrenPos(), originalNode->getBigramsPos(), - originalNode->getShortcutPos(), writingPos); + return writePtNodeWithFullInfoToBuffer(bufferToWrite, originalPtNodeParams->isBlacklisted(), + originalPtNodeParams->isNotAWord(), parentPos, codePoints, codePointCount, probability, + originalPtNodeParams->getChildrenPos(), originalPtNodeParams->getBigramsPos(), + originalPtNodeParams->getShortcutPos(), writingPos); } bool DynamicPatriciaTrieWritingHelper::createAndInsertNodeIntoPtNodeArray(const int parentPos, @@ -354,14 +356,14 @@ bool DynamicPatriciaTrieWritingHelper::createAndInsertNodeIntoPtNodeArray(const } bool DynamicPatriciaTrieWritingHelper::setPtNodeProbability( - const DynamicPatriciaTrieNodeReader *const originalPtNode, const int probability, - const int *const codePoints, bool *const outAddedNewUnigram) { - if (originalPtNode->isTerminal()) { + const PtNodeParams *const originalPtNodeParams, const int probability, + bool *const outAddedNewUnigram) { + if (originalPtNodeParams->isTerminal()) { // Overwrites the probability. *outAddedNewUnigram = false; - const int probabilityToWrite = getUpdatedProbability(originalPtNode->getProbability(), - probability); - int probabilityFieldPos = originalPtNode->getProbabilityFieldPos(); + const int probabilityToWrite = getUpdatedProbability( + originalPtNodeParams->getProbability(), probability); + int probabilityFieldPos = originalPtNodeParams->getProbabilityFieldPos(); if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(mBuffer, probabilityToWrite, &probabilityFieldPos)) { return false; @@ -370,11 +372,12 @@ bool DynamicPatriciaTrieWritingHelper::setPtNodeProbability( // Make the node terminal and write the probability. *outAddedNewUnigram = true; int movedPos = mBuffer->getTailPosition(); - if (!markNodeAsMovedAndSetPosition(originalPtNode, movedPos, movedPos)) { + if (!markNodeAsMovedAndSetPosition(originalPtNodeParams, movedPos, movedPos)) { return false; } - if (!writePtNodeToBufferByCopyingPtNodeInfo(mBuffer, originalPtNode, - originalPtNode->getParentPos(), codePoints, originalPtNode->getCodePointCount(), + if (!writePtNodeToBufferByCopyingPtNodeInfo(mBuffer, originalPtNodeParams, + originalPtNodeParams->getParentPos(), originalPtNodeParams->getCodePoints(), + originalPtNodeParams->getCodePointCount(), getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, probability), &movedPos)) { return false; @@ -384,15 +387,15 @@ bool DynamicPatriciaTrieWritingHelper::setPtNodeProbability( } bool DynamicPatriciaTrieWritingHelper::createChildrenPtNodeArrayAndAChildPtNode( - const DynamicPatriciaTrieNodeReader *const parentNode, const int probability, + const PtNodeParams *const parentPtNodeParams, const int probability, const int *const codePoints, const int codePointCount) { const int newPtNodeArrayPos = mBuffer->getTailPosition(); - int childrenPosFieldPos = parentNode->getChildrenPosFieldPos(); + int childrenPosFieldPos = parentPtNodeParams->getChildrenPosFieldPos(); if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBuffer, newPtNodeArrayPos, &childrenPosFieldPos)) { return false; } - return createNewPtNodeArrayWithAChildPtNode(parentNode->getHeadPos(), codePoints, + return createNewPtNodeArrayWithAChildPtNode(parentPtNodeParams->getHeadPos(), codePoints, codePointCount, probability); } @@ -417,8 +420,7 @@ bool DynamicPatriciaTrieWritingHelper::createNewPtNodeArrayWithAChildPtNode( // Returns whether the dictionary updating was succeeded or not. bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes( - const DynamicPatriciaTrieNodeReader *const reallocatingPtNode, - const int *const reallocatingPtNodeCodePoints, const int overlappingCodePointCount, + const PtNodeParams *const reallocatingPtNodeParams, const int overlappingCodePointCount, const int probabilityOfNewPtNode, const int *const newNodeCodePoints, const int newNodeCodePointCount) { // When addsExtraChild is true, split the reallocating PtNode and add new child. @@ -434,8 +436,8 @@ bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes( // Write the 1st part of the reallocating node. The children position will be updated later // with actual children position. const int newProbability = addsExtraChild ? NOT_A_PROBABILITY : probabilityOfNewPtNode; - if (!writePtNodeToBuffer(mBuffer, reallocatingPtNode->getParentPos(), - reallocatingPtNodeCodePoints, overlappingCodePointCount, newProbability, + if (!writePtNodeToBuffer(mBuffer, reallocatingPtNodeParams->getParentPos(), + reallocatingPtNodeParams->getCodePoints(), overlappingCodePointCount, newProbability, &writingPos)) { return false; } @@ -448,11 +450,11 @@ bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes( } // Write the 2nd part of the reallocating node. const int secondPartOfReallocatedPtNodePos = writingPos; - if (!writePtNodeToBufferByCopyingPtNodeInfo(mBuffer, reallocatingPtNode, + if (!writePtNodeToBufferByCopyingPtNodeInfo(mBuffer, reallocatingPtNodeParams, firstPartOfReallocatedPtNodePos, - reallocatingPtNodeCodePoints + overlappingCodePointCount, - reallocatingPtNode->getCodePointCount() - overlappingCodePointCount, - reallocatingPtNode->getProbability(), &writingPos)) { + reallocatingPtNodeParams->getCodePoints() + overlappingCodePointCount, + reallocatingPtNodeParams->getCodePointCount() - overlappingCodePointCount, + reallocatingPtNodeParams->getProbability(), &writingPos)) { return false; } if (addsExtraChild) { @@ -467,16 +469,17 @@ bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes( NOT_A_DICT_POS /* forwardLinkPos */, &writingPos)) { return false; } - // Update original reallocatingPtNode as moved. - if (!markNodeAsMovedAndSetPosition(reallocatingPtNode, firstPartOfReallocatedPtNodePos, + // Update original reallocating PtNode as moved. + if (!markNodeAsMovedAndSetPosition(reallocatingPtNodeParams, firstPartOfReallocatedPtNodePos, secondPartOfReallocatedPtNodePos)) { return false; } // Load node info. Information of the 1st part will be fetched. DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy); - nodeReader.fetchNodeInfoInBufferFromPtNodePos(firstPartOfReallocatedPtNodePos); + const PtNodeParams ptNodeParams( + nodeReader.fetchNodeInfoInBufferFromPtNodePos(firstPartOfReallocatedPtNodePos)); // Update children position. - int childrenPosFieldPos = nodeReader.getChildrenPosFieldPos(); + int childrenPosFieldPos = ptNodeParams.getChildrenPosFieldPos(); if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBuffer, actualChildrenPos, &childrenPosFieldPos)) { return false; @@ -487,7 +490,8 @@ bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes( bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, const HeaderPolicy *const headerPolicy, BufferWithExtendableBuffer *const bufferToWrite, int *const outUnigramCount, int *const outBigramCount) { - DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, mBigramPolicy, mShortcutPolicy); + DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy); + DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, &nodeReader); readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); DynamicPatriciaTrieGcEventListeners ::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted @@ -529,9 +533,10 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, DynamicShortcutListPolicy newDictShortcutPolicy(bufferToWrite); DynamicBigramListPolicy newDictBigramPolicy(headerPolicy, bufferToWrite, &newDictShortcutPolicy, mNeedsToDecay); - // Create reading helper for the GCed dictionary. - DynamicPatriciaTrieReadingHelper newDictReadingHelper(bufferToWrite, &newDictBigramPolicy, + // Create reading node reader and reading helper for the GCed dictionary. + DynamicPatriciaTrieNodeReader newDictNodeReader(bufferToWrite, &newDictBigramPolicy, &newDictShortcutPolicy); + DynamicPatriciaTrieReadingHelper newDictReadingHelper(bufferToWrite, &newDictNodeReader); newDictReadingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateAllPositionFields traversePolicyToUpdateAllPositionFields(this, &newDictBigramPolicy, bufferToWrite, diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.h index ca8664729..5614cb3ac 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.h @@ -26,11 +26,12 @@ namespace latinime { class BufferWithExtendableBuffer; class DynamicBigramListPolicy; -class DynamicPatriciaTrieNodeReader; class DynamicPatriciaTrieReadingHelper; class DynamicShortcutListPolicy; class HeaderPolicy; +class PtNodeParams; +// TODO: Make it independent from a particular format and move to pt_common. class DynamicPatriciaTrieWritingHelper { public: typedef hash_map_compat<int, int> PtNodeArrayPositionRelocationMap; @@ -77,12 +78,12 @@ class DynamicPatriciaTrieWritingHelper { // CAVEAT: This method must be called only from inner classes of // DynamicPatriciaTrieGcEventListeners. - bool markNodeAsDeleted(const DynamicPatriciaTrieNodeReader *const nodeToUpdate); + bool markNodeAsDeleted(const PtNodeParams *const toBeUpdatedPtNodeParams); // CAVEAT: This method must be called only from this class or inner classes of // DynamicPatriciaTrieGcEventListeners. bool writePtNodeToBufferByCopyingPtNodeInfo(BufferWithExtendableBuffer *const bufferToWrite, - const DynamicPatriciaTrieNodeReader *const originalNode, const int parentPos, + const PtNodeParams *const originalPtNodeParams, const int parentPos, const int *const codePoints, const int codePointCount, const int probability, int *const writingPos); @@ -96,7 +97,7 @@ class DynamicPatriciaTrieWritingHelper { DynamicShortcutListPolicy *const mShortcutPolicy; const bool mNeedsToDecay; - bool markNodeAsMovedAndSetPosition(const DynamicPatriciaTrieNodeReader *const nodeToUpdate, + bool markNodeAsMovedAndSetPosition(const PtNodeParams *const toBeUpdatedPtNodeParams, const int movedPos, const int bigramLinkedNodePos); bool writePtNodeWithFullInfoToBuffer(BufferWithExtendableBuffer *const bufferToWrite, @@ -112,19 +113,17 @@ class DynamicPatriciaTrieWritingHelper { bool createAndInsertNodeIntoPtNodeArray(const int parentPos, const int *const nodeCodePoints, const int nodeCodePointCount, const int probability, int *const forwardLinkFieldPos); - bool setPtNodeProbability(const DynamicPatriciaTrieNodeReader *const originalNode, - const int probability, const int *const codePoints, bool *const outAddedNewUnigram); + bool setPtNodeProbability(const PtNodeParams *const originalPtNodeParams, const int probability, + bool *const outAddedNewUnigram); - bool createChildrenPtNodeArrayAndAChildPtNode( - const DynamicPatriciaTrieNodeReader *const parentNode, const int probability, - const int *const codePoints, const int codePointCount); + bool createChildrenPtNodeArrayAndAChildPtNode(const PtNodeParams *const parentPtNodeParams, + const int probability, const int *const codePoints, const int codePointCount); bool createNewPtNodeArrayWithAChildPtNode(const int parentPos, const int *const nodeCodePoints, const int nodeCodePointCount, const int probability); bool reallocatePtNodeAndAddNewPtNodes( - const DynamicPatriciaTrieNodeReader *const reallocatingPtNode, - const int *const reallocatingPtNodeCodePoints, const int overlappingCodePointCount, + const PtNodeParams *const reallocatingPtNodeParams, const int overlappingCodePointCount, const int probabilityOfNewPtNode, const int *const newNodeCodePoints, const int newNodeCodePointCount); diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.cpp index 30ff10cd6..67733660b 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.h" #include <cstddef> #include <cstdlib> diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.h index af76bc6b5..5654105ee 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.h @@ -20,7 +20,7 @@ #include <cstddef> #include "defines.h" -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h" namespace latinime { diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/dict_content.h new file mode 100644 index 000000000..0c2f47073 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/dict_content.h @@ -0,0 +1,36 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DICT_CONTENT_H +#define LATINIME_DICT_CONTENT_H + +#include "defines.h" + +namespace latinime { + +class DictContent { + public: + virtual ~DictContent() {} + virtual bool isValid() const = 0; + + protected: + DictContent() {} + + private: + DISALLOW_COPY_AND_ASSIGN(DictContent); +}; +} // namespace latinime +#endif /* LATINIME_DICT_CONTENT_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h new file mode 100644 index 000000000..e415881a9 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h @@ -0,0 +1,49 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_SINGLE_DICT_CONTENT_H +#define LATINIME_SINGLE_DICT_CONTENT_H + +#include "defines.h" +#include "suggest/policyimpl/dictionary/structure/v4/content/dict_content.h" +#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" +#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h" + +namespace latinime { + +class SingleDictContent : public DictContent { + public: + SingleDictContent(const char *const dictDirPath, const char *const contentFileName, + const bool isUpdatable) + : mMmappedBuffer(MmappedBuffer::openBuffer(dictDirPath, contentFileName, isUpdatable)), + mExpandableContentBuffer(mMmappedBuffer.get() ? mMmappedBuffer.get()->getBuffer() : 0, + mMmappedBuffer.get() ? mMmappedBuffer.get()->getBufferSize() : 0, + BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE) {} + + virtual ~SingleDictContent() {} + + virtual bool isValid() const { + return mMmappedBuffer.get() != 0; + } + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(SingleDictContent); + + const MmappedBuffer::MmappedBufferPtr mMmappedBuffer; + BufferWithExtendableBuffer mExpandableContentBuffer; +}; +} // namespace latinime +#endif /* LATINIME_SINGLE_DICT_CONTENT_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h new file mode 100644 index 000000000..446b51ed0 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h @@ -0,0 +1,69 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_SPARSE_TABLE_DICT_CONTENT_H +#define LATINIME_SPARSE_TABLE_DICT_CONTENT_H + +#include "defines.h" +#include "suggest/policyimpl/dictionary/structure/v4/content/dict_content.h" +#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" +#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h" + +namespace latinime { + +// TODO: Support multiple contents. +class SparseTableDictContent : public DictContent { + public: + AK_FORCE_INLINE SparseTableDictContent(const char *const dictDirPath, + const char *const lookupTableFileName, const char *const addressTableFileName, + const char *const contentFileName, const bool isUpdatable) + : mLookupTableBuffer( + MmappedBuffer::openBuffer(dictDirPath, lookupTableFileName, isUpdatable)), + mAddressTableBuffer( + MmappedBuffer::openBuffer(dictDirPath, addressTableFileName, isUpdatable)), + mContentBuffer(MmappedBuffer::openBuffer(dictDirPath, contentFileName, isUpdatable)), + mExpandableLookupTableBuffer( + mLookupTableBuffer.get() ? mLookupTableBuffer.get()->getBuffer() : 0, + mLookupTableBuffer.get() ? mLookupTableBuffer.get()->getBufferSize() : 0, + BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE), + mExpandableAddressTableBuffer( + mAddressTableBuffer.get() ? mAddressTableBuffer.get()->getBuffer() : 0, + mAddressTableBuffer.get() ? mAddressTableBuffer.get()->getBufferSize() : 0, + BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE), + mExpandableContentBuffer(mContentBuffer.get() ? mContentBuffer.get()->getBuffer() : 0, + mContentBuffer.get() ? mContentBuffer.get()->getBufferSize() : 0, + BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE) {} + + virtual ~SparseTableDictContent() {} + + virtual bool isValid() const { + return mLookupTableBuffer.get() != 0 && mAddressTableBuffer.get() != 0 + && mContentBuffer.get() != 0; + } + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(SparseTableDictContent); + + // TODO: Have sparse table. + const MmappedBuffer::MmappedBufferPtr mLookupTableBuffer; + const MmappedBuffer::MmappedBufferPtr mAddressTableBuffer; + const MmappedBuffer::MmappedBufferPtr mContentBuffer; + BufferWithExtendableBuffer mExpandableLookupTableBuffer; + BufferWithExtendableBuffer mExpandableAddressTableBuffer; + BufferWithExtendableBuffer mExpandableContentBuffer; +}; +} // namespace latinime +#endif /* LATINIME_SPARSE_TABLE_DICT_CONTENT_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h new file mode 100644 index 000000000..1164c408a --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h @@ -0,0 +1,75 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_VER4_DICT_BUFFER_H +#define LATINIME_VER4_DICT_BUFFER_H + +#include "defines.h" +#include "suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h" +#include "suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h" +#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h" +#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" +#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h" + +namespace latinime { + +class Ver4DictBuffers { + public: + typedef ExclusiveOwnershipPointer<Ver4DictBuffers> Ver4DictBuffersPtr; + + static Ver4DictBuffersPtr openVer4DictBuffers(const char *const dictDirPath, + const MmappedBuffer::MmappedBufferPtr &dictBuffer) { + const bool isUpdatable = dictBuffer.get() ? dictBuffer.get()->isUpdatable() : false; + return Ver4DictBuffersPtr(new Ver4DictBuffers(dictDirPath, dictBuffer, isUpdatable)); + } + + AK_FORCE_INLINE bool isValid() const { + return mDictBuffer.get() != 0 && mProbabilityDictContent.isValid() + && mTerminalAddressTable.isValid() && mBigramDictContent.isValid() + && mShortcutDictContent.isValid(); + } + + AK_FORCE_INLINE const uint8_t *getRawDictBuffer() const { + return mDictBuffer.get()->getBuffer(); + } + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4DictBuffers); + + AK_FORCE_INLINE Ver4DictBuffers(const char *const dictDirPath, + const MmappedBuffer::MmappedBufferPtr &dictBuffer, const bool isUpdatable) + : mDictBuffer(dictBuffer), + mTerminalAddressTable(dictDirPath, + Ver4DictConstants::TERMINAL_ADDRESS_TABLE_FILE_EXTENSION, isUpdatable), + mProbabilityDictContent(dictDirPath, Ver4DictConstants::FREQ_FILE_EXTENSION, + isUpdatable), + mBigramDictContent(dictDirPath, + Ver4DictConstants::BIGRAM_LOOKUP_TABLE_FILE_EXTENSION, + Ver4DictConstants::BIGRAM_CONTENT_TABLE_FILE_EXTENSION, + Ver4DictConstants::BIGRAM_FILE_EXTENSION, isUpdatable), + mShortcutDictContent(dictDirPath, + Ver4DictConstants::SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION, + Ver4DictConstants::SHORTCUT_CONTENT_TABLE_FILE_EXTENSION, + Ver4DictConstants::SHORTCUT_FILE_EXTENSION, isUpdatable) {} + + const MmappedBuffer::MmappedBufferPtr mDictBuffer; + SingleDictContent mTerminalAddressTable; + SingleDictContent mProbabilityDictContent; + SparseTableDictContent mBigramDictContent; + SparseTableDictContent mShortcutDictContent; +}; +} // namespace latinime +#endif /* LATINIME_VER4_DICT_BUFFER_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp new file mode 100644 index 000000000..0bfd07b04 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp @@ -0,0 +1,35 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h" + +namespace latinime { + +const char *const Ver4DictConstants::TRIE_FILE_EXTENSION = ".trie"; +const char *const Ver4DictConstants::FREQ_FILE_EXTENSION = ".freq"; +// tat = Terminal Address Table +const char *const Ver4DictConstants::TERMINAL_ADDRESS_TABLE_FILE_EXTENSION = ".tat"; +const char *const Ver4DictConstants::BIGRAM_FILE_EXTENSION = ".bigram_freq"; +const char *const Ver4DictConstants::BIGRAM_LOOKUP_TABLE_FILE_EXTENSION = ".bigram_lookup"; +const char *const Ver4DictConstants::BIGRAM_CONTENT_TABLE_FILE_EXTENSION = ".bigram_index_freq"; +const char *const Ver4DictConstants::SHORTCUT_FILE_EXTENSION = ".shortcut_shortcut"; +const char *const Ver4DictConstants::SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION = ".shortcut_lookup"; +const char *const Ver4DictConstants::SHORTCUT_CONTENT_TABLE_FILE_EXTENSION = + ".shortcut_index_shortcut"; + +const int Ver4DictConstants::NOT_A_TERMINAL = -1; + +} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h new file mode 100644 index 000000000..6498ce428 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h @@ -0,0 +1,43 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_VER4_DICT_CONSTANTS_H +#define LATINIME_VER4_DICT_CONSTANTS_H + +#include "defines.h" + +namespace latinime { + +// Note that there are corresponding definitions in FormatSpec.java. +class Ver4DictConstants { + public: + static const char *const TRIE_FILE_EXTENSION; + static const char *const FREQ_FILE_EXTENSION; + static const char *const TERMINAL_ADDRESS_TABLE_FILE_EXTENSION; + static const char *const BIGRAM_FILE_EXTENSION; + static const char *const BIGRAM_LOOKUP_TABLE_FILE_EXTENSION; + static const char *const BIGRAM_CONTENT_TABLE_FILE_EXTENSION; + static const char *const SHORTCUT_FILE_EXTENSION; + static const char *const SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION; + static const char *const SHORTCUT_CONTENT_TABLE_FILE_EXTENSION; + + static const int NOT_A_TERMINAL; + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4DictConstants); +}; +} // namespace latinime +#endif /* LATINIME_VER4_DICT_CONSTANTS_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp new file mode 100644 index 000000000..b9ee4891c --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp @@ -0,0 +1,96 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h" + +namespace latinime { + +void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode, + DicNodeVector *const childDicNodes) const { + // TODO: Implement. +} + +int Ver4PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( + const int ptNodePos, const int maxCodePointCount, int *const outCodePoints, + int *const outUnigramProbability) const { + // TODO: Implement. + return 0; +} + +int Ver4PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord, + const int length, const bool forceLowerCaseSearch) const { + // TODO: Implement. + return NOT_A_DICT_POS; +} + +int Ver4PatriciaTriePolicy::getProbability(const int unigramProbability, + const int bigramProbability) const { + // TODO: Implement. + return NOT_A_PROBABILITY; +} + +int Ver4PatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int ptNodePos) const { + // TODO: Implement. + return NOT_A_PROBABILITY; +} + +int Ver4PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const { + // TODO: Implement. + return NOT_A_DICT_POS; +} + +int Ver4PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const { + // TODO: Implement. + return NOT_A_DICT_POS; +} + +bool Ver4PatriciaTriePolicy::addUnigramWord(const int *const word, const int length, + const int probability) { + // TODO: Implement. + return false; +} + +bool Ver4PatriciaTriePolicy::addBigramWords(const int *const word0, const int length0, + const int *const word1, const int length1, const int probability) { + // TODO: Implement. + return false; +} + +bool Ver4PatriciaTriePolicy::removeBigramWords(const int *const word0, const int length0, + const int *const word1, const int length1) { + // TODO: Implement. + return false; +} + +void Ver4PatriciaTriePolicy::flush(const char *const filePath) { + // TODO: Implement. +} + +void Ver4PatriciaTriePolicy::flushWithGC(const char *const filePath) { + // TODO: Implement. +} + +bool Ver4PatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const { + // TODO: Implement. + return false; +} + +void Ver4PatriciaTriePolicy::getProperty(const char *const query, char *const outResult, + const int maxResultLength) { + // TODO: Implement. +} + +} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h new file mode 100644 index 000000000..f7bfb3b0d --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h @@ -0,0 +1,96 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_VER4_PATRICIA_TRIE_POLICY_H +#define LATINIME_VER4_PATRICIA_TRIE_POLICY_H + +#include "defines.h" +#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" +#include "suggest/policyimpl/dictionary/header/header_policy.h" +#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h" +#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" + +namespace latinime { + +class DicNode; +class DicNodeVector; + +// TODO: Implement. +class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { + public: + Ver4PatriciaTriePolicy(const Ver4DictBuffers::Ver4DictBuffersPtr &buffers) + : mBuffers(buffers), + mHeaderPolicy(mBuffers.get()->getRawDictBuffer(), FormatUtils::VERSION_4) {}; + + AK_FORCE_INLINE int getRootPosition() const { + return 0; + } + + void createAndGetAllChildDicNodes(const DicNode *const dicNode, + DicNodeVector *const childDicNodes) const; + + int getCodePointsAndProbabilityAndReturnCodePointCount( + const int terminalPtNodePos, const int maxCodePointCount, int *const outCodePoints, + int *const outUnigramProbability) const; + + int getTerminalPtNodePositionOfWord(const int *const inWord, + const int length, const bool forceLowerCaseSearch) const; + + int getProbability(const int unigramProbability, const int bigramProbability) const; + + int getUnigramProbabilityOfPtNode(const int ptNodePos) const; + + int getShortcutPositionOfPtNode(const int ptNodePos) const; + + int getBigramsPositionOfPtNode(const int ptNodePos) const; + + const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const { + return &mHeaderPolicy; + } + + const DictionaryBigramsStructurePolicy *getBigramsStructurePolicy() const { + return 0; + } + + const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const { + return 0; + } + + bool addUnigramWord(const int *const word, const int length, const int probability); + + bool addBigramWords(const int *const word0, const int length0, const int *const word1, + const int length1, const int probability); + + bool removeBigramWords(const int *const word0, const int length0, const int *const word1, + const int length1); + + void flush(const char *const filePath); + + void flushWithGC(const char *const filePath); + + bool needsToRunGC(const bool mindsBlockByGC) const; + + void getProperty(const char *const query, char *const outResult, + const int maxResultLength); + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4PatriciaTriePolicy); + + const Ver4DictBuffers::Ver4DictBuffersPtr mBuffers; + const HeaderPolicy mHeaderPolicy; +}; +} // namespace latinime +#endif // LATINIME_VER4_PATRICIA_TRIE_POLICY_H diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp index f692882f2..5032131ab 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp @@ -18,7 +18,7 @@ namespace latinime { -const size_t BufferWithExtendableBuffer::MAX_ADDITIONAL_BUFFER_SIZE = 1024 * 1024; +const size_t BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE = 1024 * 1024; const int BufferWithExtendableBuffer::NEAR_BUFFER_LIMIT_THRESHOLD_PERCENTILE = 90; // TODO: Needs to allocate larger memory corresponding to the current vector size. const size_t BufferWithExtendableBuffer::EXTEND_ADDITIONAL_BUFFER_SIZE_STEP = 128 * 1024; diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h index 9dc34823c..1e27a1bec 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h @@ -32,12 +32,20 @@ namespace latinime { // raw pointer but provides several methods that handle boundary checking for writing data. class BufferWithExtendableBuffer { public: + static const size_t DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE; + BufferWithExtendableBuffer(uint8_t *const originalBuffer, const int originalBufferSize, - const int maxAdditionalBufferSize = MAX_ADDITIONAL_BUFFER_SIZE) + const int maxAdditionalBufferSize) : mOriginalBuffer(originalBuffer), mOriginalBufferSize(originalBufferSize), mAdditionalBuffer(EXTEND_ADDITIONAL_BUFFER_SIZE_STEP), mUsedAdditionalBufferSize(0), mMaxAdditionalBufferSize(maxAdditionalBufferSize) {} + // Without original buffer. + BufferWithExtendableBuffer(const int maxAdditionalBufferSize) + : mOriginalBuffer(0), mOriginalBufferSize(0), + mAdditionalBuffer(EXTEND_ADDITIONAL_BUFFER_SIZE_STEP), mUsedAdditionalBufferSize(0), + mMaxAdditionalBufferSize(maxAdditionalBufferSize) {} + AK_FORCE_INLINE int getTailPosition() const { return mOriginalBufferSize + mUsedAdditionalBufferSize; } @@ -86,7 +94,6 @@ class BufferWithExtendableBuffer { private: DISALLOW_COPY_AND_ASSIGN(BufferWithExtendableBuffer); - static const size_t MAX_ADDITIONAL_BUFFER_SIZE; static const int NEAR_BUFFER_LIMIT_THRESHOLD_PERCENTILE; static const size_t EXTEND_ADDITIONAL_BUFFER_SIZE_STEP; diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp index 994826fa8..b48e5b005 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp @@ -20,7 +20,7 @@ #include <cstring> #include "suggest/policyimpl/dictionary/header/header_policy.h" -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" #include "suggest/policyimpl/dictionary/utils/format_utils.h" @@ -33,6 +33,9 @@ const char *const DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE = switch (dictVersion) { case 3: return createEmptyV3DictFile(filePath, attributeMap); + case 4: + // TODO: Support version 4 dictionary format. + return false; default: // Only version 3 dictionary is supported for now. return false; @@ -41,12 +44,14 @@ const char *const DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE = /* static */ bool DictFileWritingUtils::createEmptyV3DictFile(const char *const filePath, const HeaderReadWriteUtils::AttributeMap *const attributeMap) { - BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */); + BufferWithExtendableBuffer headerBuffer( + BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE); HeaderPolicy headerPolicy(FormatUtils::VERSION_3, attributeMap); headerPolicy.writeHeaderToBuffer(&headerBuffer, true /* updatesLastUpdatedTime */, true /* updatesLastDecayedTime */, 0 /* unigramCount */, 0 /* bigramCount */, 0 /* extendedRegionSize */); - BufferWithExtendableBuffer bodyBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */); + BufferWithExtendableBuffer bodyBuffer( + BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE); if (!DynamicPatriciaTrieWritingUtils::writeEmptyDictionary(&bodyBuffer, 0 /* rootPos */)) { return false; } diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/file_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/file_utils.h new file mode 100644 index 000000000..59b894fa6 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/file_utils.h @@ -0,0 +1,50 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_FILE_UTILS_H +#define LATINIME_FILE_UTILS_H + +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> + +#include "defines.h" + +namespace latinime { + +class FileUtils { + public: + // Returns -1 on error. + static int getFileSize(const char *const filePath) { + const int fd = open(filePath, O_RDONLY); + if (fd == -1) { + return -1; + } + struct stat statBuf; + if (fstat(fd, &statBuf) != 0) { + close(fd); + return -1; + } + close(fd); + return static_cast<int>(statBuf.st_size); + } + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(FileUtils); +}; +} // namespace latinime +#endif /* LATINIME_FILE_UTILS_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp index 1d77d5c27..4843650ad 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp @@ -45,6 +45,8 @@ const int FormatUtils::DICTIONARY_MINIMUM_SIZE = 12; return VERSION_2; } else if (ByteArrayUtils::readUint16(dict, 4) == 3) { return VERSION_3; + } else if (ByteArrayUtils::readUint16(dict, 4) == 4) { + return VERSION_4; } else { return UNKNOWN_VERSION; } diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h index 79ed0de29..b90393a53 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h @@ -31,6 +31,7 @@ class FormatUtils { enum FORMAT_VERSION { VERSION_2, VERSION_3, + VERSION_4, UNKNOWN_VERSION }; diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.cpp new file mode 100644 index 000000000..71f863290 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.cpp @@ -0,0 +1,99 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h" + +#include <cerrno> +#include <climits> +#include <cstdio> +#include <fcntl.h> +#include <sys/mman.h> +#include <unistd.h> + +#include "suggest/policyimpl/dictionary/utils/file_utils.h" + +namespace latinime { + +/* static */ MmappedBuffer::MmappedBufferPtr MmappedBuffer::openBuffer( + const char *const path, const int bufferOffset, const int bufferSize, + const bool isUpdatable) { + const int openMode = isUpdatable ? O_RDWR : O_RDONLY; + const int mmapFd = open(path, openMode); + if (mmapFd < 0) { + AKLOGE("DICT: Can't open the source. path=%s errno=%d", path, errno); + return MmappedBufferPtr(0); + } + const int pagesize = getpagesize(); + const int offset = bufferOffset % pagesize; + int alignedOffset = bufferOffset - offset; + int alignedSize = bufferSize + offset; + const int protMode = isUpdatable ? PROT_READ | PROT_WRITE : PROT_READ; + void *const mmappedBuffer = mmap(0, alignedSize, protMode, MAP_PRIVATE, mmapFd, + alignedOffset); + if (mmappedBuffer == MAP_FAILED) { + AKLOGE("DICT: Can't mmap dictionary. errno=%d", errno); + close(mmapFd); + return MmappedBufferPtr(0); + } + uint8_t *const buffer = static_cast<uint8_t *>(mmappedBuffer) + offset; + if (!buffer) { + AKLOGE("DICT: buffer is null"); + close(mmapFd); + return MmappedBufferPtr(0); + } + return MmappedBufferPtr(new MmappedBuffer(buffer, bufferSize, mmappedBuffer, alignedSize, + mmapFd, isUpdatable)); +} + +/* static */ MmappedBuffer::MmappedBufferPtr MmappedBuffer::openBuffer( + const char *const path, const bool isUpdatable) { + const int fileSize = FileUtils::getFileSize(path); + if (fileSize == -1) { + return MmappedBufferPtr(0); + } else if (fileSize == 0) { + return MmappedBufferPtr(new MmappedBuffer(isUpdatable)); + } else { + return openBuffer(path, 0 /* bufferOffset */, fileSize, isUpdatable); + } +} + +/* static */ MmappedBuffer::MmappedBufferPtr MmappedBuffer::openBuffer( + const char *const dirPath, const char *const fileName, const bool isUpdatable) { + const int filePathBufferSize = PATH_MAX + 1 /* terminator */; + char filePath[filePathBufferSize]; + const int filePathLength = snprintf(filePath, filePathBufferSize, "%s%s", dirPath, + fileName); + if (filePathLength >= filePathBufferSize) { + return 0; + } + return openBuffer(filePath, isUpdatable); +} + +MmappedBuffer::~MmappedBuffer() { + if (mAlignedSize == 0) { + return; + } + int ret = munmap(mMmappedBuffer, mAlignedSize); + if (ret != 0) { + AKLOGE("DICT: Failure in munmap. ret=%d errno=%d", ret, errno); + } + ret = close(mMmapFd); + if (ret != 0) { + AKLOGE("DICT: Failure in close. ret=%d errno=%d", ret, errno); + } +} + +} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h b/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h index 6b69116eb..73a733b0c 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h @@ -17,58 +17,27 @@ #ifndef LATINIME_MMAPPED_BUFFER_H #define LATINIME_MMAPPED_BUFFER_H -#include <cerrno> -#include <fcntl.h> #include <stdint.h> -#include <sys/mman.h> -#include <unistd.h> #include "defines.h" +#include "utils/exclusive_ownership_pointer.h" namespace latinime { class MmappedBuffer { public: - static MmappedBuffer* openBuffer(const char *const path, const int bufferOffset, - const int bufferSize, const bool isUpdatable) { - const int openMode = isUpdatable ? O_RDWR : O_RDONLY; - const int mmapFd = open(path, openMode); - if (mmapFd < 0) { - AKLOGE("DICT: Can't open the source. path=%s errno=%d", path, errno); - return 0; - } - const int pagesize = getpagesize(); - const int offset = bufferOffset % pagesize; - int alignedOffset = bufferOffset - offset; - int alignedSize = bufferSize + offset; - const int protMode = isUpdatable ? PROT_READ | PROT_WRITE : PROT_READ; - void *const mmappedBuffer = mmap(0, alignedSize, protMode, MAP_PRIVATE, mmapFd, - alignedOffset); - if (mmappedBuffer == MAP_FAILED) { - AKLOGE("DICT: Can't mmap dictionary. errno=%d", errno); - close(mmapFd); - return 0; - } - uint8_t *const buffer = static_cast<uint8_t *>(mmappedBuffer) + offset; - if (!buffer) { - AKLOGE("DICT: buffer is null"); - close(mmapFd); - return 0; - } - return new MmappedBuffer(buffer, bufferSize, mmappedBuffer, alignedSize, mmapFd, - isUpdatable); - } + typedef ExclusiveOwnershipPointer<MmappedBuffer> MmappedBufferPtr; - ~MmappedBuffer() { - int ret = munmap(mMmappedBuffer, mAlignedSize); - if (ret != 0) { - AKLOGE("DICT: Failure in munmap. ret=%d errno=%d", ret, errno); - } - ret = close(mMmapFd); - if (ret != 0) { - AKLOGE("DICT: Failure in close. ret=%d errno=%d", ret, errno); - } - } + static MmappedBufferPtr openBuffer(const char *const path, + const int bufferOffset, const int bufferSize, const bool isUpdatable); + + // Mmap entire file. + static MmappedBufferPtr openBuffer(const char *const path, const bool isUpdatable); + + static MmappedBufferPtr openBuffer(const char *const dirPath, const char *const fileName, + const bool isUpdatable); + + ~MmappedBuffer(); AK_FORCE_INLINE uint8_t *getBuffer() const { return mBuffer; @@ -89,6 +58,11 @@ class MmappedBuffer { : mBuffer(buffer), mBufferSize(bufferSize), mMmappedBuffer(mmappedBuffer), mAlignedSize(alignedSize), mMmapFd(mmapFd), mIsUpdatable(isUpdatable) {} + // Empty file. We have to handle an empty file as a valid part of a dictionary. + AK_FORCE_INLINE MmappedBuffer(const bool isUpdatable) + : mBuffer(0), mBufferSize(0), mMmappedBuffer(0), mAlignedSize(0), mMmapFd(0), + mIsUpdatable(isUpdatable) {} + DISALLOW_IMPLICIT_CONSTRUCTORS(MmappedBuffer); uint8_t *const mBuffer; diff --git a/native/jni/src/suggest/policyimpl/typing/typing_traversal.h b/native/jni/src/suggest/policyimpl/typing/typing_traversal.h index 007c19e0a..fd0ac9eb6 100644 --- a/native/jni/src/suggest/policyimpl/typing/typing_traversal.h +++ b/native/jni/src/suggest/policyimpl/typing/typing_traversal.h @@ -81,7 +81,7 @@ class TypingTraversal : public Traversal { return false; } const int point0Index = dicNode->getInputIndex(0); - return dicNode->isTerminalWordNode() + return dicNode->isTerminalDicNode() && traverseSession->getProximityInfoState(0)-> hasSpaceProximity(point0Index); } @@ -96,7 +96,7 @@ class TypingTraversal : public Traversal { if (dicNode->isCompletion(inputSize)) { return false; } - if (!dicNode->isTerminalWordNode()) { + if (!dicNode->isTerminalDicNode()) { return false; } const int16_t pointIndex = dicNode->getInputIndex(0); diff --git a/native/jni/src/suggest/policyimpl/typing/typing_weighting.cpp b/native/jni/src/suggest/policyimpl/typing/typing_weighting.cpp index 5b6b5e874..54f65c786 100644 --- a/native/jni/src/suggest/policyimpl/typing/typing_weighting.cpp +++ b/native/jni/src/suggest/policyimpl/typing/typing_weighting.cpp @@ -23,39 +23,64 @@ namespace latinime { const TypingWeighting TypingWeighting::sInstance; -ErrorType TypingWeighting::getErrorType(const CorrectionType correctionType, +ErrorTypeUtils::ErrorType TypingWeighting::getErrorType(const CorrectionType correctionType, const DicTraverseSession *const traverseSession, const DicNode *const parentDicNode, const DicNode *const dicNode) const { switch (correctionType) { case CT_MATCH: if (isProximityDicNode(traverseSession, dicNode)) { - return ET_PROXIMITY_CORRECTION; + return ErrorTypeUtils::PROXIMITY_CORRECTION; + } else if (dicNode->isInDigraph()) { + return ErrorTypeUtils::MATCH_WITH_DIGRAPH; } else { - return ET_NOT_AN_ERROR; + // Compare the node code point with original primary code point on the keyboard. + const ProximityInfoState *const pInfoState = + traverseSession->getProximityInfoState(0); + const int primaryOriginalCodePoint = pInfoState->getPrimaryOriginalCodePointAt( + dicNode->getInputIndex(0)); + const int nodeCodePoint = dicNode->getNodeCodePoint(); + if (primaryOriginalCodePoint == nodeCodePoint) { + // Node code point is same as original code point on the keyboard. + return ErrorTypeUtils::NOT_AN_ERROR; + } else if (CharUtils::toLowerCase(primaryOriginalCodePoint) == + CharUtils::toLowerCase(nodeCodePoint)) { + // Only cases of the code points are different. + return ErrorTypeUtils::MATCH_WITH_CASE_ERROR; + } else if (CharUtils::toBaseCodePoint(primaryOriginalCodePoint) == + CharUtils::toBaseCodePoint(nodeCodePoint)) { + // Node code point is a variant of original code point. + return ErrorTypeUtils::MATCH_WITH_ACCENT_ERROR; + } else { + // Node code point is a variant of original code point and the cases are also + // different. + return ErrorTypeUtils::MATCH_WITH_ACCENT_ERROR + | ErrorTypeUtils::MATCH_WITH_CASE_ERROR; + } } + break; case CT_ADDITIONAL_PROXIMITY: - return ET_PROXIMITY_CORRECTION; + return ErrorTypeUtils::PROXIMITY_CORRECTION; case CT_OMISSION: if (parentDicNode->canBeIntentionalOmission()) { - return ET_INTENTIONAL_OMISSION; + return ErrorTypeUtils::INTENTIONAL_OMISSION; } else { - return ET_EDIT_CORRECTION; + return ErrorTypeUtils::EDIT_CORRECTION; } break; case CT_SUBSTITUTION: case CT_INSERTION: case CT_TERMINAL_INSERTION: case CT_TRANSPOSITION: - return ET_EDIT_CORRECTION; + return ErrorTypeUtils::EDIT_CORRECTION; case CT_NEW_WORD_SPACE_OMISSION: case CT_NEW_WORD_SPACE_SUBSTITUTION: - return ET_NEW_WORD; + return ErrorTypeUtils::NEW_WORD; case CT_TERMINAL: - return ET_NOT_AN_ERROR; + return ErrorTypeUtils::NOT_AN_ERROR; case CT_COMPLETION: - return ET_COMPLETION; + return ErrorTypeUtils::COMPLETION; default: - return ET_NOT_AN_ERROR; + return ErrorTypeUtils::NOT_AN_ERROR; } } } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/typing/typing_weighting.h b/native/jni/src/suggest/policyimpl/typing/typing_weighting.h index 9f0a331e3..41314ef52 100644 --- a/native/jni/src/suggest/policyimpl/typing/typing_weighting.h +++ b/native/jni/src/suggest/policyimpl/typing/typing_weighting.h @@ -19,6 +19,7 @@ #include "defines.h" #include "suggest/core/dicnode/dic_node_utils.h" +#include "suggest/core/dictionary/error_type_utils.h" #include "suggest/core/layout/touch_position_correction_utils.h" #include "suggest/core/policy/weighting.h" #include "suggest/core/session/dic_traverse_session.h" @@ -204,7 +205,7 @@ class TypingWeighting : public Weighting { return cost * traverseSession->getMultiWordCostMultiplier(); } - ErrorType getErrorType(const CorrectionType correctionType, + ErrorTypeUtils::ErrorType getErrorType(const CorrectionType correctionType, const DicTraverseSession *const traverseSession, const DicNode *const parentDicNode, const DicNode *const dicNode) const; diff --git a/native/jni/src/utils/exclusive_ownership_pointer.h b/native/jni/src/utils/exclusive_ownership_pointer.h new file mode 100644 index 000000000..3cf78954a --- /dev/null +++ b/native/jni/src/utils/exclusive_ownership_pointer.h @@ -0,0 +1,91 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_EXCLUSIVE_OWNERSHIP_POINTER_H +#define LATINIME_EXCLUSIVE_OWNERSHIP_POINTER_H + +#include "defines.h" + +namespace latinime { + +template<class T> +class ExclusiveOwnershipPointer { + public: + // This instance become an owner of the raw pointer. + ExclusiveOwnershipPointer(T *const rawPointer) + : mPointer(rawPointer), + mSharedOwnerPtr(new (ExclusiveOwnershipPointer<T> *)(this)) {} + + // Move the ownership. + ExclusiveOwnershipPointer(const ExclusiveOwnershipPointer<T> &pointer) + : mPointer(pointer.mPointer), mSharedOwnerPtr(pointer.mSharedOwnerPtr) { + transferOwnership(&pointer); + } + + ~ExclusiveOwnershipPointer() { + deletePointersIfHavingOwnership(); + } + + // Move the ownership. + ExclusiveOwnershipPointer<T> &operator=(const ExclusiveOwnershipPointer<T> &pointer) { + // Delete pointers when this is an owner of another pointer. + deletePointersIfHavingOwnership(); + mPointer = pointer.mPointer; + mSharedOwnerPtr = pointer.mSharedOwnerPtr; + transferOwnership(pointer); + return *this; + } + + T *get() const { + return mPointer; + } + + private: + // This class allows to copy and assign and ensures only one instance has the ownership of the + // managed pointer. + + ExclusiveOwnershipPointer() : mPointer(0), mSharedOwnerPtr(0) {} + + void transferOwnership(const ExclusiveOwnershipPointer<T> *const src) { + if (*mSharedOwnerPtr != src) { + AKLOGE("Failed to transfer the ownership because src is not the current owner." + "src: %p, owner: %p", src, *mSharedOwnerPtr); + ASSERT(false); + return; + } + // Transfer the ownership from src to this instance. + *mSharedOwnerPtr = this; + } + + void deletePointersIfHavingOwnership() { + if (mSharedOwnerPtr && *mSharedOwnerPtr == this) { + if (mPointer) { + if (DEBUG_DICT) { + AKLOGI("Releasing pointer: %p", mPointer); + } + delete mPointer; + } + delete mSharedOwnerPtr; + } + } + + T *mPointer; + // mSharedOwnerPtr points a shared memory space where the instance which has the ownership is + // stored. + ExclusiveOwnershipPointer<T> **mSharedOwnerPtr; +}; +} // namespace latinime +#endif /* LATINIME_EXCLUSIVE_OWNERSHIP_POINTER_H */ |