diff options
Diffstat (limited to 'native/jni/src')
18 files changed, 495 insertions, 215 deletions
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.cpp index 824d442e4..086d98b4a 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.cpp @@ -16,9 +16,7 @@ #include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h" -#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" -#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h" -#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h" +#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_array_reader.h" #include "utils/char_utils.h" namespace latinime { @@ -266,27 +264,17 @@ int DynamicPtReadingHelper::getTerminalPtNodePositionOfWord(const int *const inW // Read node array size and process empty node arrays. Nodes and arrays are counted up in this // method to avoid an infinite loop. void DynamicPtReadingHelper::nextPtNodeArray() { - if (mReadingState.mPos < 0 || mReadingState.mPos >= mBuffer->getTailPosition()) { - // Reading invalid position because of a bug or a broken dictionary. - AKLOGE("Reading PtNode array info from invalid dictionary position: %d, dict size: %d", - mReadingState.mPos, mBuffer->getTailPosition()); - ASSERT(false); + int ptNodeCountInArray = 0; + int firstPtNodePos = NOT_A_DICT_POS; + if (!mPtNodeArrayReader->readPtNodeArrayInfoAndReturnIfValid( + mReadingState.mPos, &ptNodeCountInArray, &firstPtNodePos)) { mIsError = true; mReadingState.mPos = NOT_A_DICT_POS; return; } mReadingState.mPosOfThisPtNodeArrayHead = mReadingState.mPos; - const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(mReadingState.mPos); - const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer); - if (usesAdditionalBuffer) { - mReadingState.mPos -= mBuffer->getOriginalBufferSize(); - } - mReadingState.mRemainingPtNodeCountInThisArray = - PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(dictBuf, - &mReadingState.mPos); - if (usesAdditionalBuffer) { - mReadingState.mPos += mBuffer->getOriginalBufferSize(); - } + mReadingState.mRemainingPtNodeCountInThisArray = ptNodeCountInArray; + mReadingState.mPos = firstPtNodePos; // Count up nodes and node arrays to avoid infinite loop. mReadingState.mTotalPtNodeIndexInThisArrayChain += mReadingState.mRemainingPtNodeCountInThisArray; @@ -317,29 +305,17 @@ void DynamicPtReadingHelper::nextPtNodeArray() { // Follow the forward link and read the next node array if exists. void DynamicPtReadingHelper::followForwardLink() { - if (mReadingState.mPos < 0 || mReadingState.mPos >= mBuffer->getTailPosition()) { - // Reading invalid position because of bug or broken dictionary. - AKLOGE("Reading forward link from invalid dictionary position: %d, dict size: %d", - mReadingState.mPos, mBuffer->getTailPosition()); - ASSERT(false); + int nextPtNodeArrayPos = NOT_A_DICT_POS; + if (!mPtNodeArrayReader->readForwardLinkAndReturnIfValid( + mReadingState.mPos, &nextPtNodeArrayPos)) { mIsError = true; mReadingState.mPos = NOT_A_DICT_POS; return; } - const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(mReadingState.mPos); - const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer); - if (usesAdditionalBuffer) { - mReadingState.mPos -= mBuffer->getOriginalBufferSize(); - } - const int forwardLinkPosition = - DynamicPtReadingUtils::getForwardLinkPosition(dictBuf, mReadingState.mPos); - if (usesAdditionalBuffer) { - mReadingState.mPos += mBuffer->getOriginalBufferSize(); - } mReadingState.mPosOfLastForwardLinkField = mReadingState.mPos; - if (DynamicPtReadingUtils::isValidForwardLinkPosition(forwardLinkPosition)) { + if (nextPtNodeArrayPos != NOT_A_DICT_POS) { // Follow the forward link. - mReadingState.mPos += forwardLinkPosition; + mReadingState.mPos = nextPtNodeArrayPos; nextPtNodeArray(); } else { // All node arrays have been read. diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h index bcc5c7857..cc7b5ff70 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h @@ -26,9 +26,9 @@ namespace latinime { -class BufferWithExtendableBuffer; class DictionaryBigramsStructurePolicy; class DictionaryShortcutsStructurePolicy; +class PtNodeArrayReader; /* * This class is used for traversing dynamic patricia trie. This class supports iterating nodes and @@ -74,10 +74,10 @@ class DynamicPtReadingHelper { std::vector<int> *const mTerminalPositions; }; - DynamicPtReadingHelper(const BufferWithExtendableBuffer *const buffer, - const PtNodeReader *const ptNodeReader) - : mIsError(false), mReadingState(), mBuffer(buffer), - mPtNodeReader(ptNodeReader), mReadingStateStack() {} + DynamicPtReadingHelper(const PtNodeReader *const ptNodeReader, + const PtNodeArrayReader *const ptNodeArrayReader) + : mIsError(false), mReadingState(), mPtNodeReader(ptNodeReader), + mPtNodeArrayReader(ptNodeArrayReader), mReadingStateStack() {} ~DynamicPtReadingHelper() {} @@ -252,8 +252,8 @@ class DynamicPtReadingHelper { // TODO: Introduce error code to track what caused the error. bool mIsError; PtNodeReadingState mReadingState; - const BufferWithExtendableBuffer *const mBuffer; const PtNodeReader *const mPtNodeReader; + const PtNodeArrayReader *const mPtNodeArrayReader; std::vector<PtNodeReadingState> mReadingStateStack; void nextPtNodeArray(); diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_array_reader.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_array_reader.h new file mode 100644 index 000000000..6078d8285 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_array_reader.h @@ -0,0 +1,45 @@ +/* + * Copyright (C) 2014, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_PT_NODE_ARRAY_READER_H +#define LATINIME_PT_NODE_ARRAY_READER_H + +#include "defines.h" + +namespace latinime { + +// Interface class used to read PtNode array information. +class PtNodeArrayReader { + public: + virtual ~PtNodeArrayReader() {} + + // Returns if the position is valid or not. + virtual bool readPtNodeArrayInfoAndReturnIfValid(const int ptNodeArrayPos, + int *const outPtNodeCount, int *const outFirstPtNodePos) const = 0; + + // Returns if the position is valid or not. NOT_A_DICT_POS is set to outNextPtNodeArrayPos when + // the next array doesn't exist. + virtual bool readForwardLinkAndReturnIfValid(const int forwordLinkPos, + int *const outNextPtNodeArrayPos) const = 0; + + protected: + PtNodeArrayReader() {}; + + private: + DISALLOW_COPY_AND_ASSIGN(PtNodeArrayReader); +}; +} // namespace latinime +#endif /* LATINIME_PT_NODE_READER_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h index 84731eb17..faaf44162 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h @@ -53,6 +53,21 @@ class PtNodeParams { memcpy(mCodePoints, ptNodeParams.getCodePoints(), sizeof(int) * mCodePointCount); } + // PtNode read from version 2 dictionary. + PtNodeParams(const int headPos, const PatriciaTrieReadingUtils::NodeFlags flags, + const int codePointCount, const int *const codePoints, const int probability, + const int childrenPos, const int shortcutPos, const int bigramPos, + const int siblingPos) + : mHeadPos(headPos), mFlags(flags), mParentPos(NOT_A_DICT_POS), + mCodePointCount(codePointCount), mCodePoints(), mTerminalIdFieldPos(NOT_A_DICT_POS), + mTerminalId(Ver4DictConstants::NOT_A_TERMINAL_ID), + mProbabilityFieldPos(NOT_A_DICT_POS), mProbability(probability), + mChildrenPosFieldPos(NOT_A_DICT_POS), mChildrenPos(childrenPos), + mBigramLinkedNodePos(NOT_A_DICT_POS), mShortcutPos(shortcutPos), + mBigramPos(bigramPos), mSiblingPos(siblingPos) { + memcpy(mCodePoints, codePoints, sizeof(int) * mCodePointCount); + } + // PtNode with a terminal id. PtNodeParams(const int headPos, const PatriciaTrieReadingUtils::NodeFlags flags, const int parentPos, const int codePointCount, const int *const codePoints, @@ -205,9 +220,7 @@ class PtNodeParams { private: // This class have a public copy constructor to be used as a return value. - - // Disallowing the assignment operator. - PtNodeParams &operator=(PtNodeParams &ptNodeParams); + DISALLOW_ASSIGNMENT_OPERATOR(PtNodeParams); const int mHeadPos; const PatriciaTrieReadingUtils::NodeFlags mFlags; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp index 960c1b936..8172e70b6 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp @@ -20,6 +20,7 @@ #include "defines.h" #include "suggest/core/dicnode/dic_node.h" #include "suggest/core/dicnode/dic_node_vector.h" +#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h" #include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/utils/probability_utils.h" @@ -235,89 +236,9 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( // dictionary. If no match is found, it returns NOT_A_DICT_POS. int PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord, const int length, const bool forceLowerCaseSearch) const { - int pos = getRootPosition(); - int wordPos = 0; - - while (true) { - // If we already traversed the tree further than the word is long, there means - // there was no match (or we would have found it). - if (wordPos >= length) return NOT_A_DICT_POS; - int ptNodeCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(mDictRoot, - &pos); - const int wChar = forceLowerCaseSearch - ? CharUtils::toLowerCase(inWord[wordPos]) : inWord[wordPos]; - while (true) { - // If there are no more PtNodes in this array, it means we could not - // find a matching character for this depth, therefore there is no match. - if (0 >= ptNodeCount) return NOT_A_DICT_POS; - const int ptNodePos = pos; - const PatriciaTrieReadingUtils::NodeFlags flags = - PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos); - int character = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(mDictRoot, - &pos); - if (character == wChar) { - // This is the correct PtNode. Only one PtNode may start with the same char within - // a PtNode array, so either we found our match in this array, or there is - // no match and we can return NOT_A_DICT_POS. So we will check all the - // characters in this PtNode indeed does match. - if (PatriciaTrieReadingUtils::hasMultipleChars(flags)) { - character = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(mDictRoot, - &pos); - while (NOT_A_CODE_POINT != character) { - ++wordPos; - // If we shoot the length of the word we search for, or if we find a single - // character that does not match, as explained above, it means the word is - // not in the dictionary (by virtue of this PtNode being the only one to - // match the word on the first character, but not matching the whole word). - if (wordPos >= length) return NOT_A_DICT_POS; - if (inWord[wordPos] != character) return NOT_A_DICT_POS; - character = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition( - mDictRoot, &pos); - } - } - // If we come here we know that so far, we do match. Either we are on a terminal - // and we match the length, in which case we found it, or we traverse children. - // If we don't match the length AND don't have children, then a word in the - // dictionary fully matches a prefix of the searched word but not the full word. - ++wordPos; - if (PatriciaTrieReadingUtils::isTerminal(flags)) { - if (wordPos == length) { - return ptNodePos; - } - PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos); - } - if (!PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) { - return NOT_A_DICT_POS; - } - // We have children and we are still shorter than the word we are searching for, so - // we need to traverse children. Put the pointer on the children position, and - // break - pos = PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(mDictRoot, - flags, &pos); - break; - } else { - // This PtNode does not match, so skip the remaining part and go to the next. - if (PatriciaTrieReadingUtils::hasMultipleChars(flags)) { - PatriciaTrieReadingUtils::skipCharacters(mDictRoot, flags, MAX_WORD_LENGTH, - &pos); - } - if (PatriciaTrieReadingUtils::isTerminal(flags)) { - PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos); - } - if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) { - PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(mDictRoot, - flags, &pos); - } - if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) { - mShortcutListPolicy.skipAllShortcuts(&pos); - } - if (PatriciaTrieReadingUtils::hasBigrams(flags)) { - mBigramListPolicy.skipAllBigrams(&pos); - } - } - --ptNodeCount; - } - } + DynamicPtReadingHelper readingHelper(&mPtNodeReader, &mPtNodeArrayReader); + readingHelper.initWithPtNodeArrayPos(getRootPosition()); + return readingHelper.getTerminalPtNodePositionOfWord(inWord, length, forceLowerCaseSearch); } int PatriciaTriePolicy::getProbability(const int unigramProbability, @@ -336,99 +257,50 @@ int PatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int ptNodePos) const if (ptNodePos == NOT_A_DICT_POS) { return NOT_A_PROBABILITY; } - int pos = ptNodePos; - const PatriciaTrieReadingUtils::NodeFlags flags = - PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos); - if (!PatriciaTrieReadingUtils::isTerminal(flags)) { - return NOT_A_PROBABILITY; - } - if (PatriciaTrieReadingUtils::isNotAWord(flags) - || PatriciaTrieReadingUtils::isBlacklisted(flags)) { + const PtNodeParams ptNodeParams = mPtNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos); + if (ptNodeParams.isNotAWord() || ptNodeParams.isBlacklisted()) { // If this is not a word, or if it's a blacklisted entry, it should behave as // having no probability outside of the suggestion process (where it should be used // for shortcuts). return NOT_A_PROBABILITY; } - PatriciaTrieReadingUtils::skipCharacters(mDictRoot, flags, MAX_WORD_LENGTH, &pos); - return getProbability(PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition( - mDictRoot, &pos), NOT_A_PROBABILITY); + return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY); } int PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const { if (ptNodePos == NOT_A_DICT_POS) { return NOT_A_DICT_POS; } - int pos = ptNodePos; - const PatriciaTrieReadingUtils::NodeFlags flags = - PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos); - if (!PatriciaTrieReadingUtils::hasShortcutTargets(flags)) { - return NOT_A_DICT_POS; - } - PatriciaTrieReadingUtils::skipCharacters(mDictRoot, flags, MAX_WORD_LENGTH, &pos); - if (PatriciaTrieReadingUtils::isTerminal(flags)) { - PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos); - } - if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) { - PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(mDictRoot, flags, &pos); - } - return pos; + return mPtNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos).getShortcutPos(); } int PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const { if (ptNodePos == NOT_A_DICT_POS) { return NOT_A_DICT_POS; } - int pos = ptNodePos; - const PatriciaTrieReadingUtils::NodeFlags flags = - PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos); - if (!PatriciaTrieReadingUtils::hasBigrams(flags)) { - return NOT_A_DICT_POS; - } - PatriciaTrieReadingUtils::skipCharacters(mDictRoot, flags, MAX_WORD_LENGTH, &pos); - if (PatriciaTrieReadingUtils::isTerminal(flags)) { - PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos); - } - if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) { - PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(mDictRoot, flags, &pos); - } - if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) { - mShortcutListPolicy.skipAllShortcuts(&pos);; - } - return pos; + return mPtNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos).getBigramsPos(); } int PatriciaTriePolicy::createAndGetLeavingChildNode(const DicNode *const dicNode, const int ptNodePos, DicNodeVector *childDicNodes) const { - int pos = ptNodePos; - const PatriciaTrieReadingUtils::NodeFlags flags = - PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos); + PatriciaTrieReadingUtils::NodeFlags flags; + int mergedNodeCodePointCount = 0; int mergedNodeCodePoints[MAX_WORD_LENGTH]; - const int mergedNodeCodePointCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition( - mDictRoot, flags, MAX_WORD_LENGTH, mergedNodeCodePoints, &pos); - const int probability = (PatriciaTrieReadingUtils::isTerminal(flags))? - PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos) - : NOT_A_PROBABILITY; - const int childrenPos = PatriciaTrieReadingUtils::hasChildrenInFlags(flags) ? - PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition( - mDictRoot, flags, &pos) : NOT_A_DICT_POS; - if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) { - getShortcutsStructurePolicy()->skipAllShortcuts(&pos); - } - if (PatriciaTrieReadingUtils::hasBigrams(flags)) { - getBigramsStructurePolicy()->skipAllBigrams(&pos); - } - if (mergedNodeCodePointCount <= 0) { - AKLOGE("Empty PtNode is not allowed. Code point count: %d", mergedNodeCodePointCount); - ASSERT(false); - return pos; - } + int probability = NOT_A_PROBABILITY; + int childrenPos = NOT_A_DICT_POS; + int shortcutPos = NOT_A_DICT_POS; + int bigramPos = NOT_A_DICT_POS; + int siblingPos = NOT_A_DICT_POS; + PatriciaTrieReadingUtils::readPtNodeInfo(mDictRoot, ptNodePos, getShortcutsStructurePolicy(), + getBigramsStructurePolicy(), &flags, &mergedNodeCodePointCount, mergedNodeCodePoints, + &probability, &childrenPos, &shortcutPos, &bigramPos, &siblingPos); childDicNodes->pushLeavingChild(dicNode, ptNodePos, childrenPos, probability, PatriciaTrieReadingUtils::isTerminal(flags), PatriciaTrieReadingUtils::hasChildrenInFlags(flags), - PatriciaTrieReadingUtils::isBlacklisted(flags) || - PatriciaTrieReadingUtils::isNotAWord(flags), + PatriciaTrieReadingUtils::isBlacklisted(flags) + || PatriciaTrieReadingUtils::isNotAWord(flags), mergedNodeCodePointCount, mergedNodeCodePoints); - return pos; + return siblingPos; } } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h index 319c81569..1ce7f85d4 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h @@ -24,6 +24,8 @@ #include "suggest/policyimpl/dictionary/bigram/bigram_list_policy.h" #include "suggest/policyimpl/dictionary/header/header_policy.h" #include "suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h" +#include "suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.h" +#include "suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.h" #include "suggest/policyimpl/dictionary/utils/format_utils.h" #include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h" @@ -40,7 +42,9 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { mDictRoot(mMmappedBuffer.get()->getBuffer() + mHeaderPolicy.getSize()), mDictBufferSize(mMmappedBuffer.get()->getBufferSize() - mHeaderPolicy.getSize()), - mBigramListPolicy(mDictRoot), mShortcutListPolicy(mDictRoot) {} + mBigramListPolicy(mDictRoot), mShortcutListPolicy(mDictRoot), + mPtNodeReader(mDictRoot, mDictBufferSize, &mBigramListPolicy, &mShortcutListPolicy), + mPtNodeArrayReader(mDictRoot, mDictBufferSize) {} AK_FORCE_INLINE int getRootPosition() const { return 0; @@ -143,6 +147,8 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { const int mDictBufferSize; const BigramListPolicy mBigramListPolicy; const ShortcutListPolicy mShortcutListPolicy; + const Ver2ParticiaTrieNodeReader mPtNodeReader; + const Ver2PtNodeArrayReader mPtNodeArrayReader; int createAndGetLeavingChildNode(const DicNode *const dicNode, const int ptNodePos, DicNodeVector *const childDicNodes) const; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.cpp index 82b3593c8..b4eee5572 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.cpp @@ -17,6 +17,8 @@ #include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h" #include "defines.h" +#include "suggest/core/policy/dictionary_bigrams_structure_policy.h" +#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h" #include "suggest/policyimpl/dictionary/utils/byte_array_utils.h" namespace latinime { @@ -130,4 +132,32 @@ const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_IS_BLACKLISTED = 0x01; return base + offset; } +/* static */ void PtReadingUtils::readPtNodeInfo(const uint8_t *const dictBuf, const int ptNodePos, + const DictionaryShortcutsStructurePolicy *const shortcutPolicy, + const DictionaryBigramsStructurePolicy *const bigramPolicy, + NodeFlags *const outFlags, int *const outCodePointCount, int *const outCodePoint, + int *const outProbability, int *const outChildrenPos, int *const outShortcutPos, + int *const outBigramPos, int *const outSiblingPos) { + int readingPos = ptNodePos; + const NodeFlags flags = getFlagsAndAdvancePosition(dictBuf, &readingPos); + *outFlags = flags; + *outCodePointCount = getCharsAndAdvancePosition( + dictBuf, flags, MAX_WORD_LENGTH, outCodePoint, &readingPos); + *outProbability = isTerminal(flags) ? + readProbabilityAndAdvancePosition(dictBuf, &readingPos) : NOT_A_PROBABILITY; + *outChildrenPos = hasChildrenInFlags(flags) ? + readChildrenPositionAndAdvancePosition(dictBuf, flags, &readingPos) : NOT_A_DICT_POS; + *outShortcutPos = NOT_A_DICT_POS; + if (hasShortcutTargets(flags)) { + *outShortcutPos = readingPos; + shortcutPolicy->skipAllShortcuts(&readingPos); + } + *outBigramPos = NOT_A_DICT_POS; + if (hasBigrams(flags)) { + *outBigramPos = readingPos; + bigramPolicy->skipAllBigrams(&readingPos); + } + *outSiblingPos = readingPos; +} + } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h index b28f58336..fa1430ce6 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h @@ -23,6 +23,9 @@ namespace latinime { +class DictionaryShortcutsStructurePolicy; +class DictionaryBigramsStructurePolicy; + // TODO: Move to pt_common class PatriciaTrieReadingUtils { public: @@ -101,6 +104,13 @@ class PatriciaTrieReadingUtils { return nodeFlags; } + static void readPtNodeInfo(const uint8_t *const dictBuf, const int ptNodePos, + const DictionaryShortcutsStructurePolicy *const shortcutPolicy, + const DictionaryBigramsStructurePolicy *const bigramPolicy, + NodeFlags *const outFlags, int *const outCodePointCount, int *const outCodePoint, + int *const outProbability, int *const outChildrenPos, int *const outShortcutPos, + int *const outBigramPos, int *const outSiblingPos); + private: DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTrieReadingUtils); diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.cpp new file mode 100644 index 000000000..778d7a408 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.cpp @@ -0,0 +1,52 @@ +/* + * Copyright (C) 2014, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.h" + +#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h" + +namespace latinime { + +const PtNodeParams Ver2ParticiaTrieNodeReader::fetchNodeInfoInBufferFromPtNodePos( + const int ptNodePos) const { + if (ptNodePos < 0 || ptNodePos >= mDictSize) { + // Reading invalid position because of bug or broken dictionary. + AKLOGE("Fetching PtNode info from invalid dictionary position: %d, dictionary size: %d", + ptNodePos, mDictSize); + ASSERT(false); + return PtNodeParams(); + } + PatriciaTrieReadingUtils::NodeFlags flags; + int mergedNodeCodePointCount = 0; + int mergedNodeCodePoints[MAX_WORD_LENGTH]; + int probability = NOT_A_PROBABILITY; + int childrenPos = NOT_A_DICT_POS; + int shortcutPos = NOT_A_DICT_POS; + int bigramPos = NOT_A_DICT_POS; + int siblingPos = NOT_A_DICT_POS; + PatriciaTrieReadingUtils::readPtNodeInfo(mDictBuffer, ptNodePos, mShortuctPolicy, + mBigramPolicy, &flags, &mergedNodeCodePointCount, mergedNodeCodePoints, &probability, + &childrenPos, &shortcutPos, &bigramPos, &siblingPos); + if (mergedNodeCodePointCount <= 0) { + AKLOGE("Empty PtNode is not allowed. Code point count: %d", mergedNodeCodePointCount); + ASSERT(false); + return PtNodeParams(); + } + return PtNodeParams(ptNodePos, flags, mergedNodeCodePointCount, mergedNodeCodePoints, + probability, childrenPos, shortcutPos, bigramPos, siblingPos); +} + +} diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.h new file mode 100644 index 000000000..dd1a0da51 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.h @@ -0,0 +1,50 @@ +/* + * Copyright (C) 2014, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_VER2_PATRICIA_TRIE_NODE_READER_H +#define LATINIME_VER2_PATRICIA_TRIE_NODE_READER_H + +#include <stdint.h> + +#include "defines.h" +#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h" +#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h" + +namespace latinime { + +class DictionaryBigramsStructurePolicy; +class DictionaryShortcutsStructurePolicy; + +class Ver2ParticiaTrieNodeReader : public PtNodeReader { + public: + Ver2ParticiaTrieNodeReader(const uint8_t *const dictBuffer, const int dictSize, + const DictionaryBigramsStructurePolicy *const bigramPolicy, + const DictionaryShortcutsStructurePolicy *const shortcutPolicy) + : mDictBuffer(dictBuffer), mDictSize(dictSize), mBigramPolicy(bigramPolicy), + mShortuctPolicy(shortcutPolicy) {} + + virtual const PtNodeParams fetchNodeInfoInBufferFromPtNodePos(const int ptNodePos) const; + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(Ver2ParticiaTrieNodeReader); + + const uint8_t *const mDictBuffer; + const int mDictSize; + const DictionaryBigramsStructurePolicy *const mBigramPolicy; + const DictionaryShortcutsStructurePolicy *const mShortuctPolicy; +}; +} // namespace latinime +#endif /* LATINIME_VER2_PATRICIA_TRIE_NODE_READER_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.cpp new file mode 100644 index 000000000..125ea31dc --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.cpp @@ -0,0 +1,54 @@ +/* + * Copyright (C) 2014, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.h" + +#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h" + +namespace latinime { + +bool Ver2PtNodeArrayReader::readPtNodeArrayInfoAndReturnIfValid(const int ptNodeArrayPos, + int *const outPtNodeCount, int *const outFirstPtNodePos) const { + if (ptNodeArrayPos < 0 || ptNodeArrayPos >= mDictSize) { + // Reading invalid position because of a bug or a broken dictionary. + AKLOGE("Reading PtNode array info from invalid dictionary position: %d, dict size: %d", + ptNodeArrayPos, mDictSize); + ASSERT(false); + return false; + } + int readingPos = ptNodeArrayPos; + const int ptNodeCountInArray = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition( + mDictBuffer, &readingPos); + *outPtNodeCount = ptNodeCountInArray; + *outFirstPtNodePos = readingPos; + return true; +} + +bool Ver2PtNodeArrayReader::readForwardLinkAndReturnIfValid(const int forwordLinkPos, + int *const outNextPtNodeArrayPos) const { + if (forwordLinkPos < 0 || forwordLinkPos >= mDictSize) { + // Reading invalid position because of bug or broken dictionary. + AKLOGE("Reading forward link from invalid dictionary position: %d, dict size: %d", + forwordLinkPos, mDictSize); + ASSERT(false); + return false; + } + // Ver2 dicts don't have forward links. + *outNextPtNodeArrayPos = NOT_A_DICT_POS; + return true; +} + +} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.h new file mode 100644 index 000000000..77404adf8 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.h @@ -0,0 +1,44 @@ +/* + * Copyright (C) 2014, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_VER2_PT_NODE_ARRAY_READER_H +#define LATINIME_VER2_PT_NODE_ARRAY_READER_H + +#include <stdint.h> + +#include "defines.h" +#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_array_reader.h" + +namespace latinime { + +class Ver2PtNodeArrayReader : public PtNodeArrayReader { + public: + Ver2PtNodeArrayReader(const uint8_t *const dictBuffer, const int dictSize) + : mDictBuffer(dictBuffer), mDictSize(dictSize) {}; + + virtual bool readPtNodeArrayInfoAndReturnIfValid(const int ptNodeArrayPos, + int *const outPtNodeCount, int *const outFirstPtNodePos) const; + virtual bool readForwardLinkAndReturnIfValid(const int forwordLinkPos, + int *const outNextPtNodeArrayPos) const; + + private: + DISALLOW_COPY_AND_ASSIGN(Ver2PtNodeArrayReader); + + const uint8_t *const mDictBuffer; + const int mDictSize; +}; +} // namespace latinime +#endif /* LATINIME_VER2_PT_NODE_ARRAY_READER_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h index 69576d8e5..66845bbd6 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h @@ -24,13 +24,14 @@ #include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h" #include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h" #include "suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h" -#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h" namespace latinime { class BufferWithExtendableBuffer; class Ver4BigramListPolicy; class Ver4DictBuffers; +class Ver4PatriciaTrieNodeReader; +class Ver4PtNodeArrayReader; class Ver4ShortcutListPolicy; /* @@ -39,11 +40,12 @@ class Ver4ShortcutListPolicy; class Ver4PatriciaTrieNodeWriter : public PtNodeWriter { public: Ver4PatriciaTrieNodeWriter(BufferWithExtendableBuffer *const trieBuffer, - Ver4DictBuffers *const buffers, const Ver4PatriciaTrieNodeReader *const ptNodeReader, + Ver4DictBuffers *const buffers, const PtNodeReader *const ptNodeReader, + const PtNodeArrayReader *const ptNodeArrayReader, Ver4BigramListPolicy *const bigramPolicy, Ver4ShortcutListPolicy *const shortcutPolicy) - : mTrieBuffer(trieBuffer), mBuffers(buffers), mPtNodeReader(ptNodeReader), - mReadingHelper(mTrieBuffer, mPtNodeReader), - mBigramPolicy(bigramPolicy), mShortcutPolicy(shortcutPolicy) {} + : mTrieBuffer(trieBuffer), mBuffers(buffers), + mReadingHelper(ptNodeReader, ptNodeArrayReader), mBigramPolicy(bigramPolicy), + mShortcutPolicy(shortcutPolicy) {} virtual ~Ver4PatriciaTrieNodeWriter() {} @@ -114,7 +116,6 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter { BufferWithExtendableBuffer *const mTrieBuffer; Ver4DictBuffers *const mBuffers; - const Ver4PatriciaTrieNodeReader *const mPtNodeReader; DynamicPtReadingHelper mReadingHelper; Ver4BigramListPolicy *const mBigramPolicy; Ver4ShortcutListPolicy *const mShortcutPolicy; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp index 75d85988c..efc29a0c3 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp @@ -43,7 +43,7 @@ void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const d if (!dicNode->hasChildren()) { return; } - DynamicPtReadingHelper readingHelper(mDictBuffer, &mNodeReader); + DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader); readingHelper.initWithPtNodeArrayPos(dicNode->getChildrenPtNodeArrayPos()); while (!readingHelper.isEnd()) { const PtNodeParams ptNodeParams = readingHelper.getPtNodeParams(); @@ -70,7 +70,7 @@ void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const d int Ver4PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( const int ptNodePos, const int maxCodePointCount, int *const outCodePoints, int *const outUnigramProbability) const { - DynamicPtReadingHelper readingHelper(mDictBuffer, &mNodeReader); + DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader); readingHelper.initWithPtNodePos(ptNodePos); return readingHelper.getCodePointsAndProbabilityAndReturnCodePointCount( maxCodePointCount, outCodePoints, outUnigramProbability); @@ -78,7 +78,7 @@ int Ver4PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( int Ver4PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord, const int length, const bool forceLowerCaseSearch) const { - DynamicPtReadingHelper readingHelper(mDictBuffer, &mNodeReader); + DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader); readingHelper.initWithPtNodeArrayPos(getRootPosition()); return readingHelper.getTerminalPtNodePositionOfWord(inWord, length, forceLowerCaseSearch); } @@ -158,7 +158,7 @@ bool Ver4PatriciaTriePolicy::addUnigramWord(const int *const word, const int len shortcutLength); return false; } - DynamicPtReadingHelper readingHelper(mDictBuffer, &mNodeReader); + DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader); readingHelper.initWithPtNodeArrayPos(getRootPosition()); bool addedNewUnigram = false; if (mUpdatingHelper.addUnigramWord(&readingHelper, word, length, probability, isNotAWord, @@ -397,7 +397,7 @@ int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const mTerminalPtNodePositionsForIteratingWords.clear(); DynamicPtReadingHelper::TraversePolicyToGetAllTerminalPtNodePositions traversePolicy( &mTerminalPtNodePositionsForIteratingWords); - DynamicPtReadingHelper readingHelper(mDictBuffer, &mNodeReader); + DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader); readingHelper.initWithPtNodeArrayPos(getRootPosition()); readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(&traversePolicy); } diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h index 9ba5be0c3..692163058 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h @@ -29,6 +29,7 @@ #include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.h" +#include "suggest/policyimpl/dictionary/structure/v4/ver4_pt_node_array_reader.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" namespace latinime { @@ -47,8 +48,9 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { mShortcutPolicy(mBuffers.get()->getMutableShortcutDictContent(), mBuffers.get()->getTerminalPositionLookupTable()), mNodeReader(mDictBuffer, mBuffers.get()->getProbabilityDictContent()), - mNodeWriter(mDictBuffer, mBuffers.get(), &mNodeReader, &mBigramPolicy, - &mShortcutPolicy), + mPtNodeArrayReader(mDictBuffer), + mNodeWriter(mDictBuffer, mBuffers.get(), &mNodeReader, &mPtNodeArrayReader, + &mBigramPolicy, &mShortcutPolicy), mUpdatingHelper(mDictBuffer, &mNodeReader, &mNodeWriter), mWritingHelper(mBuffers.get()), mUnigramCount(mHeaderPolicy->getUnigramCount()), @@ -132,6 +134,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { Ver4BigramListPolicy mBigramPolicy; Ver4ShortcutListPolicy mShortcutPolicy; Ver4PatriciaTrieNodeReader mNodeReader; + Ver4PtNodeArrayReader mPtNodeArrayReader; Ver4PatriciaTrieNodeWriter mNodeWriter; DynamicPtUpdatingHelper mUpdatingHelper; Ver4PatriciaTrieWritingHelper mWritingHelper; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp index 672097455..acf099122 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp @@ -26,6 +26,7 @@ #include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h" +#include "suggest/policyimpl/dictionary/structure/v4/ver4_pt_node_array_reader.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" #include "suggest/policyimpl/dictionary/utils/file_utils.h" #include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h" @@ -74,14 +75,15 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, int *const outUnigramCount, int *const outBigramCount) { Ver4PatriciaTrieNodeReader ptNodeReader(mBuffers->getTrieBuffer(), mBuffers->getProbabilityDictContent()); + Ver4PtNodeArrayReader ptNodeArrayReader(mBuffers->getTrieBuffer()); Ver4BigramListPolicy bigramPolicy(mBuffers->getMutableBigramDictContent(), mBuffers->getTerminalPositionLookupTable(), headerPolicy); Ver4ShortcutListPolicy shortcutPolicy(mBuffers->getMutableShortcutDictContent(), mBuffers->getTerminalPositionLookupTable()); Ver4PatriciaTrieNodeWriter ptNodeWriter(mBuffers->getWritableTrieBuffer(), - mBuffers, &ptNodeReader, &bigramPolicy, &shortcutPolicy); + mBuffers, &ptNodeReader, &ptNodeArrayReader, &bigramPolicy, &shortcutPolicy); - DynamicPtReadingHelper readingHelper(mBuffers->getTrieBuffer(), &ptNodeReader); + DynamicPtReadingHelper readingHelper(&ptNodeReader, &ptNodeArrayReader); readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); DynamicPtGcEventListeners ::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted @@ -124,7 +126,7 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, PtNodeWriter::DictPositionRelocationMap dictPositionRelocationMap; readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); Ver4PatriciaTrieNodeWriter ptNodeWriterForNewBuffers(buffersToWrite->getWritableTrieBuffer(), - buffersToWrite, &ptNodeReader, &bigramPolicy, &shortcutPolicy); + buffersToWrite, &ptNodeReader, &ptNodeArrayReader, &bigramPolicy, &shortcutPolicy); DynamicPtGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer traversePolicyToPlaceAndWriteValidPtNodesToBuffer(&ptNodeWriterForNewBuffers, buffersToWrite->getWritableTrieBuffer(), &dictPositionRelocationMap); @@ -136,12 +138,14 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, // Create policy instances for the GCed dictionary. Ver4PatriciaTrieNodeReader newPtNodeReader(buffersToWrite->getTrieBuffer(), buffersToWrite->getProbabilityDictContent()); + Ver4PtNodeArrayReader newPtNodeArrayreader(buffersToWrite->getTrieBuffer()); Ver4BigramListPolicy newBigramPolicy(buffersToWrite->getMutableBigramDictContent(), buffersToWrite->getTerminalPositionLookupTable(), headerPolicy); Ver4ShortcutListPolicy newShortcutPolicy(buffersToWrite->getMutableShortcutDictContent(), buffersToWrite->getTerminalPositionLookupTable()); Ver4PatriciaTrieNodeWriter newPtNodeWriter(buffersToWrite->getWritableTrieBuffer(), - buffersToWrite, &newPtNodeReader, &newBigramPolicy, &newShortcutPolicy); + buffersToWrite, &newPtNodeReader, &newPtNodeArrayreader, &newBigramPolicy, + &newShortcutPolicy); // Re-assign terminal IDs for valid terminal PtNodes. TerminalPositionLookupTable::TerminalIdMap terminalIdMap; if(!buffersToWrite->getMutableTerminalPositionLookupTable()->runGCTerminalIds( @@ -163,8 +167,7 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, mBuffers->getShortcutDictContent())) { return false; } - DynamicPtReadingHelper newDictReadingHelper(buffersToWrite->getTrieBuffer(), - &newPtNodeReader); + DynamicPtReadingHelper newDictReadingHelper(&newPtNodeReader, &newPtNodeArrayreader); newDictReadingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); DynamicPtGcEventListeners::TraversePolicyToUpdateAllPositionFields traversePolicyToUpdateAllPositionFields(&newPtNodeWriter, &dictPositionRelocationMap); diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_pt_node_array_reader.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_pt_node_array_reader.cpp new file mode 100644 index 000000000..bbdf40cdd --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_pt_node_array_reader.cpp @@ -0,0 +1,79 @@ +/* + * Copyright (C) 2014, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "suggest/policyimpl/dictionary/structure/v4/ver4_pt_node_array_reader.h" + +#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h" +#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" + +namespace latinime { + +bool Ver4PtNodeArrayReader::readPtNodeArrayInfoAndReturnIfValid(const int ptNodeArrayPos, + int *const outPtNodeCount, int *const outFirstPtNodePos) const { + if (ptNodeArrayPos < 0 || ptNodeArrayPos >= mBuffer->getTailPosition()) { + // Reading invalid position because of a bug or a broken dictionary. + AKLOGE("Reading PtNode array info from invalid dictionary position: %d, dict size: %d", + ptNodeArrayPos, mBuffer->getTailPosition()); + ASSERT(false); + return false; + } + const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(ptNodeArrayPos); + const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer); + int readingPos = ptNodeArrayPos; + if (usesAdditionalBuffer) { + readingPos -= mBuffer->getOriginalBufferSize(); + } + const int ptNodeCountInArray = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition( + dictBuf, &readingPos); + if (usesAdditionalBuffer) { + readingPos += mBuffer->getOriginalBufferSize(); + } + if (ptNodeCountInArray < 0) { + AKLOGE("Invalid PtNode count in an array: %d.", ptNodeCountInArray); + return false; + } + *outPtNodeCount = ptNodeCountInArray; + *outFirstPtNodePos = readingPos; + return true; +} + +bool Ver4PtNodeArrayReader::readForwardLinkAndReturnIfValid(const int forwordLinkPos, + int *const outNextPtNodeArrayPos) const { + if (forwordLinkPos < 0 || forwordLinkPos >= mBuffer->getTailPosition()) { + // Reading invalid position because of bug or broken dictionary. + AKLOGE("Reading forward link from invalid dictionary position: %d, dict size: %d", + forwordLinkPos, mBuffer->getTailPosition()); + ASSERT(false); + return false; + } + const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(forwordLinkPos); + const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer); + int readingPos = forwordLinkPos; + if (usesAdditionalBuffer) { + readingPos -= mBuffer->getOriginalBufferSize(); + } + const int nextPtNodeArrayOffset = + DynamicPtReadingUtils::getForwardLinkPosition(dictBuf, readingPos); + if (DynamicPtReadingUtils::isValidForwardLinkPosition(nextPtNodeArrayOffset)) { + *outNextPtNodeArrayPos = forwordLinkPos + nextPtNodeArrayOffset; + } else { + *outNextPtNodeArrayPos = NOT_A_DICT_POS; + } + return true; +} + +} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_pt_node_array_reader.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_pt_node_array_reader.h new file mode 100644 index 000000000..d81808efc --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_pt_node_array_reader.h @@ -0,0 +1,42 @@ +/* + * Copyright (C) 2014, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_VER4_PT_NODE_ARRAY_READER_H +#define LATINIME_VER4_PT_NODE_ARRAY_READER_H + +#include "defines.h" +#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_array_reader.h" + +namespace latinime { + +class BufferWithExtendableBuffer; + +class Ver4PtNodeArrayReader : public PtNodeArrayReader { + public: + Ver4PtNodeArrayReader(const BufferWithExtendableBuffer *const buffer) : mBuffer(buffer) {}; + + virtual bool readPtNodeArrayInfoAndReturnIfValid(const int ptNodeArrayPos, + int *const outPtNodeCount, int *const outFirstPtNodePos) const; + virtual bool readForwardLinkAndReturnIfValid(const int forwordLinkPos, + int *const outNextPtNodeArrayPos) const; + + private: + DISALLOW_COPY_AND_ASSIGN(Ver4PtNodeArrayReader); + + const BufferWithExtendableBuffer *const mBuffer; +}; +} // namespace latinime +#endif /* LATINIME_VER4_PT_NODE_ARRAY_READER_H */ |