diff options
Diffstat (limited to 'native')
17 files changed, 452 insertions, 24 deletions
diff --git a/native/jni/Android.mk b/native/jni/Android.mk index 55a5c06d7..d73862eb6 100644 --- a/native/jni/Android.mk +++ b/native/jni/Android.mk @@ -87,7 +87,9 @@ LATIN_IME_CORE_SRC_FILES := \ dynamic_patricia_trie_writing_utils.cpp) \ $(addprefix suggest/policyimpl/dictionary/structure/v4/, \ ver4_dict_constants.cpp \ - ver4_patricia_trie_policy.cpp) \ + ver4_patricia_trie_node_reader.cpp \ + ver4_patricia_trie_policy.cpp \ + ver4_patricia_trie_reading_utils.cpp ) \ $(addprefix suggest/policyimpl/dictionary/utils/, \ buffer_with_extendable_buffer.cpp \ byte_array_utils.cpp \ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h index 7bdd829cd..edc7b954c 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h @@ -32,7 +32,7 @@ class PtNodeParams { // Invalid PtNode. PtNodeParams() : mHeadPos(NOT_A_DICT_POS), mFlags(0), mParentPos(NOT_A_DICT_POS), mCodePointCount(0), mCodePoints(), mTerminalIdFieldPos(NOT_A_DICT_POS), - mTerminalId(Ver4DictConstants::NOT_A_TERMINAL), mProbabilityFieldPos(NOT_A_DICT_POS), + mTerminalId(Ver4DictConstants::NOT_A_TERMINAL_ID), mProbabilityFieldPos(NOT_A_DICT_POS), mProbability(NOT_A_PROBABILITY), mChildrenPosFieldPos(NOT_A_DICT_POS), mChildrenPos(NOT_A_DICT_POS), mBigramLinkedNodePos(NOT_A_DICT_POS), mShortcutPos(NOT_A_DICT_POS), mBigramPos(NOT_A_DICT_POS), @@ -53,6 +53,7 @@ class PtNodeParams { memcpy(mCodePoints, ptNodeParams.getCodePoints(), sizeof(int) * mCodePointCount); } + // PtNode without terminal id. PtNodeParams(const int headPos, const PatriciaTrieReadingUtils::NodeFlags flags, const int parentPos, const int codePointCount, const int *const codePoints, const int probabilityFieldPos, const int probability, const int childrenPosFieldPos, @@ -60,7 +61,8 @@ class PtNodeParams { const int bigramPos, const int siblingPos) : mHeadPos(headPos), mFlags(flags), mParentPos(parentPos), mCodePointCount(codePointCount), mCodePoints(), - mTerminalIdFieldPos(NOT_A_DICT_POS), mTerminalId(Ver4DictConstants::NOT_A_TERMINAL), + mTerminalIdFieldPos(NOT_A_DICT_POS), + mTerminalId(Ver4DictConstants::NOT_A_TERMINAL_ID), mProbabilityFieldPos(probabilityFieldPos), mProbability(probability), mChildrenPosFieldPos(childrenPosFieldPos), mChildrenPos(childrenPos), mBigramLinkedNodePos(bigramLinkedNodePos), mShortcutPos(shortcutPos), @@ -68,6 +70,22 @@ class PtNodeParams { memcpy(mCodePoints, codePoints, sizeof(int) * mCodePointCount); } + // PtNode with a terminal id. + PtNodeParams(const int headPos, const PatriciaTrieReadingUtils::NodeFlags flags, + const int parentPos, const int codePointCount, const int *const codePoints, + const int terminalIdFieldPos, const int terminalId, const int probability, + const int childrenPosFieldPos, const int childrenPos, const int bigramLinkedNodePos, + const int siblingPos) + : mHeadPos(headPos), mFlags(flags), mParentPos(parentPos), + mCodePointCount(codePointCount), mCodePoints(), + mTerminalIdFieldPos(terminalIdFieldPos), mTerminalId(terminalId), + mProbabilityFieldPos(NOT_A_DICT_POS), mProbability(probability), + mChildrenPosFieldPos(childrenPosFieldPos), mChildrenPos(childrenPos), + mBigramLinkedNodePos(bigramLinkedNodePos), mShortcutPos(terminalId), + mBigramPos(terminalId), mSiblingPos(siblingPos) { + memcpy(mCodePoints, codePoints, sizeof(int) * mCodePointCount); + } + AK_FORCE_INLINE bool isValid() const { return mCodePointCount > 0; } diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h index b5abffeda..ef0067b48 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h @@ -55,7 +55,7 @@ class DynamicPatriciaTrieNodeReader : public PtNodeReader { const DictionaryBigramsStructurePolicy *const mBigramsPolicy; const DictionaryShortcutsStructurePolicy *const mShortcutsPolicy; - const PtNodeParams fetchPtNodeInfoFromBufferAndProcessMovedPtNode(const int ptNodePos, + const PtNodeParams fetchPtNodeInfoFromBufferAndProcessMovedPtNode(const int ptNodePos, const int siblingNodePos, const int bigramLinkedNodePos) const; }; } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h new file mode 100644 index 000000000..daaf08f61 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h @@ -0,0 +1,50 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_PROBABILITY_DICT_CONTENT_H +#define LATINIME_PROBABILITY_DICT_CONTENT_H + +#include "defines.h" +#include "suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h" +#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h" +#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" + +namespace latinime { + +class ProbabilityDictContent : public SingleDictContent { + public: + ProbabilityDictContent(const char *const dictDirPath, const bool isUpdatable) + : SingleDictContent(dictDirPath, Ver4DictConstants::FREQ_FILE_EXTENSION, + isUpdatable) {} + + int getProbability(const int terminalId) const { + if (terminalId < 0 || terminalId >= getSize()) { + return NOT_A_PROBABILITY; + } + return Ver4PatriciaTrieReadingUtils::getProbability(getBuffer(), terminalId); + } + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(ProbabilityDictContent); + + int getSize() const { + return getBuffer()->getTailPosition() / (Ver4DictConstants::PROBABILITY_SIZE + + Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE); + } +}; +} // namespace latinime +#endif /* LATINIME_PROBABILITY_DICT_CONTENT_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h index e415881a9..4cb96da6a 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h @@ -39,6 +39,15 @@ class SingleDictContent : public DictContent { return mMmappedBuffer.get() != 0; } + protected: + BufferWithExtendableBuffer *getWritableBuffer() { + return &mExpandableContentBuffer; + } + + const BufferWithExtendableBuffer *getBuffer() const { + return &mExpandableContentBuffer; + } + private: DISALLOW_IMPLICIT_CONSTRUCTORS(SingleDictContent); diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h index 1164c408a..333a7c209 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h @@ -18,6 +18,7 @@ #define LATINIME_VER4_DICT_BUFFER_H #include "defines.h" +#include "suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h" #include "suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h" #include "suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h" @@ -42,10 +43,18 @@ class Ver4DictBuffers { && mShortcutDictContent.isValid(); } - AK_FORCE_INLINE const uint8_t *getRawDictBuffer() const { + AK_FORCE_INLINE uint8_t *getRawDictBuffer() const { return mDictBuffer.get()->getBuffer(); } + AK_FORCE_INLINE int getRawDictBufferSize() const { + return mDictBuffer.get()->getBufferSize(); + } + + AK_FORCE_INLINE const ProbabilityDictContent *getProbabilityDictContent() const { + return &mProbabilityDictContent; + } + private: DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4DictBuffers); @@ -54,8 +63,7 @@ class Ver4DictBuffers { : mDictBuffer(dictBuffer), mTerminalAddressTable(dictDirPath, Ver4DictConstants::TERMINAL_ADDRESS_TABLE_FILE_EXTENSION, isUpdatable), - mProbabilityDictContent(dictDirPath, Ver4DictConstants::FREQ_FILE_EXTENSION, - isUpdatable), + mProbabilityDictContent(dictDirPath, isUpdatable), mBigramDictContent(dictDirPath, Ver4DictConstants::BIGRAM_LOOKUP_TABLE_FILE_EXTENSION, Ver4DictConstants::BIGRAM_CONTENT_TABLE_FILE_EXTENSION, @@ -67,7 +75,7 @@ class Ver4DictBuffers { const MmappedBuffer::MmappedBufferPtr mDictBuffer; SingleDictContent mTerminalAddressTable; - SingleDictContent mProbabilityDictContent; + ProbabilityDictContent mProbabilityDictContent; SparseTableDictContent mBigramDictContent; SparseTableDictContent mShortcutDictContent; }; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp index 0bfd07b04..3e21399fd 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp @@ -30,6 +30,8 @@ const char *const Ver4DictConstants::SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION = ".sh const char *const Ver4DictConstants::SHORTCUT_CONTENT_TABLE_FILE_EXTENSION = ".shortcut_index_shortcut"; -const int Ver4DictConstants::NOT_A_TERMINAL = -1; +const int Ver4DictConstants::NOT_A_TERMINAL_ID = -1; +const int Ver4DictConstants::PROBABILITY_SIZE = 1; +const int Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE = 1; } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h index 6498ce428..47a6990f8 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h @@ -34,7 +34,9 @@ class Ver4DictConstants { static const char *const SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION; static const char *const SHORTCUT_CONTENT_TABLE_FILE_EXTENSION; - static const int NOT_A_TERMINAL; + static const int NOT_A_TERMINAL_ID; + static const int PROBABILITY_SIZE; + static const int FLAGS_IN_PROBABILITY_FILE_SIZE; private: DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4DictConstants); diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp new file mode 100644 index 000000000..b18bdc943 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp @@ -0,0 +1,95 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h" + +#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h" +#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" + +namespace latinime { + +const PtNodeParams Ver4PatriciaTrieNodeReader::fetchPtNodeInfoFromBufferAndProcessMovedPtNode( + const int ptNodePos, const int siblingNodePos, const int bigramLinkedNodePos) const { + if (ptNodePos < 0 || ptNodePos >= mBuffer->getTailPosition()) { + // Reading invalid position because of bug or broken dictionary. + AKLOGE("Fetching PtNode info from invalid dictionary position: %d, dictionary size: %d", + ptNodePos, mBuffer->getTailPosition()); + ASSERT(false); + return PtNodeParams(); + } + const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(ptNodePos); + const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer); + int pos = ptNodePos; + const int headPos = ptNodePos; + if (usesAdditionalBuffer) { + pos -= mBuffer->getOriginalBufferSize(); + } + const PatriciaTrieReadingUtils::NodeFlags flags = + PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos); + const int parentPosOffset = + DynamicPatriciaTrieReadingUtils::getParentPtNodePosOffsetAndAdvancePosition( + dictBuf, &pos); + const int parentPos = + DynamicPatriciaTrieReadingUtils::getParentPtNodePos(parentPosOffset, headPos); + int codePoints[MAX_WORD_LENGTH]; + const int codePonitCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition( + dictBuf, flags, MAX_WORD_LENGTH, codePoints, &pos); + int terminalIdFieldPos = NOT_A_DICT_POS; + int terminalId = Ver4DictConstants::NOT_A_TERMINAL_ID; + int probability = NOT_A_PROBABILITY; + if (PatriciaTrieReadingUtils::isTerminal(flags)) { + terminalIdFieldPos = pos; + if (usesAdditionalBuffer) { + terminalIdFieldPos += mBuffer->getOriginalBufferSize(); + } + terminalId = Ver4PatriciaTrieReadingUtils::getTerminalIdAndAdvancePosition(dictBuf, &pos); + probability = mProbabilityDictContent->getProbability(terminalId); + } + int childrenPosFieldPos = pos; + if (usesAdditionalBuffer) { + childrenPosFieldPos += mBuffer->getOriginalBufferSize(); + } + int childrenPos = DynamicPatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition( + dictBuf, &pos); + if (usesAdditionalBuffer && childrenPos != NOT_A_DICT_POS) { + childrenPos += mBuffer->getOriginalBufferSize(); + } + int newBigramLinkedNodePos = bigramLinkedNodePos; + if (siblingNodePos == NOT_A_DICT_POS) { + if (DynamicPatriciaTrieReadingUtils::isMoved(flags)) { + newBigramLinkedNodePos = childrenPos; + } + } + if (usesAdditionalBuffer) { + pos += mBuffer->getOriginalBufferSize(); + } + // Sibling position is the tail position of original PtNode. + int newSiblingNodePos = (siblingNodePos == NOT_A_DICT_POS) ? pos : siblingNodePos; + // Read destination node if the read node is a moved node. + if (DynamicPatriciaTrieReadingUtils::isMoved(flags)) { + // The destination position is stored at the same place as the parent position. + return fetchPtNodeInfoFromBufferAndProcessMovedPtNode(parentPos, newSiblingNodePos, + newBigramLinkedNodePos); + } else { + return PtNodeParams(headPos, flags, parentPos, codePonitCount, codePoints, + terminalIdFieldPos, terminalId, probability, childrenPosFieldPos, childrenPos, + newBigramLinkedNodePos, newSiblingNodePos); + } +} + +} diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h new file mode 100644 index 000000000..bdae0de7e --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h @@ -0,0 +1,58 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_VER4_PATRICIA_TRIE_NODE_READER_H +#define LATINIME_VER4_PATRICIA_TRIE_NODE_READER_H + +#include <stdint.h> + +#include "defines.h" +#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h" +#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h" + +namespace latinime { + +class BufferWithExtendableBuffer; +class ProbabilityDictContent; + +/* + * This class is used for helping to read nodes of ver4 patricia trie. This class handles moved + * node and reads node attributes including probability form probabilityBuffer. + */ +class Ver4PatriciaTrieNodeReader : public PtNodeReader { + public: + Ver4PatriciaTrieNodeReader(const BufferWithExtendableBuffer *const buffer, + const ProbabilityDictContent *const probabilityDictContent) + : mBuffer(buffer), mProbabilityDictContent(probabilityDictContent) {} + + ~Ver4PatriciaTrieNodeReader() {} + + virtual const PtNodeParams fetchNodeInfoInBufferFromPtNodePos(const int ptNodePos) const { + return fetchPtNodeInfoFromBufferAndProcessMovedPtNode(ptNodePos, + NOT_A_DICT_POS /* siblingNodePos */, NOT_A_DICT_POS /* bigramLinkedNodePos */); + } + + private: + DISALLOW_COPY_AND_ASSIGN(Ver4PatriciaTrieNodeReader); + + const BufferWithExtendableBuffer *const mBuffer; + const ProbabilityDictContent *const mProbabilityDictContent; + + const PtNodeParams fetchPtNodeInfoFromBufferAndProcessMovedPtNode(const int ptNodePos, + const int siblingNodePos, const int bigramLinkedNodePos) const; +}; +} // namespace latinime +#endif /* LATINIME_VER4_PATRICIA_TRIE_NODE_READER_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp index b9ee4891c..33f738413 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp @@ -16,45 +16,110 @@ #include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h" +#include "suggest/core/dicnode/dic_node.h" +#include "suggest/core/dicnode/dic_node_vector.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h" +#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h" +#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h" +#include "suggest/policyimpl/dictionary/utils/probability_utils.h" + namespace latinime { void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode, DicNodeVector *const childDicNodes) const { - // TODO: Implement. + if (!dicNode->hasChildren()) { + return; + } + DynamicPatriciaTrieReadingHelper readingHelper(&mDictBuffer, &mNodeReader); + readingHelper.initWithPtNodeArrayPos(dicNode->getChildrenPtNodeArrayPos()); + while (!readingHelper.isEnd()) { + const PtNodeParams ptNodeParams = readingHelper.getPtNodeParams(); + if (!ptNodeParams.isValid()) { + break; + } + bool isTerminal = ptNodeParams.isTerminal() && !ptNodeParams.isDeleted(); + if (isTerminal && mHeaderPolicy.isDecayingDict()) { + // A DecayingDict may have a terminal PtNode that has a terminal DicNode whose + // probability is NOT_A_PROBABILITY. In such case, we don't want to treat it as a + // valid terminal DicNode. + isTerminal = getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY) + != NOT_A_PROBABILITY; + } + childDicNodes->pushLeavingChild(dicNode, ptNodeParams.getHeadPos(), + ptNodeParams.getChildrenPos(), ptNodeParams.getProbability(), isTerminal, + ptNodeParams.hasChildren(), + ptNodeParams.isBlacklisted() + || ptNodeParams.isNotAWord() /* isBlacklistedOrNotAWord */, + ptNodeParams.getCodePointCount(), ptNodeParams.getCodePoints()); + readingHelper.readNextSiblingNode(ptNodeParams); + } } int Ver4PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( const int ptNodePos, const int maxCodePointCount, int *const outCodePoints, int *const outUnigramProbability) const { - // TODO: Implement. - return 0; + DynamicPatriciaTrieReadingHelper readingHelper(&mDictBuffer, &mNodeReader); + readingHelper.initWithPtNodePos(ptNodePos); + return readingHelper.getCodePointsAndProbabilityAndReturnCodePointCount( + maxCodePointCount, outCodePoints, outUnigramProbability); } int Ver4PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord, const int length, const bool forceLowerCaseSearch) const { - // TODO: Implement. - return NOT_A_DICT_POS; + DynamicPatriciaTrieReadingHelper readingHelper(&mDictBuffer, &mNodeReader); + readingHelper.initWithPtNodeArrayPos(getRootPosition()); + return readingHelper.getTerminalPtNodePositionOfWord(inWord, length, forceLowerCaseSearch); } int Ver4PatriciaTriePolicy::getProbability(const int unigramProbability, const int bigramProbability) const { - // TODO: Implement. - return NOT_A_PROBABILITY; + if (mHeaderPolicy.isDecayingDict()) { + // Both probabilities are encoded. Decode them and get probability. + return ForgettingCurveUtils::getProbability(unigramProbability, bigramProbability); + } else { + if (unigramProbability == NOT_A_PROBABILITY) { + return NOT_A_PROBABILITY; + } else if (bigramProbability == NOT_A_PROBABILITY) { + return ProbabilityUtils::backoff(unigramProbability); + } else { + // bigramProbability is a bigram probability delta. + return ProbabilityUtils::computeProbabilityForBigram(unigramProbability, + bigramProbability); + } + } } int Ver4PatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int ptNodePos) const { - // TODO: Implement. - return NOT_A_PROBABILITY; + if (ptNodePos == NOT_A_DICT_POS) { + return NOT_A_PROBABILITY; + } + const PtNodeParams ptNodeParams(mNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos)); + if (ptNodeParams.isDeleted() || ptNodeParams.isBlacklisted() || ptNodeParams.isNotAWord()) { + return NOT_A_PROBABILITY; + } + return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY); } int Ver4PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const { - // TODO: Implement. - return NOT_A_DICT_POS; + if (ptNodePos == NOT_A_DICT_POS) { + return NOT_A_DICT_POS; + } + const PtNodeParams ptNodeParams(mNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos)); + if (ptNodeParams.isDeleted()) { + return NOT_A_DICT_POS; + } + return ptNodeParams.getTerminalId(); } int Ver4PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const { - // TODO: Implement. - return NOT_A_DICT_POS; + if (ptNodePos == NOT_A_DICT_POS) { + return NOT_A_DICT_POS; + } + const PtNodeParams ptNodeParams(mNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos)); + if (ptNodeParams.isDeleted()) { + return NOT_A_DICT_POS; + } + return ptNodeParams.getTerminalId(); } bool Ver4PatriciaTriePolicy::addUnigramWord(const int *const word, const int length, diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h index f7bfb3b0d..2f577f741 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h @@ -21,6 +21,7 @@ #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" #include "suggest/policyimpl/dictionary/header/header_policy.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h" +#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" namespace latinime { @@ -33,7 +34,11 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { public: Ver4PatriciaTriePolicy(const Ver4DictBuffers::Ver4DictBuffersPtr &buffers) : mBuffers(buffers), - mHeaderPolicy(mBuffers.get()->getRawDictBuffer(), FormatUtils::VERSION_4) {}; + mHeaderPolicy(mBuffers.get()->getRawDictBuffer(), FormatUtils::VERSION_4), + mDictBuffer(mBuffers.get()->getRawDictBuffer() + mHeaderPolicy.getSize(), + mBuffers.get()->getRawDictBufferSize() - mHeaderPolicy.getSize(), + BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE), + mNodeReader(&mDictBuffer, mBuffers.get()->getProbabilityDictContent()) {}; AK_FORCE_INLINE int getRootPosition() const { return 0; @@ -91,6 +96,8 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { const Ver4DictBuffers::Ver4DictBuffersPtr mBuffers; const HeaderPolicy mHeaderPolicy; + BufferWithExtendableBuffer mDictBuffer; + Ver4PatriciaTrieNodeReader mNodeReader; }; } // namespace latinime #endif // LATINIME_VER4_PATRICIA_TRIE_POLICY_H diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.cpp new file mode 100644 index 000000000..fd8a3ba55 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.cpp @@ -0,0 +1,38 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.h" + +#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h" +#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" +#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h" + +namespace latinime { + +/* static */ int Ver4PatriciaTrieReadingUtils::getTerminalIdAndAdvancePosition( + const uint8_t *const buffer, int *pos) { + return ByteArrayUtils::readUint32AndAdvancePosition(buffer, pos); +} + +/* static */ int Ver4PatriciaTrieReadingUtils::getProbability( + const BufferWithExtendableBuffer *const probabilityBuffer, const int terminalId) { + int pos = terminalId * (Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE + + Ver4DictConstants::PROBABILITY_SIZE) + + Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE; + return probabilityBuffer->readUintAndAdvancePosition(Ver4DictConstants::PROBABILITY_SIZE, &pos); +} + +} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.h new file mode 100644 index 000000000..19e6dd331 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.h @@ -0,0 +1,40 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_VER4_PATRICIA_TRIE_READING_UTILS_H +#define LATINIME_VER4_PATRICIA_TRIE_READING_UTILS_H + +#include <stdint.h> + +#include "defines.h" + +namespace latinime { + +class BufferWithExtendableBuffer; + +class Ver4PatriciaTrieReadingUtils { + public: + static int getTerminalIdAndAdvancePosition(const uint8_t *const buffer, + int *const pos); + + static int getProbability(const BufferWithExtendableBuffer *const probabilityBuffer, + const int terminalId); + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4PatriciaTrieReadingUtils); +}; +} // namespace latinime +#endif /* LATINIME_VER4_PATRICIA_TRIE_READING_UTILS_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp index 5032131ab..e028de526 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp @@ -23,6 +23,20 @@ const int BufferWithExtendableBuffer::NEAR_BUFFER_LIMIT_THRESHOLD_PERCENTILE = 9 // TODO: Needs to allocate larger memory corresponding to the current vector size. const size_t BufferWithExtendableBuffer::EXTEND_ADDITIONAL_BUFFER_SIZE_STEP = 128 * 1024; +uint32_t BufferWithExtendableBuffer::readUintAndAdvancePosition(const int size, + int *const pos) const { + const bool readingPosIsInAdditionalBuffer = isInAdditionalBuffer(*pos); + if (readingPosIsInAdditionalBuffer) { + *pos -= mOriginalBufferSize; + } + const int value = ByteArrayUtils::readUintAndAdvancePosition( + getBuffer(readingPosIsInAdditionalBuffer), size, pos); + if (readingPosIsInAdditionalBuffer) { + *pos += mOriginalBufferSize; + } + return value; +} + bool BufferWithExtendableBuffer::writeUintAndAdvancePosition(const uint32_t data, const int size, int *const pos) { if (!(size >= 1 && size <= 4)) { diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h index 1e27a1bec..2d89f71b1 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h @@ -71,6 +71,8 @@ class BufferWithExtendableBuffer { } } + uint32_t readUintAndAdvancePosition(const int size, int *const pos) const; + AK_FORCE_INLINE int getOriginalBufferSize() const { return mOriginalBufferSize; } diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h index 0c1576818..1ca01b868 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h @@ -114,6 +114,24 @@ class ByteArrayUtils { return buffer[(*pos)++]; } + static AK_FORCE_INLINE int readUintAndAdvancePosition(const uint8_t *const buffer, + const int size, int *const pos) { + // size must be in 1 to 4. + ASSERT(size >= 1 && size <= 4); + switch (size) { + case 1: + return ByteArrayUtils::readUint8AndAdvancePosition(buffer, pos); + case 2: + return ByteArrayUtils::readUint16AndAdvancePosition(buffer, pos); + case 3: + return ByteArrayUtils::readUint24AndAdvancePosition(buffer, pos); + case 4: + return ByteArrayUtils::readUint32AndAdvancePosition(buffer, pos); + default: + return 0; + } + } + /** * Code Point Reading * |