aboutsummaryrefslogtreecommitdiffstats
path: root/native/jni/src
diff options
context:
space:
mode:
Diffstat (limited to 'native/jni/src')
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.cpp48
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h12
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_array_reader.h45
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h19
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp172
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h8
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.cpp30
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h10
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.cpp52
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.h50
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.cpp54
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.h44
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h13
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp10
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h7
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp15
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_pt_node_array_reader.cpp79
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_pt_node_array_reader.h42
18 files changed, 495 insertions, 215 deletions
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.cpp
index 824d442e4..086d98b4a 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.cpp
@@ -16,9 +16,7 @@
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h"
-#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
-#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
-#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_array_reader.h"
#include "utils/char_utils.h"
namespace latinime {
@@ -266,27 +264,17 @@ int DynamicPtReadingHelper::getTerminalPtNodePositionOfWord(const int *const inW
// Read node array size and process empty node arrays. Nodes and arrays are counted up in this
// method to avoid an infinite loop.
void DynamicPtReadingHelper::nextPtNodeArray() {
- if (mReadingState.mPos < 0 || mReadingState.mPos >= mBuffer->getTailPosition()) {
- // Reading invalid position because of a bug or a broken dictionary.
- AKLOGE("Reading PtNode array info from invalid dictionary position: %d, dict size: %d",
- mReadingState.mPos, mBuffer->getTailPosition());
- ASSERT(false);
+ int ptNodeCountInArray = 0;
+ int firstPtNodePos = NOT_A_DICT_POS;
+ if (!mPtNodeArrayReader->readPtNodeArrayInfoAndReturnIfValid(
+ mReadingState.mPos, &ptNodeCountInArray, &firstPtNodePos)) {
mIsError = true;
mReadingState.mPos = NOT_A_DICT_POS;
return;
}
mReadingState.mPosOfThisPtNodeArrayHead = mReadingState.mPos;
- const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(mReadingState.mPos);
- const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
- if (usesAdditionalBuffer) {
- mReadingState.mPos -= mBuffer->getOriginalBufferSize();
- }
- mReadingState.mRemainingPtNodeCountInThisArray =
- PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(dictBuf,
- &mReadingState.mPos);
- if (usesAdditionalBuffer) {
- mReadingState.mPos += mBuffer->getOriginalBufferSize();
- }
+ mReadingState.mRemainingPtNodeCountInThisArray = ptNodeCountInArray;
+ mReadingState.mPos = firstPtNodePos;
// Count up nodes and node arrays to avoid infinite loop.
mReadingState.mTotalPtNodeIndexInThisArrayChain +=
mReadingState.mRemainingPtNodeCountInThisArray;
@@ -317,29 +305,17 @@ void DynamicPtReadingHelper::nextPtNodeArray() {
// Follow the forward link and read the next node array if exists.
void DynamicPtReadingHelper::followForwardLink() {
- if (mReadingState.mPos < 0 || mReadingState.mPos >= mBuffer->getTailPosition()) {
- // Reading invalid position because of bug or broken dictionary.
- AKLOGE("Reading forward link from invalid dictionary position: %d, dict size: %d",
- mReadingState.mPos, mBuffer->getTailPosition());
- ASSERT(false);
+ int nextPtNodeArrayPos = NOT_A_DICT_POS;
+ if (!mPtNodeArrayReader->readForwardLinkAndReturnIfValid(
+ mReadingState.mPos, &nextPtNodeArrayPos)) {
mIsError = true;
mReadingState.mPos = NOT_A_DICT_POS;
return;
}
- const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(mReadingState.mPos);
- const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
- if (usesAdditionalBuffer) {
- mReadingState.mPos -= mBuffer->getOriginalBufferSize();
- }
- const int forwardLinkPosition =
- DynamicPtReadingUtils::getForwardLinkPosition(dictBuf, mReadingState.mPos);
- if (usesAdditionalBuffer) {
- mReadingState.mPos += mBuffer->getOriginalBufferSize();
- }
mReadingState.mPosOfLastForwardLinkField = mReadingState.mPos;
- if (DynamicPtReadingUtils::isValidForwardLinkPosition(forwardLinkPosition)) {
+ if (nextPtNodeArrayPos != NOT_A_DICT_POS) {
// Follow the forward link.
- mReadingState.mPos += forwardLinkPosition;
+ mReadingState.mPos = nextPtNodeArrayPos;
nextPtNodeArray();
} else {
// All node arrays have been read.
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h
index bcc5c7857..cc7b5ff70 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h
@@ -26,9 +26,9 @@
namespace latinime {
-class BufferWithExtendableBuffer;
class DictionaryBigramsStructurePolicy;
class DictionaryShortcutsStructurePolicy;
+class PtNodeArrayReader;
/*
* This class is used for traversing dynamic patricia trie. This class supports iterating nodes and
@@ -74,10 +74,10 @@ class DynamicPtReadingHelper {
std::vector<int> *const mTerminalPositions;
};
- DynamicPtReadingHelper(const BufferWithExtendableBuffer *const buffer,
- const PtNodeReader *const ptNodeReader)
- : mIsError(false), mReadingState(), mBuffer(buffer),
- mPtNodeReader(ptNodeReader), mReadingStateStack() {}
+ DynamicPtReadingHelper(const PtNodeReader *const ptNodeReader,
+ const PtNodeArrayReader *const ptNodeArrayReader)
+ : mIsError(false), mReadingState(), mPtNodeReader(ptNodeReader),
+ mPtNodeArrayReader(ptNodeArrayReader), mReadingStateStack() {}
~DynamicPtReadingHelper() {}
@@ -252,8 +252,8 @@ class DynamicPtReadingHelper {
// TODO: Introduce error code to track what caused the error.
bool mIsError;
PtNodeReadingState mReadingState;
- const BufferWithExtendableBuffer *const mBuffer;
const PtNodeReader *const mPtNodeReader;
+ const PtNodeArrayReader *const mPtNodeArrayReader;
std::vector<PtNodeReadingState> mReadingStateStack;
void nextPtNodeArray();
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_array_reader.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_array_reader.h
new file mode 100644
index 000000000..6078d8285
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_array_reader.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (C) 2014, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_PT_NODE_ARRAY_READER_H
+#define LATINIME_PT_NODE_ARRAY_READER_H
+
+#include "defines.h"
+
+namespace latinime {
+
+// Interface class used to read PtNode array information.
+class PtNodeArrayReader {
+ public:
+ virtual ~PtNodeArrayReader() {}
+
+ // Returns if the position is valid or not.
+ virtual bool readPtNodeArrayInfoAndReturnIfValid(const int ptNodeArrayPos,
+ int *const outPtNodeCount, int *const outFirstPtNodePos) const = 0;
+
+ // Returns if the position is valid or not. NOT_A_DICT_POS is set to outNextPtNodeArrayPos when
+ // the next array doesn't exist.
+ virtual bool readForwardLinkAndReturnIfValid(const int forwordLinkPos,
+ int *const outNextPtNodeArrayPos) const = 0;
+
+ protected:
+ PtNodeArrayReader() {};
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(PtNodeArrayReader);
+};
+} // namespace latinime
+#endif /* LATINIME_PT_NODE_READER_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h
index 84731eb17..faaf44162 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h
@@ -53,6 +53,21 @@ class PtNodeParams {
memcpy(mCodePoints, ptNodeParams.getCodePoints(), sizeof(int) * mCodePointCount);
}
+ // PtNode read from version 2 dictionary.
+ PtNodeParams(const int headPos, const PatriciaTrieReadingUtils::NodeFlags flags,
+ const int codePointCount, const int *const codePoints, const int probability,
+ const int childrenPos, const int shortcutPos, const int bigramPos,
+ const int siblingPos)
+ : mHeadPos(headPos), mFlags(flags), mParentPos(NOT_A_DICT_POS),
+ mCodePointCount(codePointCount), mCodePoints(), mTerminalIdFieldPos(NOT_A_DICT_POS),
+ mTerminalId(Ver4DictConstants::NOT_A_TERMINAL_ID),
+ mProbabilityFieldPos(NOT_A_DICT_POS), mProbability(probability),
+ mChildrenPosFieldPos(NOT_A_DICT_POS), mChildrenPos(childrenPos),
+ mBigramLinkedNodePos(NOT_A_DICT_POS), mShortcutPos(shortcutPos),
+ mBigramPos(bigramPos), mSiblingPos(siblingPos) {
+ memcpy(mCodePoints, codePoints, sizeof(int) * mCodePointCount);
+ }
+
// PtNode with a terminal id.
PtNodeParams(const int headPos, const PatriciaTrieReadingUtils::NodeFlags flags,
const int parentPos, const int codePointCount, const int *const codePoints,
@@ -205,9 +220,7 @@ class PtNodeParams {
private:
// This class have a public copy constructor to be used as a return value.
-
- // Disallowing the assignment operator.
- PtNodeParams &operator=(PtNodeParams &ptNodeParams);
+ DISALLOW_ASSIGNMENT_OPERATOR(PtNodeParams);
const int mHeadPos;
const PatriciaTrieReadingUtils::NodeFlags mFlags;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
index 960c1b936..8172e70b6 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
@@ -20,6 +20,7 @@
#include "defines.h"
#include "suggest/core/dicnode/dic_node.h"
#include "suggest/core/dicnode/dic_node_vector.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h"
#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/utils/probability_utils.h"
@@ -235,89 +236,9 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
// dictionary. If no match is found, it returns NOT_A_DICT_POS.
int PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord,
const int length, const bool forceLowerCaseSearch) const {
- int pos = getRootPosition();
- int wordPos = 0;
-
- while (true) {
- // If we already traversed the tree further than the word is long, there means
- // there was no match (or we would have found it).
- if (wordPos >= length) return NOT_A_DICT_POS;
- int ptNodeCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(mDictRoot,
- &pos);
- const int wChar = forceLowerCaseSearch
- ? CharUtils::toLowerCase(inWord[wordPos]) : inWord[wordPos];
- while (true) {
- // If there are no more PtNodes in this array, it means we could not
- // find a matching character for this depth, therefore there is no match.
- if (0 >= ptNodeCount) return NOT_A_DICT_POS;
- const int ptNodePos = pos;
- const PatriciaTrieReadingUtils::NodeFlags flags =
- PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
- int character = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(mDictRoot,
- &pos);
- if (character == wChar) {
- // This is the correct PtNode. Only one PtNode may start with the same char within
- // a PtNode array, so either we found our match in this array, or there is
- // no match and we can return NOT_A_DICT_POS. So we will check all the
- // characters in this PtNode indeed does match.
- if (PatriciaTrieReadingUtils::hasMultipleChars(flags)) {
- character = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(mDictRoot,
- &pos);
- while (NOT_A_CODE_POINT != character) {
- ++wordPos;
- // If we shoot the length of the word we search for, or if we find a single
- // character that does not match, as explained above, it means the word is
- // not in the dictionary (by virtue of this PtNode being the only one to
- // match the word on the first character, but not matching the whole word).
- if (wordPos >= length) return NOT_A_DICT_POS;
- if (inWord[wordPos] != character) return NOT_A_DICT_POS;
- character = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
- mDictRoot, &pos);
- }
- }
- // If we come here we know that so far, we do match. Either we are on a terminal
- // and we match the length, in which case we found it, or we traverse children.
- // If we don't match the length AND don't have children, then a word in the
- // dictionary fully matches a prefix of the searched word but not the full word.
- ++wordPos;
- if (PatriciaTrieReadingUtils::isTerminal(flags)) {
- if (wordPos == length) {
- return ptNodePos;
- }
- PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos);
- }
- if (!PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) {
- return NOT_A_DICT_POS;
- }
- // We have children and we are still shorter than the word we are searching for, so
- // we need to traverse children. Put the pointer on the children position, and
- // break
- pos = PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(mDictRoot,
- flags, &pos);
- break;
- } else {
- // This PtNode does not match, so skip the remaining part and go to the next.
- if (PatriciaTrieReadingUtils::hasMultipleChars(flags)) {
- PatriciaTrieReadingUtils::skipCharacters(mDictRoot, flags, MAX_WORD_LENGTH,
- &pos);
- }
- if (PatriciaTrieReadingUtils::isTerminal(flags)) {
- PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos);
- }
- if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) {
- PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(mDictRoot,
- flags, &pos);
- }
- if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
- mShortcutListPolicy.skipAllShortcuts(&pos);
- }
- if (PatriciaTrieReadingUtils::hasBigrams(flags)) {
- mBigramListPolicy.skipAllBigrams(&pos);
- }
- }
- --ptNodeCount;
- }
- }
+ DynamicPtReadingHelper readingHelper(&mPtNodeReader, &mPtNodeArrayReader);
+ readingHelper.initWithPtNodeArrayPos(getRootPosition());
+ return readingHelper.getTerminalPtNodePositionOfWord(inWord, length, forceLowerCaseSearch);
}
int PatriciaTriePolicy::getProbability(const int unigramProbability,
@@ -336,99 +257,50 @@ int PatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int ptNodePos) const
if (ptNodePos == NOT_A_DICT_POS) {
return NOT_A_PROBABILITY;
}
- int pos = ptNodePos;
- const PatriciaTrieReadingUtils::NodeFlags flags =
- PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
- if (!PatriciaTrieReadingUtils::isTerminal(flags)) {
- return NOT_A_PROBABILITY;
- }
- if (PatriciaTrieReadingUtils::isNotAWord(flags)
- || PatriciaTrieReadingUtils::isBlacklisted(flags)) {
+ const PtNodeParams ptNodeParams = mPtNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos);
+ if (ptNodeParams.isNotAWord() || ptNodeParams.isBlacklisted()) {
// If this is not a word, or if it's a blacklisted entry, it should behave as
// having no probability outside of the suggestion process (where it should be used
// for shortcuts).
return NOT_A_PROBABILITY;
}
- PatriciaTrieReadingUtils::skipCharacters(mDictRoot, flags, MAX_WORD_LENGTH, &pos);
- return getProbability(PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(
- mDictRoot, &pos), NOT_A_PROBABILITY);
+ return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY);
}
int PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const {
if (ptNodePos == NOT_A_DICT_POS) {
return NOT_A_DICT_POS;
}
- int pos = ptNodePos;
- const PatriciaTrieReadingUtils::NodeFlags flags =
- PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
- if (!PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
- return NOT_A_DICT_POS;
- }
- PatriciaTrieReadingUtils::skipCharacters(mDictRoot, flags, MAX_WORD_LENGTH, &pos);
- if (PatriciaTrieReadingUtils::isTerminal(flags)) {
- PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos);
- }
- if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) {
- PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(mDictRoot, flags, &pos);
- }
- return pos;
+ return mPtNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos).getShortcutPos();
}
int PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const {
if (ptNodePos == NOT_A_DICT_POS) {
return NOT_A_DICT_POS;
}
- int pos = ptNodePos;
- const PatriciaTrieReadingUtils::NodeFlags flags =
- PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
- if (!PatriciaTrieReadingUtils::hasBigrams(flags)) {
- return NOT_A_DICT_POS;
- }
- PatriciaTrieReadingUtils::skipCharacters(mDictRoot, flags, MAX_WORD_LENGTH, &pos);
- if (PatriciaTrieReadingUtils::isTerminal(flags)) {
- PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos);
- }
- if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) {
- PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(mDictRoot, flags, &pos);
- }
- if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
- mShortcutListPolicy.skipAllShortcuts(&pos);;
- }
- return pos;
+ return mPtNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos).getBigramsPos();
}
int PatriciaTriePolicy::createAndGetLeavingChildNode(const DicNode *const dicNode,
const int ptNodePos, DicNodeVector *childDicNodes) const {
- int pos = ptNodePos;
- const PatriciaTrieReadingUtils::NodeFlags flags =
- PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
+ PatriciaTrieReadingUtils::NodeFlags flags;
+ int mergedNodeCodePointCount = 0;
int mergedNodeCodePoints[MAX_WORD_LENGTH];
- const int mergedNodeCodePointCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition(
- mDictRoot, flags, MAX_WORD_LENGTH, mergedNodeCodePoints, &pos);
- const int probability = (PatriciaTrieReadingUtils::isTerminal(flags))?
- PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos)
- : NOT_A_PROBABILITY;
- const int childrenPos = PatriciaTrieReadingUtils::hasChildrenInFlags(flags) ?
- PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(
- mDictRoot, flags, &pos) : NOT_A_DICT_POS;
- if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
- getShortcutsStructurePolicy()->skipAllShortcuts(&pos);
- }
- if (PatriciaTrieReadingUtils::hasBigrams(flags)) {
- getBigramsStructurePolicy()->skipAllBigrams(&pos);
- }
- if (mergedNodeCodePointCount <= 0) {
- AKLOGE("Empty PtNode is not allowed. Code point count: %d", mergedNodeCodePointCount);
- ASSERT(false);
- return pos;
- }
+ int probability = NOT_A_PROBABILITY;
+ int childrenPos = NOT_A_DICT_POS;
+ int shortcutPos = NOT_A_DICT_POS;
+ int bigramPos = NOT_A_DICT_POS;
+ int siblingPos = NOT_A_DICT_POS;
+ PatriciaTrieReadingUtils::readPtNodeInfo(mDictRoot, ptNodePos, getShortcutsStructurePolicy(),
+ getBigramsStructurePolicy(), &flags, &mergedNodeCodePointCount, mergedNodeCodePoints,
+ &probability, &childrenPos, &shortcutPos, &bigramPos, &siblingPos);
childDicNodes->pushLeavingChild(dicNode, ptNodePos, childrenPos, probability,
PatriciaTrieReadingUtils::isTerminal(flags),
PatriciaTrieReadingUtils::hasChildrenInFlags(flags),
- PatriciaTrieReadingUtils::isBlacklisted(flags) ||
- PatriciaTrieReadingUtils::isNotAWord(flags),
+ PatriciaTrieReadingUtils::isBlacklisted(flags)
+ || PatriciaTrieReadingUtils::isNotAWord(flags),
mergedNodeCodePointCount, mergedNodeCodePoints);
- return pos;
+ return siblingPos;
}
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
index 319c81569..1ce7f85d4 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
@@ -24,6 +24,8 @@
#include "suggest/policyimpl/dictionary/bigram/bigram_list_policy.h"
#include "suggest/policyimpl/dictionary/header/header_policy.h"
#include "suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h"
+#include "suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.h"
+#include "suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.h"
#include "suggest/policyimpl/dictionary/utils/format_utils.h"
#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
@@ -40,7 +42,9 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
mDictRoot(mMmappedBuffer.get()->getBuffer() + mHeaderPolicy.getSize()),
mDictBufferSize(mMmappedBuffer.get()->getBufferSize()
- mHeaderPolicy.getSize()),
- mBigramListPolicy(mDictRoot), mShortcutListPolicy(mDictRoot) {}
+ mBigramListPolicy(mDictRoot), mShortcutListPolicy(mDictRoot),
+ mPtNodeReader(mDictRoot, mDictBufferSize, &mBigramListPolicy, &mShortcutListPolicy),
+ mPtNodeArrayReader(mDictRoot, mDictBufferSize) {}
AK_FORCE_INLINE int getRootPosition() const {
return 0;
@@ -143,6 +147,8 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
const int mDictBufferSize;
const BigramListPolicy mBigramListPolicy;
const ShortcutListPolicy mShortcutListPolicy;
+ const Ver2ParticiaTrieNodeReader mPtNodeReader;
+ const Ver2PtNodeArrayReader mPtNodeArrayReader;
int createAndGetLeavingChildNode(const DicNode *const dicNode, const int ptNodePos,
DicNodeVector *const childDicNodes) const;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.cpp
index 82b3593c8..b4eee5572 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.cpp
@@ -17,6 +17,8 @@
#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
#include "defines.h"
+#include "suggest/core/policy/dictionary_bigrams_structure_policy.h"
+#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h"
#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
namespace latinime {
@@ -130,4 +132,32 @@ const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_IS_BLACKLISTED = 0x01;
return base + offset;
}
+/* static */ void PtReadingUtils::readPtNodeInfo(const uint8_t *const dictBuf, const int ptNodePos,
+ const DictionaryShortcutsStructurePolicy *const shortcutPolicy,
+ const DictionaryBigramsStructurePolicy *const bigramPolicy,
+ NodeFlags *const outFlags, int *const outCodePointCount, int *const outCodePoint,
+ int *const outProbability, int *const outChildrenPos, int *const outShortcutPos,
+ int *const outBigramPos, int *const outSiblingPos) {
+ int readingPos = ptNodePos;
+ const NodeFlags flags = getFlagsAndAdvancePosition(dictBuf, &readingPos);
+ *outFlags = flags;
+ *outCodePointCount = getCharsAndAdvancePosition(
+ dictBuf, flags, MAX_WORD_LENGTH, outCodePoint, &readingPos);
+ *outProbability = isTerminal(flags) ?
+ readProbabilityAndAdvancePosition(dictBuf, &readingPos) : NOT_A_PROBABILITY;
+ *outChildrenPos = hasChildrenInFlags(flags) ?
+ readChildrenPositionAndAdvancePosition(dictBuf, flags, &readingPos) : NOT_A_DICT_POS;
+ *outShortcutPos = NOT_A_DICT_POS;
+ if (hasShortcutTargets(flags)) {
+ *outShortcutPos = readingPos;
+ shortcutPolicy->skipAllShortcuts(&readingPos);
+ }
+ *outBigramPos = NOT_A_DICT_POS;
+ if (hasBigrams(flags)) {
+ *outBigramPos = readingPos;
+ bigramPolicy->skipAllBigrams(&readingPos);
+ }
+ *outSiblingPos = readingPos;
+}
+
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h
index b28f58336..fa1430ce6 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h
@@ -23,6 +23,9 @@
namespace latinime {
+class DictionaryShortcutsStructurePolicy;
+class DictionaryBigramsStructurePolicy;
+
// TODO: Move to pt_common
class PatriciaTrieReadingUtils {
public:
@@ -101,6 +104,13 @@ class PatriciaTrieReadingUtils {
return nodeFlags;
}
+ static void readPtNodeInfo(const uint8_t *const dictBuf, const int ptNodePos,
+ const DictionaryShortcutsStructurePolicy *const shortcutPolicy,
+ const DictionaryBigramsStructurePolicy *const bigramPolicy,
+ NodeFlags *const outFlags, int *const outCodePointCount, int *const outCodePoint,
+ int *const outProbability, int *const outChildrenPos, int *const outShortcutPos,
+ int *const outBigramPos, int *const outSiblingPos);
+
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTrieReadingUtils);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.cpp
new file mode 100644
index 000000000..778d7a408
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.cpp
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2014, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.h"
+
+#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
+
+namespace latinime {
+
+const PtNodeParams Ver2ParticiaTrieNodeReader::fetchNodeInfoInBufferFromPtNodePos(
+ const int ptNodePos) const {
+ if (ptNodePos < 0 || ptNodePos >= mDictSize) {
+ // Reading invalid position because of bug or broken dictionary.
+ AKLOGE("Fetching PtNode info from invalid dictionary position: %d, dictionary size: %d",
+ ptNodePos, mDictSize);
+ ASSERT(false);
+ return PtNodeParams();
+ }
+ PatriciaTrieReadingUtils::NodeFlags flags;
+ int mergedNodeCodePointCount = 0;
+ int mergedNodeCodePoints[MAX_WORD_LENGTH];
+ int probability = NOT_A_PROBABILITY;
+ int childrenPos = NOT_A_DICT_POS;
+ int shortcutPos = NOT_A_DICT_POS;
+ int bigramPos = NOT_A_DICT_POS;
+ int siblingPos = NOT_A_DICT_POS;
+ PatriciaTrieReadingUtils::readPtNodeInfo(mDictBuffer, ptNodePos, mShortuctPolicy,
+ mBigramPolicy, &flags, &mergedNodeCodePointCount, mergedNodeCodePoints, &probability,
+ &childrenPos, &shortcutPos, &bigramPos, &siblingPos);
+ if (mergedNodeCodePointCount <= 0) {
+ AKLOGE("Empty PtNode is not allowed. Code point count: %d", mergedNodeCodePointCount);
+ ASSERT(false);
+ return PtNodeParams();
+ }
+ return PtNodeParams(ptNodePos, flags, mergedNodeCodePointCount, mergedNodeCodePoints,
+ probability, childrenPos, shortcutPos, bigramPos, siblingPos);
+}
+
+}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.h
new file mode 100644
index 000000000..dd1a0da51
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2014, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_VER2_PATRICIA_TRIE_NODE_READER_H
+#define LATINIME_VER2_PATRICIA_TRIE_NODE_READER_H
+
+#include <stdint.h>
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h"
+
+namespace latinime {
+
+class DictionaryBigramsStructurePolicy;
+class DictionaryShortcutsStructurePolicy;
+
+class Ver2ParticiaTrieNodeReader : public PtNodeReader {
+ public:
+ Ver2ParticiaTrieNodeReader(const uint8_t *const dictBuffer, const int dictSize,
+ const DictionaryBigramsStructurePolicy *const bigramPolicy,
+ const DictionaryShortcutsStructurePolicy *const shortcutPolicy)
+ : mDictBuffer(dictBuffer), mDictSize(dictSize), mBigramPolicy(bigramPolicy),
+ mShortuctPolicy(shortcutPolicy) {}
+
+ virtual const PtNodeParams fetchNodeInfoInBufferFromPtNodePos(const int ptNodePos) const;
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(Ver2ParticiaTrieNodeReader);
+
+ const uint8_t *const mDictBuffer;
+ const int mDictSize;
+ const DictionaryBigramsStructurePolicy *const mBigramPolicy;
+ const DictionaryShortcutsStructurePolicy *const mShortuctPolicy;
+};
+} // namespace latinime
+#endif /* LATINIME_VER2_PATRICIA_TRIE_NODE_READER_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.cpp
new file mode 100644
index 000000000..125ea31dc
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.cpp
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2014, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.h"
+
+#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
+
+namespace latinime {
+
+bool Ver2PtNodeArrayReader::readPtNodeArrayInfoAndReturnIfValid(const int ptNodeArrayPos,
+ int *const outPtNodeCount, int *const outFirstPtNodePos) const {
+ if (ptNodeArrayPos < 0 || ptNodeArrayPos >= mDictSize) {
+ // Reading invalid position because of a bug or a broken dictionary.
+ AKLOGE("Reading PtNode array info from invalid dictionary position: %d, dict size: %d",
+ ptNodeArrayPos, mDictSize);
+ ASSERT(false);
+ return false;
+ }
+ int readingPos = ptNodeArrayPos;
+ const int ptNodeCountInArray = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(
+ mDictBuffer, &readingPos);
+ *outPtNodeCount = ptNodeCountInArray;
+ *outFirstPtNodePos = readingPos;
+ return true;
+}
+
+bool Ver2PtNodeArrayReader::readForwardLinkAndReturnIfValid(const int forwordLinkPos,
+ int *const outNextPtNodeArrayPos) const {
+ if (forwordLinkPos < 0 || forwordLinkPos >= mDictSize) {
+ // Reading invalid position because of bug or broken dictionary.
+ AKLOGE("Reading forward link from invalid dictionary position: %d, dict size: %d",
+ forwordLinkPos, mDictSize);
+ ASSERT(false);
+ return false;
+ }
+ // Ver2 dicts don't have forward links.
+ *outNextPtNodeArrayPos = NOT_A_DICT_POS;
+ return true;
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.h
new file mode 100644
index 000000000..77404adf8
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2014, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_VER2_PT_NODE_ARRAY_READER_H
+#define LATINIME_VER2_PT_NODE_ARRAY_READER_H
+
+#include <stdint.h>
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_array_reader.h"
+
+namespace latinime {
+
+class Ver2PtNodeArrayReader : public PtNodeArrayReader {
+ public:
+ Ver2PtNodeArrayReader(const uint8_t *const dictBuffer, const int dictSize)
+ : mDictBuffer(dictBuffer), mDictSize(dictSize) {};
+
+ virtual bool readPtNodeArrayInfoAndReturnIfValid(const int ptNodeArrayPos,
+ int *const outPtNodeCount, int *const outFirstPtNodePos) const;
+ virtual bool readForwardLinkAndReturnIfValid(const int forwordLinkPos,
+ int *const outNextPtNodeArrayPos) const;
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(Ver2PtNodeArrayReader);
+
+ const uint8_t *const mDictBuffer;
+ const int mDictSize;
+};
+} // namespace latinime
+#endif /* LATINIME_VER2_PT_NODE_ARRAY_READER_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h
index 69576d8e5..66845bbd6 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h
@@ -24,13 +24,14 @@
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h"
-#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
namespace latinime {
class BufferWithExtendableBuffer;
class Ver4BigramListPolicy;
class Ver4DictBuffers;
+class Ver4PatriciaTrieNodeReader;
+class Ver4PtNodeArrayReader;
class Ver4ShortcutListPolicy;
/*
@@ -39,11 +40,12 @@ class Ver4ShortcutListPolicy;
class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
public:
Ver4PatriciaTrieNodeWriter(BufferWithExtendableBuffer *const trieBuffer,
- Ver4DictBuffers *const buffers, const Ver4PatriciaTrieNodeReader *const ptNodeReader,
+ Ver4DictBuffers *const buffers, const PtNodeReader *const ptNodeReader,
+ const PtNodeArrayReader *const ptNodeArrayReader,
Ver4BigramListPolicy *const bigramPolicy, Ver4ShortcutListPolicy *const shortcutPolicy)
- : mTrieBuffer(trieBuffer), mBuffers(buffers), mPtNodeReader(ptNodeReader),
- mReadingHelper(mTrieBuffer, mPtNodeReader),
- mBigramPolicy(bigramPolicy), mShortcutPolicy(shortcutPolicy) {}
+ : mTrieBuffer(trieBuffer), mBuffers(buffers),
+ mReadingHelper(ptNodeReader, ptNodeArrayReader), mBigramPolicy(bigramPolicy),
+ mShortcutPolicy(shortcutPolicy) {}
virtual ~Ver4PatriciaTrieNodeWriter() {}
@@ -114,7 +116,6 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
BufferWithExtendableBuffer *const mTrieBuffer;
Ver4DictBuffers *const mBuffers;
- const Ver4PatriciaTrieNodeReader *const mPtNodeReader;
DynamicPtReadingHelper mReadingHelper;
Ver4BigramListPolicy *const mBigramPolicy;
Ver4ShortcutListPolicy *const mShortcutPolicy;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
index 75d85988c..efc29a0c3 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
@@ -43,7 +43,7 @@ void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const d
if (!dicNode->hasChildren()) {
return;
}
- DynamicPtReadingHelper readingHelper(mDictBuffer, &mNodeReader);
+ DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
readingHelper.initWithPtNodeArrayPos(dicNode->getChildrenPtNodeArrayPos());
while (!readingHelper.isEnd()) {
const PtNodeParams ptNodeParams = readingHelper.getPtNodeParams();
@@ -70,7 +70,7 @@ void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const d
int Ver4PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
const int ptNodePos, const int maxCodePointCount, int *const outCodePoints,
int *const outUnigramProbability) const {
- DynamicPtReadingHelper readingHelper(mDictBuffer, &mNodeReader);
+ DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
readingHelper.initWithPtNodePos(ptNodePos);
return readingHelper.getCodePointsAndProbabilityAndReturnCodePointCount(
maxCodePointCount, outCodePoints, outUnigramProbability);
@@ -78,7 +78,7 @@ int Ver4PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
int Ver4PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord,
const int length, const bool forceLowerCaseSearch) const {
- DynamicPtReadingHelper readingHelper(mDictBuffer, &mNodeReader);
+ DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
readingHelper.initWithPtNodeArrayPos(getRootPosition());
return readingHelper.getTerminalPtNodePositionOfWord(inWord, length, forceLowerCaseSearch);
}
@@ -158,7 +158,7 @@ bool Ver4PatriciaTriePolicy::addUnigramWord(const int *const word, const int len
shortcutLength);
return false;
}
- DynamicPtReadingHelper readingHelper(mDictBuffer, &mNodeReader);
+ DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
readingHelper.initWithPtNodeArrayPos(getRootPosition());
bool addedNewUnigram = false;
if (mUpdatingHelper.addUnigramWord(&readingHelper, word, length, probability, isNotAWord,
@@ -397,7 +397,7 @@ int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const
mTerminalPtNodePositionsForIteratingWords.clear();
DynamicPtReadingHelper::TraversePolicyToGetAllTerminalPtNodePositions traversePolicy(
&mTerminalPtNodePositionsForIteratingWords);
- DynamicPtReadingHelper readingHelper(mDictBuffer, &mNodeReader);
+ DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
readingHelper.initWithPtNodeArrayPos(getRootPosition());
readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(&traversePolicy);
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
index 9ba5be0c3..692163058 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
@@ -29,6 +29,7 @@
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_pt_node_array_reader.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
namespace latinime {
@@ -47,8 +48,9 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
mShortcutPolicy(mBuffers.get()->getMutableShortcutDictContent(),
mBuffers.get()->getTerminalPositionLookupTable()),
mNodeReader(mDictBuffer, mBuffers.get()->getProbabilityDictContent()),
- mNodeWriter(mDictBuffer, mBuffers.get(), &mNodeReader, &mBigramPolicy,
- &mShortcutPolicy),
+ mPtNodeArrayReader(mDictBuffer),
+ mNodeWriter(mDictBuffer, mBuffers.get(), &mNodeReader, &mPtNodeArrayReader,
+ &mBigramPolicy, &mShortcutPolicy),
mUpdatingHelper(mDictBuffer, &mNodeReader, &mNodeWriter),
mWritingHelper(mBuffers.get()),
mUnigramCount(mHeaderPolicy->getUnigramCount()),
@@ -132,6 +134,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
Ver4BigramListPolicy mBigramPolicy;
Ver4ShortcutListPolicy mShortcutPolicy;
Ver4PatriciaTrieNodeReader mNodeReader;
+ Ver4PtNodeArrayReader mPtNodeArrayReader;
Ver4PatriciaTrieNodeWriter mNodeWriter;
DynamicPtUpdatingHelper mUpdatingHelper;
Ver4PatriciaTrieWritingHelper mWritingHelper;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp
index 672097455..acf099122 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp
@@ -26,6 +26,7 @@
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_pt_node_array_reader.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
#include "suggest/policyimpl/dictionary/utils/file_utils.h"
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
@@ -74,14 +75,15 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
int *const outUnigramCount, int *const outBigramCount) {
Ver4PatriciaTrieNodeReader ptNodeReader(mBuffers->getTrieBuffer(),
mBuffers->getProbabilityDictContent());
+ Ver4PtNodeArrayReader ptNodeArrayReader(mBuffers->getTrieBuffer());
Ver4BigramListPolicy bigramPolicy(mBuffers->getMutableBigramDictContent(),
mBuffers->getTerminalPositionLookupTable(), headerPolicy);
Ver4ShortcutListPolicy shortcutPolicy(mBuffers->getMutableShortcutDictContent(),
mBuffers->getTerminalPositionLookupTable());
Ver4PatriciaTrieNodeWriter ptNodeWriter(mBuffers->getWritableTrieBuffer(),
- mBuffers, &ptNodeReader, &bigramPolicy, &shortcutPolicy);
+ mBuffers, &ptNodeReader, &ptNodeArrayReader, &bigramPolicy, &shortcutPolicy);
- DynamicPtReadingHelper readingHelper(mBuffers->getTrieBuffer(), &ptNodeReader);
+ DynamicPtReadingHelper readingHelper(&ptNodeReader, &ptNodeArrayReader);
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
DynamicPtGcEventListeners
::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
@@ -124,7 +126,7 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
PtNodeWriter::DictPositionRelocationMap dictPositionRelocationMap;
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
Ver4PatriciaTrieNodeWriter ptNodeWriterForNewBuffers(buffersToWrite->getWritableTrieBuffer(),
- buffersToWrite, &ptNodeReader, &bigramPolicy, &shortcutPolicy);
+ buffersToWrite, &ptNodeReader, &ptNodeArrayReader, &bigramPolicy, &shortcutPolicy);
DynamicPtGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
traversePolicyToPlaceAndWriteValidPtNodesToBuffer(&ptNodeWriterForNewBuffers,
buffersToWrite->getWritableTrieBuffer(), &dictPositionRelocationMap);
@@ -136,12 +138,14 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
// Create policy instances for the GCed dictionary.
Ver4PatriciaTrieNodeReader newPtNodeReader(buffersToWrite->getTrieBuffer(),
buffersToWrite->getProbabilityDictContent());
+ Ver4PtNodeArrayReader newPtNodeArrayreader(buffersToWrite->getTrieBuffer());
Ver4BigramListPolicy newBigramPolicy(buffersToWrite->getMutableBigramDictContent(),
buffersToWrite->getTerminalPositionLookupTable(), headerPolicy);
Ver4ShortcutListPolicy newShortcutPolicy(buffersToWrite->getMutableShortcutDictContent(),
buffersToWrite->getTerminalPositionLookupTable());
Ver4PatriciaTrieNodeWriter newPtNodeWriter(buffersToWrite->getWritableTrieBuffer(),
- buffersToWrite, &newPtNodeReader, &newBigramPolicy, &newShortcutPolicy);
+ buffersToWrite, &newPtNodeReader, &newPtNodeArrayreader, &newBigramPolicy,
+ &newShortcutPolicy);
// Re-assign terminal IDs for valid terminal PtNodes.
TerminalPositionLookupTable::TerminalIdMap terminalIdMap;
if(!buffersToWrite->getMutableTerminalPositionLookupTable()->runGCTerminalIds(
@@ -163,8 +167,7 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
mBuffers->getShortcutDictContent())) {
return false;
}
- DynamicPtReadingHelper newDictReadingHelper(buffersToWrite->getTrieBuffer(),
- &newPtNodeReader);
+ DynamicPtReadingHelper newDictReadingHelper(&newPtNodeReader, &newPtNodeArrayreader);
newDictReadingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
DynamicPtGcEventListeners::TraversePolicyToUpdateAllPositionFields
traversePolicyToUpdateAllPositionFields(&newPtNodeWriter, &dictPositionRelocationMap);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_pt_node_array_reader.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_pt_node_array_reader.cpp
new file mode 100644
index 000000000..bbdf40cdd
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_pt_node_array_reader.cpp
@@ -0,0 +1,79 @@
+/*
+ * Copyright (C) 2014, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_pt_node_array_reader.h"
+
+#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h"
+#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+
+namespace latinime {
+
+bool Ver4PtNodeArrayReader::readPtNodeArrayInfoAndReturnIfValid(const int ptNodeArrayPos,
+ int *const outPtNodeCount, int *const outFirstPtNodePos) const {
+ if (ptNodeArrayPos < 0 || ptNodeArrayPos >= mBuffer->getTailPosition()) {
+ // Reading invalid position because of a bug or a broken dictionary.
+ AKLOGE("Reading PtNode array info from invalid dictionary position: %d, dict size: %d",
+ ptNodeArrayPos, mBuffer->getTailPosition());
+ ASSERT(false);
+ return false;
+ }
+ const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(ptNodeArrayPos);
+ const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
+ int readingPos = ptNodeArrayPos;
+ if (usesAdditionalBuffer) {
+ readingPos -= mBuffer->getOriginalBufferSize();
+ }
+ const int ptNodeCountInArray = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(
+ dictBuf, &readingPos);
+ if (usesAdditionalBuffer) {
+ readingPos += mBuffer->getOriginalBufferSize();
+ }
+ if (ptNodeCountInArray < 0) {
+ AKLOGE("Invalid PtNode count in an array: %d.", ptNodeCountInArray);
+ return false;
+ }
+ *outPtNodeCount = ptNodeCountInArray;
+ *outFirstPtNodePos = readingPos;
+ return true;
+}
+
+bool Ver4PtNodeArrayReader::readForwardLinkAndReturnIfValid(const int forwordLinkPos,
+ int *const outNextPtNodeArrayPos) const {
+ if (forwordLinkPos < 0 || forwordLinkPos >= mBuffer->getTailPosition()) {
+ // Reading invalid position because of bug or broken dictionary.
+ AKLOGE("Reading forward link from invalid dictionary position: %d, dict size: %d",
+ forwordLinkPos, mBuffer->getTailPosition());
+ ASSERT(false);
+ return false;
+ }
+ const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(forwordLinkPos);
+ const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
+ int readingPos = forwordLinkPos;
+ if (usesAdditionalBuffer) {
+ readingPos -= mBuffer->getOriginalBufferSize();
+ }
+ const int nextPtNodeArrayOffset =
+ DynamicPtReadingUtils::getForwardLinkPosition(dictBuf, readingPos);
+ if (DynamicPtReadingUtils::isValidForwardLinkPosition(nextPtNodeArrayOffset)) {
+ *outNextPtNodeArrayPos = forwordLinkPos + nextPtNodeArrayOffset;
+ } else {
+ *outNextPtNodeArrayPos = NOT_A_DICT_POS;
+ }
+ return true;
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_pt_node_array_reader.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_pt_node_array_reader.h
new file mode 100644
index 000000000..d81808efc
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_pt_node_array_reader.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2014, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_VER4_PT_NODE_ARRAY_READER_H
+#define LATINIME_VER4_PT_NODE_ARRAY_READER_H
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_array_reader.h"
+
+namespace latinime {
+
+class BufferWithExtendableBuffer;
+
+class Ver4PtNodeArrayReader : public PtNodeArrayReader {
+ public:
+ Ver4PtNodeArrayReader(const BufferWithExtendableBuffer *const buffer) : mBuffer(buffer) {};
+
+ virtual bool readPtNodeArrayInfoAndReturnIfValid(const int ptNodeArrayPos,
+ int *const outPtNodeCount, int *const outFirstPtNodePos) const;
+ virtual bool readForwardLinkAndReturnIfValid(const int forwordLinkPos,
+ int *const outNextPtNodeArrayPos) const;
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(Ver4PtNodeArrayReader);
+
+ const BufferWithExtendableBuffer *const mBuffer;
+};
+} // namespace latinime
+#endif /* LATINIME_VER4_PT_NODE_ARRAY_READER_H */