aboutsummaryrefslogtreecommitdiffstats
path: root/native
diff options
context:
space:
mode:
Diffstat (limited to 'native')
-rw-r--r--native/jni/Android.mk4
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h22
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h50
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h9
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h16
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp4
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h4
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp95
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h58
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp91
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h9
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.cpp38
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.h40
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp14
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h18
17 files changed, 452 insertions, 24 deletions
diff --git a/native/jni/Android.mk b/native/jni/Android.mk
index 55a5c06d7..d73862eb6 100644
--- a/native/jni/Android.mk
+++ b/native/jni/Android.mk
@@ -87,7 +87,9 @@ LATIN_IME_CORE_SRC_FILES := \
dynamic_patricia_trie_writing_utils.cpp) \
$(addprefix suggest/policyimpl/dictionary/structure/v4/, \
ver4_dict_constants.cpp \
- ver4_patricia_trie_policy.cpp) \
+ ver4_patricia_trie_node_reader.cpp \
+ ver4_patricia_trie_policy.cpp \
+ ver4_patricia_trie_reading_utils.cpp ) \
$(addprefix suggest/policyimpl/dictionary/utils/, \
buffer_with_extendable_buffer.cpp \
byte_array_utils.cpp \
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h
index 7bdd829cd..edc7b954c 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h
@@ -32,7 +32,7 @@ class PtNodeParams {
// Invalid PtNode.
PtNodeParams() : mHeadPos(NOT_A_DICT_POS), mFlags(0), mParentPos(NOT_A_DICT_POS),
mCodePointCount(0), mCodePoints(), mTerminalIdFieldPos(NOT_A_DICT_POS),
- mTerminalId(Ver4DictConstants::NOT_A_TERMINAL), mProbabilityFieldPos(NOT_A_DICT_POS),
+ mTerminalId(Ver4DictConstants::NOT_A_TERMINAL_ID), mProbabilityFieldPos(NOT_A_DICT_POS),
mProbability(NOT_A_PROBABILITY), mChildrenPosFieldPos(NOT_A_DICT_POS),
mChildrenPos(NOT_A_DICT_POS), mBigramLinkedNodePos(NOT_A_DICT_POS),
mShortcutPos(NOT_A_DICT_POS), mBigramPos(NOT_A_DICT_POS),
@@ -53,6 +53,7 @@ class PtNodeParams {
memcpy(mCodePoints, ptNodeParams.getCodePoints(), sizeof(int) * mCodePointCount);
}
+ // PtNode without terminal id.
PtNodeParams(const int headPos, const PatriciaTrieReadingUtils::NodeFlags flags,
const int parentPos, const int codePointCount, const int *const codePoints,
const int probabilityFieldPos, const int probability, const int childrenPosFieldPos,
@@ -60,7 +61,8 @@ class PtNodeParams {
const int bigramPos, const int siblingPos)
: mHeadPos(headPos), mFlags(flags), mParentPos(parentPos),
mCodePointCount(codePointCount), mCodePoints(),
- mTerminalIdFieldPos(NOT_A_DICT_POS), mTerminalId(Ver4DictConstants::NOT_A_TERMINAL),
+ mTerminalIdFieldPos(NOT_A_DICT_POS),
+ mTerminalId(Ver4DictConstants::NOT_A_TERMINAL_ID),
mProbabilityFieldPos(probabilityFieldPos), mProbability(probability),
mChildrenPosFieldPos(childrenPosFieldPos), mChildrenPos(childrenPos),
mBigramLinkedNodePos(bigramLinkedNodePos), mShortcutPos(shortcutPos),
@@ -68,6 +70,22 @@ class PtNodeParams {
memcpy(mCodePoints, codePoints, sizeof(int) * mCodePointCount);
}
+ // PtNode with a terminal id.
+ PtNodeParams(const int headPos, const PatriciaTrieReadingUtils::NodeFlags flags,
+ const int parentPos, const int codePointCount, const int *const codePoints,
+ const int terminalIdFieldPos, const int terminalId, const int probability,
+ const int childrenPosFieldPos, const int childrenPos, const int bigramLinkedNodePos,
+ const int siblingPos)
+ : mHeadPos(headPos), mFlags(flags), mParentPos(parentPos),
+ mCodePointCount(codePointCount), mCodePoints(),
+ mTerminalIdFieldPos(terminalIdFieldPos), mTerminalId(terminalId),
+ mProbabilityFieldPos(NOT_A_DICT_POS), mProbability(probability),
+ mChildrenPosFieldPos(childrenPosFieldPos), mChildrenPos(childrenPos),
+ mBigramLinkedNodePos(bigramLinkedNodePos), mShortcutPos(terminalId),
+ mBigramPos(terminalId), mSiblingPos(siblingPos) {
+ memcpy(mCodePoints, codePoints, sizeof(int) * mCodePointCount);
+ }
+
AK_FORCE_INLINE bool isValid() const {
return mCodePointCount > 0;
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h
index b5abffeda..ef0067b48 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h
@@ -55,7 +55,7 @@ class DynamicPatriciaTrieNodeReader : public PtNodeReader {
const DictionaryBigramsStructurePolicy *const mBigramsPolicy;
const DictionaryShortcutsStructurePolicy *const mShortcutsPolicy;
- const PtNodeParams fetchPtNodeInfoFromBufferAndProcessMovedPtNode(const int ptNodePos,
+ const PtNodeParams fetchPtNodeInfoFromBufferAndProcessMovedPtNode(const int ptNodePos,
const int siblingNodePos, const int bigramLinkedNodePos) const;
};
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h
new file mode 100644
index 000000000..daaf08f61
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_PROBABILITY_DICT_CONTENT_H
+#define LATINIME_PROBABILITY_DICT_CONTENT_H
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+
+namespace latinime {
+
+class ProbabilityDictContent : public SingleDictContent {
+ public:
+ ProbabilityDictContent(const char *const dictDirPath, const bool isUpdatable)
+ : SingleDictContent(dictDirPath, Ver4DictConstants::FREQ_FILE_EXTENSION,
+ isUpdatable) {}
+
+ int getProbability(const int terminalId) const {
+ if (terminalId < 0 || terminalId >= getSize()) {
+ return NOT_A_PROBABILITY;
+ }
+ return Ver4PatriciaTrieReadingUtils::getProbability(getBuffer(), terminalId);
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(ProbabilityDictContent);
+
+ int getSize() const {
+ return getBuffer()->getTailPosition() / (Ver4DictConstants::PROBABILITY_SIZE
+ + Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE);
+ }
+};
+} // namespace latinime
+#endif /* LATINIME_PROBABILITY_DICT_CONTENT_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h
index e415881a9..4cb96da6a 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h
@@ -39,6 +39,15 @@ class SingleDictContent : public DictContent {
return mMmappedBuffer.get() != 0;
}
+ protected:
+ BufferWithExtendableBuffer *getWritableBuffer() {
+ return &mExpandableContentBuffer;
+ }
+
+ const BufferWithExtendableBuffer *getBuffer() const {
+ return &mExpandableContentBuffer;
+ }
+
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(SingleDictContent);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h
index 1164c408a..333a7c209 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h
@@ -18,6 +18,7 @@
#define LATINIME_VER4_DICT_BUFFER_H
#include "defines.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
@@ -42,10 +43,18 @@ class Ver4DictBuffers {
&& mShortcutDictContent.isValid();
}
- AK_FORCE_INLINE const uint8_t *getRawDictBuffer() const {
+ AK_FORCE_INLINE uint8_t *getRawDictBuffer() const {
return mDictBuffer.get()->getBuffer();
}
+ AK_FORCE_INLINE int getRawDictBufferSize() const {
+ return mDictBuffer.get()->getBufferSize();
+ }
+
+ AK_FORCE_INLINE const ProbabilityDictContent *getProbabilityDictContent() const {
+ return &mProbabilityDictContent;
+ }
+
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4DictBuffers);
@@ -54,8 +63,7 @@ class Ver4DictBuffers {
: mDictBuffer(dictBuffer),
mTerminalAddressTable(dictDirPath,
Ver4DictConstants::TERMINAL_ADDRESS_TABLE_FILE_EXTENSION, isUpdatable),
- mProbabilityDictContent(dictDirPath, Ver4DictConstants::FREQ_FILE_EXTENSION,
- isUpdatable),
+ mProbabilityDictContent(dictDirPath, isUpdatable),
mBigramDictContent(dictDirPath,
Ver4DictConstants::BIGRAM_LOOKUP_TABLE_FILE_EXTENSION,
Ver4DictConstants::BIGRAM_CONTENT_TABLE_FILE_EXTENSION,
@@ -67,7 +75,7 @@ class Ver4DictBuffers {
const MmappedBuffer::MmappedBufferPtr mDictBuffer;
SingleDictContent mTerminalAddressTable;
- SingleDictContent mProbabilityDictContent;
+ ProbabilityDictContent mProbabilityDictContent;
SparseTableDictContent mBigramDictContent;
SparseTableDictContent mShortcutDictContent;
};
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp
index 0bfd07b04..3e21399fd 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp
@@ -30,6 +30,8 @@ const char *const Ver4DictConstants::SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION = ".sh
const char *const Ver4DictConstants::SHORTCUT_CONTENT_TABLE_FILE_EXTENSION =
".shortcut_index_shortcut";
-const int Ver4DictConstants::NOT_A_TERMINAL = -1;
+const int Ver4DictConstants::NOT_A_TERMINAL_ID = -1;
+const int Ver4DictConstants::PROBABILITY_SIZE = 1;
+const int Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE = 1;
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h
index 6498ce428..47a6990f8 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h
@@ -34,7 +34,9 @@ class Ver4DictConstants {
static const char *const SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION;
static const char *const SHORTCUT_CONTENT_TABLE_FILE_EXTENSION;
- static const int NOT_A_TERMINAL;
+ static const int NOT_A_TERMINAL_ID;
+ static const int PROBABILITY_SIZE;
+ static const int FLAGS_IN_PROBABILITY_FILE_SIZE;
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4DictConstants);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp
new file mode 100644
index 000000000..b18bdc943
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp
@@ -0,0 +1,95 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
+
+#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h"
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+
+namespace latinime {
+
+const PtNodeParams Ver4PatriciaTrieNodeReader::fetchPtNodeInfoFromBufferAndProcessMovedPtNode(
+ const int ptNodePos, const int siblingNodePos, const int bigramLinkedNodePos) const {
+ if (ptNodePos < 0 || ptNodePos >= mBuffer->getTailPosition()) {
+ // Reading invalid position because of bug or broken dictionary.
+ AKLOGE("Fetching PtNode info from invalid dictionary position: %d, dictionary size: %d",
+ ptNodePos, mBuffer->getTailPosition());
+ ASSERT(false);
+ return PtNodeParams();
+ }
+ const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(ptNodePos);
+ const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
+ int pos = ptNodePos;
+ const int headPos = ptNodePos;
+ if (usesAdditionalBuffer) {
+ pos -= mBuffer->getOriginalBufferSize();
+ }
+ const PatriciaTrieReadingUtils::NodeFlags flags =
+ PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
+ const int parentPosOffset =
+ DynamicPatriciaTrieReadingUtils::getParentPtNodePosOffsetAndAdvancePosition(
+ dictBuf, &pos);
+ const int parentPos =
+ DynamicPatriciaTrieReadingUtils::getParentPtNodePos(parentPosOffset, headPos);
+ int codePoints[MAX_WORD_LENGTH];
+ const int codePonitCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition(
+ dictBuf, flags, MAX_WORD_LENGTH, codePoints, &pos);
+ int terminalIdFieldPos = NOT_A_DICT_POS;
+ int terminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
+ int probability = NOT_A_PROBABILITY;
+ if (PatriciaTrieReadingUtils::isTerminal(flags)) {
+ terminalIdFieldPos = pos;
+ if (usesAdditionalBuffer) {
+ terminalIdFieldPos += mBuffer->getOriginalBufferSize();
+ }
+ terminalId = Ver4PatriciaTrieReadingUtils::getTerminalIdAndAdvancePosition(dictBuf, &pos);
+ probability = mProbabilityDictContent->getProbability(terminalId);
+ }
+ int childrenPosFieldPos = pos;
+ if (usesAdditionalBuffer) {
+ childrenPosFieldPos += mBuffer->getOriginalBufferSize();
+ }
+ int childrenPos = DynamicPatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(
+ dictBuf, &pos);
+ if (usesAdditionalBuffer && childrenPos != NOT_A_DICT_POS) {
+ childrenPos += mBuffer->getOriginalBufferSize();
+ }
+ int newBigramLinkedNodePos = bigramLinkedNodePos;
+ if (siblingNodePos == NOT_A_DICT_POS) {
+ if (DynamicPatriciaTrieReadingUtils::isMoved(flags)) {
+ newBigramLinkedNodePos = childrenPos;
+ }
+ }
+ if (usesAdditionalBuffer) {
+ pos += mBuffer->getOriginalBufferSize();
+ }
+ // Sibling position is the tail position of original PtNode.
+ int newSiblingNodePos = (siblingNodePos == NOT_A_DICT_POS) ? pos : siblingNodePos;
+ // Read destination node if the read node is a moved node.
+ if (DynamicPatriciaTrieReadingUtils::isMoved(flags)) {
+ // The destination position is stored at the same place as the parent position.
+ return fetchPtNodeInfoFromBufferAndProcessMovedPtNode(parentPos, newSiblingNodePos,
+ newBigramLinkedNodePos);
+ } else {
+ return PtNodeParams(headPos, flags, parentPos, codePonitCount, codePoints,
+ terminalIdFieldPos, terminalId, probability, childrenPosFieldPos, childrenPos,
+ newBigramLinkedNodePos, newSiblingNodePos);
+ }
+}
+
+}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h
new file mode 100644
index 000000000..bdae0de7e
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_VER4_PATRICIA_TRIE_NODE_READER_H
+#define LATINIME_VER4_PATRICIA_TRIE_NODE_READER_H
+
+#include <stdint.h>
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h"
+
+namespace latinime {
+
+class BufferWithExtendableBuffer;
+class ProbabilityDictContent;
+
+/*
+ * This class is used for helping to read nodes of ver4 patricia trie. This class handles moved
+ * node and reads node attributes including probability form probabilityBuffer.
+ */
+class Ver4PatriciaTrieNodeReader : public PtNodeReader {
+ public:
+ Ver4PatriciaTrieNodeReader(const BufferWithExtendableBuffer *const buffer,
+ const ProbabilityDictContent *const probabilityDictContent)
+ : mBuffer(buffer), mProbabilityDictContent(probabilityDictContent) {}
+
+ ~Ver4PatriciaTrieNodeReader() {}
+
+ virtual const PtNodeParams fetchNodeInfoInBufferFromPtNodePos(const int ptNodePos) const {
+ return fetchPtNodeInfoFromBufferAndProcessMovedPtNode(ptNodePos,
+ NOT_A_DICT_POS /* siblingNodePos */, NOT_A_DICT_POS /* bigramLinkedNodePos */);
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(Ver4PatriciaTrieNodeReader);
+
+ const BufferWithExtendableBuffer *const mBuffer;
+ const ProbabilityDictContent *const mProbabilityDictContent;
+
+ const PtNodeParams fetchPtNodeInfoFromBufferAndProcessMovedPtNode(const int ptNodePos,
+ const int siblingNodePos, const int bigramLinkedNodePos) const;
+};
+} // namespace latinime
+#endif /* LATINIME_VER4_PATRICIA_TRIE_NODE_READER_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
index b9ee4891c..33f738413 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
@@ -16,45 +16,110 @@
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h"
+#include "suggest/core/dicnode/dic_node.h"
+#include "suggest/core/dicnode/dic_node_vector.h"
+#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
+#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
+#include "suggest/policyimpl/dictionary/utils/probability_utils.h"
+
namespace latinime {
void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode,
DicNodeVector *const childDicNodes) const {
- // TODO: Implement.
+ if (!dicNode->hasChildren()) {
+ return;
+ }
+ DynamicPatriciaTrieReadingHelper readingHelper(&mDictBuffer, &mNodeReader);
+ readingHelper.initWithPtNodeArrayPos(dicNode->getChildrenPtNodeArrayPos());
+ while (!readingHelper.isEnd()) {
+ const PtNodeParams ptNodeParams = readingHelper.getPtNodeParams();
+ if (!ptNodeParams.isValid()) {
+ break;
+ }
+ bool isTerminal = ptNodeParams.isTerminal() && !ptNodeParams.isDeleted();
+ if (isTerminal && mHeaderPolicy.isDecayingDict()) {
+ // A DecayingDict may have a terminal PtNode that has a terminal DicNode whose
+ // probability is NOT_A_PROBABILITY. In such case, we don't want to treat it as a
+ // valid terminal DicNode.
+ isTerminal = getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY)
+ != NOT_A_PROBABILITY;
+ }
+ childDicNodes->pushLeavingChild(dicNode, ptNodeParams.getHeadPos(),
+ ptNodeParams.getChildrenPos(), ptNodeParams.getProbability(), isTerminal,
+ ptNodeParams.hasChildren(),
+ ptNodeParams.isBlacklisted()
+ || ptNodeParams.isNotAWord() /* isBlacklistedOrNotAWord */,
+ ptNodeParams.getCodePointCount(), ptNodeParams.getCodePoints());
+ readingHelper.readNextSiblingNode(ptNodeParams);
+ }
}
int Ver4PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
const int ptNodePos, const int maxCodePointCount, int *const outCodePoints,
int *const outUnigramProbability) const {
- // TODO: Implement.
- return 0;
+ DynamicPatriciaTrieReadingHelper readingHelper(&mDictBuffer, &mNodeReader);
+ readingHelper.initWithPtNodePos(ptNodePos);
+ return readingHelper.getCodePointsAndProbabilityAndReturnCodePointCount(
+ maxCodePointCount, outCodePoints, outUnigramProbability);
}
int Ver4PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord,
const int length, const bool forceLowerCaseSearch) const {
- // TODO: Implement.
- return NOT_A_DICT_POS;
+ DynamicPatriciaTrieReadingHelper readingHelper(&mDictBuffer, &mNodeReader);
+ readingHelper.initWithPtNodeArrayPos(getRootPosition());
+ return readingHelper.getTerminalPtNodePositionOfWord(inWord, length, forceLowerCaseSearch);
}
int Ver4PatriciaTriePolicy::getProbability(const int unigramProbability,
const int bigramProbability) const {
- // TODO: Implement.
- return NOT_A_PROBABILITY;
+ if (mHeaderPolicy.isDecayingDict()) {
+ // Both probabilities are encoded. Decode them and get probability.
+ return ForgettingCurveUtils::getProbability(unigramProbability, bigramProbability);
+ } else {
+ if (unigramProbability == NOT_A_PROBABILITY) {
+ return NOT_A_PROBABILITY;
+ } else if (bigramProbability == NOT_A_PROBABILITY) {
+ return ProbabilityUtils::backoff(unigramProbability);
+ } else {
+ // bigramProbability is a bigram probability delta.
+ return ProbabilityUtils::computeProbabilityForBigram(unigramProbability,
+ bigramProbability);
+ }
+ }
}
int Ver4PatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int ptNodePos) const {
- // TODO: Implement.
- return NOT_A_PROBABILITY;
+ if (ptNodePos == NOT_A_DICT_POS) {
+ return NOT_A_PROBABILITY;
+ }
+ const PtNodeParams ptNodeParams(mNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos));
+ if (ptNodeParams.isDeleted() || ptNodeParams.isBlacklisted() || ptNodeParams.isNotAWord()) {
+ return NOT_A_PROBABILITY;
+ }
+ return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY);
}
int Ver4PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const {
- // TODO: Implement.
- return NOT_A_DICT_POS;
+ if (ptNodePos == NOT_A_DICT_POS) {
+ return NOT_A_DICT_POS;
+ }
+ const PtNodeParams ptNodeParams(mNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos));
+ if (ptNodeParams.isDeleted()) {
+ return NOT_A_DICT_POS;
+ }
+ return ptNodeParams.getTerminalId();
}
int Ver4PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const {
- // TODO: Implement.
- return NOT_A_DICT_POS;
+ if (ptNodePos == NOT_A_DICT_POS) {
+ return NOT_A_DICT_POS;
+ }
+ const PtNodeParams ptNodeParams(mNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos));
+ if (ptNodeParams.isDeleted()) {
+ return NOT_A_DICT_POS;
+ }
+ return ptNodeParams.getTerminalId();
}
bool Ver4PatriciaTriePolicy::addUnigramWord(const int *const word, const int length,
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
index f7bfb3b0d..2f577f741 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
@@ -21,6 +21,7 @@
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
#include "suggest/policyimpl/dictionary/header/header_policy.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
namespace latinime {
@@ -33,7 +34,11 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
public:
Ver4PatriciaTriePolicy(const Ver4DictBuffers::Ver4DictBuffersPtr &buffers)
: mBuffers(buffers),
- mHeaderPolicy(mBuffers.get()->getRawDictBuffer(), FormatUtils::VERSION_4) {};
+ mHeaderPolicy(mBuffers.get()->getRawDictBuffer(), FormatUtils::VERSION_4),
+ mDictBuffer(mBuffers.get()->getRawDictBuffer() + mHeaderPolicy.getSize(),
+ mBuffers.get()->getRawDictBufferSize() - mHeaderPolicy.getSize(),
+ BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
+ mNodeReader(&mDictBuffer, mBuffers.get()->getProbabilityDictContent()) {};
AK_FORCE_INLINE int getRootPosition() const {
return 0;
@@ -91,6 +96,8 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
const Ver4DictBuffers::Ver4DictBuffersPtr mBuffers;
const HeaderPolicy mHeaderPolicy;
+ BufferWithExtendableBuffer mDictBuffer;
+ Ver4PatriciaTrieNodeReader mNodeReader;
};
} // namespace latinime
#endif // LATINIME_VER4_PATRICIA_TRIE_POLICY_H
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.cpp
new file mode 100644
index 000000000..fd8a3ba55
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.h"
+
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
+
+namespace latinime {
+
+/* static */ int Ver4PatriciaTrieReadingUtils::getTerminalIdAndAdvancePosition(
+ const uint8_t *const buffer, int *pos) {
+ return ByteArrayUtils::readUint32AndAdvancePosition(buffer, pos);
+}
+
+/* static */ int Ver4PatriciaTrieReadingUtils::getProbability(
+ const BufferWithExtendableBuffer *const probabilityBuffer, const int terminalId) {
+ int pos = terminalId * (Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE
+ + Ver4DictConstants::PROBABILITY_SIZE)
+ + Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE;
+ return probabilityBuffer->readUintAndAdvancePosition(Ver4DictConstants::PROBABILITY_SIZE, &pos);
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.h
new file mode 100644
index 000000000..19e6dd331
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_VER4_PATRICIA_TRIE_READING_UTILS_H
+#define LATINIME_VER4_PATRICIA_TRIE_READING_UTILS_H
+
+#include <stdint.h>
+
+#include "defines.h"
+
+namespace latinime {
+
+class BufferWithExtendableBuffer;
+
+class Ver4PatriciaTrieReadingUtils {
+ public:
+ static int getTerminalIdAndAdvancePosition(const uint8_t *const buffer,
+ int *const pos);
+
+ static int getProbability(const BufferWithExtendableBuffer *const probabilityBuffer,
+ const int terminalId);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4PatriciaTrieReadingUtils);
+};
+} // namespace latinime
+#endif /* LATINIME_VER4_PATRICIA_TRIE_READING_UTILS_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp
index 5032131ab..e028de526 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp
@@ -23,6 +23,20 @@ const int BufferWithExtendableBuffer::NEAR_BUFFER_LIMIT_THRESHOLD_PERCENTILE = 9
// TODO: Needs to allocate larger memory corresponding to the current vector size.
const size_t BufferWithExtendableBuffer::EXTEND_ADDITIONAL_BUFFER_SIZE_STEP = 128 * 1024;
+uint32_t BufferWithExtendableBuffer::readUintAndAdvancePosition(const int size,
+ int *const pos) const {
+ const bool readingPosIsInAdditionalBuffer = isInAdditionalBuffer(*pos);
+ if (readingPosIsInAdditionalBuffer) {
+ *pos -= mOriginalBufferSize;
+ }
+ const int value = ByteArrayUtils::readUintAndAdvancePosition(
+ getBuffer(readingPosIsInAdditionalBuffer), size, pos);
+ if (readingPosIsInAdditionalBuffer) {
+ *pos += mOriginalBufferSize;
+ }
+ return value;
+}
+
bool BufferWithExtendableBuffer::writeUintAndAdvancePosition(const uint32_t data, const int size,
int *const pos) {
if (!(size >= 1 && size <= 4)) {
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h
index 1e27a1bec..2d89f71b1 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h
@@ -71,6 +71,8 @@ class BufferWithExtendableBuffer {
}
}
+ uint32_t readUintAndAdvancePosition(const int size, int *const pos) const;
+
AK_FORCE_INLINE int getOriginalBufferSize() const {
return mOriginalBufferSize;
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h
index 0c1576818..1ca01b868 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h
@@ -114,6 +114,24 @@ class ByteArrayUtils {
return buffer[(*pos)++];
}
+ static AK_FORCE_INLINE int readUintAndAdvancePosition(const uint8_t *const buffer,
+ const int size, int *const pos) {
+ // size must be in 1 to 4.
+ ASSERT(size >= 1 && size <= 4);
+ switch (size) {
+ case 1:
+ return ByteArrayUtils::readUint8AndAdvancePosition(buffer, pos);
+ case 2:
+ return ByteArrayUtils::readUint16AndAdvancePosition(buffer, pos);
+ case 3:
+ return ByteArrayUtils::readUint24AndAdvancePosition(buffer, pos);
+ case 4:
+ return ByteArrayUtils::readUint32AndAdvancePosition(buffer, pos);
+ default:
+ return 0;
+ }
+ }
+
/**
* Code Point Reading
*