aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--native/jni/NativeFileList.mk3
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h15
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp85
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h5
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.cpp30
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h10
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.cpp52
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.h50
8 files changed, 181 insertions, 69 deletions
diff --git a/native/jni/NativeFileList.mk b/native/jni/NativeFileList.mk
index e9efde9fd..c6430a1d5 100644
--- a/native/jni/NativeFileList.mk
+++ b/native/jni/NativeFileList.mk
@@ -57,7 +57,8 @@ LATIN_IME_CORE_SRC_FILES := \
dynamic_pt_writing_utils.cpp) \
$(addprefix suggest/policyimpl/dictionary/structure/v2/, \
patricia_trie_policy.cpp \
- patricia_trie_reading_utils.cpp) \
+ patricia_trie_reading_utils.cpp \
+ ver2_patricia_trie_node_reader.cpp) \
$(addprefix suggest/policyimpl/dictionary/structure/v4/, \
ver4_dict_buffers.cpp \
ver4_dict_constants.cpp \
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h
index a4a53a80c..faaf44162 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h
@@ -53,6 +53,21 @@ class PtNodeParams {
memcpy(mCodePoints, ptNodeParams.getCodePoints(), sizeof(int) * mCodePointCount);
}
+ // PtNode read from version 2 dictionary.
+ PtNodeParams(const int headPos, const PatriciaTrieReadingUtils::NodeFlags flags,
+ const int codePointCount, const int *const codePoints, const int probability,
+ const int childrenPos, const int shortcutPos, const int bigramPos,
+ const int siblingPos)
+ : mHeadPos(headPos), mFlags(flags), mParentPos(NOT_A_DICT_POS),
+ mCodePointCount(codePointCount), mCodePoints(), mTerminalIdFieldPos(NOT_A_DICT_POS),
+ mTerminalId(Ver4DictConstants::NOT_A_TERMINAL_ID),
+ mProbabilityFieldPos(NOT_A_DICT_POS), mProbability(probability),
+ mChildrenPosFieldPos(NOT_A_DICT_POS), mChildrenPos(childrenPos),
+ mBigramLinkedNodePos(NOT_A_DICT_POS), mShortcutPos(shortcutPos),
+ mBigramPos(bigramPos), mSiblingPos(siblingPos) {
+ memcpy(mCodePoints, codePoints, sizeof(int) * mCodePointCount);
+ }
+
// PtNode with a terminal id.
PtNodeParams(const int headPos, const PatriciaTrieReadingUtils::NodeFlags flags,
const int parentPos, const int codePointCount, const int *const codePoints,
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
index 960c1b936..3752241bf 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
@@ -336,99 +336,50 @@ int PatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int ptNodePos) const
if (ptNodePos == NOT_A_DICT_POS) {
return NOT_A_PROBABILITY;
}
- int pos = ptNodePos;
- const PatriciaTrieReadingUtils::NodeFlags flags =
- PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
- if (!PatriciaTrieReadingUtils::isTerminal(flags)) {
- return NOT_A_PROBABILITY;
- }
- if (PatriciaTrieReadingUtils::isNotAWord(flags)
- || PatriciaTrieReadingUtils::isBlacklisted(flags)) {
+ const PtNodeParams ptNodeParams = mPtNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos);
+ if (ptNodeParams.isNotAWord() || ptNodeParams.isBlacklisted()) {
// If this is not a word, or if it's a blacklisted entry, it should behave as
// having no probability outside of the suggestion process (where it should be used
// for shortcuts).
return NOT_A_PROBABILITY;
}
- PatriciaTrieReadingUtils::skipCharacters(mDictRoot, flags, MAX_WORD_LENGTH, &pos);
- return getProbability(PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(
- mDictRoot, &pos), NOT_A_PROBABILITY);
+ return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY);
}
int PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const {
if (ptNodePos == NOT_A_DICT_POS) {
return NOT_A_DICT_POS;
}
- int pos = ptNodePos;
- const PatriciaTrieReadingUtils::NodeFlags flags =
- PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
- if (!PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
- return NOT_A_DICT_POS;
- }
- PatriciaTrieReadingUtils::skipCharacters(mDictRoot, flags, MAX_WORD_LENGTH, &pos);
- if (PatriciaTrieReadingUtils::isTerminal(flags)) {
- PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos);
- }
- if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) {
- PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(mDictRoot, flags, &pos);
- }
- return pos;
+ return mPtNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos).getShortcutPos();
}
int PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const {
if (ptNodePos == NOT_A_DICT_POS) {
return NOT_A_DICT_POS;
}
- int pos = ptNodePos;
- const PatriciaTrieReadingUtils::NodeFlags flags =
- PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
- if (!PatriciaTrieReadingUtils::hasBigrams(flags)) {
- return NOT_A_DICT_POS;
- }
- PatriciaTrieReadingUtils::skipCharacters(mDictRoot, flags, MAX_WORD_LENGTH, &pos);
- if (PatriciaTrieReadingUtils::isTerminal(flags)) {
- PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos);
- }
- if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) {
- PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(mDictRoot, flags, &pos);
- }
- if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
- mShortcutListPolicy.skipAllShortcuts(&pos);;
- }
- return pos;
+ return mPtNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos).getBigramsPos();
}
int PatriciaTriePolicy::createAndGetLeavingChildNode(const DicNode *const dicNode,
const int ptNodePos, DicNodeVector *childDicNodes) const {
- int pos = ptNodePos;
- const PatriciaTrieReadingUtils::NodeFlags flags =
- PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
+ PatriciaTrieReadingUtils::NodeFlags flags;
+ int mergedNodeCodePointCount = 0;
int mergedNodeCodePoints[MAX_WORD_LENGTH];
- const int mergedNodeCodePointCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition(
- mDictRoot, flags, MAX_WORD_LENGTH, mergedNodeCodePoints, &pos);
- const int probability = (PatriciaTrieReadingUtils::isTerminal(flags))?
- PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos)
- : NOT_A_PROBABILITY;
- const int childrenPos = PatriciaTrieReadingUtils::hasChildrenInFlags(flags) ?
- PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(
- mDictRoot, flags, &pos) : NOT_A_DICT_POS;
- if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
- getShortcutsStructurePolicy()->skipAllShortcuts(&pos);
- }
- if (PatriciaTrieReadingUtils::hasBigrams(flags)) {
- getBigramsStructurePolicy()->skipAllBigrams(&pos);
- }
- if (mergedNodeCodePointCount <= 0) {
- AKLOGE("Empty PtNode is not allowed. Code point count: %d", mergedNodeCodePointCount);
- ASSERT(false);
- return pos;
- }
+ int probability = NOT_A_PROBABILITY;
+ int childrenPos = NOT_A_DICT_POS;
+ int shortcutPos = NOT_A_DICT_POS;
+ int bigramPos = NOT_A_DICT_POS;
+ int siblingPos = NOT_A_DICT_POS;
+ PatriciaTrieReadingUtils::readPtNodeInfo(mDictRoot, ptNodePos, getShortcutsStructurePolicy(),
+ getBigramsStructurePolicy(), &flags, &mergedNodeCodePointCount, mergedNodeCodePoints,
+ &probability, &childrenPos, &shortcutPos, &bigramPos, &siblingPos);
childDicNodes->pushLeavingChild(dicNode, ptNodePos, childrenPos, probability,
PatriciaTrieReadingUtils::isTerminal(flags),
PatriciaTrieReadingUtils::hasChildrenInFlags(flags),
- PatriciaTrieReadingUtils::isBlacklisted(flags) ||
- PatriciaTrieReadingUtils::isNotAWord(flags),
+ PatriciaTrieReadingUtils::isBlacklisted(flags)
+ || PatriciaTrieReadingUtils::isNotAWord(flags),
mergedNodeCodePointCount, mergedNodeCodePoints);
- return pos;
+ return siblingPos;
}
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
index 319c81569..12efb44d8 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
@@ -24,6 +24,7 @@
#include "suggest/policyimpl/dictionary/bigram/bigram_list_policy.h"
#include "suggest/policyimpl/dictionary/header/header_policy.h"
#include "suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h"
+#include "suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.h"
#include "suggest/policyimpl/dictionary/utils/format_utils.h"
#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
@@ -40,7 +41,8 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
mDictRoot(mMmappedBuffer.get()->getBuffer() + mHeaderPolicy.getSize()),
mDictBufferSize(mMmappedBuffer.get()->getBufferSize()
- mHeaderPolicy.getSize()),
- mBigramListPolicy(mDictRoot), mShortcutListPolicy(mDictRoot) {}
+ mBigramListPolicy(mDictRoot), mShortcutListPolicy(mDictRoot),
+ mPtNodeReader(mDictRoot, mDictBufferSize, &mBigramListPolicy, &mShortcutListPolicy) {}
AK_FORCE_INLINE int getRootPosition() const {
return 0;
@@ -143,6 +145,7 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
const int mDictBufferSize;
const BigramListPolicy mBigramListPolicy;
const ShortcutListPolicy mShortcutListPolicy;
+ const Ver2ParticiaTrieNodeReader mPtNodeReader;
int createAndGetLeavingChildNode(const DicNode *const dicNode, const int ptNodePos,
DicNodeVector *const childDicNodes) const;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.cpp
index 82b3593c8..b4eee5572 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.cpp
@@ -17,6 +17,8 @@
#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
#include "defines.h"
+#include "suggest/core/policy/dictionary_bigrams_structure_policy.h"
+#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h"
#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
namespace latinime {
@@ -130,4 +132,32 @@ const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_IS_BLACKLISTED = 0x01;
return base + offset;
}
+/* static */ void PtReadingUtils::readPtNodeInfo(const uint8_t *const dictBuf, const int ptNodePos,
+ const DictionaryShortcutsStructurePolicy *const shortcutPolicy,
+ const DictionaryBigramsStructurePolicy *const bigramPolicy,
+ NodeFlags *const outFlags, int *const outCodePointCount, int *const outCodePoint,
+ int *const outProbability, int *const outChildrenPos, int *const outShortcutPos,
+ int *const outBigramPos, int *const outSiblingPos) {
+ int readingPos = ptNodePos;
+ const NodeFlags flags = getFlagsAndAdvancePosition(dictBuf, &readingPos);
+ *outFlags = flags;
+ *outCodePointCount = getCharsAndAdvancePosition(
+ dictBuf, flags, MAX_WORD_LENGTH, outCodePoint, &readingPos);
+ *outProbability = isTerminal(flags) ?
+ readProbabilityAndAdvancePosition(dictBuf, &readingPos) : NOT_A_PROBABILITY;
+ *outChildrenPos = hasChildrenInFlags(flags) ?
+ readChildrenPositionAndAdvancePosition(dictBuf, flags, &readingPos) : NOT_A_DICT_POS;
+ *outShortcutPos = NOT_A_DICT_POS;
+ if (hasShortcutTargets(flags)) {
+ *outShortcutPos = readingPos;
+ shortcutPolicy->skipAllShortcuts(&readingPos);
+ }
+ *outBigramPos = NOT_A_DICT_POS;
+ if (hasBigrams(flags)) {
+ *outBigramPos = readingPos;
+ bigramPolicy->skipAllBigrams(&readingPos);
+ }
+ *outSiblingPos = readingPos;
+}
+
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h
index b28f58336..fa1430ce6 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h
@@ -23,6 +23,9 @@
namespace latinime {
+class DictionaryShortcutsStructurePolicy;
+class DictionaryBigramsStructurePolicy;
+
// TODO: Move to pt_common
class PatriciaTrieReadingUtils {
public:
@@ -101,6 +104,13 @@ class PatriciaTrieReadingUtils {
return nodeFlags;
}
+ static void readPtNodeInfo(const uint8_t *const dictBuf, const int ptNodePos,
+ const DictionaryShortcutsStructurePolicy *const shortcutPolicy,
+ const DictionaryBigramsStructurePolicy *const bigramPolicy,
+ NodeFlags *const outFlags, int *const outCodePointCount, int *const outCodePoint,
+ int *const outProbability, int *const outChildrenPos, int *const outShortcutPos,
+ int *const outBigramPos, int *const outSiblingPos);
+
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTrieReadingUtils);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.cpp
new file mode 100644
index 000000000..778d7a408
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.cpp
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2014, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.h"
+
+#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
+
+namespace latinime {
+
+const PtNodeParams Ver2ParticiaTrieNodeReader::fetchNodeInfoInBufferFromPtNodePos(
+ const int ptNodePos) const {
+ if (ptNodePos < 0 || ptNodePos >= mDictSize) {
+ // Reading invalid position because of bug or broken dictionary.
+ AKLOGE("Fetching PtNode info from invalid dictionary position: %d, dictionary size: %d",
+ ptNodePos, mDictSize);
+ ASSERT(false);
+ return PtNodeParams();
+ }
+ PatriciaTrieReadingUtils::NodeFlags flags;
+ int mergedNodeCodePointCount = 0;
+ int mergedNodeCodePoints[MAX_WORD_LENGTH];
+ int probability = NOT_A_PROBABILITY;
+ int childrenPos = NOT_A_DICT_POS;
+ int shortcutPos = NOT_A_DICT_POS;
+ int bigramPos = NOT_A_DICT_POS;
+ int siblingPos = NOT_A_DICT_POS;
+ PatriciaTrieReadingUtils::readPtNodeInfo(mDictBuffer, ptNodePos, mShortuctPolicy,
+ mBigramPolicy, &flags, &mergedNodeCodePointCount, mergedNodeCodePoints, &probability,
+ &childrenPos, &shortcutPos, &bigramPos, &siblingPos);
+ if (mergedNodeCodePointCount <= 0) {
+ AKLOGE("Empty PtNode is not allowed. Code point count: %d", mergedNodeCodePointCount);
+ ASSERT(false);
+ return PtNodeParams();
+ }
+ return PtNodeParams(ptNodePos, flags, mergedNodeCodePointCount, mergedNodeCodePoints,
+ probability, childrenPos, shortcutPos, bigramPos, siblingPos);
+}
+
+}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.h
new file mode 100644
index 000000000..dd1a0da51
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2014, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_VER2_PATRICIA_TRIE_NODE_READER_H
+#define LATINIME_VER2_PATRICIA_TRIE_NODE_READER_H
+
+#include <stdint.h>
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h"
+
+namespace latinime {
+
+class DictionaryBigramsStructurePolicy;
+class DictionaryShortcutsStructurePolicy;
+
+class Ver2ParticiaTrieNodeReader : public PtNodeReader {
+ public:
+ Ver2ParticiaTrieNodeReader(const uint8_t *const dictBuffer, const int dictSize,
+ const DictionaryBigramsStructurePolicy *const bigramPolicy,
+ const DictionaryShortcutsStructurePolicy *const shortcutPolicy)
+ : mDictBuffer(dictBuffer), mDictSize(dictSize), mBigramPolicy(bigramPolicy),
+ mShortuctPolicy(shortcutPolicy) {}
+
+ virtual const PtNodeParams fetchNodeInfoInBufferFromPtNodePos(const int ptNodePos) const;
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(Ver2ParticiaTrieNodeReader);
+
+ const uint8_t *const mDictBuffer;
+ const int mDictSize;
+ const DictionaryBigramsStructurePolicy *const mBigramPolicy;
+ const DictionaryShortcutsStructurePolicy *const mShortuctPolicy;
+};
+} // namespace latinime
+#endif /* LATINIME_VER2_PATRICIA_TRIE_NODE_READER_H */