aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--native/jni/NativeFileList.mk3
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp87
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h5
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.cpp54
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.h44
5 files changed, 108 insertions, 85 deletions
diff --git a/native/jni/NativeFileList.mk b/native/jni/NativeFileList.mk
index c6430a1d5..eb24df601 100644
--- a/native/jni/NativeFileList.mk
+++ b/native/jni/NativeFileList.mk
@@ -58,7 +58,8 @@ LATIN_IME_CORE_SRC_FILES := \
$(addprefix suggest/policyimpl/dictionary/structure/v2/, \
patricia_trie_policy.cpp \
patricia_trie_reading_utils.cpp \
- ver2_patricia_trie_node_reader.cpp) \
+ ver2_patricia_trie_node_reader.cpp \
+ ver2_pt_node_array_reader.cpp) \
$(addprefix suggest/policyimpl/dictionary/structure/v4/, \
ver4_dict_buffers.cpp \
ver4_dict_constants.cpp \
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
index 3752241bf..8172e70b6 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
@@ -20,6 +20,7 @@
#include "defines.h"
#include "suggest/core/dicnode/dic_node.h"
#include "suggest/core/dicnode/dic_node_vector.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h"
#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/utils/probability_utils.h"
@@ -235,89 +236,9 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
// dictionary. If no match is found, it returns NOT_A_DICT_POS.
int PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord,
const int length, const bool forceLowerCaseSearch) const {
- int pos = getRootPosition();
- int wordPos = 0;
-
- while (true) {
- // If we already traversed the tree further than the word is long, there means
- // there was no match (or we would have found it).
- if (wordPos >= length) return NOT_A_DICT_POS;
- int ptNodeCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(mDictRoot,
- &pos);
- const int wChar = forceLowerCaseSearch
- ? CharUtils::toLowerCase(inWord[wordPos]) : inWord[wordPos];
- while (true) {
- // If there are no more PtNodes in this array, it means we could not
- // find a matching character for this depth, therefore there is no match.
- if (0 >= ptNodeCount) return NOT_A_DICT_POS;
- const int ptNodePos = pos;
- const PatriciaTrieReadingUtils::NodeFlags flags =
- PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
- int character = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(mDictRoot,
- &pos);
- if (character == wChar) {
- // This is the correct PtNode. Only one PtNode may start with the same char within
- // a PtNode array, so either we found our match in this array, or there is
- // no match and we can return NOT_A_DICT_POS. So we will check all the
- // characters in this PtNode indeed does match.
- if (PatriciaTrieReadingUtils::hasMultipleChars(flags)) {
- character = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(mDictRoot,
- &pos);
- while (NOT_A_CODE_POINT != character) {
- ++wordPos;
- // If we shoot the length of the word we search for, or if we find a single
- // character that does not match, as explained above, it means the word is
- // not in the dictionary (by virtue of this PtNode being the only one to
- // match the word on the first character, but not matching the whole word).
- if (wordPos >= length) return NOT_A_DICT_POS;
- if (inWord[wordPos] != character) return NOT_A_DICT_POS;
- character = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
- mDictRoot, &pos);
- }
- }
- // If we come here we know that so far, we do match. Either we are on a terminal
- // and we match the length, in which case we found it, or we traverse children.
- // If we don't match the length AND don't have children, then a word in the
- // dictionary fully matches a prefix of the searched word but not the full word.
- ++wordPos;
- if (PatriciaTrieReadingUtils::isTerminal(flags)) {
- if (wordPos == length) {
- return ptNodePos;
- }
- PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos);
- }
- if (!PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) {
- return NOT_A_DICT_POS;
- }
- // We have children and we are still shorter than the word we are searching for, so
- // we need to traverse children. Put the pointer on the children position, and
- // break
- pos = PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(mDictRoot,
- flags, &pos);
- break;
- } else {
- // This PtNode does not match, so skip the remaining part and go to the next.
- if (PatriciaTrieReadingUtils::hasMultipleChars(flags)) {
- PatriciaTrieReadingUtils::skipCharacters(mDictRoot, flags, MAX_WORD_LENGTH,
- &pos);
- }
- if (PatriciaTrieReadingUtils::isTerminal(flags)) {
- PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos);
- }
- if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) {
- PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(mDictRoot,
- flags, &pos);
- }
- if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
- mShortcutListPolicy.skipAllShortcuts(&pos);
- }
- if (PatriciaTrieReadingUtils::hasBigrams(flags)) {
- mBigramListPolicy.skipAllBigrams(&pos);
- }
- }
- --ptNodeCount;
- }
- }
+ DynamicPtReadingHelper readingHelper(&mPtNodeReader, &mPtNodeArrayReader);
+ readingHelper.initWithPtNodeArrayPos(getRootPosition());
+ return readingHelper.getTerminalPtNodePositionOfWord(inWord, length, forceLowerCaseSearch);
}
int PatriciaTriePolicy::getProbability(const int unigramProbability,
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
index 12efb44d8..1ce7f85d4 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
@@ -25,6 +25,7 @@
#include "suggest/policyimpl/dictionary/header/header_policy.h"
#include "suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h"
#include "suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.h"
+#include "suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.h"
#include "suggest/policyimpl/dictionary/utils/format_utils.h"
#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
@@ -42,7 +43,8 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
mDictBufferSize(mMmappedBuffer.get()->getBufferSize()
- mHeaderPolicy.getSize()),
mBigramListPolicy(mDictRoot), mShortcutListPolicy(mDictRoot),
- mPtNodeReader(mDictRoot, mDictBufferSize, &mBigramListPolicy, &mShortcutListPolicy) {}
+ mPtNodeReader(mDictRoot, mDictBufferSize, &mBigramListPolicy, &mShortcutListPolicy),
+ mPtNodeArrayReader(mDictRoot, mDictBufferSize) {}
AK_FORCE_INLINE int getRootPosition() const {
return 0;
@@ -146,6 +148,7 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
const BigramListPolicy mBigramListPolicy;
const ShortcutListPolicy mShortcutListPolicy;
const Ver2ParticiaTrieNodeReader mPtNodeReader;
+ const Ver2PtNodeArrayReader mPtNodeArrayReader;
int createAndGetLeavingChildNode(const DicNode *const dicNode, const int ptNodePos,
DicNodeVector *const childDicNodes) const;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.cpp
new file mode 100644
index 000000000..125ea31dc
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.cpp
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2014, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.h"
+
+#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
+
+namespace latinime {
+
+bool Ver2PtNodeArrayReader::readPtNodeArrayInfoAndReturnIfValid(const int ptNodeArrayPos,
+ int *const outPtNodeCount, int *const outFirstPtNodePos) const {
+ if (ptNodeArrayPos < 0 || ptNodeArrayPos >= mDictSize) {
+ // Reading invalid position because of a bug or a broken dictionary.
+ AKLOGE("Reading PtNode array info from invalid dictionary position: %d, dict size: %d",
+ ptNodeArrayPos, mDictSize);
+ ASSERT(false);
+ return false;
+ }
+ int readingPos = ptNodeArrayPos;
+ const int ptNodeCountInArray = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(
+ mDictBuffer, &readingPos);
+ *outPtNodeCount = ptNodeCountInArray;
+ *outFirstPtNodePos = readingPos;
+ return true;
+}
+
+bool Ver2PtNodeArrayReader::readForwardLinkAndReturnIfValid(const int forwordLinkPos,
+ int *const outNextPtNodeArrayPos) const {
+ if (forwordLinkPos < 0 || forwordLinkPos >= mDictSize) {
+ // Reading invalid position because of bug or broken dictionary.
+ AKLOGE("Reading forward link from invalid dictionary position: %d, dict size: %d",
+ forwordLinkPos, mDictSize);
+ ASSERT(false);
+ return false;
+ }
+ // Ver2 dicts don't have forward links.
+ *outNextPtNodeArrayPos = NOT_A_DICT_POS;
+ return true;
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.h
new file mode 100644
index 000000000..77404adf8
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2014, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_VER2_PT_NODE_ARRAY_READER_H
+#define LATINIME_VER2_PT_NODE_ARRAY_READER_H
+
+#include <stdint.h>
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_array_reader.h"
+
+namespace latinime {
+
+class Ver2PtNodeArrayReader : public PtNodeArrayReader {
+ public:
+ Ver2PtNodeArrayReader(const uint8_t *const dictBuffer, const int dictSize)
+ : mDictBuffer(dictBuffer), mDictSize(dictSize) {};
+
+ virtual bool readPtNodeArrayInfoAndReturnIfValid(const int ptNodeArrayPos,
+ int *const outPtNodeCount, int *const outFirstPtNodePos) const;
+ virtual bool readForwardLinkAndReturnIfValid(const int forwordLinkPos,
+ int *const outNextPtNodeArrayPos) const;
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(Ver2PtNodeArrayReader);
+
+ const uint8_t *const mDictBuffer;
+ const int mDictSize;
+};
+} // namespace latinime
+#endif /* LATINIME_VER2_PT_NODE_ARRAY_READER_H */