aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--native/jni/Android.mk3
-rw-r--r--native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.cpp6
-rw-r--r--native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h4
-rw-r--r--native/jni/src/suggest/core/dictionary/byte_array_utils.h16
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/binary_format.h42
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp41
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h69
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp140
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp67
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.h139
11 files changed, 416 insertions, 113 deletions
diff --git a/native/jni/Android.mk b/native/jni/Android.mk
index 771623c36..acd230ff2 100644
--- a/native/jni/Android.mk
+++ b/native/jni/Android.mk
@@ -72,7 +72,8 @@ LATIN_IME_CORE_SRC_FILES := \
suggest/core/session/dic_traverse_session.cpp \
$(addprefix suggest/policyimpl/dictionary/, \
dynamic_patricia_trie_policy.cpp \
- patricia_trie_policy.cpp) \
+ patricia_trie_policy.cpp \
+ patricia_trie_reading_utils.cpp) \
suggest/policyimpl/gesture/gesture_suggest_policy_factory.cpp \
$(addprefix suggest/policyimpl/typing/, \
scoring_params.cpp \
diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.cpp b/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.cpp
index 52b668936..20b77b3b2 100644
--- a/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.cpp
+++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.cpp
@@ -44,15 +44,15 @@ const int TaUtils::WHITELIST_SHORTCUT_PROBABILITY = 15;
const int origin = *pos;
switch (MASK_ATTRIBUTE_ADDRESS_TYPE & flags) {
case FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE:
- offset = ByteArrayUtils::readUint8andAdvancePosition(
+ offset = ByteArrayUtils::readUint8AndAdvancePosition(
binaryDictionaryInfo->getDictRoot(), pos);
break;
case FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES:
- offset = ByteArrayUtils::readUint16andAdvancePosition(
+ offset = ByteArrayUtils::readUint16AndAdvancePosition(
binaryDictionaryInfo->getDictRoot(), pos);
break;
case FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES:
- offset = ByteArrayUtils::readUint24andAdvancePosition(
+ offset = ByteArrayUtils::readUint24AndAdvancePosition(
binaryDictionaryInfo->getDictRoot(), pos);
break;
}
diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h b/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h
index 15637d8a9..375fc7dff 100644
--- a/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h
+++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h
@@ -33,7 +33,7 @@ class BinaryDictionaryTerminalAttributesReadingUtils {
static AK_FORCE_INLINE TerminalAttributeFlags getFlagsAndForwardPointer(
const BinaryDictionaryInfo *const binaryDictionaryInfo, int *const pos) {
- return ByteArrayUtils::readUint8andAdvancePosition(
+ return ByteArrayUtils::readUint8AndAdvancePosition(
binaryDictionaryInfo->getDictRoot(), pos);
}
@@ -66,7 +66,7 @@ class BinaryDictionaryTerminalAttributesReadingUtils {
static AK_FORCE_INLINE int getShortcutListSizeAndForwardPointer(
const BinaryDictionaryInfo *const binaryDictionaryInfo, int *const pos) {
// readUint16andAdvancePosition() returns an offset *including* the uint16 field itself.
- return ByteArrayUtils::readUint16andAdvancePosition(
+ return ByteArrayUtils::readUint16AndAdvancePosition(
binaryDictionaryInfo->getDictRoot(), pos) - SHORTCUT_LIST_SIZE_FIELD_SIZE;
}
diff --git a/native/jni/src/suggest/core/dictionary/byte_array_utils.h b/native/jni/src/suggest/core/dictionary/byte_array_utils.h
index daa822ffa..75ccfc766 100644
--- a/native/jni/src/suggest/core/dictionary/byte_array_utils.h
+++ b/native/jni/src/suggest/core/dictionary/byte_array_utils.h
@@ -50,39 +50,39 @@ class ByteArrayUtils {
return buffer[pos];
}
- static AK_FORCE_INLINE uint32_t readUint32andAdvancePosition(
+ static AK_FORCE_INLINE uint32_t readUint32AndAdvancePosition(
const uint8_t *const buffer, int *const pos) {
const uint32_t value = readUint32(buffer, *pos);
*pos += 4;
return value;
}
- static AK_FORCE_INLINE int readSint24andAdvancePosition(
+ static AK_FORCE_INLINE int readSint24AndAdvancePosition(
const uint8_t *const buffer, int *const pos) {
const uint8_t value = readUint8(buffer, *pos);
if (value < 0x80) {
- return readUint24andAdvancePosition(buffer, pos);
+ return readUint24AndAdvancePosition(buffer, pos);
} else {
(*pos)++;
- return -(((value & 0x7F) << 16) ^ readUint16andAdvancePosition(buffer, pos));
+ return -(((value & 0x7F) << 16) ^ readUint16AndAdvancePosition(buffer, pos));
}
}
- static AK_FORCE_INLINE uint32_t readUint24andAdvancePosition(
+ static AK_FORCE_INLINE uint32_t readUint24AndAdvancePosition(
const uint8_t *const buffer, int *const pos) {
const uint32_t value = readUint24(buffer, *pos);
*pos += 3;
return value;
}
- static AK_FORCE_INLINE uint16_t readUint16andAdvancePosition(
+ static AK_FORCE_INLINE uint16_t readUint16AndAdvancePosition(
const uint8_t *const buffer, int *const pos) {
const uint16_t value = readUint16(buffer, *pos);
*pos += 2;
return value;
}
- static AK_FORCE_INLINE uint8_t readUint8andAdvancePosition(
+ static AK_FORCE_INLINE uint8_t readUint8AndAdvancePosition(
const uint8_t *const buffer, int *const pos) {
return buffer[(*pos)++];
}
@@ -113,7 +113,7 @@ class ByteArrayUtils {
*pos += 1;
return NOT_A_CODE_POINT;
} else {
- return readUint24andAdvancePosition(buffer, pos);
+ return readUint24AndAdvancePosition(buffer, pos);
}
} else {
*pos += 1;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/binary_format.h b/native/jni/src/suggest/policyimpl/dictionary/binary_format.h
index 9e22b50cd..23f4c7fec 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/binary_format.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/binary_format.h
@@ -56,7 +56,6 @@ class BinaryFormat {
// Mask and flags for attribute address type selection.
static const int MASK_ATTRIBUTE_ADDRESS_TYPE = 0x30;
- static bool hasBlacklistedOrNotAWordFlag(const int flags);
static int getGroupCountAndForwardPointer(const uint8_t *const dict, int *pos);
static uint8_t getFlagsAndForwardPointer(const uint8_t *const dict, int *pos);
static int getCodePointAndForwardPointer(const uint8_t *const dict, int *pos);
@@ -74,10 +73,6 @@ class BinaryFormat {
static int getCodePointsAndProbabilityAndReturnCodePointCount(
const uint8_t *const root, const int nodePos, const int maxCodePointCount,
int *const outCodePoints, int *const outUnigramProbability);
- static int getBigramListPositionForWordPosition(const uint8_t *const root,
- const int nodePosition);
- static int getShortcutListPositionForWordPosition(const uint8_t *const root,
- const int nodePosition);
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryFormat);
@@ -99,10 +94,6 @@ class BinaryFormat {
static int skipBigrams(const uint8_t *const dict, const uint8_t flags, const int pos);
};
-inline bool BinaryFormat::hasBlacklistedOrNotAWordFlag(const int flags) {
- return (flags & (FLAG_IS_BLACKLISTED | FLAG_IS_NOT_A_WORD)) != 0;
-}
-
AK_FORCE_INLINE int BinaryFormat::getGroupCountAndForwardPointer(const uint8_t *const dict,
int *pos) {
const int msb = dict[(*pos)++];
@@ -475,38 +466,5 @@ AK_FORCE_INLINE int BinaryFormat::getCodePointsAndProbabilityAndReturnCodePointC
return 0;
}
-AK_FORCE_INLINE int BinaryFormat::getBigramListPositionForWordPosition(
- const uint8_t *const root, const int nodePosition) {
- if (NOT_A_VALID_WORD_POS == nodePosition) return NOT_A_DICT_POS;
- int position = nodePosition;
- const uint8_t flags = getFlagsAndForwardPointer(root, &position);
- if (!(flags & FLAG_HAS_BIGRAMS)) return NOT_A_DICT_POS;
- if (flags & FLAG_HAS_MULTIPLE_CHARS) {
- position = skipOtherCharacters(root, position);
- } else {
- getCodePointAndForwardPointer(root, &position);
- }
- position = skipProbability(flags, position);
- position = skipChildrenPosition(flags, position);
- position = skipShortcuts(root, flags, position);
- return position;
-}
-
-AK_FORCE_INLINE int BinaryFormat::getShortcutListPositionForWordPosition(
- const uint8_t *const root, const int nodePosition) {
- if (NOT_A_VALID_WORD_POS == nodePosition) return NOT_A_DICT_POS;
- int position = nodePosition;
- const uint8_t flags = getFlagsAndForwardPointer(root, &position);
- if (!(flags & FLAG_HAS_SHORTCUT_TARGETS)) return NOT_A_DICT_POS;
- if (flags & FLAG_HAS_MULTIPLE_CHARS) {
- position = skipOtherCharacters(root, position);
- } else {
- getCodePointAndForwardPointer(root, &position);
- }
- position = skipProbability(flags, position);
- position = skipChildrenPosition(flags, position);
- return position;
-}
-
} // namespace latinime
#endif // LATINIME_BINARY_FORMAT_H
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp
new file mode 100644
index 000000000..0de6341b0
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
+
+#include "defines.h"
+#include "suggest/core/dictionary/byte_array_utils.h"
+
+namespace latinime {
+
+typedef DynamicPatriciaTrieReadingUtils DptReadingUtils;
+
+const DptReadingUtils::NodeFlags DptReadingUtils::MASK_MOVED = 0xC0;
+const DptReadingUtils::NodeFlags DptReadingUtils::FLAG_IS_NOT_MOVED = 0xC0;
+const DptReadingUtils::NodeFlags DptReadingUtils::FLAG_IS_MOVED = 0x40;
+const DptReadingUtils::NodeFlags DptReadingUtils::FLAG_IS_DELETED = 0x80;
+
+/* static */ int DptReadingUtils::readChildrenPositionAndAdvancePosition(
+ const uint8_t *const buffer, const NodeFlags flags, int *const pos) {
+ if ((flags & MASK_MOVED) == FLAG_IS_NOT_MOVED) {
+ const int base = *pos;
+ return base + ByteArrayUtils::readSint24AndAdvancePosition(buffer, pos);
+ } else {
+ return NOT_A_DICT_POS;
+ }
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h
new file mode 100644
index 000000000..f44c2651a
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_READING_UTILS_H
+#define LATINIME_DYNAMIC_PATRICIA_TRIE_READING_UTILS_H
+
+#include <stdint.h>
+
+#include "defines.h"
+#include "suggest/core/dictionary/byte_array_utils.h"
+
+namespace latinime {
+
+class DynamicPatriciaTrieReadingUtils {
+ public:
+ typedef uint8_t NodeFlags;
+
+ static AK_FORCE_INLINE int getForwardLinkPosition(const uint8_t *const buffer, const int pos) {
+ int linkAddressPos = pos;
+ return ByteArrayUtils::readSint24AndAdvancePosition(buffer, &linkAddressPos);
+ }
+
+ static AK_FORCE_INLINE bool isValidForwardLinkPosition(const int forwardLinkAddress) {
+ return forwardLinkAddress != 0;
+ }
+
+ static AK_FORCE_INLINE int getParentPosAndAdvancePosition(const uint8_t *const buffer,
+ int *const pos) {
+ const int base = *pos;
+ return base + ByteArrayUtils::readSint24AndAdvancePosition(buffer, pos);
+ }
+
+ static int readChildrenPositionAndAdvancePosition(const uint8_t *const buffer,
+ const NodeFlags flags, int *const pos);
+
+ /**
+ * Node Flags
+ */
+ static AK_FORCE_INLINE bool isMoved(const NodeFlags flags) {
+ return FLAG_IS_MOVED == (MASK_MOVED & flags);
+ }
+
+ static AK_FORCE_INLINE bool isDeleted(const NodeFlags flags) {
+ return FLAG_IS_DELETED == (MASK_MOVED & flags);
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieReadingUtils);
+
+ static const NodeFlags MASK_MOVED;
+ static const NodeFlags FLAG_IS_NOT_MOVED;
+ static const NodeFlags FLAG_IS_MOVED;
+ static const NodeFlags FLAG_IS_DELETED;
+};
+} // namespace latinime
+#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_READING_UTILS_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp
index 2a9a5ce7a..097f7c86a 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp
@@ -21,7 +21,9 @@
#include "suggest/core/dicnode/dic_node.h"
#include "suggest/core/dicnode/dic_node_vector.h"
#include "suggest/core/dictionary/binary_dictionary_info.h"
+#include "suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h"
#include "suggest/policyimpl/dictionary/binary_format.h"
+#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h"
namespace latinime {
@@ -34,7 +36,7 @@ void PatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode,
return;
}
int nextPos = dicNode->getChildrenPos();
- const int childCount = BinaryFormat::getGroupCountAndForwardPointer(
+ const int childCount = PatriciaTrieReadingUtils::getGroupCountAndAdvancePosition(
binaryDictionaryInfo->getDictRoot(), &nextPos);
for (int i = 0; i < childCount; i++) {
nextPos = createAndGetLeavingChildNode(dicNode, nextPos, binaryDictionaryInfo,
@@ -60,82 +62,108 @@ int PatriciaTriePolicy::getTerminalNodePositionOfWord(
int PatriciaTriePolicy::getUnigramProbability(
const BinaryDictionaryInfo *const binaryDictionaryInfo, const int nodePos) const {
- const uint8_t *const root = binaryDictionaryInfo->getDictRoot();
+ if (nodePos == NOT_A_VALID_WORD_POS) {
+ return NOT_A_PROBABILITY;
+ }
+ const uint8_t *const dictRoot = binaryDictionaryInfo->getDictRoot();
int pos = nodePos;
- const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
- if (flags & (BinaryFormat::FLAG_IS_BLACKLISTED | BinaryFormat::FLAG_IS_NOT_A_WORD)) {
+ const PatriciaTrieReadingUtils::NodeFlags flags =
+ PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictRoot, &pos);
+ if (!PatriciaTrieReadingUtils::isTerminal(flags)) {
+ return NOT_A_PROBABILITY;
+ }
+ if (PatriciaTrieReadingUtils::isNotAWord(flags)
+ || PatriciaTrieReadingUtils::isBlacklisted(flags)) {
// If this is not a word, or if it's a blacklisted entry, it should behave as
// having no probability outside of the suggestion process (where it should be used
// for shortcuts).
return NOT_A_PROBABILITY;
}
- const bool hasMultipleChars = (0 != (BinaryFormat::FLAG_HAS_MULTIPLE_CHARS & flags));
- if (hasMultipleChars) {
- pos = BinaryFormat::skipOtherCharacters(root, pos);
- } else {
- BinaryFormat::getCodePointAndForwardPointer(root, &pos);
- }
- return BinaryFormat::readProbabilityWithoutMovingPointer(root, pos);
+ PatriciaTrieReadingUtils::skipCharacters(dictRoot, flags, MAX_WORD_LENGTH, &pos);
+ return PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(dictRoot, &pos);
}
int PatriciaTriePolicy::getShortcutPositionOfNode(
const BinaryDictionaryInfo *const binaryDictionaryInfo,
const int nodePos) const {
- return BinaryFormat::getShortcutListPositionForWordPosition(
- binaryDictionaryInfo->getDictRoot(), nodePos);
+ if (nodePos == NOT_A_VALID_WORD_POS) {
+ return NOT_A_DICT_POS;
+ }
+ const uint8_t *const dictRoot = binaryDictionaryInfo->getDictRoot();
+ int pos = nodePos;
+ const PatriciaTrieReadingUtils::NodeFlags flags =
+ PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictRoot, &pos);
+ if (!PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
+ return NOT_A_DICT_POS;
+ }
+ PatriciaTrieReadingUtils::skipCharacters(dictRoot, flags, MAX_WORD_LENGTH, &pos);
+ if (PatriciaTrieReadingUtils::isTerminal(flags)) {
+ PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(dictRoot, &pos);
+ }
+ if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) {
+ PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(dictRoot, flags, &pos);
+ }
+ return pos;
}
int PatriciaTriePolicy::getBigramsPositionOfNode(
const BinaryDictionaryInfo *const binaryDictionaryInfo,
const int nodePos) const {
- return BinaryFormat::getBigramListPositionForWordPosition(
- binaryDictionaryInfo->getDictRoot(), nodePos);
+ if (nodePos == NOT_A_VALID_WORD_POS) {
+ return NOT_A_DICT_POS;
+ }
+ const uint8_t *const dictRoot = binaryDictionaryInfo->getDictRoot();
+ int pos = nodePos;
+ const PatriciaTrieReadingUtils::NodeFlags flags =
+ PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictRoot, &pos);
+ if (!PatriciaTrieReadingUtils::hasBigrams(flags)) {
+ return NOT_A_DICT_POS;
+ }
+ PatriciaTrieReadingUtils::skipCharacters(dictRoot, flags, MAX_WORD_LENGTH, &pos);
+ if (PatriciaTrieReadingUtils::isTerminal(flags)) {
+ PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(dictRoot, &pos);
+ }
+ if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) {
+ PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(dictRoot, flags, &pos);
+ }
+ if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
+ BinaryDictionaryTerminalAttributesReadingUtils::skipShortcuts(binaryDictionaryInfo, &pos);
+ }
+ return pos;
}
-int PatriciaTriePolicy::createAndGetLeavingChildNode(const DicNode *const dicNode, int pos,
- const BinaryDictionaryInfo *const binaryDictionaryInfo,
+int PatriciaTriePolicy::createAndGetLeavingChildNode(const DicNode *const dicNode,
+ const int nodePos, const BinaryDictionaryInfo *const binaryDictionaryInfo,
const NodeFilter *const childrenFilter, DicNodeVector *childDicNodes) const {
- const int nextPos = pos;
- const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(
- binaryDictionaryInfo->getDictRoot(), &pos);
- const bool hasMultipleChars = (0 != (BinaryFormat::FLAG_HAS_MULTIPLE_CHARS & flags));
- const bool isTerminal = (0 != (BinaryFormat::FLAG_IS_TERMINAL & flags));
- const bool hasChildren = BinaryFormat::hasChildrenInFlags(flags);
- const bool isBlacklistedOrNotAWord = BinaryFormat::hasBlacklistedOrNotAWordFlag(flags);
-
- int codePoint = BinaryFormat::getCodePointAndForwardPointer(
- binaryDictionaryInfo->getDictRoot(), &pos);
- ASSERT(NOT_A_CODE_POINT != codePoint);
- // TODO: optimize this
+ const uint8_t *const dictRoot = binaryDictionaryInfo->getDictRoot();
+ int pos = nodePos;
+ const PatriciaTrieReadingUtils::NodeFlags flags =
+ PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictRoot, &pos);
int mergedNodeCodePoints[MAX_WORD_LENGTH];
- uint16_t mergedNodeCodePointCount = 0;
- mergedNodeCodePoints[mergedNodeCodePointCount++] = codePoint;
-
- do {
- const int nextCodePoint = hasMultipleChars
- ? BinaryFormat::getCodePointAndForwardPointer(
- binaryDictionaryInfo->getDictRoot(), &pos) : NOT_A_CODE_POINT;
- const bool isLastChar = (NOT_A_CODE_POINT == nextCodePoint);
- if (!isLastChar) {
- mergedNodeCodePoints[mergedNodeCodePointCount++] = nextCodePoint;
- }
- codePoint = nextCodePoint;
- } while (NOT_A_CODE_POINT != codePoint);
-
- const int probability = isTerminal ? BinaryFormat::readProbabilityWithoutMovingPointer(
- binaryDictionaryInfo->getDictRoot(), pos) : NOT_A_PROBABILITY;
- pos = BinaryFormat::skipProbability(flags, pos);
- int childrenPos = hasChildren ? BinaryFormat::readChildrenPosition(
- binaryDictionaryInfo->getDictRoot(), flags, pos) : NOT_A_DICT_POS;
- const int siblingPos = BinaryFormat::skipChildrenPosAndAttributes(
- binaryDictionaryInfo->getDictRoot(), flags, pos);
-
- if (childrenFilter->isFilteredOut(mergedNodeCodePoints[0])) {
- return siblingPos;
+ const int mergedNodeCodePointCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition(
+ dictRoot, flags, MAX_WORD_LENGTH, mergedNodeCodePoints, &pos);
+ const int probability = (PatriciaTrieReadingUtils::isTerminal(flags))?
+ PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(dictRoot, &pos)
+ : NOT_A_PROBABILITY;
+ const int childrenPos = PatriciaTrieReadingUtils::hasChildrenInFlags(flags) ?
+ PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(
+ dictRoot, flags, &pos) : NOT_A_DICT_POS;
+ if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
+ BinaryDictionaryTerminalAttributesReadingUtils::skipShortcuts(binaryDictionaryInfo, &pos);
+ }
+ if (PatriciaTrieReadingUtils::hasBigrams(flags)) {
+ BinaryDictionaryTerminalAttributesReadingUtils::skipExistingBigrams(
+ binaryDictionaryInfo, &pos);
+ }
+ if (!childrenFilter->isFilteredOut(mergedNodeCodePoints[0])) {
+ childDicNodes->pushLeavingChild(dicNode, nodePos, childrenPos, probability,
+ PatriciaTrieReadingUtils::isTerminal(flags),
+ PatriciaTrieReadingUtils::hasChildrenInFlags(flags),
+ PatriciaTrieReadingUtils::isBlacklisted(flags) ||
+ PatriciaTrieReadingUtils::isNotAWord(flags),
+ mergedNodeCodePointCount, mergedNodeCodePoints);
}
- childDicNodes->pushLeavingChild(dicNode, nextPos, childrenPos, probability, isTerminal,
- hasChildren, isBlacklistedOrNotAWord, mergedNodeCodePointCount, mergedNodeCodePoints);
- return siblingPos;
+ return pos;
}
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h
index 42827d93a..71f256eee 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h
@@ -61,7 +61,7 @@ class PatriciaTriePolicy : public DictionaryStructurePolicy {
PatriciaTriePolicy() {}
~PatriciaTriePolicy() {}
- int createAndGetLeavingChildNode(const DicNode *const dicNode, int pos,
+ int createAndGetLeavingChildNode(const DicNode *const dicNode, const int nodePos,
const BinaryDictionaryInfo *const binaryDictionaryInfo,
const NodeFilter *const nodeFilter, DicNodeVector *const childDicNodes) const;
};
diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp
new file mode 100644
index 000000000..89e981df8
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp
@@ -0,0 +1,67 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h"
+
+#include "defines.h"
+#include "suggest/core/dictionary/byte_array_utils.h"
+
+namespace latinime {
+
+typedef PatriciaTrieReadingUtils PtReadingUtils;
+
+const PtReadingUtils::NodeFlags PtReadingUtils::MASK_GROUP_ADDRESS_TYPE = 0xC0;
+const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_GROUP_ADDRESS_TYPE_NOADDRESS = 0x00;
+const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_GROUP_ADDRESS_TYPE_ONEBYTE = 0x40;
+const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_GROUP_ADDRESS_TYPE_TWOBYTES = 0x80;
+const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_GROUP_ADDRESS_TYPE_THREEBYTES = 0xC0;
+
+// Flag for single/multiple char group
+const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_HAS_MULTIPLE_CHARS = 0x20;
+// Flag for terminal groups
+const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_IS_TERMINAL = 0x10;
+// Flag for shortcut targets presence
+const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_HAS_SHORTCUT_TARGETS = 0x08;
+// Flag for bigram presence
+const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_HAS_BIGRAMS = 0x04;
+// Flag for non-words (typically, shortcut only entries)
+const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_IS_NOT_A_WORD = 0x02;
+// Flag for blacklist
+const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_IS_BLACKLISTED = 0x01;
+
+/* static */ int PtReadingUtils::readChildrenPositionAndAdvancePosition(
+ const uint8_t *const buffer, const NodeFlags flags, int *const pos) {
+ const int base = *pos;
+ int offset = 0;
+ switch (MASK_GROUP_ADDRESS_TYPE & flags) {
+ case FLAG_GROUP_ADDRESS_TYPE_ONEBYTE:
+ offset = ByteArrayUtils::readUint8AndAdvancePosition(buffer, pos);
+ break;
+ case FLAG_GROUP_ADDRESS_TYPE_TWOBYTES:
+ offset = ByteArrayUtils::readUint16AndAdvancePosition(buffer, pos);
+ break;
+ case FLAG_GROUP_ADDRESS_TYPE_THREEBYTES:
+ offset = ByteArrayUtils::readUint24AndAdvancePosition(buffer, pos);
+ break;
+ default:
+ // If we come here, it means we asked for the children of a word with
+ // no children.
+ return NOT_A_DICT_POS;
+ }
+ return base + offset;
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.h
new file mode 100644
index 000000000..002c3f19b
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.h
@@ -0,0 +1,139 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_PATRICIA_TRIE_READING_UTILS_H
+#define LATINIME_PATRICIA_TRIE_READING_UTILS_H
+
+#include <stdint.h>
+
+#include "defines.h"
+#include "suggest/core/dictionary/byte_array_utils.h"
+
+namespace latinime {
+
+class PatriciaTrieReadingUtils {
+ public:
+ typedef uint8_t NodeFlags;
+
+ static AK_FORCE_INLINE int getGroupCountAndAdvancePosition(
+ const uint8_t *const buffer, int *const pos) {
+ const uint8_t firstByte = ByteArrayUtils::readUint8AndAdvancePosition(buffer, pos);
+ if (firstByte < 0x80) {
+ return firstByte;
+ } else {
+ return ((firstByte & 0x7F) << 8) ^ ByteArrayUtils::readUint8AndAdvancePosition(
+ buffer, pos);
+ }
+ }
+
+ static AK_FORCE_INLINE NodeFlags getFlagsAndAdvancePosition(const uint8_t *const buffer,
+ int *const pos) {
+ return ByteArrayUtils::readUint8AndAdvancePosition(buffer, pos);
+ }
+
+ static AK_FORCE_INLINE int getCodePointAndAdvancePosition(const uint8_t *const buffer,
+ int *const pos) {
+ return ByteArrayUtils::readCodePointAndAdvancePosition(buffer, pos);
+ }
+
+ // Returns the number of read characters.
+ static AK_FORCE_INLINE int getCharsAndAdvancePosition(const uint8_t *const buffer,
+ const NodeFlags flags, const int maxLength, int *const outBuffer, int *const pos) {
+ int length = 0;
+ if (hasMultipleChars(flags)) {
+ length = ByteArrayUtils::readStringAndAdvancePosition(buffer, maxLength, outBuffer,
+ pos);
+ } else {
+ if (maxLength > 0) {
+ outBuffer[0] = getCodePointAndAdvancePosition(buffer, pos);
+ length = 1;
+ }
+ }
+ return length;
+ }
+
+ // Returns the number of skipped characters.
+ static AK_FORCE_INLINE int skipCharacters(const uint8_t *const buffer, const NodeFlags flags,
+ const int maxLength, int *const pos) {
+ if (hasMultipleChars(flags)) {
+ return ByteArrayUtils::advancePositionToBehindString(buffer, maxLength, pos);
+ } else {
+ if (maxLength > 0) {
+ getCodePointAndAdvancePosition(buffer, pos);
+ return 1;
+ } else {
+ return 0;
+ }
+ }
+ }
+
+ static AK_FORCE_INLINE int readProbabilityAndAdvancePosition(const uint8_t *const buffer,
+ int *const pos) {
+ return ByteArrayUtils::readUint8AndAdvancePosition(buffer, pos);
+ }
+
+ static int readChildrenPositionAndAdvancePosition(const uint8_t *const buffer,
+ const NodeFlags flags, int *const pos);
+
+ /**
+ * Node Flags
+ */
+ static AK_FORCE_INLINE bool isBlacklisted(const NodeFlags flags) {
+ return (flags & FLAG_IS_BLACKLISTED) != 0;
+ }
+
+ static AK_FORCE_INLINE bool isNotAWord(const NodeFlags flags) {
+ return (flags & FLAG_IS_NOT_A_WORD) != 0;
+ }
+
+ static AK_FORCE_INLINE bool isTerminal(const NodeFlags flags) {
+ return (flags & FLAG_IS_TERMINAL) != 0;
+ }
+
+ static AK_FORCE_INLINE bool hasShortcutTargets(const NodeFlags flags) {
+ return (flags & FLAG_HAS_SHORTCUT_TARGETS) != 0;
+ }
+
+ static AK_FORCE_INLINE bool hasBigrams(const NodeFlags flags) {
+ return (flags & FLAG_HAS_BIGRAMS) != 0;
+ }
+
+ static AK_FORCE_INLINE bool hasMultipleChars(const NodeFlags flags) {
+ return (flags & FLAG_HAS_MULTIPLE_CHARS) != 0;
+ }
+
+ static AK_FORCE_INLINE bool hasChildrenInFlags(const NodeFlags flags) {
+ return FLAG_GROUP_ADDRESS_TYPE_NOADDRESS != (MASK_GROUP_ADDRESS_TYPE & flags);
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTrieReadingUtils);
+
+ static const NodeFlags MASK_GROUP_ADDRESS_TYPE;
+ static const NodeFlags FLAG_GROUP_ADDRESS_TYPE_NOADDRESS;
+ static const NodeFlags FLAG_GROUP_ADDRESS_TYPE_ONEBYTE;
+ static const NodeFlags FLAG_GROUP_ADDRESS_TYPE_TWOBYTES;
+ static const NodeFlags FLAG_GROUP_ADDRESS_TYPE_THREEBYTES;
+
+ static const NodeFlags FLAG_HAS_MULTIPLE_CHARS;
+ static const NodeFlags FLAG_IS_TERMINAL;
+ static const NodeFlags FLAG_HAS_SHORTCUT_TARGETS;
+ static const NodeFlags FLAG_HAS_BIGRAMS;
+ static const NodeFlags FLAG_IS_NOT_A_WORD;
+ static const NodeFlags FLAG_IS_BLACKLISTED;
+};
+} // namespace latinime
+#endif /* LATINIME_PATRICIA_TRIE_NODE_READING_UTILS_H */