aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.cpp3
-rw-r--r--native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h31
-rw-r--r--native/jni/src/suggest/core/dictionary/binary_format.h39
-rw-r--r--native/jni/src/suggest/core/dictionary/shortcut_utils.h12
-rw-r--r--native/jni/src/suggest/core/dictionary/terminal_attributes.h67
5 files changed, 77 insertions, 75 deletions
diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.cpp b/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.cpp
index 0a7509c8b..52b668936 100644
--- a/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.cpp
+++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.cpp
@@ -33,6 +33,9 @@ const TaUtils::TerminalAttributeFlags TaUtils::FLAG_ATTRIBUTE_HAS_NEXT = 0x80;
// Mask for attribute probability, stored on 4 bits inside the flags byte.
const TaUtils::TerminalAttributeFlags TaUtils::MASK_ATTRIBUTE_PROBABILITY = 0x0F;
const int TaUtils::ATTRIBUTE_ADDRESS_SHIFT = 4;
+const int TaUtils::SHORTCUT_LIST_SIZE_FIELD_SIZE = 2;
+// The numeric value of the shortcut probability that means 'whitelist'.
+const int TaUtils::WHITELIST_SHORTCUT_PROBABILITY = 15;
/* static */ int TaUtils::getBigramAddressAndForwardPointer(
const BinaryDictionaryInfo *const binaryDictionaryInfo, const TerminalAttributeFlags flags,
diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h b/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h
index f38fd5aaa..15637d8a9 100644
--- a/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h
+++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h
@@ -29,6 +29,7 @@ class BinaryDictionaryTerminalAttributesReadingUtils {
public:
typedef uint8_t TerminalAttributeFlags;
typedef TerminalAttributeFlags BigramFlags;
+ typedef TerminalAttributeFlags ShortcutFlags;
static AK_FORCE_INLINE TerminalAttributeFlags getFlagsAndForwardPointer(
const BinaryDictionaryInfo *const binaryDictionaryInfo, int *const pos) {
@@ -59,6 +60,34 @@ class BinaryDictionaryTerminalAttributesReadingUtils {
const BinaryDictionaryInfo *const binaryDictionaryInfo, const BigramFlags flags,
int *const pos);
+ // Shortcuts reading methods
+ // This method returns the size of the shortcut list region excluding the shortcut list size
+ // field at the beginning.
+ static AK_FORCE_INLINE int getShortcutListSizeAndForwardPointer(
+ const BinaryDictionaryInfo *const binaryDictionaryInfo, int *const pos) {
+ // readUint16andAdvancePosition() returns an offset *including* the uint16 field itself.
+ return ByteArrayUtils::readUint16andAdvancePosition(
+ binaryDictionaryInfo->getDictRoot(), pos) - SHORTCUT_LIST_SIZE_FIELD_SIZE;
+ }
+
+ static AK_FORCE_INLINE void skipShortcuts(
+ const BinaryDictionaryInfo *const binaryDictionaryInfo, int *const pos) {
+ const int shortcutListSize = getShortcutListSizeAndForwardPointer(
+ binaryDictionaryInfo, pos);
+ *pos += shortcutListSize;
+ }
+
+ static AK_FORCE_INLINE bool isWhitelist(const ShortcutFlags flags) {
+ return getProbabilityFromFlags(flags) == WHITELIST_SHORTCUT_PROBABILITY;
+ }
+
+ static AK_FORCE_INLINE int readShortcutTarget(
+ const BinaryDictionaryInfo *const binaryDictionaryInfo, const int maxLength,
+ int *const outWord, int *const pos) {
+ return ByteArrayUtils::readStringAndAdvancePosition(
+ binaryDictionaryInfo->getDictRoot(), maxLength, outWord, pos);
+ }
+
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryDictionaryTerminalAttributesReadingUtils);
@@ -70,6 +99,8 @@ class BinaryDictionaryTerminalAttributesReadingUtils {
static const TerminalAttributeFlags FLAG_ATTRIBUTE_HAS_NEXT;
static const TerminalAttributeFlags MASK_ATTRIBUTE_PROBABILITY;
static const int ATTRIBUTE_ADDRESS_SHIFT;
+ static const int SHORTCUT_LIST_SIZE_FIELD_SIZE;
+ static const int WHITELIST_SHORTCUT_PROBABILITY;
static AK_FORCE_INLINE bool isOffsetNegative(const TerminalAttributeFlags flags) {
return (flags & FLAG_ATTRIBUTE_OFFSET_NEGATIVE) != 0;
diff --git a/native/jni/src/suggest/core/dictionary/binary_format.h b/native/jni/src/suggest/core/dictionary/binary_format.h
index df0ec480d..9557d8ce7 100644
--- a/native/jni/src/suggest/core/dictionary/binary_format.h
+++ b/native/jni/src/suggest/core/dictionary/binary_format.h
@@ -52,14 +52,10 @@ class BinaryFormat {
// Mask for attribute probability, stored on 4 bits inside the flags byte.
static const int MASK_ATTRIBUTE_PROBABILITY = 0x0F;
- // The numeric value of the shortcut probability that means 'whitelist'.
- static const int WHITELIST_SHORTCUT_PROBABILITY = 15;
// Mask and flags for attribute address type selection.
static const int MASK_ATTRIBUTE_ADDRESS_TYPE = 0x30;
- static const int SHORTCUT_LIST_SIZE_SIZE = 2;
-
static bool hasBlacklistedOrNotAWordFlag(const int flags);
static int getGroupCountAndForwardPointer(const uint8_t *const dict, int *pos);
static uint8_t getFlagsAndForwardPointer(const uint8_t *const dict, int *pos);
@@ -73,9 +69,6 @@ class BinaryFormat {
const int pos);
static int readChildrenPosition(const uint8_t *const dict, const uint8_t flags, const int pos);
static bool hasChildrenInFlags(const uint8_t flags);
- static int getAttributeAddressAndForwardPointer(const uint8_t *const dict, const uint8_t flags,
- int *pos);
- static int getAttributeProbabilityFromFlags(const int flags);
static int getTerminalPosition(const uint8_t *const root, const int *const inWord,
const int length, const bool forceLowerCaseSearch);
static int getWordAtAddress(const uint8_t *const root, const int address, const int maxDepth,
@@ -260,38 +253,6 @@ inline bool BinaryFormat::hasChildrenInFlags(const uint8_t flags) {
return (FLAG_GROUP_ADDRESS_TYPE_NOADDRESS != (MASK_GROUP_ADDRESS_TYPE & flags));
}
-AK_FORCE_INLINE int BinaryFormat::getAttributeAddressAndForwardPointer(const uint8_t *const dict,
- const uint8_t flags, int *pos) {
- int offset = 0;
- const int origin = *pos;
- switch (MASK_ATTRIBUTE_ADDRESS_TYPE & flags) {
- case FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE:
- offset = dict[origin];
- *pos = origin + 1;
- break;
- case FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES:
- offset = dict[origin] << 8;
- offset += dict[origin + 1];
- *pos = origin + 2;
- break;
- case FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES:
- offset = dict[origin] << 16;
- offset += dict[origin + 1] << 8;
- offset += dict[origin + 2];
- *pos = origin + 3;
- break;
- }
- if (FLAG_ATTRIBUTE_OFFSET_NEGATIVE & flags) {
- return origin - offset;
- } else {
- return origin + offset;
- }
-}
-
-inline int BinaryFormat::getAttributeProbabilityFromFlags(const int flags) {
- return flags & MASK_ATTRIBUTE_PROBABILITY;
-}
-
// This function gets the byte position of the last chargroup of the exact matching word in the
// dictionary. If no match is found, it returns NOT_VALID_WORD.
AK_FORCE_INLINE int BinaryFormat::getTerminalPosition(const uint8_t *const root,
diff --git a/native/jni/src/suggest/core/dictionary/shortcut_utils.h b/native/jni/src/suggest/core/dictionary/shortcut_utils.h
index 601ac5f5a..3c2180937 100644
--- a/native/jni/src/suggest/core/dictionary/shortcut_utils.h
+++ b/native/jni/src/suggest/core/dictionary/shortcut_utils.h
@@ -29,15 +29,15 @@ class ShortcutUtils {
int outputWordIndex, const int finalScore, int *const outputCodePoints,
int *const frequencies, int *const outputTypes, const bool sameAsTyped) {
TerminalAttributes::ShortcutIterator iterator = terminalAttributes->getShortcutIterator();
+ int shortcutTarget[MAX_WORD_LENGTH];
while (iterator.hasNextShortcutTarget() && outputWordIndex < MAX_RESULTS) {
- int shortcutTarget[MAX_WORD_LENGTH];
- int shortcutProbability;
- const int shortcutTargetStringLength = iterator.getNextShortcutTarget(
- MAX_WORD_LENGTH, shortcutTarget, &shortcutProbability);
+ bool isWhilelist;
+ int shortcutTargetStringLength;
+ iterator.nextShortcutTarget(MAX_WORD_LENGTH, shortcutTarget,
+ &shortcutTargetStringLength, &isWhilelist);
int shortcutScore;
int kind;
- if (shortcutProbability == BinaryFormat::WHITELIST_SHORTCUT_PROBABILITY
- && sameAsTyped) {
+ if (isWhilelist && sameAsTyped) {
shortcutScore = S_INT_MAX;
kind = Dictionary::KIND_WHITELIST;
} else {
diff --git a/native/jni/src/suggest/core/dictionary/terminal_attributes.h b/native/jni/src/suggest/core/dictionary/terminal_attributes.h
index bbd9af090..cec47081e 100644
--- a/native/jni/src/suggest/core/dictionary/terminal_attributes.h
+++ b/native/jni/src/suggest/core/dictionary/terminal_attributes.h
@@ -20,6 +20,7 @@
#include <stdint.h>
#include "suggest/core/dictionary/binary_dictionary_info.h"
+#include "suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h"
#include "suggest/core/dictionary/binary_format.h"
namespace latinime {
@@ -33,60 +34,66 @@ class TerminalAttributes {
public:
class ShortcutIterator {
public:
- ShortcutIterator(const BinaryDictionaryInfo *const binaryDictionaryInfo, const int pos,
- const uint8_t flags)
- : mBinaryDicitionaryInfo(binaryDictionaryInfo), mPos(pos),
- mHasNextShortcutTarget(0 != (flags & BinaryFormat::FLAG_HAS_SHORTCUT_TARGETS)) {
- }
+ ShortcutIterator(const BinaryDictionaryInfo *const binaryDictionaryInfo,
+ const int shortcutPos, const bool hasShortcutList)
+ : mBinaryDictionaryInfo(binaryDictionaryInfo), mPos(shortcutPos),
+ mHasNextShortcutTarget(hasShortcutList) {}
inline bool hasNextShortcutTarget() const {
return mHasNextShortcutTarget;
}
- // Gets the shortcut target itself as an int string. For parameters and return value
- // see BinaryFormat::getWordAtAddress.
- inline int getNextShortcutTarget(const int maxDepth, int *outWord, int *outFreq) {
- const int shortcutFlags = BinaryFormat::getFlagsAndForwardPointer(
- mBinaryDicitionaryInfo->getDictRoot(), &mPos);
- mHasNextShortcutTarget = 0 != (shortcutFlags & BinaryFormat::FLAG_ATTRIBUTE_HAS_NEXT);
- unsigned int i;
- for (i = 0; i < MAX_WORD_LENGTH; ++i) {
- const int codePoint = BinaryFormat::getCodePointAndForwardPointer(
- mBinaryDicitionaryInfo->getDictRoot(), &mPos);
- if (NOT_A_CODE_POINT == codePoint) break;
- outWord[i] = codePoint;
+ // Gets the shortcut target itself as an int string and put it to outTarget, put its length
+ // to outTargetLength, put whether it is whitelist to outIsWhitelist.
+ AK_FORCE_INLINE void nextShortcutTarget(
+ const int maxDepth, int *const outTarget, int *const outTargetLength,
+ bool *const outIsWhitelist) {
+ const BinaryDictionaryTerminalAttributesReadingUtils::ShortcutFlags flags =
+ BinaryDictionaryTerminalAttributesReadingUtils::getFlagsAndForwardPointer(
+ mBinaryDictionaryInfo, &mPos);
+ mHasNextShortcutTarget =
+ BinaryDictionaryTerminalAttributesReadingUtils::hasNext(flags);
+ if (outIsWhitelist) {
+ *outIsWhitelist =
+ BinaryDictionaryTerminalAttributesReadingUtils::isWhitelist(flags);
+ }
+ if (outTargetLength) {
+ *outTargetLength =
+ BinaryDictionaryTerminalAttributesReadingUtils::readShortcutTarget(
+ mBinaryDictionaryInfo, maxDepth, outTarget, &mPos);
}
- *outFreq = BinaryFormat::getAttributeProbabilityFromFlags(shortcutFlags);
- return i;
}
private:
- const BinaryDictionaryInfo *const mBinaryDicitionaryInfo;
+ const BinaryDictionaryInfo *const mBinaryDictionaryInfo;
int mPos;
bool mHasNextShortcutTarget;
};
- TerminalAttributes(const BinaryDictionaryInfo *const binaryDicitonaryInfo,
- const uint8_t flags, const int pos)
- : mBinaryDicitionaryInfo(binaryDicitonaryInfo), mFlags(flags), mStartPos(pos) {
- }
+ TerminalAttributes(const BinaryDictionaryInfo *const binaryDictionaryInfo,
+ const uint8_t nodeFlags, const int shortcutPos)
+ : mBinaryDictionaryInfo(binaryDictionaryInfo),
+ mNodeFlags(nodeFlags), mShortcutListSizePos(shortcutPos) {}
inline ShortcutIterator getShortcutIterator() const {
// The size of the shortcuts is stored here so that the whole shortcut chunk can be
// skipped quickly, so we ignore it.
- return ShortcutIterator(
- mBinaryDicitionaryInfo, mStartPos + BinaryFormat::SHORTCUT_LIST_SIZE_SIZE, mFlags);
+ int shortcutPos = mShortcutListSizePos;
+ BinaryDictionaryTerminalAttributesReadingUtils::getShortcutListSizeAndForwardPointer(
+ mBinaryDictionaryInfo, &shortcutPos);
+ const bool hasShortcutList = 0 != (mNodeFlags & BinaryFormat::FLAG_HAS_SHORTCUT_TARGETS);
+ return ShortcutIterator(mBinaryDictionaryInfo, shortcutPos, hasShortcutList);
}
bool isBlacklistedOrNotAWord() const {
- return BinaryFormat::hasBlacklistedOrNotAWordFlag(mFlags);
+ return BinaryFormat::hasBlacklistedOrNotAWordFlag(mNodeFlags);
}
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(TerminalAttributes);
- const BinaryDictionaryInfo *const mBinaryDicitionaryInfo;
- const uint8_t mFlags;
- const int mStartPos;
+ const BinaryDictionaryInfo *const mBinaryDictionaryInfo;
+ const uint8_t mNodeFlags;
+ const int mShortcutListSizePos;
};
} // namespace latinime
#endif // LATINIME_TERMINAL_ATTRIBUTES_H