aboutsummaryrefslogtreecommitdiffstats
path: root/native/jni/src
diff options
context:
space:
mode:
Diffstat (limited to 'native/jni/src')
-rw-r--r--native/jni/src/suggest/core/dicnode/dic_node.h54
-rw-r--r--native/jni/src/suggest/core/dicnode/dic_node_properties.h32
-rw-r--r--native/jni/src/suggest/core/dicnode/dic_node_utils.cpp6
-rw-r--r--native/jni/src/suggest/core/dicnode/dic_node_vector.h13
-rw-r--r--native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.cpp43
-rw-r--r--native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.h8
-rw-r--r--native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.cpp28
-rw-r--r--native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.h49
-rw-r--r--native/jni/src/suggest/core/dictionary/terminal_attributes.h13
-rw-r--r--native/jni/src/suggest/core/suggest.cpp33
-rw-r--r--native/jni/src/suggest/core/suggest.h2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_policy_factory.h3
-rw-r--r--native/jni/src/suggest/policyimpl/typing/typing_weighting.h7
13 files changed, 138 insertions, 153 deletions
diff --git a/native/jni/src/suggest/core/dicnode/dic_node.h b/native/jni/src/suggest/core/dicnode/dic_node.h
index 017df34fd..973da67e4 100644
--- a/native/jni/src/suggest/core/dicnode/dic_node.h
+++ b/native/jni/src/suggest/core/dicnode/dic_node.h
@@ -112,32 +112,23 @@ class DicNode {
mIsUsed = true;
mIsCachedForNextSuggestion = false;
mDicNodeProperties.init(
- NOT_A_DICT_POS, 0 /* flags */, rootGroupPos, NOT_A_DICT_POS /* attributesPos */,
+ NOT_A_DICT_POS, rootGroupPos, NOT_A_DICT_POS /* attributesPos */,
NOT_A_CODE_POINT /* nodeCodePoint */, NOT_A_PROBABILITY /* probability */,
- false /* isTerminal */, true /* hasChildren */, 0 /* depth */,
- 0 /* terminalDepth */);
+ false /* isTerminal */, true /* hasChildren */,
+ false /* isBlacklistedOrNotAWord */, 0 /* depth */, 0 /* terminalDepth */);
mDicNodeState.init(prevWordNodePos);
PROF_NODE_RESET(mProfiler);
}
- void initAsPassingChild(DicNode *parentNode) {
- mIsUsed = true;
- mIsCachedForNextSuggestion = parentNode->mIsCachedForNextSuggestion;
- const int c = parentNode->getNodeTypedCodePoint();
- mDicNodeProperties.init(&parentNode->mDicNodeProperties, c);
- mDicNodeState.init(&parentNode->mDicNodeState);
- PROF_NODE_COPY(&parentNode->mProfiler, mProfiler);
- }
-
// Init for root with previous word
void initAsRootWithPreviousWord(DicNode *dicNode, const int rootGroupPos) {
mIsUsed = true;
mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion;
mDicNodeProperties.init(
- NOT_A_DICT_POS, 0 /* flags */, rootGroupPos, NOT_A_DICT_POS /* attributesPos */,
+ NOT_A_DICT_POS, rootGroupPos, NOT_A_DICT_POS /* attributesPos */,
NOT_A_CODE_POINT /* nodeCodePoint */, NOT_A_PROBABILITY /* probability */,
- false /* isTerminal */, true /* hasChildren */, 0 /* depth */,
- 0 /* terminalDepth */);
+ false /* isTerminal */, true /* hasChildren */,
+ false /* isBlacklistedOrNotAWord */, 0 /* depth */, 0 /* terminalDepth */);
// TODO: Move to dicNodeState?
mDicNodeState.mDicNodeStateOutput.init(); // reset for next word
mDicNodeState.mDicNodeStateInput.init(
@@ -157,18 +148,27 @@ class DicNode {
PROF_NODE_COPY(&dicNode->mProfiler, mProfiler);
}
- // TODO: minimize arguments by looking binary_format
- void initAsChild(DicNode *dicNode, const int pos, const uint8_t flags, const int childrenPos,
+ void initAsPassingChild(DicNode *parentNode) {
+ mIsUsed = true;
+ mIsCachedForNextSuggestion = parentNode->mIsCachedForNextSuggestion;
+ const int c = parentNode->getNodeTypedCodePoint();
+ mDicNodeProperties.init(&parentNode->mDicNodeProperties, c);
+ mDicNodeState.init(&parentNode->mDicNodeState);
+ PROF_NODE_COPY(&parentNode->mProfiler, mProfiler);
+ }
+
+ void initAsChild(DicNode *dicNode, const int pos, const int childrenPos,
const int attributesPos, const int probability, const bool isTerminal,
- const bool hasChildren, const uint16_t mergedNodeCodePointCount,
- const int *const mergedNodeCodePoints) {
+ const bool hasChildren, const bool isBlacklistedOrNotAWord,
+ const uint16_t mergedNodeCodePointCount, const int *const mergedNodeCodePoints) {
mIsUsed = true;
uint16_t newDepth = static_cast<uint16_t>(dicNode->getNodeCodePointCount() + 1);
mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion;
const uint16_t newLeavingDepth = static_cast<uint16_t>(
dicNode->mDicNodeProperties.getLeavingDepth() + mergedNodeCodePointCount);
- mDicNodeProperties.init(pos, flags, childrenPos, attributesPos, mergedNodeCodePoints[0],
- probability, isTerminal, hasChildren, newDepth, newLeavingDepth);
+ mDicNodeProperties.init(pos, childrenPos, attributesPos, mergedNodeCodePoints[0],
+ probability, isTerminal, hasChildren, isBlacklistedOrNotAWord, newDepth,
+ newLeavingDepth);
mDicNodeState.init(&dicNode->mDicNodeState, mergedNodeCodePointCount,
mergedNodeCodePoints);
PROF_NODE_COPY(&dicNode->mProfiler, mProfiler);
@@ -216,7 +216,7 @@ class DicNode {
}
bool isImpossibleBigramWord() const {
- if (mDicNodeProperties.hasBlacklistedOrNotAWordFlag()) {
+ if (isBlacklistedOrNotAWord()) {
return true;
}
const int prevWordLen = mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength()
@@ -463,8 +463,8 @@ class DicNode {
return mDicNodeState.mDicNodeStateScoring.isExactMatch();
}
- uint8_t getFlags() const {
- return mDicNodeProperties.getFlags();
+ bool isBlacklistedOrNotAWord() const {
+ return mDicNodeProperties.isBlacklistedOrNotAWord();
}
int getAttributesPos() const {
@@ -504,6 +504,12 @@ class DicNode {
if (!right->isUsed()) {
return false;
}
+ // Promote exact matches to prevent them from being pruned.
+ const bool leftExactMatch = isExactMatch();
+ const bool rightExactMatch = right->isExactMatch();
+ if (leftExactMatch != rightExactMatch) {
+ return leftExactMatch;
+ }
const float diff =
right->getNormalizedCompoundDistance() - getNormalizedCompoundDistance();
static const float MIN_DIFF = 0.000001f;
diff --git a/native/jni/src/suggest/core/dicnode/dic_node_properties.h b/native/jni/src/suggest/core/dicnode/dic_node_properties.h
index 7e8aa4979..d98000d83 100644
--- a/native/jni/src/suggest/core/dicnode/dic_node_properties.h
+++ b/native/jni/src/suggest/core/dicnode/dic_node_properties.h
@@ -20,7 +20,6 @@
#include <stdint.h>
#include "defines.h"
-#include "suggest/core/dictionary/binary_format.h"
namespace latinime {
@@ -32,24 +31,25 @@ namespace latinime {
class DicNodeProperties {
public:
AK_FORCE_INLINE DicNodeProperties()
- : mPos(0), mFlags(0), mChildrenPos(0), mAttributesPos(0), mProbability(0),
- mNodeCodePoint(0), mDepth(0), mLeavingDepth(0), mIsTerminal(false),
- mHasChildren(false) {}
+ : mPos(0), mChildrenPos(0), mAttributesPos(0), mProbability(0),
+ mNodeCodePoint(0), mIsTerminal(false), mHasChildren(false),
+ mIsBlacklistedOrNotAWord(false), mDepth(0), mLeavingDepth(0) {}
virtual ~DicNodeProperties() {}
// Should be called only once per DicNode is initialized.
- void init(const int pos, const uint8_t flags, const int childrenPos, const int attributesPos,
+ void init(const int pos, const int childrenPos, const int attributesPos,
const int nodeCodePoint, const int probability, const bool isTerminal,
- const bool hasChildren, const uint16_t depth, const uint16_t leavingDepth) {
+ const bool hasChildren, const bool isBlacklistedOrNotAWord,
+ const uint16_t depth, const uint16_t leavingDepth) {
mPos = pos;
- mFlags = flags;
mChildrenPos = childrenPos;
mAttributesPos = attributesPos;
mNodeCodePoint = nodeCodePoint;
mProbability = probability;
mIsTerminal = isTerminal;
mHasChildren = hasChildren;
+ mIsBlacklistedOrNotAWord = isBlacklistedOrNotAWord;
mDepth = depth;
mLeavingDepth = leavingDepth;
}
@@ -57,13 +57,13 @@ class DicNodeProperties {
// Init for copy
void init(const DicNodeProperties *const nodeProp) {
mPos = nodeProp->mPos;
- mFlags = nodeProp->mFlags;
mChildrenPos = nodeProp->mChildrenPos;
mAttributesPos = nodeProp->mAttributesPos;
mNodeCodePoint = nodeProp->mNodeCodePoint;
mProbability = nodeProp->mProbability;
mIsTerminal = nodeProp->mIsTerminal;
mHasChildren = nodeProp->mHasChildren;
+ mIsBlacklistedOrNotAWord = nodeProp->mIsBlacklistedOrNotAWord;
mDepth = nodeProp->mDepth;
mLeavingDepth = nodeProp->mLeavingDepth;
}
@@ -71,13 +71,13 @@ class DicNodeProperties {
// Init as passing child
void init(const DicNodeProperties *const nodeProp, const int codePoint) {
mPos = nodeProp->mPos;
- mFlags = nodeProp->mFlags;
mChildrenPos = nodeProp->mChildrenPos;
mAttributesPos = nodeProp->mAttributesPos;
mNodeCodePoint = codePoint; // Overwrite the node char of a passing child
mProbability = nodeProp->mProbability;
mIsTerminal = nodeProp->mIsTerminal;
mHasChildren = nodeProp->mHasChildren;
+ mIsBlacklistedOrNotAWord = nodeProp->mIsBlacklistedOrNotAWord;
mDepth = nodeProp->mDepth + 1; // Increment the depth of a passing child
mLeavingDepth = nodeProp->mLeavingDepth;
}
@@ -86,10 +86,6 @@ class DicNodeProperties {
return mPos;
}
- uint8_t getFlags() const {
- return mFlags;
- }
-
int getChildrenPos() const {
return mChildrenPos;
}
@@ -123,8 +119,8 @@ class DicNodeProperties {
return mHasChildren || mDepth != mLeavingDepth;
}
- bool hasBlacklistedOrNotAWordFlag() const {
- return BinaryFormat::hasBlacklistedOrNotAWordFlag(mFlags);
+ bool isBlacklistedOrNotAWord() const {
+ return mIsBlacklistedOrNotAWord;
}
private:
@@ -132,15 +128,15 @@ class DicNodeProperties {
// Use a default copy constructor and an assign operator because shallow copies are ok
// for this class
int mPos;
- uint8_t mFlags;
int mChildrenPos;
int mAttributesPos;
int mProbability;
int mNodeCodePoint;
- uint16_t mDepth;
- uint16_t mLeavingDepth;
bool mIsTerminal;
bool mHasChildren;
+ bool mIsBlacklistedOrNotAWord;
+ uint16_t mDepth;
+ uint16_t mLeavingDepth;
};
} // namespace latinime
#endif // LATINIME_DIC_NODE_PROPERTIES_H
diff --git a/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp b/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp
index c7c8d2a19..6c7f6667a 100644
--- a/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp
+++ b/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp
@@ -78,6 +78,7 @@ namespace latinime {
const bool isTerminal = (0 != (BinaryFormat::FLAG_IS_TERMINAL & flags));
const bool hasChildren = BinaryFormat::hasChildrenInFlags(flags);
const bool hasShortcuts = (0 != (BinaryFormat::FLAG_HAS_SHORTCUT_TARGETS & flags));
+ const bool isBlacklistedOrNotAWord = BinaryFormat::hasBlacklistedOrNotAWordFlag(flags);
int codePoint = BinaryFormat::getCodePointAndForwardPointer(
binaryDictionaryInfo->getDictRoot(), &pos);
@@ -111,8 +112,9 @@ namespace latinime {
if (childrenFilter->isFilteredOut(mergedNodeCodePoints[0])) {
return siblingPos;
}
- childDicNodes->pushLeavingChild(dicNode, nextPos, flags, childrenPos, attributesPos,
- probability, isTerminal, hasChildren, mergedNodeCodePointCount, mergedNodeCodePoints);
+ childDicNodes->pushLeavingChild(dicNode, nextPos, childrenPos, attributesPos,
+ probability, isTerminal, hasChildren, isBlacklistedOrNotAWord,
+ mergedNodeCodePointCount, mergedNodeCodePoints);
return siblingPos;
}
diff --git a/native/jni/src/suggest/core/dicnode/dic_node_vector.h b/native/jni/src/suggest/core/dicnode/dic_node_vector.h
index 9641cc19c..5ac4eeaf4 100644
--- a/native/jni/src/suggest/core/dicnode/dic_node_vector.h
+++ b/native/jni/src/suggest/core/dicnode/dic_node_vector.h
@@ -62,14 +62,15 @@ class DicNodeVector {
mDicNodes.back().initAsPassingChild(dicNode);
}
- void pushLeavingChild(DicNode *dicNode, const int pos, const uint8_t flags,
- const int childrenPos, const int attributesPos, const int probability,
- const bool isTerminal, const bool hasChildren, const uint16_t mergedNodeCodePointCount,
- const int *const mergedNodeCodePoints) {
+ void pushLeavingChild(DicNode *dicNode, const int pos, const int childrenPos,
+ const int attributesPos, const int probability, const bool isTerminal,
+ const bool hasChildren, const bool isBlacklistedOrNotAWord,
+ const uint16_t mergedNodeCodePointCount, const int *const mergedNodeCodePoints) {
ASSERT(!mLock);
mDicNodes.push_back(mEmptyNode);
- mDicNodes.back().initAsChild(dicNode, pos, flags, childrenPos, attributesPos, probability,
- isTerminal, hasChildren, mergedNodeCodePointCount, mergedNodeCodePoints);
+ mDicNodes.back().initAsChild(dicNode, pos, childrenPos, attributesPos, probability,
+ isTerminal, hasChildren, isBlacklistedOrNotAWord, mergedNodeCodePointCount,
+ mergedNodeCodePoints);
}
DicNode *operator[](const int id) {
diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.cpp b/native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.cpp
index f48386bba..5d14a0554 100644
--- a/native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.cpp
+++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.cpp
@@ -31,9 +31,9 @@ const int BinaryDictionaryFormatUtils::DICTIONARY_MINIMUM_SIZE = 4;
// The versions of Latin IME that only handle format version 1 only test for the magic
// number, so we had to change it so that version 2 files would be rejected by older
// implementations. On this occasion, we made the magic number 32 bits long.
-const uint32_t BinaryDictionaryFormatUtils::FORMAT_VERSION_2_MAGIC_NUMBER = 0x9BC13AFE;
+const uint32_t BinaryDictionaryFormatUtils::HEADER_VERSION_2_MAGIC_NUMBER = 0x9BC13AFE;
// Magic number (4 bytes), version (2 bytes), options (2 bytes), header size (4 bytes) = 12
-const int BinaryDictionaryFormatUtils::FORMAT_VERSION_2_MINIMUM_SIZE = 12;
+const int BinaryDictionaryFormatUtils::HEADER_VERSION_2_MINIMUM_SIZE = 12;
/* static */ BinaryDictionaryFormatUtils::FORMAT_VERSION
BinaryDictionaryFormatUtils::detectFormatVersion(const uint8_t *const dict,
@@ -46,25 +46,28 @@ const int BinaryDictionaryFormatUtils::FORMAT_VERSION_2_MINIMUM_SIZE = 12;
}
const uint32_t magicNumber = ByteArrayUtils::readUint32(dict, 0);
switch (magicNumber) {
- case FORMAT_VERSION_2_MAGIC_NUMBER:
- // Version 2 dictionaries are at least 12 bytes long.
- // If this dictionary has the version 2 magic number but is less than 12 bytes long,
- // then it's an unknown format and we need to avoid confidently reading the next bytes.
- if (dictSize < FORMAT_VERSION_2_MINIMUM_SIZE) {
+ case HEADER_VERSION_2_MAGIC_NUMBER:
+ // Version 2 header are at least 12 bytes long.
+ // If this header has the version 2 magic number but is less than 12 bytes long,
+ // then it's an unknown format and we need to avoid confidently reading the next bytes.
+ if (dictSize < HEADER_VERSION_2_MINIMUM_SIZE) {
+ return UNKNOWN_VERSION;
+ }
+ // Version 2 header is as follows:
+ // Magic number (4 bytes) 0x9B 0xC1 0x3A 0xFE
+ // Version number (2 bytes)
+ // Options (2 bytes)
+ // Header size (4 bytes) : integer, big endian
+ if (ByteArrayUtils::readUint16(dict, 4) == 2) {
+ return VERSION_2;
+ } else if (ByteArrayUtils::readUint16(dict, 4) == 3) {
+ // TODO: Support version 3 dictionary.
+ return UNKNOWN_VERSION;
+ } else {
+ return UNKNOWN_VERSION;
+ }
+ default:
return UNKNOWN_VERSION;
- }
- // Format 2 header is as follows:
- // Magic number (4 bytes) 0x9B 0xC1 0x3A 0xFE
- // Version number (2 bytes) 0x00 0x02
- // Options (2 bytes)
- // Header size (4 bytes) : integer, big endian
- if (ByteArrayUtils::readUint16(dict, 4) == 2) {
- return VERSION_2;
- } else {
- return UNKNOWN_VERSION;
- }
- default:
- return UNKNOWN_VERSION;
}
}
diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.h b/native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.h
index 80067b255..830684c70 100644
--- a/native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.h
+++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.h
@@ -33,9 +33,9 @@ namespace latinime {
*/
class BinaryDictionaryFormatUtils {
public:
- // TODO: Support version 3 format.
enum FORMAT_VERSION {
- VERSION_2 = 1,
+ VERSION_2,
+ VERSION_3,
UNKNOWN_VERSION
};
@@ -45,8 +45,8 @@ class BinaryDictionaryFormatUtils {
DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryDictionaryFormatUtils);
static const int DICTIONARY_MINIMUM_SIZE;
- static const uint32_t FORMAT_VERSION_2_MAGIC_NUMBER;
- static const int FORMAT_VERSION_2_MINIMUM_SIZE;
+ static const uint32_t HEADER_VERSION_2_MAGIC_NUMBER;
+ static const int HEADER_VERSION_2_MINIMUM_SIZE;
};
} // namespace latinime
#endif /* LATINIME_BINARY_DICTIONARY_FORMAT_UTILS_H */
diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.cpp b/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.cpp
index c4c4bedde..a57b0f859 100644
--- a/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.cpp
+++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.cpp
@@ -26,10 +26,10 @@ namespace latinime {
const int BinaryDictionaryHeaderReadingUtils::MAX_OPTION_KEY_LENGTH = 256;
-const int BinaryDictionaryHeaderReadingUtils::VERSION_2_MAGIC_NUMBER_SIZE = 4;
-const int BinaryDictionaryHeaderReadingUtils::VERSION_2_DICTIONARY_VERSION_SIZE = 2;
-const int BinaryDictionaryHeaderReadingUtils::VERSION_2_DICTIONARY_FLAG_SIZE = 2;
-const int BinaryDictionaryHeaderReadingUtils::VERSION_2_DICTIONARY_HEADER_SIZE_SIZE = 4;
+const int BinaryDictionaryHeaderReadingUtils::VERSION_2_HEADER_MAGIC_NUMBER_SIZE = 4;
+const int BinaryDictionaryHeaderReadingUtils::VERSION_2_HEADER_DICTIONARY_VERSION_SIZE = 2;
+const int BinaryDictionaryHeaderReadingUtils::VERSION_2_HEADER_FLAG_SIZE = 2;
+const int BinaryDictionaryHeaderReadingUtils::VERSION_2_HEADER_SIZE_FIELD_SIZE = 4;
const BinaryDictionaryHeaderReadingUtils::DictionaryFlags
BinaryDictionaryHeaderReadingUtils::NO_FLAGS = 0;
@@ -45,13 +45,13 @@ const BinaryDictionaryHeaderReadingUtils::DictionaryFlags
/* static */ int BinaryDictionaryHeaderReadingUtils::getHeaderSize(
const BinaryDictionaryInfo *const binaryDictionaryInfo) {
- switch (binaryDictionaryInfo->getFormat()) {
- case BinaryDictionaryFormatUtils::VERSION_2:
+ switch (getHeaderVersion(binaryDictionaryInfo->getFormat())) {
+ case HEADER_VERSION_2:
// See the format of the header in the comment in
// BinaryDictionaryFormatUtils::detectFormatVersion()
return ByteArrayUtils::readUint32(binaryDictionaryInfo->getDictBuf(),
- VERSION_2_MAGIC_NUMBER_SIZE + VERSION_2_DICTIONARY_VERSION_SIZE
- + VERSION_2_DICTIONARY_FLAG_SIZE);
+ VERSION_2_HEADER_MAGIC_NUMBER_SIZE + VERSION_2_HEADER_DICTIONARY_VERSION_SIZE
+ + VERSION_2_HEADER_FLAG_SIZE);
default:
return S_INT_MAX;
}
@@ -60,10 +60,10 @@ const BinaryDictionaryHeaderReadingUtils::DictionaryFlags
/* static */ BinaryDictionaryHeaderReadingUtils::DictionaryFlags
BinaryDictionaryHeaderReadingUtils::getFlags(
const BinaryDictionaryInfo *const binaryDictionaryInfo) {
- switch (binaryDictionaryInfo->getFormat()) {
- case BinaryDictionaryFormatUtils::VERSION_2:
+ switch (getHeaderVersion(binaryDictionaryInfo->getFormat())) {
+ case HEADER_VERSION_2:
return ByteArrayUtils::readUint16(binaryDictionaryInfo->getDictBuf(),
- VERSION_2_MAGIC_NUMBER_SIZE + VERSION_2_DICTIONARY_VERSION_SIZE);
+ VERSION_2_HEADER_MAGIC_NUMBER_SIZE + VERSION_2_HEADER_DICTIONARY_VERSION_SIZE);
default:
return NO_FLAGS;
}
@@ -73,11 +73,15 @@ const BinaryDictionaryHeaderReadingUtils::DictionaryFlags
/* static */ bool BinaryDictionaryHeaderReadingUtils::readHeaderValue(
const BinaryDictionaryInfo *const binaryDictionaryInfo,
const char *const key, int *outValue, const int outValueSize) {
- if (outValueSize <= 0 || !hasHeaderAttributes(binaryDictionaryInfo->getFormat())) {
+ if (outValueSize <= 0) {
return false;
}
const int headerSize = getHeaderSize(binaryDictionaryInfo);
int pos = getHeaderOptionsPosition(binaryDictionaryInfo->getFormat());
+ if (pos == NOT_A_DICT_POS) {
+ // The header doesn't have header options.
+ return false;
+ }
while (pos < headerSize) {
if(ByteArrayUtils::compareStringInBufferWithCharArray(
binaryDictionaryInfo->getDictBuf(), key, headerSize - pos, &pos) == 0) {
diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.h b/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.h
index 94b9e124d..61748227e 100644
--- a/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.h
+++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.h
@@ -48,27 +48,15 @@ class BinaryDictionaryHeaderReadingUtils {
return (flags & FRENCH_LIGATURE_PROCESSING_FLAG) != 0;
}
- static AK_FORCE_INLINE bool hasHeaderAttributes(
- const BinaryDictionaryFormatUtils::FORMAT_VERSION format) {
- // Only format 2 and above have header attributes as {key,value} string pairs.
- switch (format) {
- case BinaryDictionaryFormatUtils::VERSION_2:
- return true;
- break;
- default:
- return false;
- }
- }
-
static AK_FORCE_INLINE int getHeaderOptionsPosition(
- const BinaryDictionaryFormatUtils::FORMAT_VERSION format) {
- switch (format) {
- case BinaryDictionaryFormatUtils::VERSION_2:
- return VERSION_2_MAGIC_NUMBER_SIZE + VERSION_2_DICTIONARY_VERSION_SIZE
- + VERSION_2_DICTIONARY_FLAG_SIZE + VERSION_2_DICTIONARY_HEADER_SIZE_SIZE;
+ const BinaryDictionaryFormatUtils::FORMAT_VERSION dictionaryFormat) {
+ switch (getHeaderVersion(dictionaryFormat)) {
+ case HEADER_VERSION_2:
+ return VERSION_2_HEADER_MAGIC_NUMBER_SIZE + VERSION_2_HEADER_DICTIONARY_VERSION_SIZE
+ + VERSION_2_HEADER_FLAG_SIZE + VERSION_2_HEADER_SIZE_FIELD_SIZE;
break;
default:
- return 0;
+ return NOT_A_DICT_POS;
}
}
@@ -82,10 +70,15 @@ class BinaryDictionaryHeaderReadingUtils {
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryDictionaryHeaderReadingUtils);
- static const int VERSION_2_MAGIC_NUMBER_SIZE;
- static const int VERSION_2_DICTIONARY_VERSION_SIZE;
- static const int VERSION_2_DICTIONARY_FLAG_SIZE;
- static const int VERSION_2_DICTIONARY_HEADER_SIZE_SIZE;
+ enum HEADER_VERSION {
+ HEADER_VERSION_2,
+ UNKNOWN_HEADER_VERSION
+ };
+
+ static const int VERSION_2_HEADER_MAGIC_NUMBER_SIZE;
+ static const int VERSION_2_HEADER_DICTIONARY_VERSION_SIZE;
+ static const int VERSION_2_HEADER_FLAG_SIZE;
+ static const int VERSION_2_HEADER_SIZE_FIELD_SIZE;
static const DictionaryFlags NO_FLAGS;
// Flags for special processing
@@ -95,6 +88,18 @@ class BinaryDictionaryHeaderReadingUtils {
static const DictionaryFlags SUPPORTS_DYNAMIC_UPDATE_FLAG;
static const DictionaryFlags FRENCH_LIGATURE_PROCESSING_FLAG;
static const DictionaryFlags CONTAINS_BIGRAMS_FLAG;
+
+ static HEADER_VERSION getHeaderVersion(
+ const BinaryDictionaryFormatUtils::FORMAT_VERSION formatVersion) {
+ switch(formatVersion) {
+ case BinaryDictionaryFormatUtils::VERSION_2:
+ // Fall through
+ case BinaryDictionaryFormatUtils::VERSION_3:
+ return HEADER_VERSION_2;
+ default:
+ return UNKNOWN_HEADER_VERSION;
+ }
+ }
};
}
#endif /* LATINIME_DICTIONARY_HEADER_READING_UTILS_H */
diff --git a/native/jni/src/suggest/core/dictionary/terminal_attributes.h b/native/jni/src/suggest/core/dictionary/terminal_attributes.h
index a8520b1f1..0da6504eb 100644
--- a/native/jni/src/suggest/core/dictionary/terminal_attributes.h
+++ b/native/jni/src/suggest/core/dictionary/terminal_attributes.h
@@ -21,7 +21,6 @@
#include "suggest/core/dictionary/binary_dictionary_info.h"
#include "suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h"
-#include "suggest/core/dictionary/binary_format.h"
namespace latinime {
@@ -71,13 +70,12 @@ class TerminalAttributes {
};
TerminalAttributes(const BinaryDictionaryInfo *const binaryDictionaryInfo,
- const uint8_t nodeFlags, const int shortcutPos)
- : mBinaryDictionaryInfo(binaryDictionaryInfo),
- mNodeFlags(nodeFlags), mShortcutListSizePos(shortcutPos) {}
+ const int shortcutPos)
+ : mBinaryDictionaryInfo(binaryDictionaryInfo), mShortcutListSizePos(shortcutPos) {}
inline ShortcutIterator getShortcutIterator() const {
int shortcutPos = mShortcutListSizePos;
- const bool hasShortcutList = 0 != (mNodeFlags & BinaryFormat::FLAG_HAS_SHORTCUT_TARGETS);
+ const bool hasShortcutList = shortcutPos != NOT_A_DICT_POS;
if (hasShortcutList) {
BinaryDictionaryTerminalAttributesReadingUtils::getShortcutListSizeAndForwardPointer(
mBinaryDictionaryInfo, &shortcutPos);
@@ -86,14 +84,9 @@ class TerminalAttributes {
return ShortcutIterator(mBinaryDictionaryInfo, shortcutPos, hasShortcutList);
}
- bool isBlacklistedOrNotAWord() const {
- return BinaryFormat::hasBlacklistedOrNotAWordFlag(mNodeFlags);
- }
-
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(TerminalAttributes);
const BinaryDictionaryInfo *const mBinaryDictionaryInfo;
- const uint8_t mNodeFlags;
const int mShortcutListSizePos;
};
} // namespace latinime
diff --git a/native/jni/src/suggest/core/suggest.cpp b/native/jni/src/suggest/core/suggest.cpp
index 9a0f10cd5..c6da6f003 100644
--- a/native/jni/src/suggest/core/suggest.cpp
+++ b/native/jni/src/suggest/core/suggest.cpp
@@ -36,7 +36,6 @@ namespace latinime {
const int Suggest::MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT = 16;
const int Suggest::MIN_CONTINUOUS_SUGGESTION_INPUT_SIZE = 2;
const float Suggest::AUTOCORRECT_CLASSIFICATION_THRESHOLD = 0.33f;
-const int Suggest::FINAL_SCORE_PENALTY_FOR_NOT_BEST_EXACT_MATCHED_WORD = 1;
/**
* Returns a set of suggestions for the given input touch points. The commitPoint argument indicates
@@ -149,8 +148,6 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen
&doubleLetterTerminalIndex, &doubleLetterLevel);
int maxScore = S_INT_MIN;
- int bestExactMatchedNodeTerminalIndex = -1;
- int bestExactMatchedNodeOutputWordIndex = -1;
// Force autocorrection for obvious long multi-word suggestions when the top suggestion is
// a long multiple words suggestion.
// TODO: Implement a smarter auto-commit method for handling multi-word suggestions.
@@ -173,8 +170,6 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen
terminalIndex, doubleLetterTerminalIndex, doubleLetterLevel);
const float compoundDistance = terminalDicNode->getCompoundDistance(languageWeight)
+ doubleLetterCost;
- const TerminalAttributes terminalAttributes(traverseSession->getBinaryDictionaryInfo(),
- terminalDicNode->getFlags(), terminalDicNode->getAttributesPos());
const bool isPossiblyOffensiveWord = terminalDicNode->getProbability() <= 0;
const bool isExactMatch = terminalDicNode->isExactMatch();
const bool isFirstCharUppercase = terminalDicNode->isFirstCharUppercase();
@@ -187,14 +182,15 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen
| (isSafeExactMatch ? Dictionary::KIND_FLAG_EXACT_MATCH : 0);
// Entries that are blacklisted or do not represent a word should not be output.
- const bool isValidWord = !terminalAttributes.isBlacklistedOrNotAWord();
+ const bool isValidWord = !terminalDicNode->isBlacklistedOrNotAWord();
// Increase output score of top typing suggestion to ensure autocorrection.
// TODO: Better integration with java side autocorrection logic.
const int finalScore = SCORING->calculateFinalScore(
compoundDistance, traverseSession->getInputSize(),
- (forceCommitMultiWords && terminalDicNode->hasMultipleWords())
- || (isValidWord && SCORING->doesAutoCorrectValidWord()));
+ terminalDicNode->isExactMatch()
+ || (forceCommitMultiWords && terminalDicNode->hasMultipleWords())
+ || (isValidWord && SCORING->doesAutoCorrectValidWord()));
maxScore = max(maxScore, finalScore);
// TODO: Implement a smarter auto-commit method for handling multi-word suggestions.
@@ -207,25 +203,6 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen
if (isValidWord) {
outputTypes[outputWordIndex] = Dictionary::KIND_CORRECTION | outputTypeFlags;
frequencies[outputWordIndex] = finalScore;
- if (isSafeExactMatch) {
- // Demote exact matches that are not the highest probable node among all exact
- // matches.
- const bool isBestTerminal = bestExactMatchedNodeTerminalIndex < 0
- || terminals[bestExactMatchedNodeTerminalIndex].getProbability()
- < terminalDicNode->getProbability();
- const int outputWordIndexToBeDemoted = isBestTerminal ?
- bestExactMatchedNodeOutputWordIndex : outputWordIndex;
- if (outputWordIndexToBeDemoted >= 0) {
- frequencies[outputWordIndexToBeDemoted] -=
- FINAL_SCORE_PENALTY_FOR_NOT_BEST_EXACT_MATCHED_WORD;
- }
- if (isBestTerminal) {
- // Updates the best exact matched node index.
- bestExactMatchedNodeTerminalIndex = terminalIndex;
- // Updates the best exact matched output word index.
- bestExactMatchedNodeOutputWordIndex = outputWordIndex;
- }
- }
// Populate the outputChars array with the suggested word.
const int startIndex = outputWordIndex * MAX_WORD_LENGTH;
terminalDicNode->outputResult(&outputCodePoints[startIndex]);
@@ -233,6 +210,8 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen
}
if (!terminalDicNode->hasMultipleWords()) {
+ const TerminalAttributes terminalAttributes(traverseSession->getBinaryDictionaryInfo(),
+ terminalDicNode->getAttributesPos());
// Shortcut is not supported for multiple words suggestions.
// TODO: Check shortcuts during traversal for multiple words suggestions.
const bool sameAsTyped = TRAVERSAL->sameAsTyped(traverseSession, terminalDicNode);
diff --git a/native/jni/src/suggest/core/suggest.h b/native/jni/src/suggest/core/suggest.h
index 752bde9ac..875cbe4e0 100644
--- a/native/jni/src/suggest/core/suggest.h
+++ b/native/jni/src/suggest/core/suggest.h
@@ -82,8 +82,6 @@ class Suggest : public SuggestInterface {
// Threshold for autocorrection classifier
static const float AUTOCORRECT_CLASSIFICATION_THRESHOLD;
- // Final score penalty to exact match words that are not the most probable exact match.
- static const int FINAL_SCORE_PENALTY_FOR_NOT_BEST_EXACT_MATCHED_WORD;
const Traversal *const TRAVERSAL;
const Scoring *const SCORING;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_policy_factory.h b/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_policy_factory.h
index c0e24fa4e..70dad67e8 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_policy_factory.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_policy_factory.h
@@ -32,6 +32,9 @@ class DictionaryStructurePolicyFactory {
switch (dictionaryFormat) {
case BinaryDictionaryFormatUtils::VERSION_2:
return PatriciaTriePolicy::getInstance();
+ case BinaryDictionaryFormatUtils::VERSION_3:
+ // TODO: support version 3 dictionaries.
+ return 0;
default:
ASSERT(false);
return 0;
diff --git a/native/jni/src/suggest/policyimpl/typing/typing_weighting.h b/native/jni/src/suggest/policyimpl/typing/typing_weighting.h
index e098f353e..830aa80de 100644
--- a/native/jni/src/suggest/policyimpl/typing/typing_weighting.h
+++ b/native/jni/src/suggest/policyimpl/typing/typing_weighting.h
@@ -169,12 +169,7 @@ class TypingWeighting : public Weighting {
float getTerminalLanguageCost(const DicTraverseSession *const traverseSession,
const DicNode *const dicNode, const float dicNodeLanguageImprobability) const {
- // We promote exact matches here to prevent them from being pruned. The final score of
- // exact match nodes might be demoted later in Suggest::outputSuggestions if there are
- // multiple exact matches.
- const float languageImprobability = (dicNode->isExactMatch()) ?
- 0.0f : dicNodeLanguageImprobability;
- return languageImprobability * ScoringParams::DISTANCE_WEIGHT_LANGUAGE;
+ return dicNodeLanguageImprobability * ScoringParams::DISTANCE_WEIGHT_LANGUAGE;
}
AK_FORCE_INLINE bool needsToNormalizeCompoundDistance() const {