aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--native/jni/Android.mk2
-rw-r--r--native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_iterator.h16
-rw-r--r--native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.cpp (renamed from native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_reading_utils.cpp)31
-rw-r--r--native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h (renamed from native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_reading_utils.h)66
-rw-r--r--native/jni/src/suggest/core/suggest.cpp23
-rw-r--r--native/jni/src/suggest/core/suggest.h2
-rw-r--r--native/jni/src/suggest/policyimpl/typing/typing_weighting.h3
7 files changed, 84 insertions, 59 deletions
diff --git a/native/jni/Android.mk b/native/jni/Android.mk
index fb60139d3..d5df6b62e 100644
--- a/native/jni/Android.mk
+++ b/native/jni/Android.mk
@@ -53,10 +53,10 @@ LATIN_IME_CORE_SRC_FILES := \
dic_nodes_cache.cpp) \
$(addprefix suggest/core/dictionary/, \
bigram_dictionary.cpp \
- binary_dictionary_bigrams_reading_utils.cpp \
binary_dictionary_format_utils.cpp \
binary_dictionary_header.cpp \
binary_dictionary_header_reading_utils.cpp \
+ binary_dictionary_terminal_attributes_reading_utils.cpp \
bloom_filter.cpp \
byte_array_utils.cpp \
dictionary.cpp \
diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_iterator.h b/native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_iterator.h
index 0856840b2..f2b48e960 100644
--- a/native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_iterator.h
+++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_iterator.h
@@ -18,8 +18,8 @@
#define LATINIME_BINARY_DICTIONARY_BIGRAMS_ITERATOR_H
#include "defines.h"
-#include "suggest/core/dictionary/binary_dictionary_bigrams_reading_utils.h"
#include "suggest/core/dictionary/binary_dictionary_info.h"
+#include "suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h"
namespace latinime {
@@ -35,15 +35,17 @@ class BinaryDictionaryBigramsIterator {
}
AK_FORCE_INLINE void next() {
- mBigramFlags = BinaryDictionaryBigramsReadingUtils::getFlagsAndForwardPointer(
+ mBigramFlags = BinaryDictionaryTerminalAttributesReadingUtils::getFlagsAndForwardPointer(
mBinaryDictionaryInfo, &mPos);
- mBigramPos = BinaryDictionaryBigramsReadingUtils::getBigramAddressAndForwardPointer(
- mBinaryDictionaryInfo, mBigramFlags, &mPos);
- mHasNext = BinaryDictionaryBigramsReadingUtils::hasNext(mBigramFlags);
+ mBigramPos =
+ BinaryDictionaryTerminalAttributesReadingUtils::getBigramAddressAndForwardPointer(
+ mBinaryDictionaryInfo, mBigramFlags, &mPos);
+ mHasNext = BinaryDictionaryTerminalAttributesReadingUtils::hasNext(mBigramFlags);
}
AK_FORCE_INLINE int getProbability() const {
- return BinaryDictionaryBigramsReadingUtils::getBigramProbability(mBigramFlags);
+ return BinaryDictionaryTerminalAttributesReadingUtils::getProbabilityFromFlags(
+ mBigramFlags);
}
AK_FORCE_INLINE int getBigramPos() const {
@@ -59,7 +61,7 @@ class BinaryDictionaryBigramsIterator {
const BinaryDictionaryInfo *const mBinaryDictionaryInfo;
int mPos;
- BinaryDictionaryBigramsReadingUtils::BigramFlags mBigramFlags;
+ BinaryDictionaryTerminalAttributesReadingUtils::BigramFlags mBigramFlags;
int mBigramPos;
bool mHasNext;
};
diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_reading_utils.cpp b/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.cpp
index 78a54b141..0a7509c8b 100644
--- a/native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_reading_utils.cpp
+++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.cpp
@@ -14,33 +14,28 @@
* limitations under the License.
*/
-#include "suggest/core/dictionary/binary_dictionary_bigrams_reading_utils.h"
+#include "suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h"
#include "suggest/core/dictionary/binary_dictionary_info.h"
#include "suggest/core/dictionary/byte_array_utils.h"
namespace latinime {
-const BinaryDictionaryBigramsReadingUtils::BigramFlags
- BinaryDictionaryBigramsReadingUtils::MASK_ATTRIBUTE_ADDRESS_TYPE = 0x30;
-const BinaryDictionaryBigramsReadingUtils::BigramFlags
- BinaryDictionaryBigramsReadingUtils::FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE = 0x10;
-const BinaryDictionaryBigramsReadingUtils::BigramFlags
- BinaryDictionaryBigramsReadingUtils::FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES = 0x20;
-const BinaryDictionaryBigramsReadingUtils::BigramFlags
- BinaryDictionaryBigramsReadingUtils::FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES = 0x30;
-const BinaryDictionaryBigramsReadingUtils::BigramFlags
- BinaryDictionaryBigramsReadingUtils::FLAG_ATTRIBUTE_OFFSET_NEGATIVE = 0x40;
+typedef BinaryDictionaryTerminalAttributesReadingUtils TaUtils;
+
+const TaUtils::TerminalAttributeFlags TaUtils::MASK_ATTRIBUTE_ADDRESS_TYPE = 0x30;
+const TaUtils::TerminalAttributeFlags TaUtils::FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE = 0x10;
+const TaUtils::TerminalAttributeFlags TaUtils::FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES = 0x20;
+const TaUtils::TerminalAttributeFlags TaUtils::FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES = 0x30;
+const TaUtils::TerminalAttributeFlags TaUtils::FLAG_ATTRIBUTE_OFFSET_NEGATIVE = 0x40;
// Flag for presence of more attributes
-const BinaryDictionaryBigramsReadingUtils::BigramFlags
- BinaryDictionaryBigramsReadingUtils::FLAG_ATTRIBUTE_HAS_NEXT = 0x80;
+const TaUtils::TerminalAttributeFlags TaUtils::FLAG_ATTRIBUTE_HAS_NEXT = 0x80;
// Mask for attribute probability, stored on 4 bits inside the flags byte.
-const BinaryDictionaryBigramsReadingUtils::BigramFlags
- BinaryDictionaryBigramsReadingUtils::MASK_ATTRIBUTE_PROBABILITY = 0x0F;
-const int BinaryDictionaryBigramsReadingUtils::ATTRIBUTE_ADDRESS_SHIFT = 4;
+const TaUtils::TerminalAttributeFlags TaUtils::MASK_ATTRIBUTE_PROBABILITY = 0x0F;
+const int TaUtils::ATTRIBUTE_ADDRESS_SHIFT = 4;
-/* static */ int BinaryDictionaryBigramsReadingUtils::getBigramAddressAndForwardPointer(
- const BinaryDictionaryInfo *const binaryDictionaryInfo, const BigramFlags flags,
+/* static */ int TaUtils::getBigramAddressAndForwardPointer(
+ const BinaryDictionaryInfo *const binaryDictionaryInfo, const TerminalAttributeFlags flags,
int *const pos) {
int offset = 0;
const int origin = *pos;
diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_reading_utils.h b/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h
index e71f2a17a..f38fd5aaa 100644
--- a/native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_reading_utils.h
+++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#ifndef LATINIME_BINARY_DICTIONARY_BIGRAM_READING_UTILS_H
-#define LATINIME_BINARY_DICTIONARY_BIGRAM_READING_UTILS_H
+#ifndef LATINIME_BINARY_DICTIONARY_TERMINAL_ATTRIBUTES_READING_UTILS_H
+#define LATINIME_BINARY_DICTIONARY_TERMINAL_ATTRIBUTES_READING_UTILS_H
#include <stdint.h>
@@ -25,55 +25,57 @@
namespace latinime {
-class BinaryDictionaryBigramsReadingUtils {
+class BinaryDictionaryTerminalAttributesReadingUtils {
public:
- typedef uint8_t BigramFlags;
+ typedef uint8_t TerminalAttributeFlags;
+ typedef TerminalAttributeFlags BigramFlags;
- static AK_FORCE_INLINE void skipExistingBigrams(
- const BinaryDictionaryInfo *const binaryDictionaryInfo, int *const pos) {
- BigramFlags flags = getFlagsAndForwardPointer(binaryDictionaryInfo, pos);
- while (hasNext(flags)) {
- *pos += attributeAddressSize(flags);
- flags = getFlagsAndForwardPointer(binaryDictionaryInfo, pos);
- }
- *pos += attributeAddressSize(flags);
- }
-
- static AK_FORCE_INLINE BigramFlags getFlagsAndForwardPointer(
+ static AK_FORCE_INLINE TerminalAttributeFlags getFlagsAndForwardPointer(
const BinaryDictionaryInfo *const binaryDictionaryInfo, int *const pos) {
return ByteArrayUtils::readUint8andAdvancePosition(
binaryDictionaryInfo->getDictRoot(), pos);
}
- static AK_FORCE_INLINE int getBigramProbability(const BigramFlags flags) {
+ static AK_FORCE_INLINE int getProbabilityFromFlags(const TerminalAttributeFlags flags) {
return flags & MASK_ATTRIBUTE_PROBABILITY;
}
- static AK_FORCE_INLINE bool isOffsetNegative(const BigramFlags flags) {
- return (flags & FLAG_ATTRIBUTE_OFFSET_NEGATIVE) != 0;
+ static AK_FORCE_INLINE bool hasNext(const TerminalAttributeFlags flags) {
+ return (flags & FLAG_ATTRIBUTE_HAS_NEXT) != 0;
}
- static AK_FORCE_INLINE bool hasNext(const BigramFlags flags) {
- return (flags & FLAG_ATTRIBUTE_HAS_NEXT) != 0;
+ // Bigrams reading methods
+ static AK_FORCE_INLINE void skipExistingBigrams(
+ const BinaryDictionaryInfo *const binaryDictionaryInfo, int *const pos) {
+ BigramFlags flags = getFlagsAndForwardPointer(binaryDictionaryInfo, pos);
+ while (hasNext(flags)) {
+ *pos += attributeAddressSize(flags);
+ flags = getFlagsAndForwardPointer(binaryDictionaryInfo, pos);
+ }
+ *pos += attributeAddressSize(flags);
}
static int getBigramAddressAndForwardPointer(
- const BinaryDictionaryInfo *const binaryDictionaryInfo,
- const BigramFlags flags, int *const pos);
+ const BinaryDictionaryInfo *const binaryDictionaryInfo, const BigramFlags flags,
+ int *const pos);
private:
- DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryDictionaryBigramsReadingUtils);
+ DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryDictionaryTerminalAttributesReadingUtils);
- static const BigramFlags MASK_ATTRIBUTE_ADDRESS_TYPE;
- static const BigramFlags FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE;
- static const BigramFlags FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES;
- static const BigramFlags FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES;
- static const BigramFlags FLAG_ATTRIBUTE_OFFSET_NEGATIVE;
- static const BigramFlags FLAG_ATTRIBUTE_HAS_NEXT;
- static const BigramFlags MASK_ATTRIBUTE_PROBABILITY;
+ static const TerminalAttributeFlags MASK_ATTRIBUTE_ADDRESS_TYPE;
+ static const TerminalAttributeFlags FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE;
+ static const TerminalAttributeFlags FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES;
+ static const TerminalAttributeFlags FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES;
+ static const TerminalAttributeFlags FLAG_ATTRIBUTE_OFFSET_NEGATIVE;
+ static const TerminalAttributeFlags FLAG_ATTRIBUTE_HAS_NEXT;
+ static const TerminalAttributeFlags MASK_ATTRIBUTE_PROBABILITY;
static const int ATTRIBUTE_ADDRESS_SHIFT;
- static AK_FORCE_INLINE int attributeAddressSize(const BigramFlags flags) {
+ static AK_FORCE_INLINE bool isOffsetNegative(const TerminalAttributeFlags flags) {
+ return (flags & FLAG_ATTRIBUTE_OFFSET_NEGATIVE) != 0;
+ }
+
+ static AK_FORCE_INLINE int attributeAddressSize(const TerminalAttributeFlags flags) {
return (flags & MASK_ATTRIBUTE_ADDRESS_TYPE) >> ATTRIBUTE_ADDRESS_SHIFT;
/* Note: this is a value-dependant optimization of what may probably be
more readably written this way:
@@ -87,4 +89,4 @@ class BinaryDictionaryBigramsReadingUtils {
}
};
}
-#endif /* LATINIME_BINARY_DICTIONARY_BIGRAM_READING_UTILS_H */
+#endif /* LATINIME_BINARY_DICTIONARY_TERMINAL_ATTRIBUTES_READING_UTILS_H */
diff --git a/native/jni/src/suggest/core/suggest.cpp b/native/jni/src/suggest/core/suggest.cpp
index a8f16c8cb..173a612be 100644
--- a/native/jni/src/suggest/core/suggest.cpp
+++ b/native/jni/src/suggest/core/suggest.cpp
@@ -36,6 +36,7 @@ namespace latinime {
const int Suggest::MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT = 16;
const int Suggest::MIN_CONTINUOUS_SUGGESTION_INPUT_SIZE = 2;
const float Suggest::AUTOCORRECT_CLASSIFICATION_THRESHOLD = 0.33f;
+const int Suggest::FINAL_SCORE_PENALTY_FOR_NOT_BEST_EXACT_MATCHED_WORD = 1;
/**
* Returns a set of suggestions for the given input touch points. The commitPoint argument indicates
@@ -148,6 +149,8 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen
&doubleLetterTerminalIndex, &doubleLetterLevel);
int maxScore = S_INT_MIN;
+ int bestExactMatchedNodeTerminalIndex = -1;
+ int bestExactMatchedNodeOutputWordIndex = -1;
// Output suggestion results here
for (int terminalIndex = 0; terminalIndex < terminalSize && outputWordIndex < MAX_RESULTS;
++terminalIndex) {
@@ -186,7 +189,6 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen
const int finalScore = SCORING->calculateFinalScore(
compoundDistance, traverseSession->getInputSize(),
isForceCommitMultiWords || (isValidWord && SCORING->doesAutoCorrectValidWord()));
-
maxScore = max(maxScore, finalScore);
if (TRAVERSAL->allowPartialCommit()) {
@@ -200,6 +202,25 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen
if (isValidWord) {
outputTypes[outputWordIndex] = Dictionary::KIND_CORRECTION | outputTypeFlags;
frequencies[outputWordIndex] = finalScore;
+ if (isSafeExactMatch) {
+ // Demote exact matches that are not the highest probable node among all exact
+ // matches.
+ const bool isBestTerminal = bestExactMatchedNodeTerminalIndex < 0
+ || terminals[bestExactMatchedNodeTerminalIndex].getProbability()
+ < terminalDicNode->getProbability();
+ const int outputWordIndexToBeDemoted = isBestTerminal ?
+ bestExactMatchedNodeOutputWordIndex : outputWordIndex;
+ if (outputWordIndexToBeDemoted >= 0) {
+ frequencies[outputWordIndexToBeDemoted] -=
+ FINAL_SCORE_PENALTY_FOR_NOT_BEST_EXACT_MATCHED_WORD;
+ }
+ if (isBestTerminal) {
+ // Updates the best exact matched node index.
+ bestExactMatchedNodeTerminalIndex = terminalIndex;
+ // Updates the best exact matched output word index.
+ bestExactMatchedNodeOutputWordIndex = outputWordIndex;
+ }
+ }
// Populate the outputChars array with the suggested word.
const int startIndex = outputWordIndex * MAX_WORD_LENGTH;
terminalDicNode->outputResult(&outputCodePoints[startIndex]);
diff --git a/native/jni/src/suggest/core/suggest.h b/native/jni/src/suggest/core/suggest.h
index 875cbe4e0..752bde9ac 100644
--- a/native/jni/src/suggest/core/suggest.h
+++ b/native/jni/src/suggest/core/suggest.h
@@ -82,6 +82,8 @@ class Suggest : public SuggestInterface {
// Threshold for autocorrection classifier
static const float AUTOCORRECT_CLASSIFICATION_THRESHOLD;
+ // Final score penalty to exact match words that are not the most probable exact match.
+ static const int FINAL_SCORE_PENALTY_FOR_NOT_BEST_EXACT_MATCHED_WORD;
const Traversal *const TRAVERSAL;
const Scoring *const SCORING;
diff --git a/native/jni/src/suggest/policyimpl/typing/typing_weighting.h b/native/jni/src/suggest/policyimpl/typing/typing_weighting.h
index 7333dbe0d..e098f353e 100644
--- a/native/jni/src/suggest/policyimpl/typing/typing_weighting.h
+++ b/native/jni/src/suggest/policyimpl/typing/typing_weighting.h
@@ -169,6 +169,9 @@ class TypingWeighting : public Weighting {
float getTerminalLanguageCost(const DicTraverseSession *const traverseSession,
const DicNode *const dicNode, const float dicNodeLanguageImprobability) const {
+ // We promote exact matches here to prevent them from being pruned. The final score of
+ // exact match nodes might be demoted later in Suggest::outputSuggestions if there are
+ // multiple exact matches.
const float languageImprobability = (dicNode->isExactMatch()) ?
0.0f : dicNodeLanguageImprobability;
return languageImprobability * ScoringParams::DISTANCE_WEIGHT_LANGUAGE;