aboutsummaryrefslogtreecommitdiffstats
path: root/native
diff options
context:
space:
mode:
Diffstat (limited to 'native')
-rw-r--r--native/jni/Android.mk3
-rw-r--r--native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp6
-rw-r--r--native/jni/src/defines.h76
-rw-r--r--native/jni/src/suggest/core/dicnode/dic_node.h61
-rw-r--r--native/jni/src/suggest/core/dicnode/dic_node_properties.h32
-rw-r--r--native/jni/src/suggest/core/dicnode/dic_node_proximity_filter.h58
-rw-r--r--native/jni/src/suggest/core/dicnode/dic_node_utils.cpp91
-rw-r--r--native/jni/src/suggest/core/dicnode/dic_node_utils.h26
-rw-r--r--native/jni/src/suggest/core/dicnode/dic_node_vector.h13
-rw-r--r--native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.cpp53
-rw-r--r--native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.h8
-rw-r--r--native/jni/src/suggest/core/dictionary/binary_dictionary_header.h14
-rw-r--r--native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.cpp38
-rw-r--r--native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.h49
-rw-r--r--native/jni/src/suggest/core/dictionary/binary_dictionary_info.h37
-rw-r--r--native/jni/src/suggest/core/dictionary/dictionary.cpp6
-rw-r--r--native/jni/src/suggest/core/dictionary/dictionary.h4
-rw-r--r--native/jni/src/suggest/core/dictionary/terminal_attributes.h22
-rw-r--r--native/jni/src/suggest/core/layout/proximity_info_utils.h4
-rw-r--r--native/jni/src/suggest/core/policy/traversal.h2
-rw-r--r--native/jni/src/suggest/core/suggest.cpp75
-rw-r--r--native/jni/src/suggest/core/suggest.h2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_policy_factory.h5
-rw-r--r--native/jni/src/suggest/policyimpl/typing/typing_traversal.h5
-rw-r--r--native/jni/src/suggest/policyimpl/typing/typing_weighting.h7
-rw-r--r--native/jni/src/utils/log_utils.cpp72
-rw-r--r--native/jni/src/utils/log_utils.h37
27 files changed, 478 insertions, 328 deletions
diff --git a/native/jni/Android.mk b/native/jni/Android.mk
index f89eea735..d78da969b 100644
--- a/native/jni/Android.mk
+++ b/native/jni/Android.mk
@@ -79,8 +79,9 @@ LATIN_IME_CORE_SRC_FILES := \
typing_traversal.cpp \
typing_weighting.cpp) \
$(addprefix utils/, \
+ autocorrection_threshold_utils.cpp \
char_utils.cpp \
- autocorrection_threshold_utils.cpp)
+ log_utils.cpp)
LOCAL_SRC_FILES := \
$(LATIN_IME_JNI_SRC_FILES) \
diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
index a93bbeb8c..6e1b80ee0 100644
--- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
+++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
@@ -93,8 +93,8 @@ static jlong latinime_BinaryDictionary_open(JNIEnv *env, jclass clazz, jstring s
AKLOGE("DICT: dictionary format is unknown, bad magic number");
releaseDictBuf(static_cast<const char *>(dictBuf) - offset, adjDictSize, fd);
} else {
- dictionary = new Dictionary(
- dictBuf, static_cast<int>(dictSize), fd, offset, updatableMmap);
+ dictionary = new Dictionary(env, dictBuf, static_cast<int>(dictSize), fd, offset,
+ updatableMmap);
}
PROF_END(66);
PROF_CLOSE;
@@ -201,7 +201,7 @@ static int latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jclass clazz, j
static jint latinime_BinaryDictionary_getProbability(JNIEnv *env, jclass clazz, jlong dict,
jintArray word) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
- if (!dictionary) return 0;
+ if (!dictionary) return NOT_A_PROBABILITY;
const jsize wordLength = env->GetArrayLength(word);
int codePoints[wordLength];
env->GetIntArrayRegion(word, 0, wordLength, codePoints);
diff --git a/native/jni/src/defines.h b/native/jni/src/defines.h
index cb6681456..607a74400 100644
--- a/native/jni/src/defines.h
+++ b/native/jni/src/defines.h
@@ -35,46 +35,74 @@
// Must be equal to ProximityInfo.MAX_PROXIMITY_CHARS_SIZE in Java
#define MAX_PROXIMITY_CHARS_SIZE 16
#define ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE 2
+#define NELEMS(x) (sizeof(x) / sizeof((x)[0]))
-#if defined(FLAG_DO_PROFILE) || defined(FLAG_DBG)
-#include <android/log.h>
-#ifndef LOG_TAG
-#define LOG_TAG "LatinIME: "
-#endif // LOG_TAG
-#define AKLOGE(fmt, ...) __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, fmt, ##__VA_ARGS__)
-#define AKLOGI(fmt, ...) __android_log_print(ANDROID_LOG_INFO, LOG_TAG, fmt, ##__VA_ARGS__)
-
-#define DUMP_RESULT(words, frequencies) do { dumpResult(words, frequencies); } while (0)
-#define DUMP_WORD(word, length) do { dumpWord(word, length); } while (0)
-#define INTS_TO_CHARS(input, length, output) do { \
- intArrayToCharArray(input, length, output); } while (0)
-
-// TODO: Support full UTF-8 conversion
-AK_FORCE_INLINE static int intArrayToCharArray(const int *source, const int sourceSize,
- char *dest) {
+AK_FORCE_INLINE static int intArrayToCharArray(const int *const source, const int sourceSize,
+ char *dest, const int destSize) {
+ // We want to always terminate with a 0 char, so stop one short of the length to make
+ // sure there is room.
+ const int destLimit = destSize - 1;
int si = 0;
int di = 0;
- while (si < sourceSize && di < MAX_WORD_LENGTH - 1 && 0 != source[si]) {
+ while (si < sourceSize && di < destLimit && 0 != source[si]) {
const int codePoint = source[si++];
- if (codePoint < 0x7F) {
+ if (codePoint < 0x7F) { // One byte
dest[di++] = codePoint;
- } else if (codePoint < 0x7FF) {
+ } else if (codePoint < 0x7FF) { // Two bytes
+ if (di + 1 >= destLimit) break;
dest[di++] = 0xC0 + (codePoint >> 6);
dest[di++] = 0x80 + (codePoint & 0x3F);
- } else if (codePoint < 0xFFFF) {
+ } else if (codePoint < 0xFFFF) { // Three bytes
+ if (di + 2 >= destLimit) break;
dest[di++] = 0xE0 + (codePoint >> 12);
- dest[di++] = 0x80 + ((codePoint & 0xFC0) >> 6);
+ dest[di++] = 0x80 + ((codePoint >> 6) & 0x3F);
+ dest[di++] = 0x80 + (codePoint & 0x3F);
+ } else if (codePoint <= 0x1FFFFF) { // Four bytes
+ if (di + 3 >= destLimit) break;
+ dest[di++] = 0xF0 + (codePoint >> 18);
+ dest[di++] = 0x80 + ((codePoint >> 12) & 0x3F);
+ dest[di++] = 0x80 + ((codePoint >> 6) & 0x3F);
dest[di++] = 0x80 + (codePoint & 0x3F);
+ } else if (codePoint <= 0x3FFFFFF) { // Five bytes
+ if (di + 4 >= destLimit) break;
+ dest[di++] = 0xF8 + (codePoint >> 24);
+ dest[di++] = 0x80 + ((codePoint >> 18) & 0x3F);
+ dest[di++] = 0x80 + ((codePoint >> 12) & 0x3F);
+ dest[di++] = 0x80 + ((codePoint >> 6) & 0x3F);
+ dest[di++] = codePoint & 0x3F;
+ } else if (codePoint <= 0x7FFFFFFF) { // Six bytes
+ if (di + 5 >= destLimit) break;
+ dest[di++] = 0xFC + (codePoint >> 30);
+ dest[di++] = 0x80 + ((codePoint >> 24) & 0x3F);
+ dest[di++] = 0x80 + ((codePoint >> 18) & 0x3F);
+ dest[di++] = 0x80 + ((codePoint >> 12) & 0x3F);
+ dest[di++] = 0x80 + ((codePoint >> 6) & 0x3F);
+ dest[di++] = codePoint & 0x3F;
+ } else {
+ // Not a code point... skip.
}
}
dest[di] = 0;
return di;
}
+#if defined(FLAG_DO_PROFILE) || defined(FLAG_DBG)
+#include <android/log.h>
+#ifndef LOG_TAG
+#define LOG_TAG "LatinIME: "
+#endif // LOG_TAG
+#define AKLOGE(fmt, ...) __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, fmt, ##__VA_ARGS__)
+#define AKLOGI(fmt, ...) __android_log_print(ANDROID_LOG_INFO, LOG_TAG, fmt, ##__VA_ARGS__)
+
+#define DUMP_RESULT(words, frequencies) do { dumpResult(words, frequencies); } while (0)
+#define DUMP_WORD(word, length) do { dumpWord(word, length); } while (0)
+#define INTS_TO_CHARS(input, length, output, outlength) do { \
+ intArrayToCharArray(input, length, output, outlength); } while (0)
+
static inline void dumpWordInfo(const int *word, const int length, const int rank,
const int probability) {
static char charBuf[50];
- const int N = intArrayToCharArray(word, length, charBuf);
+ const int N = intArrayToCharArray(word, length, charBuf, NELEMS(charBuf));
if (N > 1) {
AKLOGI("%2d [ %s ] (%d)", rank, charBuf, probability);
}
@@ -90,7 +118,7 @@ static inline void dumpResult(const int *outWords, const int *frequencies) {
static AK_FORCE_INLINE void dumpWord(const int *word, const int length) {
static char charBuf[50];
- const int N = intArrayToCharArray(word, length, charBuf);
+ const int N = intArrayToCharArray(word, length, charBuf, NELEMS(charBuf));
if (N > 1) {
AKLOGI("[ %s ]", charBuf);
}
@@ -304,8 +332,6 @@ static inline void prof_out(void) {
template<typename T> AK_FORCE_INLINE const T &min(const T &a, const T &b) { return a < b ? a : b; }
template<typename T> AK_FORCE_INLINE const T &max(const T &a, const T &b) { return a > b ? a : b; }
-#define NELEMS(x) (sizeof(x) / sizeof((x)[0]))
-
// DEBUG
#define INPUTLENGTH_FOR_DEBUG (-1)
#define MIN_OUTPUT_INDEX_FOR_DEBUG (-1)
diff --git a/native/jni/src/suggest/core/dicnode/dic_node.h b/native/jni/src/suggest/core/dicnode/dic_node.h
index 52db8e9c7..973da67e4 100644
--- a/native/jni/src/suggest/core/dicnode/dic_node.h
+++ b/native/jni/src/suggest/core/dicnode/dic_node.h
@@ -28,15 +28,16 @@
#if DEBUG_DICT
#define LOGI_SHOW_ADD_COST_PROP \
do { char charBuf[50]; \
- INTS_TO_CHARS(getOutputWordBuf(), getNodeCodePointCount(), charBuf); \
+ INTS_TO_CHARS(getOutputWordBuf(), getNodeCodePointCount(), charBuf, NELEMS(charBuf)); \
AKLOGI("%20s, \"%c\", size = %03d, total = %03d, index(0) = %02d, dist = %.4f, %s,,", \
__FUNCTION__, getNodeCodePoint(), inputSize, getTotalInputIndex(), \
getInputIndex(0), getNormalizedCompoundDistance(), charBuf); } while (0)
#define DUMP_WORD_AND_SCORE(header) \
do { char charBuf[50]; char prevWordCharBuf[50]; \
- INTS_TO_CHARS(getOutputWordBuf(), getNodeCodePointCount(), charBuf); \
+ INTS_TO_CHARS(getOutputWordBuf(), getNodeCodePointCount(), charBuf, NELEMS(charBuf)); \
INTS_TO_CHARS(mDicNodeState.mDicNodeStatePrevWord.mPrevWord, \
- mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength(), prevWordCharBuf); \
+ mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength(), prevWordCharBuf, \
+ NELEMS(prevWordCharBuf)); \
AKLOGI("#%8s, %5f, %5f, %5f, %5f, %s, %s, %d,,", header, \
getSpatialDistanceForScoring(), getLanguageDistanceForScoring(), \
getNormalizedCompoundDistance(), getRawLength(), prevWordCharBuf, charBuf, \
@@ -111,32 +112,23 @@ class DicNode {
mIsUsed = true;
mIsCachedForNextSuggestion = false;
mDicNodeProperties.init(
- NOT_A_DICT_POS, 0 /* flags */, rootGroupPos, NOT_A_DICT_POS /* attributesPos */,
+ NOT_A_DICT_POS, rootGroupPos, NOT_A_DICT_POS /* attributesPos */,
NOT_A_CODE_POINT /* nodeCodePoint */, NOT_A_PROBABILITY /* probability */,
- false /* isTerminal */, true /* hasChildren */, 0 /* depth */,
- 0 /* terminalDepth */);
+ false /* isTerminal */, true /* hasChildren */,
+ false /* isBlacklistedOrNotAWord */, 0 /* depth */, 0 /* terminalDepth */);
mDicNodeState.init(prevWordNodePos);
PROF_NODE_RESET(mProfiler);
}
- void initAsPassingChild(DicNode *parentNode) {
- mIsUsed = true;
- mIsCachedForNextSuggestion = parentNode->mIsCachedForNextSuggestion;
- const int c = parentNode->getNodeTypedCodePoint();
- mDicNodeProperties.init(&parentNode->mDicNodeProperties, c);
- mDicNodeState.init(&parentNode->mDicNodeState);
- PROF_NODE_COPY(&parentNode->mProfiler, mProfiler);
- }
-
// Init for root with previous word
void initAsRootWithPreviousWord(DicNode *dicNode, const int rootGroupPos) {
mIsUsed = true;
mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion;
mDicNodeProperties.init(
- NOT_A_DICT_POS, 0 /* flags */, rootGroupPos, NOT_A_DICT_POS /* attributesPos */,
+ NOT_A_DICT_POS, rootGroupPos, NOT_A_DICT_POS /* attributesPos */,
NOT_A_CODE_POINT /* nodeCodePoint */, NOT_A_PROBABILITY /* probability */,
- false /* isTerminal */, true /* hasChildren */, 0 /* depth */,
- 0 /* terminalDepth */);
+ false /* isTerminal */, true /* hasChildren */,
+ false /* isBlacklistedOrNotAWord */, 0 /* depth */, 0 /* terminalDepth */);
// TODO: Move to dicNodeState?
mDicNodeState.mDicNodeStateOutput.init(); // reset for next word
mDicNodeState.mDicNodeStateInput.init(
@@ -156,18 +148,27 @@ class DicNode {
PROF_NODE_COPY(&dicNode->mProfiler, mProfiler);
}
- // TODO: minimize arguments by looking binary_format
- void initAsChild(DicNode *dicNode, const int pos, const uint8_t flags, const int childrenPos,
+ void initAsPassingChild(DicNode *parentNode) {
+ mIsUsed = true;
+ mIsCachedForNextSuggestion = parentNode->mIsCachedForNextSuggestion;
+ const int c = parentNode->getNodeTypedCodePoint();
+ mDicNodeProperties.init(&parentNode->mDicNodeProperties, c);
+ mDicNodeState.init(&parentNode->mDicNodeState);
+ PROF_NODE_COPY(&parentNode->mProfiler, mProfiler);
+ }
+
+ void initAsChild(DicNode *dicNode, const int pos, const int childrenPos,
const int attributesPos, const int probability, const bool isTerminal,
- const bool hasChildren, const uint16_t mergedNodeCodePointCount,
- const int *const mergedNodeCodePoints) {
+ const bool hasChildren, const bool isBlacklistedOrNotAWord,
+ const uint16_t mergedNodeCodePointCount, const int *const mergedNodeCodePoints) {
mIsUsed = true;
uint16_t newDepth = static_cast<uint16_t>(dicNode->getNodeCodePointCount() + 1);
mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion;
const uint16_t newLeavingDepth = static_cast<uint16_t>(
dicNode->mDicNodeProperties.getLeavingDepth() + mergedNodeCodePointCount);
- mDicNodeProperties.init(pos, flags, childrenPos, attributesPos, mergedNodeCodePoints[0],
- probability, isTerminal, hasChildren, newDepth, newLeavingDepth);
+ mDicNodeProperties.init(pos, childrenPos, attributesPos, mergedNodeCodePoints[0],
+ probability, isTerminal, hasChildren, isBlacklistedOrNotAWord, newDepth,
+ newLeavingDepth);
mDicNodeState.init(&dicNode->mDicNodeState, mergedNodeCodePointCount,
mergedNodeCodePoints);
PROF_NODE_COPY(&dicNode->mProfiler, mProfiler);
@@ -215,7 +216,7 @@ class DicNode {
}
bool isImpossibleBigramWord() const {
- if (mDicNodeProperties.hasBlacklistedOrNotAWordFlag()) {
+ if (isBlacklistedOrNotAWord()) {
return true;
}
const int prevWordLen = mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength()
@@ -462,8 +463,8 @@ class DicNode {
return mDicNodeState.mDicNodeStateScoring.isExactMatch();
}
- uint8_t getFlags() const {
- return mDicNodeProperties.getFlags();
+ bool isBlacklistedOrNotAWord() const {
+ return mDicNodeProperties.isBlacklistedOrNotAWord();
}
int getAttributesPos() const {
@@ -503,6 +504,12 @@ class DicNode {
if (!right->isUsed()) {
return false;
}
+ // Promote exact matches to prevent them from being pruned.
+ const bool leftExactMatch = isExactMatch();
+ const bool rightExactMatch = right->isExactMatch();
+ if (leftExactMatch != rightExactMatch) {
+ return leftExactMatch;
+ }
const float diff =
right->getNormalizedCompoundDistance() - getNormalizedCompoundDistance();
static const float MIN_DIFF = 0.000001f;
diff --git a/native/jni/src/suggest/core/dicnode/dic_node_properties.h b/native/jni/src/suggest/core/dicnode/dic_node_properties.h
index 7e8aa4979..d98000d83 100644
--- a/native/jni/src/suggest/core/dicnode/dic_node_properties.h
+++ b/native/jni/src/suggest/core/dicnode/dic_node_properties.h
@@ -20,7 +20,6 @@
#include <stdint.h>
#include "defines.h"
-#include "suggest/core/dictionary/binary_format.h"
namespace latinime {
@@ -32,24 +31,25 @@ namespace latinime {
class DicNodeProperties {
public:
AK_FORCE_INLINE DicNodeProperties()
- : mPos(0), mFlags(0), mChildrenPos(0), mAttributesPos(0), mProbability(0),
- mNodeCodePoint(0), mDepth(0), mLeavingDepth(0), mIsTerminal(false),
- mHasChildren(false) {}
+ : mPos(0), mChildrenPos(0), mAttributesPos(0), mProbability(0),
+ mNodeCodePoint(0), mIsTerminal(false), mHasChildren(false),
+ mIsBlacklistedOrNotAWord(false), mDepth(0), mLeavingDepth(0) {}
virtual ~DicNodeProperties() {}
// Should be called only once per DicNode is initialized.
- void init(const int pos, const uint8_t flags, const int childrenPos, const int attributesPos,
+ void init(const int pos, const int childrenPos, const int attributesPos,
const int nodeCodePoint, const int probability, const bool isTerminal,
- const bool hasChildren, const uint16_t depth, const uint16_t leavingDepth) {
+ const bool hasChildren, const bool isBlacklistedOrNotAWord,
+ const uint16_t depth, const uint16_t leavingDepth) {
mPos = pos;
- mFlags = flags;
mChildrenPos = childrenPos;
mAttributesPos = attributesPos;
mNodeCodePoint = nodeCodePoint;
mProbability = probability;
mIsTerminal = isTerminal;
mHasChildren = hasChildren;
+ mIsBlacklistedOrNotAWord = isBlacklistedOrNotAWord;
mDepth = depth;
mLeavingDepth = leavingDepth;
}
@@ -57,13 +57,13 @@ class DicNodeProperties {
// Init for copy
void init(const DicNodeProperties *const nodeProp) {
mPos = nodeProp->mPos;
- mFlags = nodeProp->mFlags;
mChildrenPos = nodeProp->mChildrenPos;
mAttributesPos = nodeProp->mAttributesPos;
mNodeCodePoint = nodeProp->mNodeCodePoint;
mProbability = nodeProp->mProbability;
mIsTerminal = nodeProp->mIsTerminal;
mHasChildren = nodeProp->mHasChildren;
+ mIsBlacklistedOrNotAWord = nodeProp->mIsBlacklistedOrNotAWord;
mDepth = nodeProp->mDepth;
mLeavingDepth = nodeProp->mLeavingDepth;
}
@@ -71,13 +71,13 @@ class DicNodeProperties {
// Init as passing child
void init(const DicNodeProperties *const nodeProp, const int codePoint) {
mPos = nodeProp->mPos;
- mFlags = nodeProp->mFlags;
mChildrenPos = nodeProp->mChildrenPos;
mAttributesPos = nodeProp->mAttributesPos;
mNodeCodePoint = codePoint; // Overwrite the node char of a passing child
mProbability = nodeProp->mProbability;
mIsTerminal = nodeProp->mIsTerminal;
mHasChildren = nodeProp->mHasChildren;
+ mIsBlacklistedOrNotAWord = nodeProp->mIsBlacklistedOrNotAWord;
mDepth = nodeProp->mDepth + 1; // Increment the depth of a passing child
mLeavingDepth = nodeProp->mLeavingDepth;
}
@@ -86,10 +86,6 @@ class DicNodeProperties {
return mPos;
}
- uint8_t getFlags() const {
- return mFlags;
- }
-
int getChildrenPos() const {
return mChildrenPos;
}
@@ -123,8 +119,8 @@ class DicNodeProperties {
return mHasChildren || mDepth != mLeavingDepth;
}
- bool hasBlacklistedOrNotAWordFlag() const {
- return BinaryFormat::hasBlacklistedOrNotAWordFlag(mFlags);
+ bool isBlacklistedOrNotAWord() const {
+ return mIsBlacklistedOrNotAWord;
}
private:
@@ -132,15 +128,15 @@ class DicNodeProperties {
// Use a default copy constructor and an assign operator because shallow copies are ok
// for this class
int mPos;
- uint8_t mFlags;
int mChildrenPos;
int mAttributesPos;
int mProbability;
int mNodeCodePoint;
- uint16_t mDepth;
- uint16_t mLeavingDepth;
bool mIsTerminal;
bool mHasChildren;
+ bool mIsBlacklistedOrNotAWord;
+ uint16_t mDepth;
+ uint16_t mLeavingDepth;
};
} // namespace latinime
#endif // LATINIME_DIC_NODE_PROPERTIES_H
diff --git a/native/jni/src/suggest/core/dicnode/dic_node_proximity_filter.h b/native/jni/src/suggest/core/dicnode/dic_node_proximity_filter.h
new file mode 100644
index 000000000..1a39f2ef3
--- /dev/null
+++ b/native/jni/src/suggest/core/dicnode/dic_node_proximity_filter.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DIC_NODE_PROXIMITY_FILTER_H
+#define LATINIME_DIC_NODE_PROXIMITY_FILTER_H
+
+#include "defines.h"
+#include "suggest/core/layout/proximity_info_state.h"
+#include "suggest/core/layout/proximity_info_utils.h"
+#include "suggest/core/policy/dictionary_structure_policy.h"
+
+namespace latinime {
+
+class DicNodeProximityFilter : public DictionaryStructurePolicy::NodeFilter {
+ public:
+ DicNodeProximityFilter(const ProximityInfoState *const pInfoState,
+ const int pointIndex, const bool exactOnly)
+ : mProximityInfoState(pInfoState), mPointIndex(pointIndex), mExactOnly(exactOnly) {}
+
+ bool isFilteredOut(const int codePoint) const {
+ return !isProximityCodePoint(codePoint);
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(DicNodeProximityFilter);
+
+ const ProximityInfoState *const mProximityInfoState;
+ const int mPointIndex;
+ const bool mExactOnly;
+
+ // TODO: Move to proximity info state
+ bool isProximityCodePoint(const int codePoint) const {
+ if (!mProximityInfoState) {
+ return true;
+ }
+ if (mExactOnly) {
+ return mProximityInfoState->getPrimaryCodePointAt(mPointIndex) == codePoint;
+ }
+ const ProximityType matchedId = mProximityInfoState->getProximityType(
+ mPointIndex, codePoint, true /* checkProximityChars */);
+ return ProximityInfoUtils::isMatchOrProximityChar(matchedId);
+ }
+};
+} // namespace latinime
+#endif // LATINIME_DIC_NODE_PROXIMITY_FILTER_H
diff --git a/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp b/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp
index 9bf7eceb5..6c7f6667a 100644
--- a/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp
+++ b/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp
@@ -14,18 +14,17 @@
* limitations under the License.
*/
+#include "suggest/core/dicnode/dic_node_utils.h"
+
#include <cstring>
-#include <vector>
#include "suggest/core/dicnode/dic_node.h"
-#include "suggest/core/dicnode/dic_node_utils.h"
+#include "suggest/core/dicnode/dic_node_proximity_filter.h"
#include "suggest/core/dicnode/dic_node_vector.h"
#include "suggest/core/dictionary/binary_dictionary_info.h"
#include "suggest/core/dictionary/binary_format.h"
#include "suggest/core/dictionary/multi_bigram_map.h"
#include "suggest/core/dictionary/probability_utils.h"
-#include "suggest/core/layout/proximity_info.h"
-#include "suggest/core/layout/proximity_info_state.h"
#include "suggest/core/policy/dictionary_structure_policy.h"
#include "utils/char_utils.h"
@@ -57,21 +56,20 @@ namespace latinime {
///////////////////////////////////
/* static */ void DicNodeUtils::createAndGetPassingChildNode(DicNode *dicNode,
- const ProximityInfoState *pInfoState, const int pointIndex, const bool exactOnly,
+ const DicNodeProximityFilter *const childrenFilter,
DicNodeVector *childDicNodes) {
// Passing multiple chars node. No need to traverse child
const int codePoint = dicNode->getNodeTypedCodePoint();
const int baseLowerCaseCodePoint = CharUtils::toBaseLowerCase(codePoint);
- const bool isMatch = isMatchedNodeCodePoint(pInfoState, pointIndex, exactOnly, codePoint);
- if (isMatch || CharUtils::isIntentionalOmissionCodePoint(baseLowerCaseCodePoint)) {
+ if (!childrenFilter->isFilteredOut(codePoint)
+ || CharUtils::isIntentionalOmissionCodePoint(baseLowerCaseCodePoint)) {
childDicNodes->pushPassingChild(dicNode);
}
}
/* static */ int DicNodeUtils::createAndGetLeavingChildNode(DicNode *dicNode, int pos,
const BinaryDictionaryInfo *const binaryDictionaryInfo,
- const ProximityInfoState *pInfoState, const int pointIndex, const bool exactOnly,
- const std::vector<int> *const codePointsFilter, const ProximityInfo *const pInfo,
+ const DicNodeProximityFilter *const childrenFilter,
DicNodeVector *childDicNodes) {
int nextPos = pos;
const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(
@@ -80,6 +78,7 @@ namespace latinime {
const bool isTerminal = (0 != (BinaryFormat::FLAG_IS_TERMINAL & flags));
const bool hasChildren = BinaryFormat::hasChildrenInFlags(flags);
const bool hasShortcuts = (0 != (BinaryFormat::FLAG_HAS_SHORTCUT_TARGETS & flags));
+ const bool isBlacklistedOrNotAWord = BinaryFormat::hasBlacklistedOrNotAWordFlag(flags);
int codePoint = BinaryFormat::getCodePointAndForwardPointer(
binaryDictionaryInfo->getDictRoot(), &pos);
@@ -110,50 +109,18 @@ namespace latinime {
const int siblingPos = BinaryFormat::skipChildrenPosAndAttributes(
binaryDictionaryInfo->getDictRoot(), flags, pos);
- if (isDicNodeFilteredOut(mergedNodeCodePoints[0], pInfo, codePointsFilter)) {
- return siblingPos;
- }
- if (!isMatchedNodeCodePoint(pInfoState, pointIndex, exactOnly, mergedNodeCodePoints[0])) {
+ if (childrenFilter->isFilteredOut(mergedNodeCodePoints[0])) {
return siblingPos;
}
- childDicNodes->pushLeavingChild(dicNode, nextPos, flags, childrenPos, attributesPos,
- probability, isTerminal, hasChildren, mergedNodeCodePointCount, mergedNodeCodePoints);
+ childDicNodes->pushLeavingChild(dicNode, nextPos, childrenPos, attributesPos,
+ probability, isTerminal, hasChildren, isBlacklistedOrNotAWord,
+ mergedNodeCodePointCount, mergedNodeCodePoints);
return siblingPos;
}
-/* static */ bool DicNodeUtils::isDicNodeFilteredOut(const int nodeCodePoint,
- const ProximityInfo *const pInfo, const std::vector<int> *const codePointsFilter) {
- const int filterSize = codePointsFilter ? codePointsFilter->size() : 0;
- if (filterSize <= 0) {
- return false;
- }
- if (pInfo && (pInfo->getKeyIndexOf(nodeCodePoint) == NOT_AN_INDEX
- || CharUtils::isIntentionalOmissionCodePoint(nodeCodePoint))) {
- // If normalized nodeCodePoint is not on the keyboard or skippable, this child is never
- // filtered.
- return false;
- }
- const int lowerCodePoint = CharUtils::toLowerCase(nodeCodePoint);
- const int baseLowerCodePoint = CharUtils::toBaseCodePoint(lowerCodePoint);
- // TODO: Avoid linear search
- for (int i = 0; i < filterSize; ++i) {
- // Checking if a normalized code point is in filter characters when pInfo is not
- // null. When pInfo is null, nodeCodePoint is used to check filtering without
- // normalizing.
- if ((pInfo && ((*codePointsFilter)[i] == lowerCodePoint
- || (*codePointsFilter)[i] == baseLowerCodePoint))
- || (!pInfo && (*codePointsFilter)[i] == nodeCodePoint)) {
- return false;
- }
- }
- return true;
-}
-
/* static */ void DicNodeUtils::createAndGetAllLeavingChildNodes(DicNode *dicNode,
const BinaryDictionaryInfo *const binaryDictionaryInfo,
- const ProximityInfoState *pInfoState, const int pointIndex, const bool exactOnly,
- const std::vector<int> *const codePointsFilter, const ProximityInfo *const pInfo,
- DicNodeVector *childDicNodes) {
+ const DicNodeProximityFilter *const childrenFilter, DicNodeVector *childDicNodes) {
if (!dicNode->hasChildren()) {
return;
}
@@ -161,14 +128,8 @@ namespace latinime {
const int childCount = BinaryFormat::getGroupCountAndForwardPointer(
binaryDictionaryInfo->getDictRoot(), &nextPos);
for (int i = 0; i < childCount; i++) {
- const int filterSize = codePointsFilter ? codePointsFilter->size() : 0;
nextPos = createAndGetLeavingChildNode(dicNode, nextPos, binaryDictionaryInfo,
- pInfoState, pointIndex, exactOnly, codePointsFilter, pInfo,
- childDicNodes);
- if (!pInfo && filterSize > 0 && childDicNodes->exceeds(filterSize)) {
- // All code points have been found.
- break;
- }
+ childrenFilter, childDicNodes);
}
}
@@ -184,13 +145,12 @@ namespace latinime {
if (dicNode->isTotalInputSizeExceedingLimit()) {
return;
}
+ const DicNodeProximityFilter childrenFilter(pInfoState, pointIndex, exactOnly);
if (!dicNode->isLeavingNode()) {
- DicNodeUtils::createAndGetPassingChildNode(dicNode, pInfoState, pointIndex, exactOnly,
- childDicNodes);
+ DicNodeUtils::createAndGetPassingChildNode(dicNode, &childrenFilter, childDicNodes);
} else {
DicNodeUtils::createAndGetAllLeavingChildNodes(
- dicNode, binaryDictionaryInfo, pInfoState, pointIndex, exactOnly,
- 0 /* codePointsFilter */, 0 /* pInfo */, childDicNodes);
+ dicNode, binaryDictionaryInfo, &childrenFilter, childDicNodes);
}
}
@@ -230,23 +190,6 @@ namespace latinime {
return ProbabilityUtils::backoff(unigramProbability);
}
-///////////////////////////////////////
-// Bigram / Unigram dictionary utils //
-///////////////////////////////////////
-
-/* static */ bool DicNodeUtils::isMatchedNodeCodePoint(const ProximityInfoState *pInfoState,
- const int pointIndex, const bool exactOnly, const int nodeCodePoint) {
- if (!pInfoState) {
- return true;
- }
- if (exactOnly) {
- return pInfoState->getPrimaryCodePointAt(pointIndex) == nodeCodePoint;
- }
- const ProximityType matchedId = pInfoState->getProximityType(pointIndex, nodeCodePoint,
- true /* checkProximityChars */);
- return isProximityChar(matchedId);
-}
-
////////////////
// Char utils //
////////////////
diff --git a/native/jni/src/suggest/core/dicnode/dic_node_utils.h b/native/jni/src/suggest/core/dicnode/dic_node_utils.h
index d526975ce..7b567b582 100644
--- a/native/jni/src/suggest/core/dicnode/dic_node_utils.h
+++ b/native/jni/src/suggest/core/dicnode/dic_node_utils.h
@@ -18,7 +18,6 @@
#define LATINIME_DIC_NODE_UTILS_H
#include <stdint.h>
-#include <vector>
#include "defines.h"
@@ -26,8 +25,8 @@ namespace latinime {
class BinaryDictionaryInfo;
class DicNode;
+class DicNodeProximityFilter;
class DicNodeVector;
-class ProximityInfo;
class ProximityInfoState;
class MultiBigramMap;
@@ -44,19 +43,12 @@ class DicNodeUtils {
const BinaryDictionaryInfo *const binaryDictionaryInfo, DicNodeVector *childDicNodes);
static float getBigramNodeImprobability(const BinaryDictionaryInfo *const binaryDictionaryInfo,
const DicNode *const node, MultiBigramMap *const multiBigramMap);
- static bool isDicNodeFilteredOut(const int nodeCodePoint, const ProximityInfo *const pInfo,
- const std::vector<int> *const codePointsFilter);
// TODO: Move to private
static void getProximityChildDicNodes(DicNode *dicNode,
const BinaryDictionaryInfo *const binaryDictionaryInfo,
const ProximityInfoState *pInfoState, const int pointIndex, bool exactOnly,
DicNodeVector *childDicNodes);
- // TODO: Move to proximity info
- static bool isProximityChar(ProximityType type) {
- return type == MATCH_CHAR || type == PROXIMITY_CHAR || type == ADDITIONAL_PROXIMITY_CHAR;
- }
-
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(DicNodeUtils);
// Max number of bigrams to look up
@@ -64,22 +56,14 @@ class DicNodeUtils {
static int getBigramNodeProbability(const BinaryDictionaryInfo *const binaryDictionaryInfo,
const DicNode *const node, MultiBigramMap *multiBigramMap);
- static void createAndGetPassingChildNode(DicNode *dicNode, const ProximityInfoState *pInfoState,
- const int pointIndex, const bool exactOnly, DicNodeVector *childDicNodes);
+ static void createAndGetPassingChildNode(DicNode *dicNode,
+ const DicNodeProximityFilter *const childrenFilter, DicNodeVector *childDicNodes);
static void createAndGetAllLeavingChildNodes(DicNode *dicNode,
const BinaryDictionaryInfo *const binaryDictionaryInfo,
- const ProximityInfoState *pInfoState, const int pointIndex, const bool exactOnly,
- const std::vector<int> *const codePointsFilter,
- const ProximityInfo *const pInfo, DicNodeVector *childDicNodes);
+ const DicNodeProximityFilter *const childrenFilter, DicNodeVector *childDicNodes);
static int createAndGetLeavingChildNode(DicNode *dicNode, int pos,
const BinaryDictionaryInfo *const binaryDictionaryInfo,
- const ProximityInfoState *pInfoState, const int pointIndex,
- const bool exactOnly, const std::vector<int> *const codePointsFilter,
- const ProximityInfo *const pInfo, DicNodeVector *childDicNodes);
-
- // TODO: Move to proximity info
- static bool isMatchedNodeCodePoint(const ProximityInfoState *pInfoState, const int pointIndex,
- const bool exactOnly, const int nodeCodePoint);
+ const DicNodeProximityFilter *const childrenFilter, DicNodeVector *childDicNodes);
};
} // namespace latinime
#endif // LATINIME_DIC_NODE_UTILS_H
diff --git a/native/jni/src/suggest/core/dicnode/dic_node_vector.h b/native/jni/src/suggest/core/dicnode/dic_node_vector.h
index 9641cc19c..5ac4eeaf4 100644
--- a/native/jni/src/suggest/core/dicnode/dic_node_vector.h
+++ b/native/jni/src/suggest/core/dicnode/dic_node_vector.h
@@ -62,14 +62,15 @@ class DicNodeVector {
mDicNodes.back().initAsPassingChild(dicNode);
}
- void pushLeavingChild(DicNode *dicNode, const int pos, const uint8_t flags,
- const int childrenPos, const int attributesPos, const int probability,
- const bool isTerminal, const bool hasChildren, const uint16_t mergedNodeCodePointCount,
- const int *const mergedNodeCodePoints) {
+ void pushLeavingChild(DicNode *dicNode, const int pos, const int childrenPos,
+ const int attributesPos, const int probability, const bool isTerminal,
+ const bool hasChildren, const bool isBlacklistedOrNotAWord,
+ const uint16_t mergedNodeCodePointCount, const int *const mergedNodeCodePoints) {
ASSERT(!mLock);
mDicNodes.push_back(mEmptyNode);
- mDicNodes.back().initAsChild(dicNode, pos, flags, childrenPos, attributesPos, probability,
- isTerminal, hasChildren, mergedNodeCodePointCount, mergedNodeCodePoints);
+ mDicNodes.back().initAsChild(dicNode, pos, childrenPos, attributesPos, probability,
+ isTerminal, hasChildren, isBlacklistedOrNotAWord, mergedNodeCodePointCount,
+ mergedNodeCodePoints);
}
DicNode *operator[](const int id) {
diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.cpp b/native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.cpp
index bbb4ca3f0..5d14a0554 100644
--- a/native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.cpp
+++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.cpp
@@ -27,17 +27,13 @@ const int BinaryDictionaryFormatUtils::DICTIONARY_MINIMUM_SIZE = 4;
/**
* Format versions
*/
-// Originally, format version 1 had a 16-bit magic number, then the version number `01'
-// then options that must be 0. Hence the first 32-bits of the format are always as follow
-// and it's okay to consider them a magic number as a whole.
-const uint32_t BinaryDictionaryFormatUtils::FORMAT_VERSION_1_MAGIC_NUMBER = 0x78B10100;
// The versions of Latin IME that only handle format version 1 only test for the magic
// number, so we had to change it so that version 2 files would be rejected by older
// implementations. On this occasion, we made the magic number 32 bits long.
-const uint32_t BinaryDictionaryFormatUtils::FORMAT_VERSION_2_MAGIC_NUMBER = 0x9BC13AFE;
+const uint32_t BinaryDictionaryFormatUtils::HEADER_VERSION_2_MAGIC_NUMBER = 0x9BC13AFE;
// Magic number (4 bytes), version (2 bytes), options (2 bytes), header size (4 bytes) = 12
-const int BinaryDictionaryFormatUtils::FORMAT_VERSION_2_MINIMUM_SIZE = 12;
+const int BinaryDictionaryFormatUtils::HEADER_VERSION_2_MINIMUM_SIZE = 12;
/* static */ BinaryDictionaryFormatUtils::FORMAT_VERSION
BinaryDictionaryFormatUtils::detectFormatVersion(const uint8_t *const dict,
@@ -50,31 +46,28 @@ const int BinaryDictionaryFormatUtils::FORMAT_VERSION_2_MINIMUM_SIZE = 12;
}
const uint32_t magicNumber = ByteArrayUtils::readUint32(dict, 0);
switch (magicNumber) {
- case FORMAT_VERSION_1_MAGIC_NUMBER:
- // Format 1 header is exactly 5 bytes long and looks like:
- // Magic number (2 bytes) 0x78 0xB1
- // Version number (1 byte) 0x01
- // Options (2 bytes) must be 0x00 0x00
- return VERSION_1;
- case FORMAT_VERSION_2_MAGIC_NUMBER:
- // Version 2 dictionaries are at least 12 bytes long.
- // If this dictionary has the version 2 magic number but is less than 12 bytes long,
- // then it's an unknown format and we need to avoid confidently reading the next bytes.
- if (dictSize < FORMAT_VERSION_2_MINIMUM_SIZE) {
+ case HEADER_VERSION_2_MAGIC_NUMBER:
+ // Version 2 header are at least 12 bytes long.
+ // If this header has the version 2 magic number but is less than 12 bytes long,
+ // then it's an unknown format and we need to avoid confidently reading the next bytes.
+ if (dictSize < HEADER_VERSION_2_MINIMUM_SIZE) {
+ return UNKNOWN_VERSION;
+ }
+ // Version 2 header is as follows:
+ // Magic number (4 bytes) 0x9B 0xC1 0x3A 0xFE
+ // Version number (2 bytes)
+ // Options (2 bytes)
+ // Header size (4 bytes) : integer, big endian
+ if (ByteArrayUtils::readUint16(dict, 4) == 2) {
+ return VERSION_2;
+ } else if (ByteArrayUtils::readUint16(dict, 4) == 3) {
+ // TODO: Support version 3 dictionary.
+ return UNKNOWN_VERSION;
+ } else {
+ return UNKNOWN_VERSION;
+ }
+ default:
return UNKNOWN_VERSION;
- }
- // Format 2 header is as follows:
- // Magic number (4 bytes) 0x9B 0xC1 0x3A 0xFE
- // Version number (2 bytes) 0x00 0x02
- // Options (2 bytes)
- // Header size (4 bytes) : integer, big endian
- if (ByteArrayUtils::readUint16(dict, 4) == 2) {
- return VERSION_2;
- } else {
- return UNKNOWN_VERSION;
- }
- default:
- return UNKNOWN_VERSION;
}
}
diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.h b/native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.h
index 33618b9f0..830684c70 100644
--- a/native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.h
+++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.h
@@ -33,10 +33,9 @@ namespace latinime {
*/
class BinaryDictionaryFormatUtils {
public:
- // TODO: Remove obsolete version logic
enum FORMAT_VERSION {
- VERSION_1,
VERSION_2,
+ VERSION_3,
UNKNOWN_VERSION
};
@@ -46,9 +45,8 @@ class BinaryDictionaryFormatUtils {
DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryDictionaryFormatUtils);
static const int DICTIONARY_MINIMUM_SIZE;
- static const uint32_t FORMAT_VERSION_1_MAGIC_NUMBER;
- static const uint32_t FORMAT_VERSION_2_MAGIC_NUMBER;
- static const int FORMAT_VERSION_2_MINIMUM_SIZE;
+ static const uint32_t HEADER_VERSION_2_MAGIC_NUMBER;
+ static const int HEADER_VERSION_2_MINIMUM_SIZE;
};
} // namespace latinime
#endif /* LATINIME_BINARY_DICTIONARY_FORMAT_UTILS_H */
diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_header.h b/native/jni/src/suggest/core/dictionary/binary_dictionary_header.h
index 6dba0b266..240512bce 100644
--- a/native/jni/src/suggest/core/dictionary/binary_dictionary_header.h
+++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_header.h
@@ -53,6 +53,20 @@ class BinaryDictionaryHeader {
return mMultiWordCostMultiplier;
}
+ AK_FORCE_INLINE void readHeaderValueOrQuestionMark(const char *const key,
+ int *outValue, int outValueSize) const {
+ if (outValueSize <= 0) return;
+ if (outValueSize == 1) {
+ outValue[0] = '\0';
+ return;
+ }
+ if (!BinaryDictionaryHeaderReadingUtils::readHeaderValue(mBinaryDictionaryInfo,
+ key, outValue, outValueSize)) {
+ outValue[0] = '?';
+ outValue[1] = '\0';
+ }
+ }
+
private:
DISALLOW_COPY_AND_ASSIGN(BinaryDictionaryHeader);
diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.cpp b/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.cpp
index 2c9593144..a57b0f859 100644
--- a/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.cpp
+++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.cpp
@@ -26,12 +26,10 @@ namespace latinime {
const int BinaryDictionaryHeaderReadingUtils::MAX_OPTION_KEY_LENGTH = 256;
-const int BinaryDictionaryHeaderReadingUtils::FORMAT_VERSION_1_HEADER_SIZE = 5;
-
-const int BinaryDictionaryHeaderReadingUtils::VERSION_2_MAGIC_NUMBER_SIZE = 4;
-const int BinaryDictionaryHeaderReadingUtils::VERSION_2_DICTIONARY_VERSION_SIZE = 2;
-const int BinaryDictionaryHeaderReadingUtils::VERSION_2_DICTIONARY_FLAG_SIZE = 2;
-const int BinaryDictionaryHeaderReadingUtils::VERSION_2_DICTIONARY_HEADER_SIZE_SIZE = 4;
+const int BinaryDictionaryHeaderReadingUtils::VERSION_2_HEADER_MAGIC_NUMBER_SIZE = 4;
+const int BinaryDictionaryHeaderReadingUtils::VERSION_2_HEADER_DICTIONARY_VERSION_SIZE = 2;
+const int BinaryDictionaryHeaderReadingUtils::VERSION_2_HEADER_FLAG_SIZE = 2;
+const int BinaryDictionaryHeaderReadingUtils::VERSION_2_HEADER_SIZE_FIELD_SIZE = 4;
const BinaryDictionaryHeaderReadingUtils::DictionaryFlags
BinaryDictionaryHeaderReadingUtils::NO_FLAGS = 0;
@@ -47,15 +45,13 @@ const BinaryDictionaryHeaderReadingUtils::DictionaryFlags
/* static */ int BinaryDictionaryHeaderReadingUtils::getHeaderSize(
const BinaryDictionaryInfo *const binaryDictionaryInfo) {
- switch (binaryDictionaryInfo->getFormat()) {
- case BinaryDictionaryFormatUtils::VERSION_1:
- return FORMAT_VERSION_1_HEADER_SIZE;
- case BinaryDictionaryFormatUtils::VERSION_2:
+ switch (getHeaderVersion(binaryDictionaryInfo->getFormat())) {
+ case HEADER_VERSION_2:
// See the format of the header in the comment in
// BinaryDictionaryFormatUtils::detectFormatVersion()
return ByteArrayUtils::readUint32(binaryDictionaryInfo->getDictBuf(),
- VERSION_2_MAGIC_NUMBER_SIZE + VERSION_2_DICTIONARY_VERSION_SIZE
- + VERSION_2_DICTIONARY_FLAG_SIZE);
+ VERSION_2_HEADER_MAGIC_NUMBER_SIZE + VERSION_2_HEADER_DICTIONARY_VERSION_SIZE
+ + VERSION_2_HEADER_FLAG_SIZE);
default:
return S_INT_MAX;
}
@@ -64,12 +60,10 @@ const BinaryDictionaryHeaderReadingUtils::DictionaryFlags
/* static */ BinaryDictionaryHeaderReadingUtils::DictionaryFlags
BinaryDictionaryHeaderReadingUtils::getFlags(
const BinaryDictionaryInfo *const binaryDictionaryInfo) {
- switch (binaryDictionaryInfo->getFormat()) {
- case BinaryDictionaryFormatUtils::VERSION_1:
- return NO_FLAGS;
- case BinaryDictionaryFormatUtils::VERSION_2:
+ switch (getHeaderVersion(binaryDictionaryInfo->getFormat())) {
+ case HEADER_VERSION_2:
return ByteArrayUtils::readUint16(binaryDictionaryInfo->getDictBuf(),
- VERSION_2_MAGIC_NUMBER_SIZE + VERSION_2_DICTIONARY_VERSION_SIZE);
+ VERSION_2_HEADER_MAGIC_NUMBER_SIZE + VERSION_2_HEADER_DICTIONARY_VERSION_SIZE);
default:
return NO_FLAGS;
}
@@ -79,17 +73,23 @@ const BinaryDictionaryHeaderReadingUtils::DictionaryFlags
/* static */ bool BinaryDictionaryHeaderReadingUtils::readHeaderValue(
const BinaryDictionaryInfo *const binaryDictionaryInfo,
const char *const key, int *outValue, const int outValueSize) {
- if (outValueSize <= 0 || !hasHeaderAttributes(binaryDictionaryInfo->getFormat())) {
+ if (outValueSize <= 0) {
return false;
}
const int headerSize = getHeaderSize(binaryDictionaryInfo);
int pos = getHeaderOptionsPosition(binaryDictionaryInfo->getFormat());
+ if (pos == NOT_A_DICT_POS) {
+ // The header doesn't have header options.
+ return false;
+ }
while (pos < headerSize) {
if(ByteArrayUtils::compareStringInBufferWithCharArray(
binaryDictionaryInfo->getDictBuf(), key, headerSize - pos, &pos) == 0) {
// The key was found.
- ByteArrayUtils::readStringAndAdvancePosition(
+ const int length = ByteArrayUtils::readStringAndAdvancePosition(
binaryDictionaryInfo->getDictBuf(), outValueSize, outValue, &pos);
+ // Add a 0 terminator to the string.
+ outValue[length < outValueSize ? length : outValueSize - 1] = '\0';
return true;
}
ByteArrayUtils::advancePositionToBehindString(
diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.h b/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.h
index 49ed2b9cc..61748227e 100644
--- a/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.h
+++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.h
@@ -48,27 +48,15 @@ class BinaryDictionaryHeaderReadingUtils {
return (flags & FRENCH_LIGATURE_PROCESSING_FLAG) != 0;
}
- static AK_FORCE_INLINE bool hasHeaderAttributes(
- const BinaryDictionaryFormatUtils::FORMAT_VERSION format) {
- // Only format 2 and above have header attributes as {key,value} string pairs.
- switch (format) {
- case BinaryDictionaryFormatUtils::VERSION_2:
- return true;
- break;
- default:
- return false;
- }
- }
-
static AK_FORCE_INLINE int getHeaderOptionsPosition(
- const BinaryDictionaryFormatUtils::FORMAT_VERSION format) {
- switch (format) {
- case BinaryDictionaryFormatUtils::VERSION_2:
- return VERSION_2_MAGIC_NUMBER_SIZE + VERSION_2_DICTIONARY_VERSION_SIZE
- + VERSION_2_DICTIONARY_FLAG_SIZE + VERSION_2_DICTIONARY_HEADER_SIZE_SIZE;
+ const BinaryDictionaryFormatUtils::FORMAT_VERSION dictionaryFormat) {
+ switch (getHeaderVersion(dictionaryFormat)) {
+ case HEADER_VERSION_2:
+ return VERSION_2_HEADER_MAGIC_NUMBER_SIZE + VERSION_2_HEADER_DICTIONARY_VERSION_SIZE
+ + VERSION_2_HEADER_FLAG_SIZE + VERSION_2_HEADER_SIZE_FIELD_SIZE;
break;
default:
- return 0;
+ return NOT_A_DICT_POS;
}
}
@@ -82,12 +70,15 @@ class BinaryDictionaryHeaderReadingUtils {
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryDictionaryHeaderReadingUtils);
- static const int FORMAT_VERSION_1_HEADER_SIZE;
+ enum HEADER_VERSION {
+ HEADER_VERSION_2,
+ UNKNOWN_HEADER_VERSION
+ };
- static const int VERSION_2_MAGIC_NUMBER_SIZE;
- static const int VERSION_2_DICTIONARY_VERSION_SIZE;
- static const int VERSION_2_DICTIONARY_FLAG_SIZE;
- static const int VERSION_2_DICTIONARY_HEADER_SIZE_SIZE;
+ static const int VERSION_2_HEADER_MAGIC_NUMBER_SIZE;
+ static const int VERSION_2_HEADER_DICTIONARY_VERSION_SIZE;
+ static const int VERSION_2_HEADER_FLAG_SIZE;
+ static const int VERSION_2_HEADER_SIZE_FIELD_SIZE;
static const DictionaryFlags NO_FLAGS;
// Flags for special processing
@@ -97,6 +88,18 @@ class BinaryDictionaryHeaderReadingUtils {
static const DictionaryFlags SUPPORTS_DYNAMIC_UPDATE_FLAG;
static const DictionaryFlags FRENCH_LIGATURE_PROCESSING_FLAG;
static const DictionaryFlags CONTAINS_BIGRAMS_FLAG;
+
+ static HEADER_VERSION getHeaderVersion(
+ const BinaryDictionaryFormatUtils::FORMAT_VERSION formatVersion) {
+ switch(formatVersion) {
+ case BinaryDictionaryFormatUtils::VERSION_2:
+ // Fall through
+ case BinaryDictionaryFormatUtils::VERSION_3:
+ return HEADER_VERSION_2;
+ default:
+ return UNKNOWN_HEADER_VERSION;
+ }
+ }
};
}
#endif /* LATINIME_DICTIONARY_HEADER_READING_UTILS_H */
diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_info.h b/native/jni/src/suggest/core/dictionary/binary_dictionary_info.h
index 7cb31440a..cbea18f90 100644
--- a/native/jni/src/suggest/core/dictionary/binary_dictionary_info.h
+++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_info.h
@@ -20,23 +20,27 @@
#include <stdint.h>
#include "defines.h"
+#include "jni.h"
#include "suggest/core/dictionary/binary_dictionary_format_utils.h"
#include "suggest/core/dictionary/binary_dictionary_header.h"
#include "suggest/policyimpl/dictionary/dictionary_structure_policy_factory.h"
+#include "utils/log_utils.h"
namespace latinime {
class BinaryDictionaryInfo {
public:
- BinaryDictionaryInfo(const uint8_t *const dictBuf, const int dictSize, const int mmapFd,
- const int dictBufOffset, const bool isUpdatable)
+ AK_FORCE_INLINE BinaryDictionaryInfo(JNIEnv *env, const uint8_t *const dictBuf,
+ const int dictSize, const int mmapFd, const int dictBufOffset, const bool isUpdatable)
: mDictBuf(dictBuf), mDictSize(dictSize), mMmapFd(mmapFd),
mDictBufOffset(dictBufOffset), mIsUpdatable(isUpdatable),
mDictionaryFormat(BinaryDictionaryFormatUtils::detectFormatVersion(
mDictBuf, mDictSize)),
mDictionaryHeader(this), mDictRoot(mDictBuf + mDictionaryHeader.getSize()),
mStructurePolicy(DictionaryStructurePolicyFactory::getDictionaryStructurePolicy(
- mDictionaryFormat)) {}
+ mDictionaryFormat)) {
+ logDictionaryInfo(env);
+ }
AK_FORCE_INLINE const uint8_t *getDictBuf() const {
return mDictBuf;
@@ -88,6 +92,33 @@ class BinaryDictionaryInfo {
const BinaryDictionaryHeader mDictionaryHeader;
const uint8_t *const mDictRoot;
const DictionaryStructurePolicy *const mStructurePolicy;
+
+ AK_FORCE_INLINE void logDictionaryInfo(JNIEnv *const env) const {
+ const int BUFFER_SIZE = 16;
+ int dictionaryIdCodePointBuffer[BUFFER_SIZE];
+ int versionStringCodePointBuffer[BUFFER_SIZE];
+ int dateStringCodePointBuffer[BUFFER_SIZE];
+ mDictionaryHeader.readHeaderValueOrQuestionMark("dictionary",
+ dictionaryIdCodePointBuffer, BUFFER_SIZE);
+ mDictionaryHeader.readHeaderValueOrQuestionMark("version",
+ versionStringCodePointBuffer, BUFFER_SIZE);
+ mDictionaryHeader.readHeaderValueOrQuestionMark("date",
+ dateStringCodePointBuffer, BUFFER_SIZE);
+
+ char dictionaryIdCharBuffer[BUFFER_SIZE];
+ char versionStringCharBuffer[BUFFER_SIZE];
+ char dateStringCharBuffer[BUFFER_SIZE];
+ intArrayToCharArray(dictionaryIdCodePointBuffer, BUFFER_SIZE,
+ dictionaryIdCharBuffer, BUFFER_SIZE);
+ intArrayToCharArray(versionStringCodePointBuffer, BUFFER_SIZE,
+ versionStringCharBuffer, BUFFER_SIZE);
+ intArrayToCharArray(dateStringCodePointBuffer, BUFFER_SIZE,
+ dateStringCharBuffer, BUFFER_SIZE);
+
+ LogUtils::logToJava(env,
+ "Dictionary info: dictionary = %s ; version = %s ; date = %s ; filesize = %i",
+ dictionaryIdCharBuffer, versionStringCharBuffer, dateStringCharBuffer, mDictSize);
+ }
};
}
#endif /* LATINIME_BINARY_DICTIONARY_INFO_H */
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp
index 675b54972..f520a75b1 100644
--- a/native/jni/src/suggest/core/dictionary/dictionary.cpp
+++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp
@@ -22,6 +22,7 @@
#include <stdint.h>
#include "defines.h"
+#include "jni.h"
#include "suggest/core/dictionary/bigram_dictionary.h"
#include "suggest/core/dictionary/binary_format.h"
#include "suggest/core/session/dic_traverse_session.h"
@@ -32,8 +33,9 @@
namespace latinime {
-Dictionary::Dictionary(void *dict, int dictSize, int mmapFd, int dictBufOffset, bool isUpdatable)
- : mBinaryDictionaryInfo(static_cast<const uint8_t *>(dict), dictSize, mmapFd,
+Dictionary::Dictionary(JNIEnv *env, void *dict, int dictSize, int mmapFd,
+ int dictBufOffset, bool isUpdatable)
+ : mBinaryDictionaryInfo(env, static_cast<const uint8_t *>(dict), dictSize, mmapFd,
dictBufOffset, isUpdatable),
mBigramDictionary(new BigramDictionary(&mBinaryDictionaryInfo)),
mGestureSuggest(new Suggest(GestureSuggestPolicyFactory::getGestureSuggestPolicy())),
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.h b/native/jni/src/suggest/core/dictionary/dictionary.h
index 94579c200..1bf24a85b 100644
--- a/native/jni/src/suggest/core/dictionary/dictionary.h
+++ b/native/jni/src/suggest/core/dictionary/dictionary.h
@@ -20,6 +20,7 @@
#include <stdint.h>
#include "defines.h"
+#include "jni.h"
#include "suggest/core/dictionary/binary_dictionary_info.h"
namespace latinime {
@@ -52,7 +53,8 @@ class Dictionary {
static const int KIND_FLAG_POSSIBLY_OFFENSIVE = 0x80000000;
static const int KIND_FLAG_EXACT_MATCH = 0x40000000;
- Dictionary(void *dict, int dictSize, int mmapFd, int dictBufOffset, bool isUpdatable);
+ Dictionary(JNIEnv *env, void *dict, int dictSize, int mmapFd, int dictBufOffset,
+ bool isUpdatable);
int getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession *traverseSession,
int *xcoordinates, int *ycoordinates, int *times, int *pointerIds, int *inputCodePoints,
diff --git a/native/jni/src/suggest/core/dictionary/terminal_attributes.h b/native/jni/src/suggest/core/dictionary/terminal_attributes.h
index cec47081e..0da6504eb 100644
--- a/native/jni/src/suggest/core/dictionary/terminal_attributes.h
+++ b/native/jni/src/suggest/core/dictionary/terminal_attributes.h
@@ -21,7 +21,6 @@
#include "suggest/core/dictionary/binary_dictionary_info.h"
#include "suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h"
-#include "suggest/core/dictionary/binary_format.h"
namespace latinime {
@@ -71,28 +70,23 @@ class TerminalAttributes {
};
TerminalAttributes(const BinaryDictionaryInfo *const binaryDictionaryInfo,
- const uint8_t nodeFlags, const int shortcutPos)
- : mBinaryDictionaryInfo(binaryDictionaryInfo),
- mNodeFlags(nodeFlags), mShortcutListSizePos(shortcutPos) {}
+ const int shortcutPos)
+ : mBinaryDictionaryInfo(binaryDictionaryInfo), mShortcutListSizePos(shortcutPos) {}
inline ShortcutIterator getShortcutIterator() const {
- // The size of the shortcuts is stored here so that the whole shortcut chunk can be
- // skipped quickly, so we ignore it.
int shortcutPos = mShortcutListSizePos;
- BinaryDictionaryTerminalAttributesReadingUtils::getShortcutListSizeAndForwardPointer(
- mBinaryDictionaryInfo, &shortcutPos);
- const bool hasShortcutList = 0 != (mNodeFlags & BinaryFormat::FLAG_HAS_SHORTCUT_TARGETS);
+ const bool hasShortcutList = shortcutPos != NOT_A_DICT_POS;
+ if (hasShortcutList) {
+ BinaryDictionaryTerminalAttributesReadingUtils::getShortcutListSizeAndForwardPointer(
+ mBinaryDictionaryInfo, &shortcutPos);
+ }
+ // shortcutPos is never used if hasShortcutList is false.
return ShortcutIterator(mBinaryDictionaryInfo, shortcutPos, hasShortcutList);
}
- bool isBlacklistedOrNotAWord() const {
- return BinaryFormat::hasBlacklistedOrNotAWordFlag(mNodeFlags);
- }
-
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(TerminalAttributes);
const BinaryDictionaryInfo *const mBinaryDictionaryInfo;
- const uint8_t mNodeFlags;
const int mShortcutListSizePos;
};
} // namespace latinime
diff --git a/native/jni/src/suggest/core/layout/proximity_info_utils.h b/native/jni/src/suggest/core/layout/proximity_info_utils.h
index 54f7539d1..0e28560fc 100644
--- a/native/jni/src/suggest/core/layout/proximity_info_utils.h
+++ b/native/jni/src/suggest/core/layout/proximity_info_utils.h
@@ -117,6 +117,10 @@ class ProximityInfoUtils {
return getSquaredDistanceFloat(x, y, projectionX, projectionY);
}
+ static AK_FORCE_INLINE bool isMatchOrProximityChar(const ProximityType type) {
+ return type == MATCH_CHAR || type == PROXIMITY_CHAR || type == ADDITIONAL_PROXIMITY_CHAR;
+ }
+
// Normal distribution N(u, sigma^2).
struct NormalDistribution {
public:
diff --git a/native/jni/src/suggest/core/policy/traversal.h b/native/jni/src/suggest/core/policy/traversal.h
index c6f66f231..f26d7149e 100644
--- a/native/jni/src/suggest/core/policy/traversal.h
+++ b/native/jni/src/suggest/core/policy/traversal.h
@@ -45,7 +45,7 @@ class Traversal {
const DicNode *const dicNode) const = 0;
virtual bool needsToTraverseAllUserInput() const = 0;
virtual float getMaxSpatialDistance() const = 0;
- virtual bool allowPartialCommit() const = 0;
+ virtual bool autoCorrectsToMultiWordSuggestionIfTop() const = 0;
virtual int getDefaultExpandDicNodeSize() const = 0;
virtual int getMaxCacheSize() const = 0;
virtual bool isPossibleOmissionChildNode(const DicTraverseSession *const traverseSession,
diff --git a/native/jni/src/suggest/core/suggest.cpp b/native/jni/src/suggest/core/suggest.cpp
index 173a612be..c6da6f003 100644
--- a/native/jni/src/suggest/core/suggest.cpp
+++ b/native/jni/src/suggest/core/suggest.cpp
@@ -36,7 +36,6 @@ namespace latinime {
const int Suggest::MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT = 16;
const int Suggest::MIN_CONTINUOUS_SUGGESTION_INPUT_SIZE = 2;
const float Suggest::AUTOCORRECT_CLASSIFICATION_THRESHOLD = 0.33f;
-const int Suggest::FINAL_SCORE_PENALTY_FOR_NOT_BEST_EXACT_MATCHED_WORD = 1;
/**
* Returns a set of suggestions for the given input touch points. The commitPoint argument indicates
@@ -85,9 +84,9 @@ void Suggest::initializeSearch(DicTraverseSession *traverseSession, int commitPo
if (!traverseSession->getProximityInfoState(0)->isUsed()) {
return;
}
- if (TRAVERSAL->allowPartialCommit()) {
- commitPoint = 0;
- }
+
+ // Never auto partial commit for now.
+ commitPoint = 0;
if (traverseSession->getInputSize() > MIN_CONTINUOUS_SUGGESTION_INPUT_SIZE
&& traverseSession->isContinuousSuggestionPossible()) {
@@ -149,8 +148,17 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen
&doubleLetterTerminalIndex, &doubleLetterLevel);
int maxScore = S_INT_MIN;
- int bestExactMatchedNodeTerminalIndex = -1;
- int bestExactMatchedNodeOutputWordIndex = -1;
+ // Force autocorrection for obvious long multi-word suggestions when the top suggestion is
+ // a long multiple words suggestion.
+ // TODO: Implement a smarter auto-commit method for handling multi-word suggestions.
+ // traverseSession->isPartiallyCommited() always returns false because we never auto partial
+ // commit for now.
+ const bool forceCommitMultiWords = (terminalSize > 0) ?
+ TRAVERSAL->autoCorrectsToMultiWordSuggestionIfTop()
+ && (traverseSession->isPartiallyCommited()
+ || (traverseSession->getInputSize()
+ >= MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT
+ && terminals[0].hasMultipleWords())) : false;
// Output suggestion results here
for (int terminalIndex = 0; terminalIndex < terminalSize && outputWordIndex < MAX_RESULTS;
++terminalIndex) {
@@ -162,8 +170,6 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen
terminalIndex, doubleLetterTerminalIndex, doubleLetterLevel);
const float compoundDistance = terminalDicNode->getCompoundDistance(languageWeight)
+ doubleLetterCost;
- const TerminalAttributes terminalAttributes(traverseSession->getBinaryDictionaryInfo(),
- terminalDicNode->getFlags(), terminalDicNode->getAttributesPos());
const bool isPossiblyOffensiveWord = terminalDicNode->getProbability() <= 0;
const bool isExactMatch = terminalDicNode->isExactMatch();
const bool isFirstCharUppercase = terminalDicNode->isFirstCharUppercase();
@@ -176,60 +182,43 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen
| (isSafeExactMatch ? Dictionary::KIND_FLAG_EXACT_MATCH : 0);
// Entries that are blacklisted or do not represent a word should not be output.
- const bool isValidWord = !terminalAttributes.isBlacklistedOrNotAWord();
+ const bool isValidWord = !terminalDicNode->isBlacklistedOrNotAWord();
// Increase output score of top typing suggestion to ensure autocorrection.
// TODO: Better integration with java side autocorrection logic.
- // Force autocorrection for obvious long multi-word suggestions.
- const bool isForceCommitMultiWords = TRAVERSAL->allowPartialCommit()
- && (traverseSession->isPartiallyCommited()
- || (traverseSession->getInputSize() >= MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT
- && terminalDicNode->hasMultipleWords()));
-
const int finalScore = SCORING->calculateFinalScore(
compoundDistance, traverseSession->getInputSize(),
- isForceCommitMultiWords || (isValidWord && SCORING->doesAutoCorrectValidWord()));
+ terminalDicNode->isExactMatch()
+ || (forceCommitMultiWords && terminalDicNode->hasMultipleWords())
+ || (isValidWord && SCORING->doesAutoCorrectValidWord()));
maxScore = max(maxScore, finalScore);
- if (TRAVERSAL->allowPartialCommit()) {
- // Index for top typing suggestion should be 0.
- if (isValidWord && outputWordIndex == 0) {
- terminalDicNode->outputSpacePositionsResult(spaceIndices);
- }
+ // TODO: Implement a smarter auto-commit method for handling multi-word suggestions.
+ // Index for top typing suggestion should be 0.
+ if (isValidWord && outputWordIndex == 0) {
+ terminalDicNode->outputSpacePositionsResult(spaceIndices);
}
// Don't output invalid words. However, we still need to submit their shortcuts if any.
if (isValidWord) {
outputTypes[outputWordIndex] = Dictionary::KIND_CORRECTION | outputTypeFlags;
frequencies[outputWordIndex] = finalScore;
- if (isSafeExactMatch) {
- // Demote exact matches that are not the highest probable node among all exact
- // matches.
- const bool isBestTerminal = bestExactMatchedNodeTerminalIndex < 0
- || terminals[bestExactMatchedNodeTerminalIndex].getProbability()
- < terminalDicNode->getProbability();
- const int outputWordIndexToBeDemoted = isBestTerminal ?
- bestExactMatchedNodeOutputWordIndex : outputWordIndex;
- if (outputWordIndexToBeDemoted >= 0) {
- frequencies[outputWordIndexToBeDemoted] -=
- FINAL_SCORE_PENALTY_FOR_NOT_BEST_EXACT_MATCHED_WORD;
- }
- if (isBestTerminal) {
- // Updates the best exact matched node index.
- bestExactMatchedNodeTerminalIndex = terminalIndex;
- // Updates the best exact matched output word index.
- bestExactMatchedNodeOutputWordIndex = outputWordIndex;
- }
- }
// Populate the outputChars array with the suggested word.
const int startIndex = outputWordIndex * MAX_WORD_LENGTH;
terminalDicNode->outputResult(&outputCodePoints[startIndex]);
++outputWordIndex;
}
- const bool sameAsTyped = TRAVERSAL->sameAsTyped(traverseSession, terminalDicNode);
- outputWordIndex = ShortcutUtils::outputShortcuts(&terminalAttributes, outputWordIndex,
- finalScore, outputCodePoints, frequencies, outputTypes, sameAsTyped);
+ if (!terminalDicNode->hasMultipleWords()) {
+ const TerminalAttributes terminalAttributes(traverseSession->getBinaryDictionaryInfo(),
+ terminalDicNode->getAttributesPos());
+ // Shortcut is not supported for multiple words suggestions.
+ // TODO: Check shortcuts during traversal for multiple words suggestions.
+ const bool sameAsTyped = TRAVERSAL->sameAsTyped(traverseSession, terminalDicNode);
+ outputWordIndex = ShortcutUtils::outputShortcuts(&terminalAttributes, outputWordIndex,
+ finalScore, outputCodePoints, frequencies, outputTypes, sameAsTyped);
+
+ }
DicNode::managedDelete(terminalDicNode);
}
diff --git a/native/jni/src/suggest/core/suggest.h b/native/jni/src/suggest/core/suggest.h
index 752bde9ac..875cbe4e0 100644
--- a/native/jni/src/suggest/core/suggest.h
+++ b/native/jni/src/suggest/core/suggest.h
@@ -82,8 +82,6 @@ class Suggest : public SuggestInterface {
// Threshold for autocorrection classifier
static const float AUTOCORRECT_CLASSIFICATION_THRESHOLD;
- // Final score penalty to exact match words that are not the most probable exact match.
- static const int FINAL_SCORE_PENALTY_FOR_NOT_BEST_EXACT_MATCHED_WORD;
const Traversal *const TRAVERSAL;
const Scoring *const SCORING;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_policy_factory.h b/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_policy_factory.h
index 5070651cb..70dad67e8 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_policy_factory.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_policy_factory.h
@@ -30,10 +30,11 @@ class DictionaryStructurePolicyFactory {
static const DictionaryStructurePolicy *getDictionaryStructurePolicy(
const BinaryDictionaryFormatUtils::FORMAT_VERSION dictionaryFormat) {
switch (dictionaryFormat) {
- case BinaryDictionaryFormatUtils::VERSION_1:
- // Fall through
case BinaryDictionaryFormatUtils::VERSION_2:
return PatriciaTriePolicy::getInstance();
+ case BinaryDictionaryFormatUtils::VERSION_3:
+ // TODO: support version 3 dictionaries.
+ return 0;
default:
ASSERT(false);
return 0;
diff --git a/native/jni/src/suggest/policyimpl/typing/typing_traversal.h b/native/jni/src/suggest/policyimpl/typing/typing_traversal.h
index 5ae396e64..ef144e00a 100644
--- a/native/jni/src/suggest/policyimpl/typing/typing_traversal.h
+++ b/native/jni/src/suggest/policyimpl/typing/typing_traversal.h
@@ -23,6 +23,7 @@
#include "suggest/core/dicnode/dic_node.h"
#include "suggest/core/dicnode/dic_node_vector.h"
#include "suggest/core/layout/proximity_info_state.h"
+#include "suggest/core/layout/proximity_info_utils.h"
#include "suggest/core/policy/traversal.h"
#include "suggest/core/session/dic_traverse_session.h"
#include "suggest/policyimpl/typing/scoring_params.h"
@@ -136,7 +137,7 @@ class TypingTraversal : public Traversal {
return ScoringParams::MAX_SPATIAL_DISTANCE;
}
- AK_FORCE_INLINE bool allowPartialCommit() const {
+ AK_FORCE_INLINE bool autoCorrectsToMultiWordSuggestionIfTop() const {
return true;
}
@@ -159,7 +160,7 @@ class TypingTraversal : public Traversal {
const DicNode *const dicNode) const {
const ProximityType proximityType =
getProximityType(traverseSession, parentDicNode, dicNode);
- if (!DicNodeUtils::isProximityChar(proximityType)) {
+ if (!ProximityInfoUtils::isMatchOrProximityChar(proximityType)) {
return false;
}
return true;
diff --git a/native/jni/src/suggest/policyimpl/typing/typing_weighting.h b/native/jni/src/suggest/policyimpl/typing/typing_weighting.h
index e098f353e..830aa80de 100644
--- a/native/jni/src/suggest/policyimpl/typing/typing_weighting.h
+++ b/native/jni/src/suggest/policyimpl/typing/typing_weighting.h
@@ -169,12 +169,7 @@ class TypingWeighting : public Weighting {
float getTerminalLanguageCost(const DicTraverseSession *const traverseSession,
const DicNode *const dicNode, const float dicNodeLanguageImprobability) const {
- // We promote exact matches here to prevent them from being pruned. The final score of
- // exact match nodes might be demoted later in Suggest::outputSuggestions if there are
- // multiple exact matches.
- const float languageImprobability = (dicNode->isExactMatch()) ?
- 0.0f : dicNodeLanguageImprobability;
- return languageImprobability * ScoringParams::DISTANCE_WEIGHT_LANGUAGE;
+ return dicNodeLanguageImprobability * ScoringParams::DISTANCE_WEIGHT_LANGUAGE;
}
AK_FORCE_INLINE bool needsToNormalizeCompoundDistance() const {
diff --git a/native/jni/src/utils/log_utils.cpp b/native/jni/src/utils/log_utils.cpp
new file mode 100644
index 000000000..5ab2b2862
--- /dev/null
+++ b/native/jni/src/utils/log_utils.cpp
@@ -0,0 +1,72 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "log_utils.h"
+
+#include <cstdio>
+#include <stdarg.h>
+
+#include "defines.h"
+
+namespace latinime {
+ /* static */ void LogUtils::logToJava(JNIEnv *const env, const char *const format, ...) {
+ static const char *TAG = "LatinIME:LogUtils";
+ const jclass androidUtilLogClass = env->FindClass("android/util/Log");
+ if (!androidUtilLogClass) {
+ // If we can't find the class, we are probably in off-device testing, and
+ // it's expected. Regardless, logging is not essential to functionality, so
+ // we should just return. However, FindClass has thrown an exception behind
+ // our back and there is no way to prevent it from doing that, so we clear
+ // the exception before we return.
+ env->ExceptionClear();
+ return;
+ }
+ const jmethodID logDotIMethodId = env->GetStaticMethodID(androidUtilLogClass, "i",
+ "(Ljava/lang/String;Ljava/lang/String;)I");
+ if (!logDotIMethodId) {
+ env->ExceptionClear();
+ if (androidUtilLogClass) env->DeleteLocalRef(androidUtilLogClass);
+ return;
+ }
+ const jstring javaTag = env->NewStringUTF(TAG);
+
+ static const int DEFAULT_LINE_SIZE = 128;
+ char fixedSizeCString[DEFAULT_LINE_SIZE];
+ va_list argList;
+ va_start(argList, format);
+ // Get the necessary size. Add 1 for the 0 terminator.
+ const int size = vsnprintf(fixedSizeCString, DEFAULT_LINE_SIZE, format, argList) + 1;
+ va_end(argList);
+
+ jstring javaString;
+ if (size <= DEFAULT_LINE_SIZE) {
+ // The buffer was large enough.
+ javaString = env->NewStringUTF(fixedSizeCString);
+ } else {
+ // The buffer was not large enough.
+ va_start(argList, format);
+ char variableSizeCString[size];
+ vsnprintf(variableSizeCString, size, format, argList);
+ va_end(argList);
+ javaString = env->NewStringUTF(variableSizeCString);
+ }
+
+ env->CallStaticIntMethod(androidUtilLogClass, logDotIMethodId, javaTag, javaString);
+ if (javaString) env->DeleteLocalRef(javaString);
+ if (javaTag) env->DeleteLocalRef(javaTag);
+ if (androidUtilLogClass) env->DeleteLocalRef(androidUtilLogClass);
+ }
+}
diff --git a/native/jni/src/utils/log_utils.h b/native/jni/src/utils/log_utils.h
new file mode 100644
index 000000000..6ac16d91a
--- /dev/null
+++ b/native/jni/src/utils/log_utils.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_LOG_UTILS_H
+#define LATINIME_LOG_UTILS_H
+
+#include "defines.h"
+#include "jni.h"
+
+namespace latinime {
+
+class LogUtils {
+ public:
+ static void logToJava(JNIEnv *const env, const char *const format, ...)
+#ifdef __GNUC__
+ __attribute__ ((format (printf, 2, 3)))
+#endif // __GNUC__
+ ;
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(LogUtils);
+};
+} // namespace latinime
+#endif // LATINIME_LOG_UTILS_H