aboutsummaryrefslogtreecommitdiffstats
path: root/native/jni/src
diff options
context:
space:
mode:
Diffstat (limited to 'native/jni/src')
-rw-r--r--native/jni/src/defines.h76
-rw-r--r--native/jni/src/suggest/core/dicnode/dic_node.h7
-rw-r--r--native/jni/src/suggest/core/dicnode/dic_node_proximity_filter.h58
-rw-r--r--native/jni/src/suggest/core/dicnode/dic_node_utils.cpp85
-rw-r--r--native/jni/src/suggest/core/dicnode/dic_node_utils.h26
-rw-r--r--native/jni/src/suggest/core/dictionary/binary_dictionary_header.h14
-rw-r--r--native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.cpp4
-rw-r--r--native/jni/src/suggest/core/dictionary/binary_dictionary_info.h37
-rw-r--r--native/jni/src/suggest/core/dictionary/dictionary.cpp6
-rw-r--r--native/jni/src/suggest/core/dictionary/dictionary.h4
-rw-r--r--native/jni/src/suggest/core/dictionary/terminal_attributes.h9
-rw-r--r--native/jni/src/suggest/core/layout/proximity_info_utils.h4
-rw-r--r--native/jni/src/suggest/core/policy/traversal.h2
-rw-r--r--native/jni/src/suggest/core/suggest.cpp46
-rw-r--r--native/jni/src/suggest/policyimpl/typing/typing_traversal.h5
-rw-r--r--native/jni/src/utils/log_utils.cpp72
-rw-r--r--native/jni/src/utils/log_utils.h37
17 files changed, 339 insertions, 153 deletions
diff --git a/native/jni/src/defines.h b/native/jni/src/defines.h
index cb6681456..607a74400 100644
--- a/native/jni/src/defines.h
+++ b/native/jni/src/defines.h
@@ -35,46 +35,74 @@
// Must be equal to ProximityInfo.MAX_PROXIMITY_CHARS_SIZE in Java
#define MAX_PROXIMITY_CHARS_SIZE 16
#define ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE 2
+#define NELEMS(x) (sizeof(x) / sizeof((x)[0]))
-#if defined(FLAG_DO_PROFILE) || defined(FLAG_DBG)
-#include <android/log.h>
-#ifndef LOG_TAG
-#define LOG_TAG "LatinIME: "
-#endif // LOG_TAG
-#define AKLOGE(fmt, ...) __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, fmt, ##__VA_ARGS__)
-#define AKLOGI(fmt, ...) __android_log_print(ANDROID_LOG_INFO, LOG_TAG, fmt, ##__VA_ARGS__)
-
-#define DUMP_RESULT(words, frequencies) do { dumpResult(words, frequencies); } while (0)
-#define DUMP_WORD(word, length) do { dumpWord(word, length); } while (0)
-#define INTS_TO_CHARS(input, length, output) do { \
- intArrayToCharArray(input, length, output); } while (0)
-
-// TODO: Support full UTF-8 conversion
-AK_FORCE_INLINE static int intArrayToCharArray(const int *source, const int sourceSize,
- char *dest) {
+AK_FORCE_INLINE static int intArrayToCharArray(const int *const source, const int sourceSize,
+ char *dest, const int destSize) {
+ // We want to always terminate with a 0 char, so stop one short of the length to make
+ // sure there is room.
+ const int destLimit = destSize - 1;
int si = 0;
int di = 0;
- while (si < sourceSize && di < MAX_WORD_LENGTH - 1 && 0 != source[si]) {
+ while (si < sourceSize && di < destLimit && 0 != source[si]) {
const int codePoint = source[si++];
- if (codePoint < 0x7F) {
+ if (codePoint < 0x7F) { // One byte
dest[di++] = codePoint;
- } else if (codePoint < 0x7FF) {
+ } else if (codePoint < 0x7FF) { // Two bytes
+ if (di + 1 >= destLimit) break;
dest[di++] = 0xC0 + (codePoint >> 6);
dest[di++] = 0x80 + (codePoint & 0x3F);
- } else if (codePoint < 0xFFFF) {
+ } else if (codePoint < 0xFFFF) { // Three bytes
+ if (di + 2 >= destLimit) break;
dest[di++] = 0xE0 + (codePoint >> 12);
- dest[di++] = 0x80 + ((codePoint & 0xFC0) >> 6);
+ dest[di++] = 0x80 + ((codePoint >> 6) & 0x3F);
+ dest[di++] = 0x80 + (codePoint & 0x3F);
+ } else if (codePoint <= 0x1FFFFF) { // Four bytes
+ if (di + 3 >= destLimit) break;
+ dest[di++] = 0xF0 + (codePoint >> 18);
+ dest[di++] = 0x80 + ((codePoint >> 12) & 0x3F);
+ dest[di++] = 0x80 + ((codePoint >> 6) & 0x3F);
dest[di++] = 0x80 + (codePoint & 0x3F);
+ } else if (codePoint <= 0x3FFFFFF) { // Five bytes
+ if (di + 4 >= destLimit) break;
+ dest[di++] = 0xF8 + (codePoint >> 24);
+ dest[di++] = 0x80 + ((codePoint >> 18) & 0x3F);
+ dest[di++] = 0x80 + ((codePoint >> 12) & 0x3F);
+ dest[di++] = 0x80 + ((codePoint >> 6) & 0x3F);
+ dest[di++] = codePoint & 0x3F;
+ } else if (codePoint <= 0x7FFFFFFF) { // Six bytes
+ if (di + 5 >= destLimit) break;
+ dest[di++] = 0xFC + (codePoint >> 30);
+ dest[di++] = 0x80 + ((codePoint >> 24) & 0x3F);
+ dest[di++] = 0x80 + ((codePoint >> 18) & 0x3F);
+ dest[di++] = 0x80 + ((codePoint >> 12) & 0x3F);
+ dest[di++] = 0x80 + ((codePoint >> 6) & 0x3F);
+ dest[di++] = codePoint & 0x3F;
+ } else {
+ // Not a code point... skip.
}
}
dest[di] = 0;
return di;
}
+#if defined(FLAG_DO_PROFILE) || defined(FLAG_DBG)
+#include <android/log.h>
+#ifndef LOG_TAG
+#define LOG_TAG "LatinIME: "
+#endif // LOG_TAG
+#define AKLOGE(fmt, ...) __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, fmt, ##__VA_ARGS__)
+#define AKLOGI(fmt, ...) __android_log_print(ANDROID_LOG_INFO, LOG_TAG, fmt, ##__VA_ARGS__)
+
+#define DUMP_RESULT(words, frequencies) do { dumpResult(words, frequencies); } while (0)
+#define DUMP_WORD(word, length) do { dumpWord(word, length); } while (0)
+#define INTS_TO_CHARS(input, length, output, outlength) do { \
+ intArrayToCharArray(input, length, output, outlength); } while (0)
+
static inline void dumpWordInfo(const int *word, const int length, const int rank,
const int probability) {
static char charBuf[50];
- const int N = intArrayToCharArray(word, length, charBuf);
+ const int N = intArrayToCharArray(word, length, charBuf, NELEMS(charBuf));
if (N > 1) {
AKLOGI("%2d [ %s ] (%d)", rank, charBuf, probability);
}
@@ -90,7 +118,7 @@ static inline void dumpResult(const int *outWords, const int *frequencies) {
static AK_FORCE_INLINE void dumpWord(const int *word, const int length) {
static char charBuf[50];
- const int N = intArrayToCharArray(word, length, charBuf);
+ const int N = intArrayToCharArray(word, length, charBuf, NELEMS(charBuf));
if (N > 1) {
AKLOGI("[ %s ]", charBuf);
}
@@ -304,8 +332,6 @@ static inline void prof_out(void) {
template<typename T> AK_FORCE_INLINE const T &min(const T &a, const T &b) { return a < b ? a : b; }
template<typename T> AK_FORCE_INLINE const T &max(const T &a, const T &b) { return a > b ? a : b; }
-#define NELEMS(x) (sizeof(x) / sizeof((x)[0]))
-
// DEBUG
#define INPUTLENGTH_FOR_DEBUG (-1)
#define MIN_OUTPUT_INDEX_FOR_DEBUG (-1)
diff --git a/native/jni/src/suggest/core/dicnode/dic_node.h b/native/jni/src/suggest/core/dicnode/dic_node.h
index 52db8e9c7..017df34fd 100644
--- a/native/jni/src/suggest/core/dicnode/dic_node.h
+++ b/native/jni/src/suggest/core/dicnode/dic_node.h
@@ -28,15 +28,16 @@
#if DEBUG_DICT
#define LOGI_SHOW_ADD_COST_PROP \
do { char charBuf[50]; \
- INTS_TO_CHARS(getOutputWordBuf(), getNodeCodePointCount(), charBuf); \
+ INTS_TO_CHARS(getOutputWordBuf(), getNodeCodePointCount(), charBuf, NELEMS(charBuf)); \
AKLOGI("%20s, \"%c\", size = %03d, total = %03d, index(0) = %02d, dist = %.4f, %s,,", \
__FUNCTION__, getNodeCodePoint(), inputSize, getTotalInputIndex(), \
getInputIndex(0), getNormalizedCompoundDistance(), charBuf); } while (0)
#define DUMP_WORD_AND_SCORE(header) \
do { char charBuf[50]; char prevWordCharBuf[50]; \
- INTS_TO_CHARS(getOutputWordBuf(), getNodeCodePointCount(), charBuf); \
+ INTS_TO_CHARS(getOutputWordBuf(), getNodeCodePointCount(), charBuf, NELEMS(charBuf)); \
INTS_TO_CHARS(mDicNodeState.mDicNodeStatePrevWord.mPrevWord, \
- mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength(), prevWordCharBuf); \
+ mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength(), prevWordCharBuf, \
+ NELEMS(prevWordCharBuf)); \
AKLOGI("#%8s, %5f, %5f, %5f, %5f, %s, %s, %d,,", header, \
getSpatialDistanceForScoring(), getLanguageDistanceForScoring(), \
getNormalizedCompoundDistance(), getRawLength(), prevWordCharBuf, charBuf, \
diff --git a/native/jni/src/suggest/core/dicnode/dic_node_proximity_filter.h b/native/jni/src/suggest/core/dicnode/dic_node_proximity_filter.h
new file mode 100644
index 000000000..1a39f2ef3
--- /dev/null
+++ b/native/jni/src/suggest/core/dicnode/dic_node_proximity_filter.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DIC_NODE_PROXIMITY_FILTER_H
+#define LATINIME_DIC_NODE_PROXIMITY_FILTER_H
+
+#include "defines.h"
+#include "suggest/core/layout/proximity_info_state.h"
+#include "suggest/core/layout/proximity_info_utils.h"
+#include "suggest/core/policy/dictionary_structure_policy.h"
+
+namespace latinime {
+
+class DicNodeProximityFilter : public DictionaryStructurePolicy::NodeFilter {
+ public:
+ DicNodeProximityFilter(const ProximityInfoState *const pInfoState,
+ const int pointIndex, const bool exactOnly)
+ : mProximityInfoState(pInfoState), mPointIndex(pointIndex), mExactOnly(exactOnly) {}
+
+ bool isFilteredOut(const int codePoint) const {
+ return !isProximityCodePoint(codePoint);
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(DicNodeProximityFilter);
+
+ const ProximityInfoState *const mProximityInfoState;
+ const int mPointIndex;
+ const bool mExactOnly;
+
+ // TODO: Move to proximity info state
+ bool isProximityCodePoint(const int codePoint) const {
+ if (!mProximityInfoState) {
+ return true;
+ }
+ if (mExactOnly) {
+ return mProximityInfoState->getPrimaryCodePointAt(mPointIndex) == codePoint;
+ }
+ const ProximityType matchedId = mProximityInfoState->getProximityType(
+ mPointIndex, codePoint, true /* checkProximityChars */);
+ return ProximityInfoUtils::isMatchOrProximityChar(matchedId);
+ }
+};
+} // namespace latinime
+#endif // LATINIME_DIC_NODE_PROXIMITY_FILTER_H
diff --git a/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp b/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp
index 9bf7eceb5..c7c8d2a19 100644
--- a/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp
+++ b/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp
@@ -14,18 +14,17 @@
* limitations under the License.
*/
+#include "suggest/core/dicnode/dic_node_utils.h"
+
#include <cstring>
-#include <vector>
#include "suggest/core/dicnode/dic_node.h"
-#include "suggest/core/dicnode/dic_node_utils.h"
+#include "suggest/core/dicnode/dic_node_proximity_filter.h"
#include "suggest/core/dicnode/dic_node_vector.h"
#include "suggest/core/dictionary/binary_dictionary_info.h"
#include "suggest/core/dictionary/binary_format.h"
#include "suggest/core/dictionary/multi_bigram_map.h"
#include "suggest/core/dictionary/probability_utils.h"
-#include "suggest/core/layout/proximity_info.h"
-#include "suggest/core/layout/proximity_info_state.h"
#include "suggest/core/policy/dictionary_structure_policy.h"
#include "utils/char_utils.h"
@@ -57,21 +56,20 @@ namespace latinime {
///////////////////////////////////
/* static */ void DicNodeUtils::createAndGetPassingChildNode(DicNode *dicNode,
- const ProximityInfoState *pInfoState, const int pointIndex, const bool exactOnly,
+ const DicNodeProximityFilter *const childrenFilter,
DicNodeVector *childDicNodes) {
// Passing multiple chars node. No need to traverse child
const int codePoint = dicNode->getNodeTypedCodePoint();
const int baseLowerCaseCodePoint = CharUtils::toBaseLowerCase(codePoint);
- const bool isMatch = isMatchedNodeCodePoint(pInfoState, pointIndex, exactOnly, codePoint);
- if (isMatch || CharUtils::isIntentionalOmissionCodePoint(baseLowerCaseCodePoint)) {
+ if (!childrenFilter->isFilteredOut(codePoint)
+ || CharUtils::isIntentionalOmissionCodePoint(baseLowerCaseCodePoint)) {
childDicNodes->pushPassingChild(dicNode);
}
}
/* static */ int DicNodeUtils::createAndGetLeavingChildNode(DicNode *dicNode, int pos,
const BinaryDictionaryInfo *const binaryDictionaryInfo,
- const ProximityInfoState *pInfoState, const int pointIndex, const bool exactOnly,
- const std::vector<int> *const codePointsFilter, const ProximityInfo *const pInfo,
+ const DicNodeProximityFilter *const childrenFilter,
DicNodeVector *childDicNodes) {
int nextPos = pos;
const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(
@@ -110,10 +108,7 @@ namespace latinime {
const int siblingPos = BinaryFormat::skipChildrenPosAndAttributes(
binaryDictionaryInfo->getDictRoot(), flags, pos);
- if (isDicNodeFilteredOut(mergedNodeCodePoints[0], pInfo, codePointsFilter)) {
- return siblingPos;
- }
- if (!isMatchedNodeCodePoint(pInfoState, pointIndex, exactOnly, mergedNodeCodePoints[0])) {
+ if (childrenFilter->isFilteredOut(mergedNodeCodePoints[0])) {
return siblingPos;
}
childDicNodes->pushLeavingChild(dicNode, nextPos, flags, childrenPos, attributesPos,
@@ -121,39 +116,9 @@ namespace latinime {
return siblingPos;
}
-/* static */ bool DicNodeUtils::isDicNodeFilteredOut(const int nodeCodePoint,
- const ProximityInfo *const pInfo, const std::vector<int> *const codePointsFilter) {
- const int filterSize = codePointsFilter ? codePointsFilter->size() : 0;
- if (filterSize <= 0) {
- return false;
- }
- if (pInfo && (pInfo->getKeyIndexOf(nodeCodePoint) == NOT_AN_INDEX
- || CharUtils::isIntentionalOmissionCodePoint(nodeCodePoint))) {
- // If normalized nodeCodePoint is not on the keyboard or skippable, this child is never
- // filtered.
- return false;
- }
- const int lowerCodePoint = CharUtils::toLowerCase(nodeCodePoint);
- const int baseLowerCodePoint = CharUtils::toBaseCodePoint(lowerCodePoint);
- // TODO: Avoid linear search
- for (int i = 0; i < filterSize; ++i) {
- // Checking if a normalized code point is in filter characters when pInfo is not
- // null. When pInfo is null, nodeCodePoint is used to check filtering without
- // normalizing.
- if ((pInfo && ((*codePointsFilter)[i] == lowerCodePoint
- || (*codePointsFilter)[i] == baseLowerCodePoint))
- || (!pInfo && (*codePointsFilter)[i] == nodeCodePoint)) {
- return false;
- }
- }
- return true;
-}
-
/* static */ void DicNodeUtils::createAndGetAllLeavingChildNodes(DicNode *dicNode,
const BinaryDictionaryInfo *const binaryDictionaryInfo,
- const ProximityInfoState *pInfoState, const int pointIndex, const bool exactOnly,
- const std::vector<int> *const codePointsFilter, const ProximityInfo *const pInfo,
- DicNodeVector *childDicNodes) {
+ const DicNodeProximityFilter *const childrenFilter, DicNodeVector *childDicNodes) {
if (!dicNode->hasChildren()) {
return;
}
@@ -161,14 +126,8 @@ namespace latinime {
const int childCount = BinaryFormat::getGroupCountAndForwardPointer(
binaryDictionaryInfo->getDictRoot(), &nextPos);
for (int i = 0; i < childCount; i++) {
- const int filterSize = codePointsFilter ? codePointsFilter->size() : 0;
nextPos = createAndGetLeavingChildNode(dicNode, nextPos, binaryDictionaryInfo,
- pInfoState, pointIndex, exactOnly, codePointsFilter, pInfo,
- childDicNodes);
- if (!pInfo && filterSize > 0 && childDicNodes->exceeds(filterSize)) {
- // All code points have been found.
- break;
- }
+ childrenFilter, childDicNodes);
}
}
@@ -184,13 +143,12 @@ namespace latinime {
if (dicNode->isTotalInputSizeExceedingLimit()) {
return;
}
+ const DicNodeProximityFilter childrenFilter(pInfoState, pointIndex, exactOnly);
if (!dicNode->isLeavingNode()) {
- DicNodeUtils::createAndGetPassingChildNode(dicNode, pInfoState, pointIndex, exactOnly,
- childDicNodes);
+ DicNodeUtils::createAndGetPassingChildNode(dicNode, &childrenFilter, childDicNodes);
} else {
DicNodeUtils::createAndGetAllLeavingChildNodes(
- dicNode, binaryDictionaryInfo, pInfoState, pointIndex, exactOnly,
- 0 /* codePointsFilter */, 0 /* pInfo */, childDicNodes);
+ dicNode, binaryDictionaryInfo, &childrenFilter, childDicNodes);
}
}
@@ -230,23 +188,6 @@ namespace latinime {
return ProbabilityUtils::backoff(unigramProbability);
}
-///////////////////////////////////////
-// Bigram / Unigram dictionary utils //
-///////////////////////////////////////
-
-/* static */ bool DicNodeUtils::isMatchedNodeCodePoint(const ProximityInfoState *pInfoState,
- const int pointIndex, const bool exactOnly, const int nodeCodePoint) {
- if (!pInfoState) {
- return true;
- }
- if (exactOnly) {
- return pInfoState->getPrimaryCodePointAt(pointIndex) == nodeCodePoint;
- }
- const ProximityType matchedId = pInfoState->getProximityType(pointIndex, nodeCodePoint,
- true /* checkProximityChars */);
- return isProximityChar(matchedId);
-}
-
////////////////
// Char utils //
////////////////
diff --git a/native/jni/src/suggest/core/dicnode/dic_node_utils.h b/native/jni/src/suggest/core/dicnode/dic_node_utils.h
index d526975ce..7b567b582 100644
--- a/native/jni/src/suggest/core/dicnode/dic_node_utils.h
+++ b/native/jni/src/suggest/core/dicnode/dic_node_utils.h
@@ -18,7 +18,6 @@
#define LATINIME_DIC_NODE_UTILS_H
#include <stdint.h>
-#include <vector>
#include "defines.h"
@@ -26,8 +25,8 @@ namespace latinime {
class BinaryDictionaryInfo;
class DicNode;
+class DicNodeProximityFilter;
class DicNodeVector;
-class ProximityInfo;
class ProximityInfoState;
class MultiBigramMap;
@@ -44,19 +43,12 @@ class DicNodeUtils {
const BinaryDictionaryInfo *const binaryDictionaryInfo, DicNodeVector *childDicNodes);
static float getBigramNodeImprobability(const BinaryDictionaryInfo *const binaryDictionaryInfo,
const DicNode *const node, MultiBigramMap *const multiBigramMap);
- static bool isDicNodeFilteredOut(const int nodeCodePoint, const ProximityInfo *const pInfo,
- const std::vector<int> *const codePointsFilter);
// TODO: Move to private
static void getProximityChildDicNodes(DicNode *dicNode,
const BinaryDictionaryInfo *const binaryDictionaryInfo,
const ProximityInfoState *pInfoState, const int pointIndex, bool exactOnly,
DicNodeVector *childDicNodes);
- // TODO: Move to proximity info
- static bool isProximityChar(ProximityType type) {
- return type == MATCH_CHAR || type == PROXIMITY_CHAR || type == ADDITIONAL_PROXIMITY_CHAR;
- }
-
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(DicNodeUtils);
// Max number of bigrams to look up
@@ -64,22 +56,14 @@ class DicNodeUtils {
static int getBigramNodeProbability(const BinaryDictionaryInfo *const binaryDictionaryInfo,
const DicNode *const node, MultiBigramMap *multiBigramMap);
- static void createAndGetPassingChildNode(DicNode *dicNode, const ProximityInfoState *pInfoState,
- const int pointIndex, const bool exactOnly, DicNodeVector *childDicNodes);
+ static void createAndGetPassingChildNode(DicNode *dicNode,
+ const DicNodeProximityFilter *const childrenFilter, DicNodeVector *childDicNodes);
static void createAndGetAllLeavingChildNodes(DicNode *dicNode,
const BinaryDictionaryInfo *const binaryDictionaryInfo,
- const ProximityInfoState *pInfoState, const int pointIndex, const bool exactOnly,
- const std::vector<int> *const codePointsFilter,
- const ProximityInfo *const pInfo, DicNodeVector *childDicNodes);
+ const DicNodeProximityFilter *const childrenFilter, DicNodeVector *childDicNodes);
static int createAndGetLeavingChildNode(DicNode *dicNode, int pos,
const BinaryDictionaryInfo *const binaryDictionaryInfo,
- const ProximityInfoState *pInfoState, const int pointIndex,
- const bool exactOnly, const std::vector<int> *const codePointsFilter,
- const ProximityInfo *const pInfo, DicNodeVector *childDicNodes);
-
- // TODO: Move to proximity info
- static bool isMatchedNodeCodePoint(const ProximityInfoState *pInfoState, const int pointIndex,
- const bool exactOnly, const int nodeCodePoint);
+ const DicNodeProximityFilter *const childrenFilter, DicNodeVector *childDicNodes);
};
} // namespace latinime
#endif // LATINIME_DIC_NODE_UTILS_H
diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_header.h b/native/jni/src/suggest/core/dictionary/binary_dictionary_header.h
index 6dba0b266..240512bce 100644
--- a/native/jni/src/suggest/core/dictionary/binary_dictionary_header.h
+++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_header.h
@@ -53,6 +53,20 @@ class BinaryDictionaryHeader {
return mMultiWordCostMultiplier;
}
+ AK_FORCE_INLINE void readHeaderValueOrQuestionMark(const char *const key,
+ int *outValue, int outValueSize) const {
+ if (outValueSize <= 0) return;
+ if (outValueSize == 1) {
+ outValue[0] = '\0';
+ return;
+ }
+ if (!BinaryDictionaryHeaderReadingUtils::readHeaderValue(mBinaryDictionaryInfo,
+ key, outValue, outValueSize)) {
+ outValue[0] = '?';
+ outValue[1] = '\0';
+ }
+ }
+
private:
DISALLOW_COPY_AND_ASSIGN(BinaryDictionaryHeader);
diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.cpp b/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.cpp
index 1e437dd63..a57b0f859 100644
--- a/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.cpp
+++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.cpp
@@ -86,8 +86,10 @@ const BinaryDictionaryHeaderReadingUtils::DictionaryFlags
if(ByteArrayUtils::compareStringInBufferWithCharArray(
binaryDictionaryInfo->getDictBuf(), key, headerSize - pos, &pos) == 0) {
// The key was found.
- ByteArrayUtils::readStringAndAdvancePosition(
+ const int length = ByteArrayUtils::readStringAndAdvancePosition(
binaryDictionaryInfo->getDictBuf(), outValueSize, outValue, &pos);
+ // Add a 0 terminator to the string.
+ outValue[length < outValueSize ? length : outValueSize - 1] = '\0';
return true;
}
ByteArrayUtils::advancePositionToBehindString(
diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_info.h b/native/jni/src/suggest/core/dictionary/binary_dictionary_info.h
index 7cb31440a..cbea18f90 100644
--- a/native/jni/src/suggest/core/dictionary/binary_dictionary_info.h
+++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_info.h
@@ -20,23 +20,27 @@
#include <stdint.h>
#include "defines.h"
+#include "jni.h"
#include "suggest/core/dictionary/binary_dictionary_format_utils.h"
#include "suggest/core/dictionary/binary_dictionary_header.h"
#include "suggest/policyimpl/dictionary/dictionary_structure_policy_factory.h"
+#include "utils/log_utils.h"
namespace latinime {
class BinaryDictionaryInfo {
public:
- BinaryDictionaryInfo(const uint8_t *const dictBuf, const int dictSize, const int mmapFd,
- const int dictBufOffset, const bool isUpdatable)
+ AK_FORCE_INLINE BinaryDictionaryInfo(JNIEnv *env, const uint8_t *const dictBuf,
+ const int dictSize, const int mmapFd, const int dictBufOffset, const bool isUpdatable)
: mDictBuf(dictBuf), mDictSize(dictSize), mMmapFd(mmapFd),
mDictBufOffset(dictBufOffset), mIsUpdatable(isUpdatable),
mDictionaryFormat(BinaryDictionaryFormatUtils::detectFormatVersion(
mDictBuf, mDictSize)),
mDictionaryHeader(this), mDictRoot(mDictBuf + mDictionaryHeader.getSize()),
mStructurePolicy(DictionaryStructurePolicyFactory::getDictionaryStructurePolicy(
- mDictionaryFormat)) {}
+ mDictionaryFormat)) {
+ logDictionaryInfo(env);
+ }
AK_FORCE_INLINE const uint8_t *getDictBuf() const {
return mDictBuf;
@@ -88,6 +92,33 @@ class BinaryDictionaryInfo {
const BinaryDictionaryHeader mDictionaryHeader;
const uint8_t *const mDictRoot;
const DictionaryStructurePolicy *const mStructurePolicy;
+
+ AK_FORCE_INLINE void logDictionaryInfo(JNIEnv *const env) const {
+ const int BUFFER_SIZE = 16;
+ int dictionaryIdCodePointBuffer[BUFFER_SIZE];
+ int versionStringCodePointBuffer[BUFFER_SIZE];
+ int dateStringCodePointBuffer[BUFFER_SIZE];
+ mDictionaryHeader.readHeaderValueOrQuestionMark("dictionary",
+ dictionaryIdCodePointBuffer, BUFFER_SIZE);
+ mDictionaryHeader.readHeaderValueOrQuestionMark("version",
+ versionStringCodePointBuffer, BUFFER_SIZE);
+ mDictionaryHeader.readHeaderValueOrQuestionMark("date",
+ dateStringCodePointBuffer, BUFFER_SIZE);
+
+ char dictionaryIdCharBuffer[BUFFER_SIZE];
+ char versionStringCharBuffer[BUFFER_SIZE];
+ char dateStringCharBuffer[BUFFER_SIZE];
+ intArrayToCharArray(dictionaryIdCodePointBuffer, BUFFER_SIZE,
+ dictionaryIdCharBuffer, BUFFER_SIZE);
+ intArrayToCharArray(versionStringCodePointBuffer, BUFFER_SIZE,
+ versionStringCharBuffer, BUFFER_SIZE);
+ intArrayToCharArray(dateStringCodePointBuffer, BUFFER_SIZE,
+ dateStringCharBuffer, BUFFER_SIZE);
+
+ LogUtils::logToJava(env,
+ "Dictionary info: dictionary = %s ; version = %s ; date = %s ; filesize = %i",
+ dictionaryIdCharBuffer, versionStringCharBuffer, dateStringCharBuffer, mDictSize);
+ }
};
}
#endif /* LATINIME_BINARY_DICTIONARY_INFO_H */
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp
index 675b54972..f520a75b1 100644
--- a/native/jni/src/suggest/core/dictionary/dictionary.cpp
+++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp
@@ -22,6 +22,7 @@
#include <stdint.h>
#include "defines.h"
+#include "jni.h"
#include "suggest/core/dictionary/bigram_dictionary.h"
#include "suggest/core/dictionary/binary_format.h"
#include "suggest/core/session/dic_traverse_session.h"
@@ -32,8 +33,9 @@
namespace latinime {
-Dictionary::Dictionary(void *dict, int dictSize, int mmapFd, int dictBufOffset, bool isUpdatable)
- : mBinaryDictionaryInfo(static_cast<const uint8_t *>(dict), dictSize, mmapFd,
+Dictionary::Dictionary(JNIEnv *env, void *dict, int dictSize, int mmapFd,
+ int dictBufOffset, bool isUpdatable)
+ : mBinaryDictionaryInfo(env, static_cast<const uint8_t *>(dict), dictSize, mmapFd,
dictBufOffset, isUpdatable),
mBigramDictionary(new BigramDictionary(&mBinaryDictionaryInfo)),
mGestureSuggest(new Suggest(GestureSuggestPolicyFactory::getGestureSuggestPolicy())),
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.h b/native/jni/src/suggest/core/dictionary/dictionary.h
index 94579c200..1bf24a85b 100644
--- a/native/jni/src/suggest/core/dictionary/dictionary.h
+++ b/native/jni/src/suggest/core/dictionary/dictionary.h
@@ -20,6 +20,7 @@
#include <stdint.h>
#include "defines.h"
+#include "jni.h"
#include "suggest/core/dictionary/binary_dictionary_info.h"
namespace latinime {
@@ -52,7 +53,8 @@ class Dictionary {
static const int KIND_FLAG_POSSIBLY_OFFENSIVE = 0x80000000;
static const int KIND_FLAG_EXACT_MATCH = 0x40000000;
- Dictionary(void *dict, int dictSize, int mmapFd, int dictBufOffset, bool isUpdatable);
+ Dictionary(JNIEnv *env, void *dict, int dictSize, int mmapFd, int dictBufOffset,
+ bool isUpdatable);
int getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession *traverseSession,
int *xcoordinates, int *ycoordinates, int *times, int *pointerIds, int *inputCodePoints,
diff --git a/native/jni/src/suggest/core/dictionary/terminal_attributes.h b/native/jni/src/suggest/core/dictionary/terminal_attributes.h
index cec47081e..a8520b1f1 100644
--- a/native/jni/src/suggest/core/dictionary/terminal_attributes.h
+++ b/native/jni/src/suggest/core/dictionary/terminal_attributes.h
@@ -76,12 +76,13 @@ class TerminalAttributes {
mNodeFlags(nodeFlags), mShortcutListSizePos(shortcutPos) {}
inline ShortcutIterator getShortcutIterator() const {
- // The size of the shortcuts is stored here so that the whole shortcut chunk can be
- // skipped quickly, so we ignore it.
int shortcutPos = mShortcutListSizePos;
- BinaryDictionaryTerminalAttributesReadingUtils::getShortcutListSizeAndForwardPointer(
- mBinaryDictionaryInfo, &shortcutPos);
const bool hasShortcutList = 0 != (mNodeFlags & BinaryFormat::FLAG_HAS_SHORTCUT_TARGETS);
+ if (hasShortcutList) {
+ BinaryDictionaryTerminalAttributesReadingUtils::getShortcutListSizeAndForwardPointer(
+ mBinaryDictionaryInfo, &shortcutPos);
+ }
+ // shortcutPos is never used if hasShortcutList is false.
return ShortcutIterator(mBinaryDictionaryInfo, shortcutPos, hasShortcutList);
}
diff --git a/native/jni/src/suggest/core/layout/proximity_info_utils.h b/native/jni/src/suggest/core/layout/proximity_info_utils.h
index 54f7539d1..0e28560fc 100644
--- a/native/jni/src/suggest/core/layout/proximity_info_utils.h
+++ b/native/jni/src/suggest/core/layout/proximity_info_utils.h
@@ -117,6 +117,10 @@ class ProximityInfoUtils {
return getSquaredDistanceFloat(x, y, projectionX, projectionY);
}
+ static AK_FORCE_INLINE bool isMatchOrProximityChar(const ProximityType type) {
+ return type == MATCH_CHAR || type == PROXIMITY_CHAR || type == ADDITIONAL_PROXIMITY_CHAR;
+ }
+
// Normal distribution N(u, sigma^2).
struct NormalDistribution {
public:
diff --git a/native/jni/src/suggest/core/policy/traversal.h b/native/jni/src/suggest/core/policy/traversal.h
index c6f66f231..f26d7149e 100644
--- a/native/jni/src/suggest/core/policy/traversal.h
+++ b/native/jni/src/suggest/core/policy/traversal.h
@@ -45,7 +45,7 @@ class Traversal {
const DicNode *const dicNode) const = 0;
virtual bool needsToTraverseAllUserInput() const = 0;
virtual float getMaxSpatialDistance() const = 0;
- virtual bool allowPartialCommit() const = 0;
+ virtual bool autoCorrectsToMultiWordSuggestionIfTop() const = 0;
virtual int getDefaultExpandDicNodeSize() const = 0;
virtual int getMaxCacheSize() const = 0;
virtual bool isPossibleOmissionChildNode(const DicTraverseSession *const traverseSession,
diff --git a/native/jni/src/suggest/core/suggest.cpp b/native/jni/src/suggest/core/suggest.cpp
index 173a612be..9a0f10cd5 100644
--- a/native/jni/src/suggest/core/suggest.cpp
+++ b/native/jni/src/suggest/core/suggest.cpp
@@ -85,9 +85,9 @@ void Suggest::initializeSearch(DicTraverseSession *traverseSession, int commitPo
if (!traverseSession->getProximityInfoState(0)->isUsed()) {
return;
}
- if (TRAVERSAL->allowPartialCommit()) {
- commitPoint = 0;
- }
+
+ // Never auto partial commit for now.
+ commitPoint = 0;
if (traverseSession->getInputSize() > MIN_CONTINUOUS_SUGGESTION_INPUT_SIZE
&& traverseSession->isContinuousSuggestionPossible()) {
@@ -151,6 +151,17 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen
int maxScore = S_INT_MIN;
int bestExactMatchedNodeTerminalIndex = -1;
int bestExactMatchedNodeOutputWordIndex = -1;
+ // Force autocorrection for obvious long multi-word suggestions when the top suggestion is
+ // a long multiple words suggestion.
+ // TODO: Implement a smarter auto-commit method for handling multi-word suggestions.
+ // traverseSession->isPartiallyCommited() always returns false because we never auto partial
+ // commit for now.
+ const bool forceCommitMultiWords = (terminalSize > 0) ?
+ TRAVERSAL->autoCorrectsToMultiWordSuggestionIfTop()
+ && (traverseSession->isPartiallyCommited()
+ || (traverseSession->getInputSize()
+ >= MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT
+ && terminals[0].hasMultipleWords())) : false;
// Output suggestion results here
for (int terminalIndex = 0; terminalIndex < terminalSize && outputWordIndex < MAX_RESULTS;
++terminalIndex) {
@@ -180,22 +191,16 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen
// Increase output score of top typing suggestion to ensure autocorrection.
// TODO: Better integration with java side autocorrection logic.
- // Force autocorrection for obvious long multi-word suggestions.
- const bool isForceCommitMultiWords = TRAVERSAL->allowPartialCommit()
- && (traverseSession->isPartiallyCommited()
- || (traverseSession->getInputSize() >= MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT
- && terminalDicNode->hasMultipleWords()));
-
const int finalScore = SCORING->calculateFinalScore(
compoundDistance, traverseSession->getInputSize(),
- isForceCommitMultiWords || (isValidWord && SCORING->doesAutoCorrectValidWord()));
+ (forceCommitMultiWords && terminalDicNode->hasMultipleWords())
+ || (isValidWord && SCORING->doesAutoCorrectValidWord()));
maxScore = max(maxScore, finalScore);
- if (TRAVERSAL->allowPartialCommit()) {
- // Index for top typing suggestion should be 0.
- if (isValidWord && outputWordIndex == 0) {
- terminalDicNode->outputSpacePositionsResult(spaceIndices);
- }
+ // TODO: Implement a smarter auto-commit method for handling multi-word suggestions.
+ // Index for top typing suggestion should be 0.
+ if (isValidWord && outputWordIndex == 0) {
+ terminalDicNode->outputSpacePositionsResult(spaceIndices);
}
// Don't output invalid words. However, we still need to submit their shortcuts if any.
@@ -227,9 +232,14 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen
++outputWordIndex;
}
- const bool sameAsTyped = TRAVERSAL->sameAsTyped(traverseSession, terminalDicNode);
- outputWordIndex = ShortcutUtils::outputShortcuts(&terminalAttributes, outputWordIndex,
- finalScore, outputCodePoints, frequencies, outputTypes, sameAsTyped);
+ if (!terminalDicNode->hasMultipleWords()) {
+ // Shortcut is not supported for multiple words suggestions.
+ // TODO: Check shortcuts during traversal for multiple words suggestions.
+ const bool sameAsTyped = TRAVERSAL->sameAsTyped(traverseSession, terminalDicNode);
+ outputWordIndex = ShortcutUtils::outputShortcuts(&terminalAttributes, outputWordIndex,
+ finalScore, outputCodePoints, frequencies, outputTypes, sameAsTyped);
+
+ }
DicNode::managedDelete(terminalDicNode);
}
diff --git a/native/jni/src/suggest/policyimpl/typing/typing_traversal.h b/native/jni/src/suggest/policyimpl/typing/typing_traversal.h
index 5ae396e64..ef144e00a 100644
--- a/native/jni/src/suggest/policyimpl/typing/typing_traversal.h
+++ b/native/jni/src/suggest/policyimpl/typing/typing_traversal.h
@@ -23,6 +23,7 @@
#include "suggest/core/dicnode/dic_node.h"
#include "suggest/core/dicnode/dic_node_vector.h"
#include "suggest/core/layout/proximity_info_state.h"
+#include "suggest/core/layout/proximity_info_utils.h"
#include "suggest/core/policy/traversal.h"
#include "suggest/core/session/dic_traverse_session.h"
#include "suggest/policyimpl/typing/scoring_params.h"
@@ -136,7 +137,7 @@ class TypingTraversal : public Traversal {
return ScoringParams::MAX_SPATIAL_DISTANCE;
}
- AK_FORCE_INLINE bool allowPartialCommit() const {
+ AK_FORCE_INLINE bool autoCorrectsToMultiWordSuggestionIfTop() const {
return true;
}
@@ -159,7 +160,7 @@ class TypingTraversal : public Traversal {
const DicNode *const dicNode) const {
const ProximityType proximityType =
getProximityType(traverseSession, parentDicNode, dicNode);
- if (!DicNodeUtils::isProximityChar(proximityType)) {
+ if (!ProximityInfoUtils::isMatchOrProximityChar(proximityType)) {
return false;
}
return true;
diff --git a/native/jni/src/utils/log_utils.cpp b/native/jni/src/utils/log_utils.cpp
new file mode 100644
index 000000000..5ab2b2862
--- /dev/null
+++ b/native/jni/src/utils/log_utils.cpp
@@ -0,0 +1,72 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "log_utils.h"
+
+#include <cstdio>
+#include <stdarg.h>
+
+#include "defines.h"
+
+namespace latinime {
+ /* static */ void LogUtils::logToJava(JNIEnv *const env, const char *const format, ...) {
+ static const char *TAG = "LatinIME:LogUtils";
+ const jclass androidUtilLogClass = env->FindClass("android/util/Log");
+ if (!androidUtilLogClass) {
+ // If we can't find the class, we are probably in off-device testing, and
+ // it's expected. Regardless, logging is not essential to functionality, so
+ // we should just return. However, FindClass has thrown an exception behind
+ // our back and there is no way to prevent it from doing that, so we clear
+ // the exception before we return.
+ env->ExceptionClear();
+ return;
+ }
+ const jmethodID logDotIMethodId = env->GetStaticMethodID(androidUtilLogClass, "i",
+ "(Ljava/lang/String;Ljava/lang/String;)I");
+ if (!logDotIMethodId) {
+ env->ExceptionClear();
+ if (androidUtilLogClass) env->DeleteLocalRef(androidUtilLogClass);
+ return;
+ }
+ const jstring javaTag = env->NewStringUTF(TAG);
+
+ static const int DEFAULT_LINE_SIZE = 128;
+ char fixedSizeCString[DEFAULT_LINE_SIZE];
+ va_list argList;
+ va_start(argList, format);
+ // Get the necessary size. Add 1 for the 0 terminator.
+ const int size = vsnprintf(fixedSizeCString, DEFAULT_LINE_SIZE, format, argList) + 1;
+ va_end(argList);
+
+ jstring javaString;
+ if (size <= DEFAULT_LINE_SIZE) {
+ // The buffer was large enough.
+ javaString = env->NewStringUTF(fixedSizeCString);
+ } else {
+ // The buffer was not large enough.
+ va_start(argList, format);
+ char variableSizeCString[size];
+ vsnprintf(variableSizeCString, size, format, argList);
+ va_end(argList);
+ javaString = env->NewStringUTF(variableSizeCString);
+ }
+
+ env->CallStaticIntMethod(androidUtilLogClass, logDotIMethodId, javaTag, javaString);
+ if (javaString) env->DeleteLocalRef(javaString);
+ if (javaTag) env->DeleteLocalRef(javaTag);
+ if (androidUtilLogClass) env->DeleteLocalRef(androidUtilLogClass);
+ }
+}
diff --git a/native/jni/src/utils/log_utils.h b/native/jni/src/utils/log_utils.h
new file mode 100644
index 000000000..6ac16d91a
--- /dev/null
+++ b/native/jni/src/utils/log_utils.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_LOG_UTILS_H
+#define LATINIME_LOG_UTILS_H
+
+#include "defines.h"
+#include "jni.h"
+
+namespace latinime {
+
+class LogUtils {
+ public:
+ static void logToJava(JNIEnv *const env, const char *const format, ...)
+#ifdef __GNUC__
+ __attribute__ ((format (printf, 2, 3)))
+#endif // __GNUC__
+ ;
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(LogUtils);
+};
+} // namespace latinime
+#endif // LATINIME_LOG_UTILS_H