diff options
Diffstat (limited to 'native')
27 files changed, 478 insertions, 328 deletions
diff --git a/native/jni/Android.mk b/native/jni/Android.mk index f89eea735..d78da969b 100644 --- a/native/jni/Android.mk +++ b/native/jni/Android.mk @@ -79,8 +79,9 @@ LATIN_IME_CORE_SRC_FILES := \ typing_traversal.cpp \ typing_weighting.cpp) \ $(addprefix utils/, \ + autocorrection_threshold_utils.cpp \ char_utils.cpp \ - autocorrection_threshold_utils.cpp) + log_utils.cpp) LOCAL_SRC_FILES := \ $(LATIN_IME_JNI_SRC_FILES) \ diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp index a93bbeb8c..6e1b80ee0 100644 --- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp +++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp @@ -93,8 +93,8 @@ static jlong latinime_BinaryDictionary_open(JNIEnv *env, jclass clazz, jstring s AKLOGE("DICT: dictionary format is unknown, bad magic number"); releaseDictBuf(static_cast<const char *>(dictBuf) - offset, adjDictSize, fd); } else { - dictionary = new Dictionary( - dictBuf, static_cast<int>(dictSize), fd, offset, updatableMmap); + dictionary = new Dictionary(env, dictBuf, static_cast<int>(dictSize), fd, offset, + updatableMmap); } PROF_END(66); PROF_CLOSE; @@ -201,7 +201,7 @@ static int latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jclass clazz, j static jint latinime_BinaryDictionary_getProbability(JNIEnv *env, jclass clazz, jlong dict, jintArray word) { Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); - if (!dictionary) return 0; + if (!dictionary) return NOT_A_PROBABILITY; const jsize wordLength = env->GetArrayLength(word); int codePoints[wordLength]; env->GetIntArrayRegion(word, 0, wordLength, codePoints); diff --git a/native/jni/src/defines.h b/native/jni/src/defines.h index cb6681456..607a74400 100644 --- a/native/jni/src/defines.h +++ b/native/jni/src/defines.h @@ -35,46 +35,74 @@ // Must be equal to ProximityInfo.MAX_PROXIMITY_CHARS_SIZE in Java #define MAX_PROXIMITY_CHARS_SIZE 16 #define ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE 2 +#define NELEMS(x) (sizeof(x) / sizeof((x)[0])) -#if defined(FLAG_DO_PROFILE) || defined(FLAG_DBG) -#include <android/log.h> -#ifndef LOG_TAG -#define LOG_TAG "LatinIME: " -#endif // LOG_TAG -#define AKLOGE(fmt, ...) __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, fmt, ##__VA_ARGS__) -#define AKLOGI(fmt, ...) __android_log_print(ANDROID_LOG_INFO, LOG_TAG, fmt, ##__VA_ARGS__) - -#define DUMP_RESULT(words, frequencies) do { dumpResult(words, frequencies); } while (0) -#define DUMP_WORD(word, length) do { dumpWord(word, length); } while (0) -#define INTS_TO_CHARS(input, length, output) do { \ - intArrayToCharArray(input, length, output); } while (0) - -// TODO: Support full UTF-8 conversion -AK_FORCE_INLINE static int intArrayToCharArray(const int *source, const int sourceSize, - char *dest) { +AK_FORCE_INLINE static int intArrayToCharArray(const int *const source, const int sourceSize, + char *dest, const int destSize) { + // We want to always terminate with a 0 char, so stop one short of the length to make + // sure there is room. + const int destLimit = destSize - 1; int si = 0; int di = 0; - while (si < sourceSize && di < MAX_WORD_LENGTH - 1 && 0 != source[si]) { + while (si < sourceSize && di < destLimit && 0 != source[si]) { const int codePoint = source[si++]; - if (codePoint < 0x7F) { + if (codePoint < 0x7F) { // One byte dest[di++] = codePoint; - } else if (codePoint < 0x7FF) { + } else if (codePoint < 0x7FF) { // Two bytes + if (di + 1 >= destLimit) break; dest[di++] = 0xC0 + (codePoint >> 6); dest[di++] = 0x80 + (codePoint & 0x3F); - } else if (codePoint < 0xFFFF) { + } else if (codePoint < 0xFFFF) { // Three bytes + if (di + 2 >= destLimit) break; dest[di++] = 0xE0 + (codePoint >> 12); - dest[di++] = 0x80 + ((codePoint & 0xFC0) >> 6); + dest[di++] = 0x80 + ((codePoint >> 6) & 0x3F); + dest[di++] = 0x80 + (codePoint & 0x3F); + } else if (codePoint <= 0x1FFFFF) { // Four bytes + if (di + 3 >= destLimit) break; + dest[di++] = 0xF0 + (codePoint >> 18); + dest[di++] = 0x80 + ((codePoint >> 12) & 0x3F); + dest[di++] = 0x80 + ((codePoint >> 6) & 0x3F); dest[di++] = 0x80 + (codePoint & 0x3F); + } else if (codePoint <= 0x3FFFFFF) { // Five bytes + if (di + 4 >= destLimit) break; + dest[di++] = 0xF8 + (codePoint >> 24); + dest[di++] = 0x80 + ((codePoint >> 18) & 0x3F); + dest[di++] = 0x80 + ((codePoint >> 12) & 0x3F); + dest[di++] = 0x80 + ((codePoint >> 6) & 0x3F); + dest[di++] = codePoint & 0x3F; + } else if (codePoint <= 0x7FFFFFFF) { // Six bytes + if (di + 5 >= destLimit) break; + dest[di++] = 0xFC + (codePoint >> 30); + dest[di++] = 0x80 + ((codePoint >> 24) & 0x3F); + dest[di++] = 0x80 + ((codePoint >> 18) & 0x3F); + dest[di++] = 0x80 + ((codePoint >> 12) & 0x3F); + dest[di++] = 0x80 + ((codePoint >> 6) & 0x3F); + dest[di++] = codePoint & 0x3F; + } else { + // Not a code point... skip. } } dest[di] = 0; return di; } +#if defined(FLAG_DO_PROFILE) || defined(FLAG_DBG) +#include <android/log.h> +#ifndef LOG_TAG +#define LOG_TAG "LatinIME: " +#endif // LOG_TAG +#define AKLOGE(fmt, ...) __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, fmt, ##__VA_ARGS__) +#define AKLOGI(fmt, ...) __android_log_print(ANDROID_LOG_INFO, LOG_TAG, fmt, ##__VA_ARGS__) + +#define DUMP_RESULT(words, frequencies) do { dumpResult(words, frequencies); } while (0) +#define DUMP_WORD(word, length) do { dumpWord(word, length); } while (0) +#define INTS_TO_CHARS(input, length, output, outlength) do { \ + intArrayToCharArray(input, length, output, outlength); } while (0) + static inline void dumpWordInfo(const int *word, const int length, const int rank, const int probability) { static char charBuf[50]; - const int N = intArrayToCharArray(word, length, charBuf); + const int N = intArrayToCharArray(word, length, charBuf, NELEMS(charBuf)); if (N > 1) { AKLOGI("%2d [ %s ] (%d)", rank, charBuf, probability); } @@ -90,7 +118,7 @@ static inline void dumpResult(const int *outWords, const int *frequencies) { static AK_FORCE_INLINE void dumpWord(const int *word, const int length) { static char charBuf[50]; - const int N = intArrayToCharArray(word, length, charBuf); + const int N = intArrayToCharArray(word, length, charBuf, NELEMS(charBuf)); if (N > 1) { AKLOGI("[ %s ]", charBuf); } @@ -304,8 +332,6 @@ static inline void prof_out(void) { template<typename T> AK_FORCE_INLINE const T &min(const T &a, const T &b) { return a < b ? a : b; } template<typename T> AK_FORCE_INLINE const T &max(const T &a, const T &b) { return a > b ? a : b; } -#define NELEMS(x) (sizeof(x) / sizeof((x)[0])) - // DEBUG #define INPUTLENGTH_FOR_DEBUG (-1) #define MIN_OUTPUT_INDEX_FOR_DEBUG (-1) diff --git a/native/jni/src/suggest/core/dicnode/dic_node.h b/native/jni/src/suggest/core/dicnode/dic_node.h index 52db8e9c7..973da67e4 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node.h +++ b/native/jni/src/suggest/core/dicnode/dic_node.h @@ -28,15 +28,16 @@ #if DEBUG_DICT #define LOGI_SHOW_ADD_COST_PROP \ do { char charBuf[50]; \ - INTS_TO_CHARS(getOutputWordBuf(), getNodeCodePointCount(), charBuf); \ + INTS_TO_CHARS(getOutputWordBuf(), getNodeCodePointCount(), charBuf, NELEMS(charBuf)); \ AKLOGI("%20s, \"%c\", size = %03d, total = %03d, index(0) = %02d, dist = %.4f, %s,,", \ __FUNCTION__, getNodeCodePoint(), inputSize, getTotalInputIndex(), \ getInputIndex(0), getNormalizedCompoundDistance(), charBuf); } while (0) #define DUMP_WORD_AND_SCORE(header) \ do { char charBuf[50]; char prevWordCharBuf[50]; \ - INTS_TO_CHARS(getOutputWordBuf(), getNodeCodePointCount(), charBuf); \ + INTS_TO_CHARS(getOutputWordBuf(), getNodeCodePointCount(), charBuf, NELEMS(charBuf)); \ INTS_TO_CHARS(mDicNodeState.mDicNodeStatePrevWord.mPrevWord, \ - mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength(), prevWordCharBuf); \ + mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength(), prevWordCharBuf, \ + NELEMS(prevWordCharBuf)); \ AKLOGI("#%8s, %5f, %5f, %5f, %5f, %s, %s, %d,,", header, \ getSpatialDistanceForScoring(), getLanguageDistanceForScoring(), \ getNormalizedCompoundDistance(), getRawLength(), prevWordCharBuf, charBuf, \ @@ -111,32 +112,23 @@ class DicNode { mIsUsed = true; mIsCachedForNextSuggestion = false; mDicNodeProperties.init( - NOT_A_DICT_POS, 0 /* flags */, rootGroupPos, NOT_A_DICT_POS /* attributesPos */, + NOT_A_DICT_POS, rootGroupPos, NOT_A_DICT_POS /* attributesPos */, NOT_A_CODE_POINT /* nodeCodePoint */, NOT_A_PROBABILITY /* probability */, - false /* isTerminal */, true /* hasChildren */, 0 /* depth */, - 0 /* terminalDepth */); + false /* isTerminal */, true /* hasChildren */, + false /* isBlacklistedOrNotAWord */, 0 /* depth */, 0 /* terminalDepth */); mDicNodeState.init(prevWordNodePos); PROF_NODE_RESET(mProfiler); } - void initAsPassingChild(DicNode *parentNode) { - mIsUsed = true; - mIsCachedForNextSuggestion = parentNode->mIsCachedForNextSuggestion; - const int c = parentNode->getNodeTypedCodePoint(); - mDicNodeProperties.init(&parentNode->mDicNodeProperties, c); - mDicNodeState.init(&parentNode->mDicNodeState); - PROF_NODE_COPY(&parentNode->mProfiler, mProfiler); - } - // Init for root with previous word void initAsRootWithPreviousWord(DicNode *dicNode, const int rootGroupPos) { mIsUsed = true; mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion; mDicNodeProperties.init( - NOT_A_DICT_POS, 0 /* flags */, rootGroupPos, NOT_A_DICT_POS /* attributesPos */, + NOT_A_DICT_POS, rootGroupPos, NOT_A_DICT_POS /* attributesPos */, NOT_A_CODE_POINT /* nodeCodePoint */, NOT_A_PROBABILITY /* probability */, - false /* isTerminal */, true /* hasChildren */, 0 /* depth */, - 0 /* terminalDepth */); + false /* isTerminal */, true /* hasChildren */, + false /* isBlacklistedOrNotAWord */, 0 /* depth */, 0 /* terminalDepth */); // TODO: Move to dicNodeState? mDicNodeState.mDicNodeStateOutput.init(); // reset for next word mDicNodeState.mDicNodeStateInput.init( @@ -156,18 +148,27 @@ class DicNode { PROF_NODE_COPY(&dicNode->mProfiler, mProfiler); } - // TODO: minimize arguments by looking binary_format - void initAsChild(DicNode *dicNode, const int pos, const uint8_t flags, const int childrenPos, + void initAsPassingChild(DicNode *parentNode) { + mIsUsed = true; + mIsCachedForNextSuggestion = parentNode->mIsCachedForNextSuggestion; + const int c = parentNode->getNodeTypedCodePoint(); + mDicNodeProperties.init(&parentNode->mDicNodeProperties, c); + mDicNodeState.init(&parentNode->mDicNodeState); + PROF_NODE_COPY(&parentNode->mProfiler, mProfiler); + } + + void initAsChild(DicNode *dicNode, const int pos, const int childrenPos, const int attributesPos, const int probability, const bool isTerminal, - const bool hasChildren, const uint16_t mergedNodeCodePointCount, - const int *const mergedNodeCodePoints) { + const bool hasChildren, const bool isBlacklistedOrNotAWord, + const uint16_t mergedNodeCodePointCount, const int *const mergedNodeCodePoints) { mIsUsed = true; uint16_t newDepth = static_cast<uint16_t>(dicNode->getNodeCodePointCount() + 1); mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion; const uint16_t newLeavingDepth = static_cast<uint16_t>( dicNode->mDicNodeProperties.getLeavingDepth() + mergedNodeCodePointCount); - mDicNodeProperties.init(pos, flags, childrenPos, attributesPos, mergedNodeCodePoints[0], - probability, isTerminal, hasChildren, newDepth, newLeavingDepth); + mDicNodeProperties.init(pos, childrenPos, attributesPos, mergedNodeCodePoints[0], + probability, isTerminal, hasChildren, isBlacklistedOrNotAWord, newDepth, + newLeavingDepth); mDicNodeState.init(&dicNode->mDicNodeState, mergedNodeCodePointCount, mergedNodeCodePoints); PROF_NODE_COPY(&dicNode->mProfiler, mProfiler); @@ -215,7 +216,7 @@ class DicNode { } bool isImpossibleBigramWord() const { - if (mDicNodeProperties.hasBlacklistedOrNotAWordFlag()) { + if (isBlacklistedOrNotAWord()) { return true; } const int prevWordLen = mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength() @@ -462,8 +463,8 @@ class DicNode { return mDicNodeState.mDicNodeStateScoring.isExactMatch(); } - uint8_t getFlags() const { - return mDicNodeProperties.getFlags(); + bool isBlacklistedOrNotAWord() const { + return mDicNodeProperties.isBlacklistedOrNotAWord(); } int getAttributesPos() const { @@ -503,6 +504,12 @@ class DicNode { if (!right->isUsed()) { return false; } + // Promote exact matches to prevent them from being pruned. + const bool leftExactMatch = isExactMatch(); + const bool rightExactMatch = right->isExactMatch(); + if (leftExactMatch != rightExactMatch) { + return leftExactMatch; + } const float diff = right->getNormalizedCompoundDistance() - getNormalizedCompoundDistance(); static const float MIN_DIFF = 0.000001f; diff --git a/native/jni/src/suggest/core/dicnode/dic_node_properties.h b/native/jni/src/suggest/core/dicnode/dic_node_properties.h index 7e8aa4979..d98000d83 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node_properties.h +++ b/native/jni/src/suggest/core/dicnode/dic_node_properties.h @@ -20,7 +20,6 @@ #include <stdint.h> #include "defines.h" -#include "suggest/core/dictionary/binary_format.h" namespace latinime { @@ -32,24 +31,25 @@ namespace latinime { class DicNodeProperties { public: AK_FORCE_INLINE DicNodeProperties() - : mPos(0), mFlags(0), mChildrenPos(0), mAttributesPos(0), mProbability(0), - mNodeCodePoint(0), mDepth(0), mLeavingDepth(0), mIsTerminal(false), - mHasChildren(false) {} + : mPos(0), mChildrenPos(0), mAttributesPos(0), mProbability(0), + mNodeCodePoint(0), mIsTerminal(false), mHasChildren(false), + mIsBlacklistedOrNotAWord(false), mDepth(0), mLeavingDepth(0) {} virtual ~DicNodeProperties() {} // Should be called only once per DicNode is initialized. - void init(const int pos, const uint8_t flags, const int childrenPos, const int attributesPos, + void init(const int pos, const int childrenPos, const int attributesPos, const int nodeCodePoint, const int probability, const bool isTerminal, - const bool hasChildren, const uint16_t depth, const uint16_t leavingDepth) { + const bool hasChildren, const bool isBlacklistedOrNotAWord, + const uint16_t depth, const uint16_t leavingDepth) { mPos = pos; - mFlags = flags; mChildrenPos = childrenPos; mAttributesPos = attributesPos; mNodeCodePoint = nodeCodePoint; mProbability = probability; mIsTerminal = isTerminal; mHasChildren = hasChildren; + mIsBlacklistedOrNotAWord = isBlacklistedOrNotAWord; mDepth = depth; mLeavingDepth = leavingDepth; } @@ -57,13 +57,13 @@ class DicNodeProperties { // Init for copy void init(const DicNodeProperties *const nodeProp) { mPos = nodeProp->mPos; - mFlags = nodeProp->mFlags; mChildrenPos = nodeProp->mChildrenPos; mAttributesPos = nodeProp->mAttributesPos; mNodeCodePoint = nodeProp->mNodeCodePoint; mProbability = nodeProp->mProbability; mIsTerminal = nodeProp->mIsTerminal; mHasChildren = nodeProp->mHasChildren; + mIsBlacklistedOrNotAWord = nodeProp->mIsBlacklistedOrNotAWord; mDepth = nodeProp->mDepth; mLeavingDepth = nodeProp->mLeavingDepth; } @@ -71,13 +71,13 @@ class DicNodeProperties { // Init as passing child void init(const DicNodeProperties *const nodeProp, const int codePoint) { mPos = nodeProp->mPos; - mFlags = nodeProp->mFlags; mChildrenPos = nodeProp->mChildrenPos; mAttributesPos = nodeProp->mAttributesPos; mNodeCodePoint = codePoint; // Overwrite the node char of a passing child mProbability = nodeProp->mProbability; mIsTerminal = nodeProp->mIsTerminal; mHasChildren = nodeProp->mHasChildren; + mIsBlacklistedOrNotAWord = nodeProp->mIsBlacklistedOrNotAWord; mDepth = nodeProp->mDepth + 1; // Increment the depth of a passing child mLeavingDepth = nodeProp->mLeavingDepth; } @@ -86,10 +86,6 @@ class DicNodeProperties { return mPos; } - uint8_t getFlags() const { - return mFlags; - } - int getChildrenPos() const { return mChildrenPos; } @@ -123,8 +119,8 @@ class DicNodeProperties { return mHasChildren || mDepth != mLeavingDepth; } - bool hasBlacklistedOrNotAWordFlag() const { - return BinaryFormat::hasBlacklistedOrNotAWordFlag(mFlags); + bool isBlacklistedOrNotAWord() const { + return mIsBlacklistedOrNotAWord; } private: @@ -132,15 +128,15 @@ class DicNodeProperties { // Use a default copy constructor and an assign operator because shallow copies are ok // for this class int mPos; - uint8_t mFlags; int mChildrenPos; int mAttributesPos; int mProbability; int mNodeCodePoint; - uint16_t mDepth; - uint16_t mLeavingDepth; bool mIsTerminal; bool mHasChildren; + bool mIsBlacklistedOrNotAWord; + uint16_t mDepth; + uint16_t mLeavingDepth; }; } // namespace latinime #endif // LATINIME_DIC_NODE_PROPERTIES_H diff --git a/native/jni/src/suggest/core/dicnode/dic_node_proximity_filter.h b/native/jni/src/suggest/core/dicnode/dic_node_proximity_filter.h new file mode 100644 index 000000000..1a39f2ef3 --- /dev/null +++ b/native/jni/src/suggest/core/dicnode/dic_node_proximity_filter.h @@ -0,0 +1,58 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DIC_NODE_PROXIMITY_FILTER_H +#define LATINIME_DIC_NODE_PROXIMITY_FILTER_H + +#include "defines.h" +#include "suggest/core/layout/proximity_info_state.h" +#include "suggest/core/layout/proximity_info_utils.h" +#include "suggest/core/policy/dictionary_structure_policy.h" + +namespace latinime { + +class DicNodeProximityFilter : public DictionaryStructurePolicy::NodeFilter { + public: + DicNodeProximityFilter(const ProximityInfoState *const pInfoState, + const int pointIndex, const bool exactOnly) + : mProximityInfoState(pInfoState), mPointIndex(pointIndex), mExactOnly(exactOnly) {} + + bool isFilteredOut(const int codePoint) const { + return !isProximityCodePoint(codePoint); + } + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(DicNodeProximityFilter); + + const ProximityInfoState *const mProximityInfoState; + const int mPointIndex; + const bool mExactOnly; + + // TODO: Move to proximity info state + bool isProximityCodePoint(const int codePoint) const { + if (!mProximityInfoState) { + return true; + } + if (mExactOnly) { + return mProximityInfoState->getPrimaryCodePointAt(mPointIndex) == codePoint; + } + const ProximityType matchedId = mProximityInfoState->getProximityType( + mPointIndex, codePoint, true /* checkProximityChars */); + return ProximityInfoUtils::isMatchOrProximityChar(matchedId); + } +}; +} // namespace latinime +#endif // LATINIME_DIC_NODE_PROXIMITY_FILTER_H diff --git a/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp b/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp index 9bf7eceb5..6c7f6667a 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp +++ b/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp @@ -14,18 +14,17 @@ * limitations under the License. */ +#include "suggest/core/dicnode/dic_node_utils.h" + #include <cstring> -#include <vector> #include "suggest/core/dicnode/dic_node.h" -#include "suggest/core/dicnode/dic_node_utils.h" +#include "suggest/core/dicnode/dic_node_proximity_filter.h" #include "suggest/core/dicnode/dic_node_vector.h" #include "suggest/core/dictionary/binary_dictionary_info.h" #include "suggest/core/dictionary/binary_format.h" #include "suggest/core/dictionary/multi_bigram_map.h" #include "suggest/core/dictionary/probability_utils.h" -#include "suggest/core/layout/proximity_info.h" -#include "suggest/core/layout/proximity_info_state.h" #include "suggest/core/policy/dictionary_structure_policy.h" #include "utils/char_utils.h" @@ -57,21 +56,20 @@ namespace latinime { /////////////////////////////////// /* static */ void DicNodeUtils::createAndGetPassingChildNode(DicNode *dicNode, - const ProximityInfoState *pInfoState, const int pointIndex, const bool exactOnly, + const DicNodeProximityFilter *const childrenFilter, DicNodeVector *childDicNodes) { // Passing multiple chars node. No need to traverse child const int codePoint = dicNode->getNodeTypedCodePoint(); const int baseLowerCaseCodePoint = CharUtils::toBaseLowerCase(codePoint); - const bool isMatch = isMatchedNodeCodePoint(pInfoState, pointIndex, exactOnly, codePoint); - if (isMatch || CharUtils::isIntentionalOmissionCodePoint(baseLowerCaseCodePoint)) { + if (!childrenFilter->isFilteredOut(codePoint) + || CharUtils::isIntentionalOmissionCodePoint(baseLowerCaseCodePoint)) { childDicNodes->pushPassingChild(dicNode); } } /* static */ int DicNodeUtils::createAndGetLeavingChildNode(DicNode *dicNode, int pos, const BinaryDictionaryInfo *const binaryDictionaryInfo, - const ProximityInfoState *pInfoState, const int pointIndex, const bool exactOnly, - const std::vector<int> *const codePointsFilter, const ProximityInfo *const pInfo, + const DicNodeProximityFilter *const childrenFilter, DicNodeVector *childDicNodes) { int nextPos = pos; const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer( @@ -80,6 +78,7 @@ namespace latinime { const bool isTerminal = (0 != (BinaryFormat::FLAG_IS_TERMINAL & flags)); const bool hasChildren = BinaryFormat::hasChildrenInFlags(flags); const bool hasShortcuts = (0 != (BinaryFormat::FLAG_HAS_SHORTCUT_TARGETS & flags)); + const bool isBlacklistedOrNotAWord = BinaryFormat::hasBlacklistedOrNotAWordFlag(flags); int codePoint = BinaryFormat::getCodePointAndForwardPointer( binaryDictionaryInfo->getDictRoot(), &pos); @@ -110,50 +109,18 @@ namespace latinime { const int siblingPos = BinaryFormat::skipChildrenPosAndAttributes( binaryDictionaryInfo->getDictRoot(), flags, pos); - if (isDicNodeFilteredOut(mergedNodeCodePoints[0], pInfo, codePointsFilter)) { - return siblingPos; - } - if (!isMatchedNodeCodePoint(pInfoState, pointIndex, exactOnly, mergedNodeCodePoints[0])) { + if (childrenFilter->isFilteredOut(mergedNodeCodePoints[0])) { return siblingPos; } - childDicNodes->pushLeavingChild(dicNode, nextPos, flags, childrenPos, attributesPos, - probability, isTerminal, hasChildren, mergedNodeCodePointCount, mergedNodeCodePoints); + childDicNodes->pushLeavingChild(dicNode, nextPos, childrenPos, attributesPos, + probability, isTerminal, hasChildren, isBlacklistedOrNotAWord, + mergedNodeCodePointCount, mergedNodeCodePoints); return siblingPos; } -/* static */ bool DicNodeUtils::isDicNodeFilteredOut(const int nodeCodePoint, - const ProximityInfo *const pInfo, const std::vector<int> *const codePointsFilter) { - const int filterSize = codePointsFilter ? codePointsFilter->size() : 0; - if (filterSize <= 0) { - return false; - } - if (pInfo && (pInfo->getKeyIndexOf(nodeCodePoint) == NOT_AN_INDEX - || CharUtils::isIntentionalOmissionCodePoint(nodeCodePoint))) { - // If normalized nodeCodePoint is not on the keyboard or skippable, this child is never - // filtered. - return false; - } - const int lowerCodePoint = CharUtils::toLowerCase(nodeCodePoint); - const int baseLowerCodePoint = CharUtils::toBaseCodePoint(lowerCodePoint); - // TODO: Avoid linear search - for (int i = 0; i < filterSize; ++i) { - // Checking if a normalized code point is in filter characters when pInfo is not - // null. When pInfo is null, nodeCodePoint is used to check filtering without - // normalizing. - if ((pInfo && ((*codePointsFilter)[i] == lowerCodePoint - || (*codePointsFilter)[i] == baseLowerCodePoint)) - || (!pInfo && (*codePointsFilter)[i] == nodeCodePoint)) { - return false; - } - } - return true; -} - /* static */ void DicNodeUtils::createAndGetAllLeavingChildNodes(DicNode *dicNode, const BinaryDictionaryInfo *const binaryDictionaryInfo, - const ProximityInfoState *pInfoState, const int pointIndex, const bool exactOnly, - const std::vector<int> *const codePointsFilter, const ProximityInfo *const pInfo, - DicNodeVector *childDicNodes) { + const DicNodeProximityFilter *const childrenFilter, DicNodeVector *childDicNodes) { if (!dicNode->hasChildren()) { return; } @@ -161,14 +128,8 @@ namespace latinime { const int childCount = BinaryFormat::getGroupCountAndForwardPointer( binaryDictionaryInfo->getDictRoot(), &nextPos); for (int i = 0; i < childCount; i++) { - const int filterSize = codePointsFilter ? codePointsFilter->size() : 0; nextPos = createAndGetLeavingChildNode(dicNode, nextPos, binaryDictionaryInfo, - pInfoState, pointIndex, exactOnly, codePointsFilter, pInfo, - childDicNodes); - if (!pInfo && filterSize > 0 && childDicNodes->exceeds(filterSize)) { - // All code points have been found. - break; - } + childrenFilter, childDicNodes); } } @@ -184,13 +145,12 @@ namespace latinime { if (dicNode->isTotalInputSizeExceedingLimit()) { return; } + const DicNodeProximityFilter childrenFilter(pInfoState, pointIndex, exactOnly); if (!dicNode->isLeavingNode()) { - DicNodeUtils::createAndGetPassingChildNode(dicNode, pInfoState, pointIndex, exactOnly, - childDicNodes); + DicNodeUtils::createAndGetPassingChildNode(dicNode, &childrenFilter, childDicNodes); } else { DicNodeUtils::createAndGetAllLeavingChildNodes( - dicNode, binaryDictionaryInfo, pInfoState, pointIndex, exactOnly, - 0 /* codePointsFilter */, 0 /* pInfo */, childDicNodes); + dicNode, binaryDictionaryInfo, &childrenFilter, childDicNodes); } } @@ -230,23 +190,6 @@ namespace latinime { return ProbabilityUtils::backoff(unigramProbability); } -/////////////////////////////////////// -// Bigram / Unigram dictionary utils // -/////////////////////////////////////// - -/* static */ bool DicNodeUtils::isMatchedNodeCodePoint(const ProximityInfoState *pInfoState, - const int pointIndex, const bool exactOnly, const int nodeCodePoint) { - if (!pInfoState) { - return true; - } - if (exactOnly) { - return pInfoState->getPrimaryCodePointAt(pointIndex) == nodeCodePoint; - } - const ProximityType matchedId = pInfoState->getProximityType(pointIndex, nodeCodePoint, - true /* checkProximityChars */); - return isProximityChar(matchedId); -} - //////////////// // Char utils // //////////////// diff --git a/native/jni/src/suggest/core/dicnode/dic_node_utils.h b/native/jni/src/suggest/core/dicnode/dic_node_utils.h index d526975ce..7b567b582 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node_utils.h +++ b/native/jni/src/suggest/core/dicnode/dic_node_utils.h @@ -18,7 +18,6 @@ #define LATINIME_DIC_NODE_UTILS_H #include <stdint.h> -#include <vector> #include "defines.h" @@ -26,8 +25,8 @@ namespace latinime { class BinaryDictionaryInfo; class DicNode; +class DicNodeProximityFilter; class DicNodeVector; -class ProximityInfo; class ProximityInfoState; class MultiBigramMap; @@ -44,19 +43,12 @@ class DicNodeUtils { const BinaryDictionaryInfo *const binaryDictionaryInfo, DicNodeVector *childDicNodes); static float getBigramNodeImprobability(const BinaryDictionaryInfo *const binaryDictionaryInfo, const DicNode *const node, MultiBigramMap *const multiBigramMap); - static bool isDicNodeFilteredOut(const int nodeCodePoint, const ProximityInfo *const pInfo, - const std::vector<int> *const codePointsFilter); // TODO: Move to private static void getProximityChildDicNodes(DicNode *dicNode, const BinaryDictionaryInfo *const binaryDictionaryInfo, const ProximityInfoState *pInfoState, const int pointIndex, bool exactOnly, DicNodeVector *childDicNodes); - // TODO: Move to proximity info - static bool isProximityChar(ProximityType type) { - return type == MATCH_CHAR || type == PROXIMITY_CHAR || type == ADDITIONAL_PROXIMITY_CHAR; - } - private: DISALLOW_IMPLICIT_CONSTRUCTORS(DicNodeUtils); // Max number of bigrams to look up @@ -64,22 +56,14 @@ class DicNodeUtils { static int getBigramNodeProbability(const BinaryDictionaryInfo *const binaryDictionaryInfo, const DicNode *const node, MultiBigramMap *multiBigramMap); - static void createAndGetPassingChildNode(DicNode *dicNode, const ProximityInfoState *pInfoState, - const int pointIndex, const bool exactOnly, DicNodeVector *childDicNodes); + static void createAndGetPassingChildNode(DicNode *dicNode, + const DicNodeProximityFilter *const childrenFilter, DicNodeVector *childDicNodes); static void createAndGetAllLeavingChildNodes(DicNode *dicNode, const BinaryDictionaryInfo *const binaryDictionaryInfo, - const ProximityInfoState *pInfoState, const int pointIndex, const bool exactOnly, - const std::vector<int> *const codePointsFilter, - const ProximityInfo *const pInfo, DicNodeVector *childDicNodes); + const DicNodeProximityFilter *const childrenFilter, DicNodeVector *childDicNodes); static int createAndGetLeavingChildNode(DicNode *dicNode, int pos, const BinaryDictionaryInfo *const binaryDictionaryInfo, - const ProximityInfoState *pInfoState, const int pointIndex, - const bool exactOnly, const std::vector<int> *const codePointsFilter, - const ProximityInfo *const pInfo, DicNodeVector *childDicNodes); - - // TODO: Move to proximity info - static bool isMatchedNodeCodePoint(const ProximityInfoState *pInfoState, const int pointIndex, - const bool exactOnly, const int nodeCodePoint); + const DicNodeProximityFilter *const childrenFilter, DicNodeVector *childDicNodes); }; } // namespace latinime #endif // LATINIME_DIC_NODE_UTILS_H diff --git a/native/jni/src/suggest/core/dicnode/dic_node_vector.h b/native/jni/src/suggest/core/dicnode/dic_node_vector.h index 9641cc19c..5ac4eeaf4 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node_vector.h +++ b/native/jni/src/suggest/core/dicnode/dic_node_vector.h @@ -62,14 +62,15 @@ class DicNodeVector { mDicNodes.back().initAsPassingChild(dicNode); } - void pushLeavingChild(DicNode *dicNode, const int pos, const uint8_t flags, - const int childrenPos, const int attributesPos, const int probability, - const bool isTerminal, const bool hasChildren, const uint16_t mergedNodeCodePointCount, - const int *const mergedNodeCodePoints) { + void pushLeavingChild(DicNode *dicNode, const int pos, const int childrenPos, + const int attributesPos, const int probability, const bool isTerminal, + const bool hasChildren, const bool isBlacklistedOrNotAWord, + const uint16_t mergedNodeCodePointCount, const int *const mergedNodeCodePoints) { ASSERT(!mLock); mDicNodes.push_back(mEmptyNode); - mDicNodes.back().initAsChild(dicNode, pos, flags, childrenPos, attributesPos, probability, - isTerminal, hasChildren, mergedNodeCodePointCount, mergedNodeCodePoints); + mDicNodes.back().initAsChild(dicNode, pos, childrenPos, attributesPos, probability, + isTerminal, hasChildren, isBlacklistedOrNotAWord, mergedNodeCodePointCount, + mergedNodeCodePoints); } DicNode *operator[](const int id) { diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.cpp b/native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.cpp index bbb4ca3f0..5d14a0554 100644 --- a/native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.cpp +++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.cpp @@ -27,17 +27,13 @@ const int BinaryDictionaryFormatUtils::DICTIONARY_MINIMUM_SIZE = 4; /** * Format versions */ -// Originally, format version 1 had a 16-bit magic number, then the version number `01' -// then options that must be 0. Hence the first 32-bits of the format are always as follow -// and it's okay to consider them a magic number as a whole. -const uint32_t BinaryDictionaryFormatUtils::FORMAT_VERSION_1_MAGIC_NUMBER = 0x78B10100; // The versions of Latin IME that only handle format version 1 only test for the magic // number, so we had to change it so that version 2 files would be rejected by older // implementations. On this occasion, we made the magic number 32 bits long. -const uint32_t BinaryDictionaryFormatUtils::FORMAT_VERSION_2_MAGIC_NUMBER = 0x9BC13AFE; +const uint32_t BinaryDictionaryFormatUtils::HEADER_VERSION_2_MAGIC_NUMBER = 0x9BC13AFE; // Magic number (4 bytes), version (2 bytes), options (2 bytes), header size (4 bytes) = 12 -const int BinaryDictionaryFormatUtils::FORMAT_VERSION_2_MINIMUM_SIZE = 12; +const int BinaryDictionaryFormatUtils::HEADER_VERSION_2_MINIMUM_SIZE = 12; /* static */ BinaryDictionaryFormatUtils::FORMAT_VERSION BinaryDictionaryFormatUtils::detectFormatVersion(const uint8_t *const dict, @@ -50,31 +46,28 @@ const int BinaryDictionaryFormatUtils::FORMAT_VERSION_2_MINIMUM_SIZE = 12; } const uint32_t magicNumber = ByteArrayUtils::readUint32(dict, 0); switch (magicNumber) { - case FORMAT_VERSION_1_MAGIC_NUMBER: - // Format 1 header is exactly 5 bytes long and looks like: - // Magic number (2 bytes) 0x78 0xB1 - // Version number (1 byte) 0x01 - // Options (2 bytes) must be 0x00 0x00 - return VERSION_1; - case FORMAT_VERSION_2_MAGIC_NUMBER: - // Version 2 dictionaries are at least 12 bytes long. - // If this dictionary has the version 2 magic number but is less than 12 bytes long, - // then it's an unknown format and we need to avoid confidently reading the next bytes. - if (dictSize < FORMAT_VERSION_2_MINIMUM_SIZE) { + case HEADER_VERSION_2_MAGIC_NUMBER: + // Version 2 header are at least 12 bytes long. + // If this header has the version 2 magic number but is less than 12 bytes long, + // then it's an unknown format and we need to avoid confidently reading the next bytes. + if (dictSize < HEADER_VERSION_2_MINIMUM_SIZE) { + return UNKNOWN_VERSION; + } + // Version 2 header is as follows: + // Magic number (4 bytes) 0x9B 0xC1 0x3A 0xFE + // Version number (2 bytes) + // Options (2 bytes) + // Header size (4 bytes) : integer, big endian + if (ByteArrayUtils::readUint16(dict, 4) == 2) { + return VERSION_2; + } else if (ByteArrayUtils::readUint16(dict, 4) == 3) { + // TODO: Support version 3 dictionary. + return UNKNOWN_VERSION; + } else { + return UNKNOWN_VERSION; + } + default: return UNKNOWN_VERSION; - } - // Format 2 header is as follows: - // Magic number (4 bytes) 0x9B 0xC1 0x3A 0xFE - // Version number (2 bytes) 0x00 0x02 - // Options (2 bytes) - // Header size (4 bytes) : integer, big endian - if (ByteArrayUtils::readUint16(dict, 4) == 2) { - return VERSION_2; - } else { - return UNKNOWN_VERSION; - } - default: - return UNKNOWN_VERSION; } } diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.h b/native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.h index 33618b9f0..830684c70 100644 --- a/native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.h +++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.h @@ -33,10 +33,9 @@ namespace latinime { */ class BinaryDictionaryFormatUtils { public: - // TODO: Remove obsolete version logic enum FORMAT_VERSION { - VERSION_1, VERSION_2, + VERSION_3, UNKNOWN_VERSION }; @@ -46,9 +45,8 @@ class BinaryDictionaryFormatUtils { DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryDictionaryFormatUtils); static const int DICTIONARY_MINIMUM_SIZE; - static const uint32_t FORMAT_VERSION_1_MAGIC_NUMBER; - static const uint32_t FORMAT_VERSION_2_MAGIC_NUMBER; - static const int FORMAT_VERSION_2_MINIMUM_SIZE; + static const uint32_t HEADER_VERSION_2_MAGIC_NUMBER; + static const int HEADER_VERSION_2_MINIMUM_SIZE; }; } // namespace latinime #endif /* LATINIME_BINARY_DICTIONARY_FORMAT_UTILS_H */ diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_header.h b/native/jni/src/suggest/core/dictionary/binary_dictionary_header.h index 6dba0b266..240512bce 100644 --- a/native/jni/src/suggest/core/dictionary/binary_dictionary_header.h +++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_header.h @@ -53,6 +53,20 @@ class BinaryDictionaryHeader { return mMultiWordCostMultiplier; } + AK_FORCE_INLINE void readHeaderValueOrQuestionMark(const char *const key, + int *outValue, int outValueSize) const { + if (outValueSize <= 0) return; + if (outValueSize == 1) { + outValue[0] = '\0'; + return; + } + if (!BinaryDictionaryHeaderReadingUtils::readHeaderValue(mBinaryDictionaryInfo, + key, outValue, outValueSize)) { + outValue[0] = '?'; + outValue[1] = '\0'; + } + } + private: DISALLOW_COPY_AND_ASSIGN(BinaryDictionaryHeader); diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.cpp b/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.cpp index 2c9593144..a57b0f859 100644 --- a/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.cpp +++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.cpp @@ -26,12 +26,10 @@ namespace latinime { const int BinaryDictionaryHeaderReadingUtils::MAX_OPTION_KEY_LENGTH = 256; -const int BinaryDictionaryHeaderReadingUtils::FORMAT_VERSION_1_HEADER_SIZE = 5; - -const int BinaryDictionaryHeaderReadingUtils::VERSION_2_MAGIC_NUMBER_SIZE = 4; -const int BinaryDictionaryHeaderReadingUtils::VERSION_2_DICTIONARY_VERSION_SIZE = 2; -const int BinaryDictionaryHeaderReadingUtils::VERSION_2_DICTIONARY_FLAG_SIZE = 2; -const int BinaryDictionaryHeaderReadingUtils::VERSION_2_DICTIONARY_HEADER_SIZE_SIZE = 4; +const int BinaryDictionaryHeaderReadingUtils::VERSION_2_HEADER_MAGIC_NUMBER_SIZE = 4; +const int BinaryDictionaryHeaderReadingUtils::VERSION_2_HEADER_DICTIONARY_VERSION_SIZE = 2; +const int BinaryDictionaryHeaderReadingUtils::VERSION_2_HEADER_FLAG_SIZE = 2; +const int BinaryDictionaryHeaderReadingUtils::VERSION_2_HEADER_SIZE_FIELD_SIZE = 4; const BinaryDictionaryHeaderReadingUtils::DictionaryFlags BinaryDictionaryHeaderReadingUtils::NO_FLAGS = 0; @@ -47,15 +45,13 @@ const BinaryDictionaryHeaderReadingUtils::DictionaryFlags /* static */ int BinaryDictionaryHeaderReadingUtils::getHeaderSize( const BinaryDictionaryInfo *const binaryDictionaryInfo) { - switch (binaryDictionaryInfo->getFormat()) { - case BinaryDictionaryFormatUtils::VERSION_1: - return FORMAT_VERSION_1_HEADER_SIZE; - case BinaryDictionaryFormatUtils::VERSION_2: + switch (getHeaderVersion(binaryDictionaryInfo->getFormat())) { + case HEADER_VERSION_2: // See the format of the header in the comment in // BinaryDictionaryFormatUtils::detectFormatVersion() return ByteArrayUtils::readUint32(binaryDictionaryInfo->getDictBuf(), - VERSION_2_MAGIC_NUMBER_SIZE + VERSION_2_DICTIONARY_VERSION_SIZE - + VERSION_2_DICTIONARY_FLAG_SIZE); + VERSION_2_HEADER_MAGIC_NUMBER_SIZE + VERSION_2_HEADER_DICTIONARY_VERSION_SIZE + + VERSION_2_HEADER_FLAG_SIZE); default: return S_INT_MAX; } @@ -64,12 +60,10 @@ const BinaryDictionaryHeaderReadingUtils::DictionaryFlags /* static */ BinaryDictionaryHeaderReadingUtils::DictionaryFlags BinaryDictionaryHeaderReadingUtils::getFlags( const BinaryDictionaryInfo *const binaryDictionaryInfo) { - switch (binaryDictionaryInfo->getFormat()) { - case BinaryDictionaryFormatUtils::VERSION_1: - return NO_FLAGS; - case BinaryDictionaryFormatUtils::VERSION_2: + switch (getHeaderVersion(binaryDictionaryInfo->getFormat())) { + case HEADER_VERSION_2: return ByteArrayUtils::readUint16(binaryDictionaryInfo->getDictBuf(), - VERSION_2_MAGIC_NUMBER_SIZE + VERSION_2_DICTIONARY_VERSION_SIZE); + VERSION_2_HEADER_MAGIC_NUMBER_SIZE + VERSION_2_HEADER_DICTIONARY_VERSION_SIZE); default: return NO_FLAGS; } @@ -79,17 +73,23 @@ const BinaryDictionaryHeaderReadingUtils::DictionaryFlags /* static */ bool BinaryDictionaryHeaderReadingUtils::readHeaderValue( const BinaryDictionaryInfo *const binaryDictionaryInfo, const char *const key, int *outValue, const int outValueSize) { - if (outValueSize <= 0 || !hasHeaderAttributes(binaryDictionaryInfo->getFormat())) { + if (outValueSize <= 0) { return false; } const int headerSize = getHeaderSize(binaryDictionaryInfo); int pos = getHeaderOptionsPosition(binaryDictionaryInfo->getFormat()); + if (pos == NOT_A_DICT_POS) { + // The header doesn't have header options. + return false; + } while (pos < headerSize) { if(ByteArrayUtils::compareStringInBufferWithCharArray( binaryDictionaryInfo->getDictBuf(), key, headerSize - pos, &pos) == 0) { // The key was found. - ByteArrayUtils::readStringAndAdvancePosition( + const int length = ByteArrayUtils::readStringAndAdvancePosition( binaryDictionaryInfo->getDictBuf(), outValueSize, outValue, &pos); + // Add a 0 terminator to the string. + outValue[length < outValueSize ? length : outValueSize - 1] = '\0'; return true; } ByteArrayUtils::advancePositionToBehindString( diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.h b/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.h index 49ed2b9cc..61748227e 100644 --- a/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.h +++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.h @@ -48,27 +48,15 @@ class BinaryDictionaryHeaderReadingUtils { return (flags & FRENCH_LIGATURE_PROCESSING_FLAG) != 0; } - static AK_FORCE_INLINE bool hasHeaderAttributes( - const BinaryDictionaryFormatUtils::FORMAT_VERSION format) { - // Only format 2 and above have header attributes as {key,value} string pairs. - switch (format) { - case BinaryDictionaryFormatUtils::VERSION_2: - return true; - break; - default: - return false; - } - } - static AK_FORCE_INLINE int getHeaderOptionsPosition( - const BinaryDictionaryFormatUtils::FORMAT_VERSION format) { - switch (format) { - case BinaryDictionaryFormatUtils::VERSION_2: - return VERSION_2_MAGIC_NUMBER_SIZE + VERSION_2_DICTIONARY_VERSION_SIZE - + VERSION_2_DICTIONARY_FLAG_SIZE + VERSION_2_DICTIONARY_HEADER_SIZE_SIZE; + const BinaryDictionaryFormatUtils::FORMAT_VERSION dictionaryFormat) { + switch (getHeaderVersion(dictionaryFormat)) { + case HEADER_VERSION_2: + return VERSION_2_HEADER_MAGIC_NUMBER_SIZE + VERSION_2_HEADER_DICTIONARY_VERSION_SIZE + + VERSION_2_HEADER_FLAG_SIZE + VERSION_2_HEADER_SIZE_FIELD_SIZE; break; default: - return 0; + return NOT_A_DICT_POS; } } @@ -82,12 +70,15 @@ class BinaryDictionaryHeaderReadingUtils { private: DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryDictionaryHeaderReadingUtils); - static const int FORMAT_VERSION_1_HEADER_SIZE; + enum HEADER_VERSION { + HEADER_VERSION_2, + UNKNOWN_HEADER_VERSION + }; - static const int VERSION_2_MAGIC_NUMBER_SIZE; - static const int VERSION_2_DICTIONARY_VERSION_SIZE; - static const int VERSION_2_DICTIONARY_FLAG_SIZE; - static const int VERSION_2_DICTIONARY_HEADER_SIZE_SIZE; + static const int VERSION_2_HEADER_MAGIC_NUMBER_SIZE; + static const int VERSION_2_HEADER_DICTIONARY_VERSION_SIZE; + static const int VERSION_2_HEADER_FLAG_SIZE; + static const int VERSION_2_HEADER_SIZE_FIELD_SIZE; static const DictionaryFlags NO_FLAGS; // Flags for special processing @@ -97,6 +88,18 @@ class BinaryDictionaryHeaderReadingUtils { static const DictionaryFlags SUPPORTS_DYNAMIC_UPDATE_FLAG; static const DictionaryFlags FRENCH_LIGATURE_PROCESSING_FLAG; static const DictionaryFlags CONTAINS_BIGRAMS_FLAG; + + static HEADER_VERSION getHeaderVersion( + const BinaryDictionaryFormatUtils::FORMAT_VERSION formatVersion) { + switch(formatVersion) { + case BinaryDictionaryFormatUtils::VERSION_2: + // Fall through + case BinaryDictionaryFormatUtils::VERSION_3: + return HEADER_VERSION_2; + default: + return UNKNOWN_HEADER_VERSION; + } + } }; } #endif /* LATINIME_DICTIONARY_HEADER_READING_UTILS_H */ diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_info.h b/native/jni/src/suggest/core/dictionary/binary_dictionary_info.h index 7cb31440a..cbea18f90 100644 --- a/native/jni/src/suggest/core/dictionary/binary_dictionary_info.h +++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_info.h @@ -20,23 +20,27 @@ #include <stdint.h> #include "defines.h" +#include "jni.h" #include "suggest/core/dictionary/binary_dictionary_format_utils.h" #include "suggest/core/dictionary/binary_dictionary_header.h" #include "suggest/policyimpl/dictionary/dictionary_structure_policy_factory.h" +#include "utils/log_utils.h" namespace latinime { class BinaryDictionaryInfo { public: - BinaryDictionaryInfo(const uint8_t *const dictBuf, const int dictSize, const int mmapFd, - const int dictBufOffset, const bool isUpdatable) + AK_FORCE_INLINE BinaryDictionaryInfo(JNIEnv *env, const uint8_t *const dictBuf, + const int dictSize, const int mmapFd, const int dictBufOffset, const bool isUpdatable) : mDictBuf(dictBuf), mDictSize(dictSize), mMmapFd(mmapFd), mDictBufOffset(dictBufOffset), mIsUpdatable(isUpdatable), mDictionaryFormat(BinaryDictionaryFormatUtils::detectFormatVersion( mDictBuf, mDictSize)), mDictionaryHeader(this), mDictRoot(mDictBuf + mDictionaryHeader.getSize()), mStructurePolicy(DictionaryStructurePolicyFactory::getDictionaryStructurePolicy( - mDictionaryFormat)) {} + mDictionaryFormat)) { + logDictionaryInfo(env); + } AK_FORCE_INLINE const uint8_t *getDictBuf() const { return mDictBuf; @@ -88,6 +92,33 @@ class BinaryDictionaryInfo { const BinaryDictionaryHeader mDictionaryHeader; const uint8_t *const mDictRoot; const DictionaryStructurePolicy *const mStructurePolicy; + + AK_FORCE_INLINE void logDictionaryInfo(JNIEnv *const env) const { + const int BUFFER_SIZE = 16; + int dictionaryIdCodePointBuffer[BUFFER_SIZE]; + int versionStringCodePointBuffer[BUFFER_SIZE]; + int dateStringCodePointBuffer[BUFFER_SIZE]; + mDictionaryHeader.readHeaderValueOrQuestionMark("dictionary", + dictionaryIdCodePointBuffer, BUFFER_SIZE); + mDictionaryHeader.readHeaderValueOrQuestionMark("version", + versionStringCodePointBuffer, BUFFER_SIZE); + mDictionaryHeader.readHeaderValueOrQuestionMark("date", + dateStringCodePointBuffer, BUFFER_SIZE); + + char dictionaryIdCharBuffer[BUFFER_SIZE]; + char versionStringCharBuffer[BUFFER_SIZE]; + char dateStringCharBuffer[BUFFER_SIZE]; + intArrayToCharArray(dictionaryIdCodePointBuffer, BUFFER_SIZE, + dictionaryIdCharBuffer, BUFFER_SIZE); + intArrayToCharArray(versionStringCodePointBuffer, BUFFER_SIZE, + versionStringCharBuffer, BUFFER_SIZE); + intArrayToCharArray(dateStringCodePointBuffer, BUFFER_SIZE, + dateStringCharBuffer, BUFFER_SIZE); + + LogUtils::logToJava(env, + "Dictionary info: dictionary = %s ; version = %s ; date = %s ; filesize = %i", + dictionaryIdCharBuffer, versionStringCharBuffer, dateStringCharBuffer, mDictSize); + } }; } #endif /* LATINIME_BINARY_DICTIONARY_INFO_H */ diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp index 675b54972..f520a75b1 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.cpp +++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp @@ -22,6 +22,7 @@ #include <stdint.h> #include "defines.h" +#include "jni.h" #include "suggest/core/dictionary/bigram_dictionary.h" #include "suggest/core/dictionary/binary_format.h" #include "suggest/core/session/dic_traverse_session.h" @@ -32,8 +33,9 @@ namespace latinime { -Dictionary::Dictionary(void *dict, int dictSize, int mmapFd, int dictBufOffset, bool isUpdatable) - : mBinaryDictionaryInfo(static_cast<const uint8_t *>(dict), dictSize, mmapFd, +Dictionary::Dictionary(JNIEnv *env, void *dict, int dictSize, int mmapFd, + int dictBufOffset, bool isUpdatable) + : mBinaryDictionaryInfo(env, static_cast<const uint8_t *>(dict), dictSize, mmapFd, dictBufOffset, isUpdatable), mBigramDictionary(new BigramDictionary(&mBinaryDictionaryInfo)), mGestureSuggest(new Suggest(GestureSuggestPolicyFactory::getGestureSuggestPolicy())), diff --git a/native/jni/src/suggest/core/dictionary/dictionary.h b/native/jni/src/suggest/core/dictionary/dictionary.h index 94579c200..1bf24a85b 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.h +++ b/native/jni/src/suggest/core/dictionary/dictionary.h @@ -20,6 +20,7 @@ #include <stdint.h> #include "defines.h" +#include "jni.h" #include "suggest/core/dictionary/binary_dictionary_info.h" namespace latinime { @@ -52,7 +53,8 @@ class Dictionary { static const int KIND_FLAG_POSSIBLY_OFFENSIVE = 0x80000000; static const int KIND_FLAG_EXACT_MATCH = 0x40000000; - Dictionary(void *dict, int dictSize, int mmapFd, int dictBufOffset, bool isUpdatable); + Dictionary(JNIEnv *env, void *dict, int dictSize, int mmapFd, int dictBufOffset, + bool isUpdatable); int getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession *traverseSession, int *xcoordinates, int *ycoordinates, int *times, int *pointerIds, int *inputCodePoints, diff --git a/native/jni/src/suggest/core/dictionary/terminal_attributes.h b/native/jni/src/suggest/core/dictionary/terminal_attributes.h index cec47081e..0da6504eb 100644 --- a/native/jni/src/suggest/core/dictionary/terminal_attributes.h +++ b/native/jni/src/suggest/core/dictionary/terminal_attributes.h @@ -21,7 +21,6 @@ #include "suggest/core/dictionary/binary_dictionary_info.h" #include "suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h" -#include "suggest/core/dictionary/binary_format.h" namespace latinime { @@ -71,28 +70,23 @@ class TerminalAttributes { }; TerminalAttributes(const BinaryDictionaryInfo *const binaryDictionaryInfo, - const uint8_t nodeFlags, const int shortcutPos) - : mBinaryDictionaryInfo(binaryDictionaryInfo), - mNodeFlags(nodeFlags), mShortcutListSizePos(shortcutPos) {} + const int shortcutPos) + : mBinaryDictionaryInfo(binaryDictionaryInfo), mShortcutListSizePos(shortcutPos) {} inline ShortcutIterator getShortcutIterator() const { - // The size of the shortcuts is stored here so that the whole shortcut chunk can be - // skipped quickly, so we ignore it. int shortcutPos = mShortcutListSizePos; - BinaryDictionaryTerminalAttributesReadingUtils::getShortcutListSizeAndForwardPointer( - mBinaryDictionaryInfo, &shortcutPos); - const bool hasShortcutList = 0 != (mNodeFlags & BinaryFormat::FLAG_HAS_SHORTCUT_TARGETS); + const bool hasShortcutList = shortcutPos != NOT_A_DICT_POS; + if (hasShortcutList) { + BinaryDictionaryTerminalAttributesReadingUtils::getShortcutListSizeAndForwardPointer( + mBinaryDictionaryInfo, &shortcutPos); + } + // shortcutPos is never used if hasShortcutList is false. return ShortcutIterator(mBinaryDictionaryInfo, shortcutPos, hasShortcutList); } - bool isBlacklistedOrNotAWord() const { - return BinaryFormat::hasBlacklistedOrNotAWordFlag(mNodeFlags); - } - private: DISALLOW_IMPLICIT_CONSTRUCTORS(TerminalAttributes); const BinaryDictionaryInfo *const mBinaryDictionaryInfo; - const uint8_t mNodeFlags; const int mShortcutListSizePos; }; } // namespace latinime diff --git a/native/jni/src/suggest/core/layout/proximity_info_utils.h b/native/jni/src/suggest/core/layout/proximity_info_utils.h index 54f7539d1..0e28560fc 100644 --- a/native/jni/src/suggest/core/layout/proximity_info_utils.h +++ b/native/jni/src/suggest/core/layout/proximity_info_utils.h @@ -117,6 +117,10 @@ class ProximityInfoUtils { return getSquaredDistanceFloat(x, y, projectionX, projectionY); } + static AK_FORCE_INLINE bool isMatchOrProximityChar(const ProximityType type) { + return type == MATCH_CHAR || type == PROXIMITY_CHAR || type == ADDITIONAL_PROXIMITY_CHAR; + } + // Normal distribution N(u, sigma^2). struct NormalDistribution { public: diff --git a/native/jni/src/suggest/core/policy/traversal.h b/native/jni/src/suggest/core/policy/traversal.h index c6f66f231..f26d7149e 100644 --- a/native/jni/src/suggest/core/policy/traversal.h +++ b/native/jni/src/suggest/core/policy/traversal.h @@ -45,7 +45,7 @@ class Traversal { const DicNode *const dicNode) const = 0; virtual bool needsToTraverseAllUserInput() const = 0; virtual float getMaxSpatialDistance() const = 0; - virtual bool allowPartialCommit() const = 0; + virtual bool autoCorrectsToMultiWordSuggestionIfTop() const = 0; virtual int getDefaultExpandDicNodeSize() const = 0; virtual int getMaxCacheSize() const = 0; virtual bool isPossibleOmissionChildNode(const DicTraverseSession *const traverseSession, diff --git a/native/jni/src/suggest/core/suggest.cpp b/native/jni/src/suggest/core/suggest.cpp index 173a612be..c6da6f003 100644 --- a/native/jni/src/suggest/core/suggest.cpp +++ b/native/jni/src/suggest/core/suggest.cpp @@ -36,7 +36,6 @@ namespace latinime { const int Suggest::MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT = 16; const int Suggest::MIN_CONTINUOUS_SUGGESTION_INPUT_SIZE = 2; const float Suggest::AUTOCORRECT_CLASSIFICATION_THRESHOLD = 0.33f; -const int Suggest::FINAL_SCORE_PENALTY_FOR_NOT_BEST_EXACT_MATCHED_WORD = 1; /** * Returns a set of suggestions for the given input touch points. The commitPoint argument indicates @@ -85,9 +84,9 @@ void Suggest::initializeSearch(DicTraverseSession *traverseSession, int commitPo if (!traverseSession->getProximityInfoState(0)->isUsed()) { return; } - if (TRAVERSAL->allowPartialCommit()) { - commitPoint = 0; - } + + // Never auto partial commit for now. + commitPoint = 0; if (traverseSession->getInputSize() > MIN_CONTINUOUS_SUGGESTION_INPUT_SIZE && traverseSession->isContinuousSuggestionPossible()) { @@ -149,8 +148,17 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen &doubleLetterTerminalIndex, &doubleLetterLevel); int maxScore = S_INT_MIN; - int bestExactMatchedNodeTerminalIndex = -1; - int bestExactMatchedNodeOutputWordIndex = -1; + // Force autocorrection for obvious long multi-word suggestions when the top suggestion is + // a long multiple words suggestion. + // TODO: Implement a smarter auto-commit method for handling multi-word suggestions. + // traverseSession->isPartiallyCommited() always returns false because we never auto partial + // commit for now. + const bool forceCommitMultiWords = (terminalSize > 0) ? + TRAVERSAL->autoCorrectsToMultiWordSuggestionIfTop() + && (traverseSession->isPartiallyCommited() + || (traverseSession->getInputSize() + >= MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT + && terminals[0].hasMultipleWords())) : false; // Output suggestion results here for (int terminalIndex = 0; terminalIndex < terminalSize && outputWordIndex < MAX_RESULTS; ++terminalIndex) { @@ -162,8 +170,6 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen terminalIndex, doubleLetterTerminalIndex, doubleLetterLevel); const float compoundDistance = terminalDicNode->getCompoundDistance(languageWeight) + doubleLetterCost; - const TerminalAttributes terminalAttributes(traverseSession->getBinaryDictionaryInfo(), - terminalDicNode->getFlags(), terminalDicNode->getAttributesPos()); const bool isPossiblyOffensiveWord = terminalDicNode->getProbability() <= 0; const bool isExactMatch = terminalDicNode->isExactMatch(); const bool isFirstCharUppercase = terminalDicNode->isFirstCharUppercase(); @@ -176,60 +182,43 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen | (isSafeExactMatch ? Dictionary::KIND_FLAG_EXACT_MATCH : 0); // Entries that are blacklisted or do not represent a word should not be output. - const bool isValidWord = !terminalAttributes.isBlacklistedOrNotAWord(); + const bool isValidWord = !terminalDicNode->isBlacklistedOrNotAWord(); // Increase output score of top typing suggestion to ensure autocorrection. // TODO: Better integration with java side autocorrection logic. - // Force autocorrection for obvious long multi-word suggestions. - const bool isForceCommitMultiWords = TRAVERSAL->allowPartialCommit() - && (traverseSession->isPartiallyCommited() - || (traverseSession->getInputSize() >= MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT - && terminalDicNode->hasMultipleWords())); - const int finalScore = SCORING->calculateFinalScore( compoundDistance, traverseSession->getInputSize(), - isForceCommitMultiWords || (isValidWord && SCORING->doesAutoCorrectValidWord())); + terminalDicNode->isExactMatch() + || (forceCommitMultiWords && terminalDicNode->hasMultipleWords()) + || (isValidWord && SCORING->doesAutoCorrectValidWord())); maxScore = max(maxScore, finalScore); - if (TRAVERSAL->allowPartialCommit()) { - // Index for top typing suggestion should be 0. - if (isValidWord && outputWordIndex == 0) { - terminalDicNode->outputSpacePositionsResult(spaceIndices); - } + // TODO: Implement a smarter auto-commit method for handling multi-word suggestions. + // Index for top typing suggestion should be 0. + if (isValidWord && outputWordIndex == 0) { + terminalDicNode->outputSpacePositionsResult(spaceIndices); } // Don't output invalid words. However, we still need to submit their shortcuts if any. if (isValidWord) { outputTypes[outputWordIndex] = Dictionary::KIND_CORRECTION | outputTypeFlags; frequencies[outputWordIndex] = finalScore; - if (isSafeExactMatch) { - // Demote exact matches that are not the highest probable node among all exact - // matches. - const bool isBestTerminal = bestExactMatchedNodeTerminalIndex < 0 - || terminals[bestExactMatchedNodeTerminalIndex].getProbability() - < terminalDicNode->getProbability(); - const int outputWordIndexToBeDemoted = isBestTerminal ? - bestExactMatchedNodeOutputWordIndex : outputWordIndex; - if (outputWordIndexToBeDemoted >= 0) { - frequencies[outputWordIndexToBeDemoted] -= - FINAL_SCORE_PENALTY_FOR_NOT_BEST_EXACT_MATCHED_WORD; - } - if (isBestTerminal) { - // Updates the best exact matched node index. - bestExactMatchedNodeTerminalIndex = terminalIndex; - // Updates the best exact matched output word index. - bestExactMatchedNodeOutputWordIndex = outputWordIndex; - } - } // Populate the outputChars array with the suggested word. const int startIndex = outputWordIndex * MAX_WORD_LENGTH; terminalDicNode->outputResult(&outputCodePoints[startIndex]); ++outputWordIndex; } - const bool sameAsTyped = TRAVERSAL->sameAsTyped(traverseSession, terminalDicNode); - outputWordIndex = ShortcutUtils::outputShortcuts(&terminalAttributes, outputWordIndex, - finalScore, outputCodePoints, frequencies, outputTypes, sameAsTyped); + if (!terminalDicNode->hasMultipleWords()) { + const TerminalAttributes terminalAttributes(traverseSession->getBinaryDictionaryInfo(), + terminalDicNode->getAttributesPos()); + // Shortcut is not supported for multiple words suggestions. + // TODO: Check shortcuts during traversal for multiple words suggestions. + const bool sameAsTyped = TRAVERSAL->sameAsTyped(traverseSession, terminalDicNode); + outputWordIndex = ShortcutUtils::outputShortcuts(&terminalAttributes, outputWordIndex, + finalScore, outputCodePoints, frequencies, outputTypes, sameAsTyped); + + } DicNode::managedDelete(terminalDicNode); } diff --git a/native/jni/src/suggest/core/suggest.h b/native/jni/src/suggest/core/suggest.h index 752bde9ac..875cbe4e0 100644 --- a/native/jni/src/suggest/core/suggest.h +++ b/native/jni/src/suggest/core/suggest.h @@ -82,8 +82,6 @@ class Suggest : public SuggestInterface { // Threshold for autocorrection classifier static const float AUTOCORRECT_CLASSIFICATION_THRESHOLD; - // Final score penalty to exact match words that are not the most probable exact match. - static const int FINAL_SCORE_PENALTY_FOR_NOT_BEST_EXACT_MATCHED_WORD; const Traversal *const TRAVERSAL; const Scoring *const SCORING; diff --git a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_policy_factory.h b/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_policy_factory.h index 5070651cb..70dad67e8 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_policy_factory.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_policy_factory.h @@ -30,10 +30,11 @@ class DictionaryStructurePolicyFactory { static const DictionaryStructurePolicy *getDictionaryStructurePolicy( const BinaryDictionaryFormatUtils::FORMAT_VERSION dictionaryFormat) { switch (dictionaryFormat) { - case BinaryDictionaryFormatUtils::VERSION_1: - // Fall through case BinaryDictionaryFormatUtils::VERSION_2: return PatriciaTriePolicy::getInstance(); + case BinaryDictionaryFormatUtils::VERSION_3: + // TODO: support version 3 dictionaries. + return 0; default: ASSERT(false); return 0; diff --git a/native/jni/src/suggest/policyimpl/typing/typing_traversal.h b/native/jni/src/suggest/policyimpl/typing/typing_traversal.h index 5ae396e64..ef144e00a 100644 --- a/native/jni/src/suggest/policyimpl/typing/typing_traversal.h +++ b/native/jni/src/suggest/policyimpl/typing/typing_traversal.h @@ -23,6 +23,7 @@ #include "suggest/core/dicnode/dic_node.h" #include "suggest/core/dicnode/dic_node_vector.h" #include "suggest/core/layout/proximity_info_state.h" +#include "suggest/core/layout/proximity_info_utils.h" #include "suggest/core/policy/traversal.h" #include "suggest/core/session/dic_traverse_session.h" #include "suggest/policyimpl/typing/scoring_params.h" @@ -136,7 +137,7 @@ class TypingTraversal : public Traversal { return ScoringParams::MAX_SPATIAL_DISTANCE; } - AK_FORCE_INLINE bool allowPartialCommit() const { + AK_FORCE_INLINE bool autoCorrectsToMultiWordSuggestionIfTop() const { return true; } @@ -159,7 +160,7 @@ class TypingTraversal : public Traversal { const DicNode *const dicNode) const { const ProximityType proximityType = getProximityType(traverseSession, parentDicNode, dicNode); - if (!DicNodeUtils::isProximityChar(proximityType)) { + if (!ProximityInfoUtils::isMatchOrProximityChar(proximityType)) { return false; } return true; diff --git a/native/jni/src/suggest/policyimpl/typing/typing_weighting.h b/native/jni/src/suggest/policyimpl/typing/typing_weighting.h index e098f353e..830aa80de 100644 --- a/native/jni/src/suggest/policyimpl/typing/typing_weighting.h +++ b/native/jni/src/suggest/policyimpl/typing/typing_weighting.h @@ -169,12 +169,7 @@ class TypingWeighting : public Weighting { float getTerminalLanguageCost(const DicTraverseSession *const traverseSession, const DicNode *const dicNode, const float dicNodeLanguageImprobability) const { - // We promote exact matches here to prevent them from being pruned. The final score of - // exact match nodes might be demoted later in Suggest::outputSuggestions if there are - // multiple exact matches. - const float languageImprobability = (dicNode->isExactMatch()) ? - 0.0f : dicNodeLanguageImprobability; - return languageImprobability * ScoringParams::DISTANCE_WEIGHT_LANGUAGE; + return dicNodeLanguageImprobability * ScoringParams::DISTANCE_WEIGHT_LANGUAGE; } AK_FORCE_INLINE bool needsToNormalizeCompoundDistance() const { diff --git a/native/jni/src/utils/log_utils.cpp b/native/jni/src/utils/log_utils.cpp new file mode 100644 index 000000000..5ab2b2862 --- /dev/null +++ b/native/jni/src/utils/log_utils.cpp @@ -0,0 +1,72 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "log_utils.h" + +#include <cstdio> +#include <stdarg.h> + +#include "defines.h" + +namespace latinime { + /* static */ void LogUtils::logToJava(JNIEnv *const env, const char *const format, ...) { + static const char *TAG = "LatinIME:LogUtils"; + const jclass androidUtilLogClass = env->FindClass("android/util/Log"); + if (!androidUtilLogClass) { + // If we can't find the class, we are probably in off-device testing, and + // it's expected. Regardless, logging is not essential to functionality, so + // we should just return. However, FindClass has thrown an exception behind + // our back and there is no way to prevent it from doing that, so we clear + // the exception before we return. + env->ExceptionClear(); + return; + } + const jmethodID logDotIMethodId = env->GetStaticMethodID(androidUtilLogClass, "i", + "(Ljava/lang/String;Ljava/lang/String;)I"); + if (!logDotIMethodId) { + env->ExceptionClear(); + if (androidUtilLogClass) env->DeleteLocalRef(androidUtilLogClass); + return; + } + const jstring javaTag = env->NewStringUTF(TAG); + + static const int DEFAULT_LINE_SIZE = 128; + char fixedSizeCString[DEFAULT_LINE_SIZE]; + va_list argList; + va_start(argList, format); + // Get the necessary size. Add 1 for the 0 terminator. + const int size = vsnprintf(fixedSizeCString, DEFAULT_LINE_SIZE, format, argList) + 1; + va_end(argList); + + jstring javaString; + if (size <= DEFAULT_LINE_SIZE) { + // The buffer was large enough. + javaString = env->NewStringUTF(fixedSizeCString); + } else { + // The buffer was not large enough. + va_start(argList, format); + char variableSizeCString[size]; + vsnprintf(variableSizeCString, size, format, argList); + va_end(argList); + javaString = env->NewStringUTF(variableSizeCString); + } + + env->CallStaticIntMethod(androidUtilLogClass, logDotIMethodId, javaTag, javaString); + if (javaString) env->DeleteLocalRef(javaString); + if (javaTag) env->DeleteLocalRef(javaTag); + if (androidUtilLogClass) env->DeleteLocalRef(androidUtilLogClass); + } +} diff --git a/native/jni/src/utils/log_utils.h b/native/jni/src/utils/log_utils.h new file mode 100644 index 000000000..6ac16d91a --- /dev/null +++ b/native/jni/src/utils/log_utils.h @@ -0,0 +1,37 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_LOG_UTILS_H +#define LATINIME_LOG_UTILS_H + +#include "defines.h" +#include "jni.h" + +namespace latinime { + +class LogUtils { + public: + static void logToJava(JNIEnv *const env, const char *const format, ...) +#ifdef __GNUC__ + __attribute__ ((format (printf, 2, 3))) +#endif // __GNUC__ + ; + + private: + DISALLOW_COPY_AND_ASSIGN(LogUtils); +}; +} // namespace latinime +#endif // LATINIME_LOG_UTILS_H |