diff options
author | 2012-09-18 12:07:33 +0900 | |
---|---|---|
committer | 2012-09-18 12:07:33 +0900 | |
commit | f18fc03621b70f5a51cf54c4bf40eb213de40652 (patch) | |
tree | e6897145da33c21866b57c95444a530488ca2af1 /native/jni/src | |
parent | 9761fa578609b4f3788344b5b3c886b1e883e97e (diff) | |
parent | 764dd712032d7b8012797b1116b523bef7b907f3 (diff) | |
download | latinime-f18fc03621b70f5a51cf54c4bf40eb213de40652.tar.gz latinime-f18fc03621b70f5a51cf54c4bf40eb213de40652.tar.xz latinime-f18fc03621b70f5a51cf54c4bf40eb213de40652.zip |
Merge remote-tracking branch 'goog/jb-mr1-dev' into mergescriptpackage
Diffstat (limited to 'native/jni/src')
-rw-r--r-- | native/jni/src/additional_proximity_chars.h | 4 | ||||
-rw-r--r-- | native/jni/src/basechars.cpp | 4 | ||||
-rw-r--r-- | native/jni/src/bigram_dictionary.cpp | 10 | ||||
-rw-r--r-- | native/jni/src/binary_format.h | 50 | ||||
-rw-r--r-- | native/jni/src/bloom_filter.h | 14 | ||||
-rw-r--r-- | native/jni/src/char_utils.h | 3 | ||||
-rw-r--r-- | native/jni/src/correction.cpp | 20 | ||||
-rw-r--r-- | native/jni/src/debug.h | 71 | ||||
-rw-r--r-- | native/jni/src/defines.h | 104 | ||||
-rw-r--r-- | native/jni/src/dictionary.h | 9 | ||||
-rw-r--r-- | native/jni/src/geometry_utils.h | 21 | ||||
-rw-r--r-- | native/jni/src/proximity_info.cpp | 82 | ||||
-rw-r--r-- | native/jni/src/proximity_info.h | 40 | ||||
-rw-r--r-- | native/jni/src/proximity_info_state.cpp | 387 | ||||
-rw-r--r-- | native/jni/src/proximity_info_state.h | 59 | ||||
-rw-r--r-- | native/jni/src/terminal_attributes.h | 14 | ||||
-rw-r--r-- | native/jni/src/unigram_dictionary.cpp | 104 | ||||
-rw-r--r-- | native/jni/src/unigram_dictionary.h | 12 | ||||
-rw-r--r-- | native/jni/src/words_priority_queue_pool.h | 2 |
19 files changed, 683 insertions, 327 deletions
diff --git a/native/jni/src/additional_proximity_chars.h b/native/jni/src/additional_proximity_chars.h index 1fe996d0d..d420c4664 100644 --- a/native/jni/src/additional_proximity_chars.h +++ b/native/jni/src/additional_proximity_chars.h @@ -50,7 +50,7 @@ class AdditionalProximityChars { if (!isEnLocale(localeStr)) { return 0; } - switch(c) { + switch (c) { case 'a': return EN_US_ADDITIONAL_A_SIZE; case 'e': @@ -70,7 +70,7 @@ class AdditionalProximityChars { if (!isEnLocale(localeStr)) { return 0; } - switch(c) { + switch (c) { case 'a': return EN_US_ADDITIONAL_A; case 'e': diff --git a/native/jni/src/basechars.cpp b/native/jni/src/basechars.cpp index c91e5f741..379cb6226 100644 --- a/native/jni/src/basechars.cpp +++ b/native/jni/src/basechars.cpp @@ -14,6 +14,8 @@ * limitations under the License. */ +#include <stdint.h> + #include "char_utils.h" namespace latinime { @@ -24,7 +26,7 @@ namespace latinime { * if c is not a combined character, or the base character if it * is combined. */ -const unsigned short BASE_CHARS[BASE_CHARS_SIZE] = { +const uint16_t BASE_CHARS[BASE_CHARS_SIZE] = { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, diff --git a/native/jni/src/bigram_dictionary.cpp b/native/jni/src/bigram_dictionary.cpp index f1d538095..dade4f16b 100644 --- a/native/jni/src/bigram_dictionary.cpp +++ b/native/jni/src/bigram_dictionary.cpp @@ -113,7 +113,7 @@ int BigramDictionary::getBigrams(const int32_t *prevWord, int prevWordLength, in } // If still no bigrams, we really don't have them! if (0 == pos) return 0; - int bigramFlags; + uint8_t bigramFlags; int bigramCount = 0; do { bigramFlags = BinaryFormat::getFlagsAndForwardPointer(root, &pos); @@ -153,10 +153,10 @@ int BigramDictionary::getBigramListPositionForWord(const int32_t *prevWord, forceLowerCaseSearch); if (NOT_VALID_WORD == pos) return 0; - const int flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos); + const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos); if (0 == (flags & BinaryFormat::FLAG_HAS_BIGRAMS)) return 0; if (0 == (flags & BinaryFormat::FLAG_HAS_MULTIPLE_CHARS)) { - BinaryFormat::getCharCodeAndForwardPointer(root, &pos); + BinaryFormat::getCodePointAndForwardPointer(root, &pos); } else { pos = BinaryFormat::skipOtherCharacters(root, pos); } @@ -179,7 +179,7 @@ void BigramDictionary::fillBigramAddressToFrequencyMapAndFilter(const int32_t *p } if (0 == pos) return; - int bigramFlags; + uint8_t bigramFlags; do { bigramFlags = BinaryFormat::getFlagsAndForwardPointer(root, &pos); const int frequency = BinaryFormat::MASK_ATTRIBUTE_FREQUENCY & bigramFlags; @@ -215,7 +215,7 @@ bool BigramDictionary::isValidBigram(const int32_t *word1, int length1, const in int nextWordPos = BinaryFormat::getTerminalPosition(root, word2, length2, false /* forceLowerCaseSearch */); if (NOT_VALID_WORD == nextWordPos) return false; - int bigramFlags; + uint8_t bigramFlags; do { bigramFlags = BinaryFormat::getFlagsAndForwardPointer(root, &pos); const int bigramPos = BinaryFormat::getAttributeAddressAndForwardPointer(root, bigramFlags, diff --git a/native/jni/src/binary_format.h b/native/jni/src/binary_format.h index d8f3e83dd..eec52e323 100644 --- a/native/jni/src/binary_format.h +++ b/native/jni/src/binary_format.h @@ -43,6 +43,10 @@ class BinaryFormat { static const int FLAG_HAS_SHORTCUT_TARGETS = 0x08; // Flag for bigram presence static const int FLAG_HAS_BIGRAMS = 0x04; + // Flag for non-words (typically, shortcut only entries) + static const int FLAG_IS_NOT_A_WORD = 0x02; + // Flag for blacklist + static const int FLAG_IS_BLACKLISTED = 0x01; // Attribute (bigram/shortcut) related flags: // Flag for presence of more attributes @@ -80,7 +84,7 @@ class BinaryFormat { static unsigned int getFlags(const uint8_t *const dict); static int getGroupCountAndForwardPointer(const uint8_t *const dict, int *pos); static uint8_t getFlagsAndForwardPointer(const uint8_t *const dict, int *pos); - static int32_t getCharCodeAndForwardPointer(const uint8_t *const dict, int *pos); + static int32_t getCodePointAndForwardPointer(const uint8_t *const dict, int *pos); static int readFrequencyWithoutMovingPointer(const uint8_t *const dict, const int pos); static int skipOtherCharacters(const uint8_t *const dict, const int pos); static int skipChildrenPosition(const uint8_t flags, const int pos); @@ -172,22 +176,22 @@ inline uint8_t BinaryFormat::getFlagsAndForwardPointer(const uint8_t *const dict return dict[(*pos)++]; } -inline int32_t BinaryFormat::getCharCodeAndForwardPointer(const uint8_t *const dict, int *pos) { +inline int32_t BinaryFormat::getCodePointAndForwardPointer(const uint8_t *const dict, int *pos) { const int origin = *pos; - const int32_t character = dict[origin]; - if (character < MINIMAL_ONE_BYTE_CHARACTER_VALUE) { - if (character == CHARACTER_ARRAY_TERMINATOR) { + const int32_t codePoint = dict[origin]; + if (codePoint < MINIMAL_ONE_BYTE_CHARACTER_VALUE) { + if (codePoint == CHARACTER_ARRAY_TERMINATOR) { *pos = origin + 1; - return NOT_A_CHARACTER; + return NOT_A_CODE_POINT; } else { *pos = origin + 3; - const int32_t char_1 = character << 16; + const int32_t char_1 = codePoint << 16; const int32_t char_2 = char_1 + (dict[origin + 1] << 8); return char_2 + dict[origin + 2]; } } else { *pos = origin + 1; - return character; + return codePoint; } } @@ -356,7 +360,7 @@ inline int BinaryFormat::getTerminalPosition(const uint8_t *const root, while (true) { // If we already traversed the tree further than the word is long, there means // there was no match (or we would have found it). - if (wordPos > length) return NOT_VALID_WORD; + if (wordPos >= length) return NOT_VALID_WORD; int charGroupCount = BinaryFormat::getGroupCountAndForwardPointer(root, &pos); const int32_t wChar = forceLowerCaseSearch ? toLowerCase(inWord[wordPos]) : inWord[wordPos]; while (true) { @@ -365,23 +369,23 @@ inline int BinaryFormat::getTerminalPosition(const uint8_t *const root, if (0 >= charGroupCount) return NOT_VALID_WORD; const int charGroupPos = pos; const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos); - int32_t character = BinaryFormat::getCharCodeAndForwardPointer(root, &pos); + int32_t character = BinaryFormat::getCodePointAndForwardPointer(root, &pos); if (character == wChar) { // This is the correct node. Only one character group may start with the same // char within a node, so either we found our match in this node, or there is // no match and we can return NOT_VALID_WORD. So we will check all the characters // in this character group indeed does match. if (FLAG_HAS_MULTIPLE_CHARS & flags) { - character = BinaryFormat::getCharCodeAndForwardPointer(root, &pos); - while (NOT_A_CHARACTER != character) { + character = BinaryFormat::getCodePointAndForwardPointer(root, &pos); + while (NOT_A_CODE_POINT != character) { ++wordPos; // If we shoot the length of the word we search for, or if we find a single // character that does not match, as explained above, it means the word is // not in the dictionary (by virtue of this chargroup being the only one to // match the word on the first character, but not matching the whole word). - if (wordPos > length) return NOT_VALID_WORD; + if (wordPos >= length) return NOT_VALID_WORD; if (inWord[wordPos] != character) return NOT_VALID_WORD; - character = BinaryFormat::getCharCodeAndForwardPointer(root, &pos); + character = BinaryFormat::getCodePointAndForwardPointer(root, &pos); } } // If we come here we know that so far, we do match. Either we are on a terminal @@ -453,19 +457,19 @@ inline int BinaryFormat::getWordAtAddress(const uint8_t *const root, const int a --charGroupCount) { const int startPos = pos; const uint8_t flags = getFlagsAndForwardPointer(root, &pos); - const int32_t character = getCharCodeAndForwardPointer(root, &pos); + const int32_t character = getCodePointAndForwardPointer(root, &pos); if (address == startPos) { // We found the address. Copy the rest of the word in the buffer and return // the length. outWord[wordPos] = character; if (FLAG_HAS_MULTIPLE_CHARS & flags) { - int32_t nextChar = getCharCodeAndForwardPointer(root, &pos); + int32_t nextChar = getCodePointAndForwardPointer(root, &pos); // We count chars in order to avoid infinite loops if the file is broken or // if there is some other bug int charCount = maxDepth; - while (NOT_A_CHARACTER != nextChar && --charCount > 0) { + while (NOT_A_CODE_POINT != nextChar && --charCount > 0) { outWord[++wordPos] = nextChar; - nextChar = getCharCodeAndForwardPointer(root, &pos); + nextChar = getCodePointAndForwardPointer(root, &pos); } } *outUnigramFrequency = readFrequencyWithoutMovingPointer(root, pos); @@ -519,16 +523,16 @@ inline int BinaryFormat::getWordAtAddress(const uint8_t *const root, const int a const uint8_t lastFlags = getFlagsAndForwardPointer(root, &lastCandidateGroupPos); const int32_t lastChar = - getCharCodeAndForwardPointer(root, &lastCandidateGroupPos); + getCodePointAndForwardPointer(root, &lastCandidateGroupPos); // We copy all the characters in this group to the buffer outWord[wordPos] = lastChar; if (FLAG_HAS_MULTIPLE_CHARS & lastFlags) { int32_t nextChar = - getCharCodeAndForwardPointer(root, &lastCandidateGroupPos); + getCodePointAndForwardPointer(root, &lastCandidateGroupPos); int charCount = maxDepth; while (-1 != nextChar && --charCount > 0) { outWord[++wordPos] = nextChar; - nextChar = getCharCodeAndForwardPointer(root, &lastCandidateGroupPos); + nextChar = getCodePointAndForwardPointer(root, &lastCandidateGroupPos); } } ++wordPos; @@ -578,8 +582,8 @@ inline int BinaryFormat::computeFrequencyForBigram(const int unigramFreq, const // 0 for the bigram frequency represents the middle of the 16th step from the top, // while a value of 15 represents the middle of the top step. // See makedict.BinaryDictInputOutput for details. - const float stepSize = (static_cast<float>(MAX_FREQ) - unigramFreq) / (1.5f + MAX_BIGRAM_FREQ); - return static_cast<int>(unigramFreq + (bigramFreq + 1) * stepSize); + const float stepSize = static_cast<float>(MAX_FREQ - unigramFreq) / (1.5f + MAX_BIGRAM_FREQ); + return unigramFreq + static_cast<int>(static_cast<float>(bigramFreq + 1) * stepSize); } // This returns a probability in log space. diff --git a/native/jni/src/bloom_filter.h b/native/jni/src/bloom_filter.h index 47177dcba..bcce1f7ea 100644 --- a/native/jni/src/bloom_filter.h +++ b/native/jni/src/bloom_filter.h @@ -23,14 +23,16 @@ namespace latinime { -static inline void setInFilter(uint8_t *filter, const int position) { - const unsigned int bucket = position % BIGRAM_FILTER_MODULO; - filter[bucket >> 3] |= (1 << (bucket & 0x7)); +// TODO: uint32_t position +static inline void setInFilter(uint8_t *filter, const int32_t position) { + const uint32_t bucket = static_cast<uint32_t>(position % BIGRAM_FILTER_MODULO); + filter[bucket >> 3] |= static_cast<uint8_t>(1 << (bucket & 0x7)); } -static inline bool isInFilter(const uint8_t *filter, const int position) { - const unsigned int bucket = position % BIGRAM_FILTER_MODULO; - return filter[bucket >> 3] & (1 << (bucket & 0x7)); +// TODO: uint32_t position +static inline bool isInFilter(const uint8_t *filter, const int32_t position) { + const uint32_t bucket = static_cast<uint32_t>(position % BIGRAM_FILTER_MODULO); + return filter[bucket >> 3] & static_cast<uint8_t>(1 << (bucket & 0x7)); } } // namespace latinime #endif // LATINIME_BLOOM_FILTER_H diff --git a/native/jni/src/char_utils.h b/native/jni/src/char_utils.h index b30677fa7..b17f262ec 100644 --- a/native/jni/src/char_utils.h +++ b/native/jni/src/char_utils.h @@ -18,6 +18,7 @@ #define LATINIME_CHAR_UTILS_H #include <cctype> +#include <stdint.h> namespace latinime { @@ -43,7 +44,7 @@ unsigned short latin_tolower(const unsigned short c); */ static const int BASE_CHARS_SIZE = 0x0500; -extern const unsigned short BASE_CHARS[BASE_CHARS_SIZE]; +extern const uint16_t BASE_CHARS[BASE_CHARS_SIZE]; inline static unsigned short toBaseChar(unsigned short c) { if (c < BASE_CHARS_SIZE) { diff --git a/native/jni/src/correction.cpp b/native/jni/src/correction.cpp index 9ad65b09d..49e3e3c8c 100644 --- a/native/jni/src/correction.cpp +++ b/native/jni/src/correction.cpp @@ -55,7 +55,7 @@ inline static void dumpEditDistance10ForDebug(int *editDistanceTable, } AKLOGI("[ %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d ]", c[0], c[1], c[2], c[3], c[4], c[5], c[6], c[7], c[8], c[9], c[10]); - (void)c; + (void)c; // To suppress compiler warning } } } @@ -362,7 +362,8 @@ Correction::CorrectionType Correction::processCharAndCalcState( if (mSkipPos >= 0) { if (mSkippedCount == 0 && mSkipPos < mOutputIndex) { if (DEBUG_DICT) { - assert(mSkipPos == mOutputIndex - 1); + // TODO: Enable this assertion. + //assert(mSkipPos == mOutputIndex - 1); } mSkipPos = mOutputIndex; } @@ -630,7 +631,7 @@ Correction::CorrectionType Correction::processCharAndCalcState( inline static int getQuoteCount(const unsigned short *word, const int length) { int quoteCount = 0; for (int i = 0; i < length; ++i) { - if(word[i] == '\'') { + if (word[i] == SINGLE_QUOTE) { ++quoteCount; } } @@ -936,7 +937,7 @@ int Correction::RankingAlgorithm::calcFreqForSplitMultipleWords( int totalLength = 0; int totalFreq = 0; - for (int i = 0; i < wordCount; ++i){ + for (int i = 0; i < wordCount; ++i) { const int wordLength = wordLengthArray[i]; if (wordLength <= 0) { return 0; @@ -1126,15 +1127,16 @@ float Correction::RankingAlgorithm::calcNormalizedScore(const unsigned short *be return 0; } - const float maxScore = score >= S_INT_MAX ? S_INT_MAX : MAX_INITIAL_SCORE - * powf(static_cast<float>(TYPED_LETTER_MULTIPLIER), - static_cast<float>(min(beforeLength, afterLength - spaceCount))) - * FULL_WORD_MULTIPLIER; + const float maxScore = score >= S_INT_MAX ? static_cast<float>(S_INT_MAX) + : static_cast<float>(MAX_INITIAL_SCORE) + * powf(static_cast<float>(TYPED_LETTER_MULTIPLIER), + static_cast<float>(min(beforeLength, afterLength - spaceCount))) + * static_cast<float>(FULL_WORD_MULTIPLIER); // add a weight based on edit distance. // distance <= max(afterLength, beforeLength) == afterLength, // so, 0 <= distance / afterLength <= 1 const float weight = 1.0f - static_cast<float>(distance) / static_cast<float>(afterLength); - return (score / maxScore) * weight; + return (static_cast<float>(score) / maxScore) * weight; } } // namespace latinime diff --git a/native/jni/src/debug.h b/native/jni/src/debug.h deleted file mode 100644 index 8f6b69d77..000000000 --- a/native/jni/src/debug.h +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (C) 2011, The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef LATINIME_DEBUG_H -#define LATINIME_DEBUG_H - -#include "defines.h" - -static inline unsigned char *convertToUnibyteString(unsigned short *input, unsigned char *output, - const unsigned int length) { - unsigned int i = 0; - for (; i < length && input[i] != 0; ++i) - output[i] = input[i] & 0xFF; - output[i] = 0; - return output; -} - -static inline unsigned char *convertToUnibyteStringAndReplaceLastChar(unsigned short *input, - unsigned char *output, const unsigned int length, unsigned char c) { - unsigned int i = 0; - for (; i < length && input[i] != 0; ++i) - output[i] = input[i] & 0xFF; - if (i > 0) output[i-1] = c; - output[i] = 0; - return output; -} - -static inline void LOGI_S16(unsigned short *string, const unsigned int length) { - unsigned char tmp_buffer[length]; - convertToUnibyteString(string, tmp_buffer, length); - AKLOGI(">> %s", tmp_buffer); - // The log facility is throwing out log that comes too fast. The following - // is a dirty way of slowing down processing so that we can see all log. - // TODO : refactor this in a blocking log or something. - // usleep(10); -} - -static inline void LOGI_S16_PLUS(unsigned short *string, const unsigned int length, - unsigned char c) { - unsigned char tmp_buffer[length+1]; - convertToUnibyteStringAndReplaceLastChar(string, tmp_buffer, length, c); - AKLOGI(">> %s", tmp_buffer); - // Likewise - // usleep(10); -} - -static inline void printDebug(const char *tag, int *codes, int codesSize, int MAX_PROXIMITY_CHARS) { - unsigned char *buf = static_cast<unsigned char *>(malloc((1 + codesSize) * sizeof(*buf))); - - buf[codesSize] = 0; - while (--codesSize >= 0) { - buf[codesSize] = static_cast<unsigned char>(codes[codesSize * MAX_PROXIMITY_CHARS]); - } - AKLOGI("%s, WORD = %s", tag, buf); - - free(buf); -} -#endif // LATINIME_DEBUG_H diff --git a/native/jni/src/defines.h b/native/jni/src/defines.h index 9b530077a..ad526fb7f 100644 --- a/native/jni/src/defines.h +++ b/native/jni/src/defines.h @@ -17,6 +17,8 @@ #ifndef LATINIME_DEFINES_H #define LATINIME_DEFINES_H +#include <stdint.h> + #if defined(FLAG_DO_PROFILE) || defined(FLAG_DBG) #include <android/log.h> #ifndef LOG_TAG @@ -26,9 +28,12 @@ #define AKLOGI(fmt, ...) __android_log_print(ANDROID_LOG_INFO, LOG_TAG, fmt, ##__VA_ARGS__) #define DUMP_RESULT(words, frequencies, maxWordCount, maxWordLength) do { \ - dumpResult(words, frequencies, maxWordCount, maxWordLength); } while(0) -#define DUMP_WORD(word, length) do { dumpWord(word, length); } while(0) -#define DUMP_WORD_INT(word, length) do { dumpWordInt(word, length); } while(0) + dumpResult(words, frequencies, maxWordCount, maxWordLength); } while (0) +#define DUMP_WORD(word, length) do { dumpWord(word, length); } while (0) +#define DUMP_WORD_INT(word, length) do { dumpWordInt(word, length); } while (0) +// TODO: INTS_TO_CHARS +#define SHORTS_TO_CHARS(input, length, output) do { \ + shortArrayToCharArray(input, length, output); } while (0) static inline void dumpWordInfo(const unsigned short *word, const int length, const int rank, const int frequency) { @@ -39,7 +44,8 @@ static inline void dumpWordInfo(const unsigned short *word, const int length, if (c == 0) { break; } - charBuf[i] = c; + // static_cast only for debugging + charBuf[i] = static_cast<char>(c); } charBuf[i] = 0; if (i > 1) { @@ -65,7 +71,8 @@ static inline void dumpWord(const unsigned short *word, const int length) { if (c == 0) { break; } - charBuf[i] = c; + // static_cast only for debugging + charBuf[i] = static_cast<char>(c); } charBuf[i] = 0; if (i > 1) { @@ -83,12 +90,58 @@ static inline void dumpWordInt(const int *word, const int length) { AKLOGI("i[ %s ]", charBuf); } +// TODO: Change this to intArrayToCharArray +static inline void shortArrayToCharArray( + const unsigned short *input, const int length, char *output) { + int i = 0; + for (;i < length; ++i) { + const unsigned short c = input[i]; + if (c == 0) { + break; + } + // static_cast only for debugging + output[i] = static_cast<char>(c); + } + output[i] = 0; +} + +#ifndef __ANDROID__ +#include <cassert> +#include <execinfo.h> +#include <stdlib.h> + +#define ASSERT(success) do { if (!(success)) { showStackTrace(); assert(success);} } while (0) +#define SHOW_STACK_TRACE do { showStackTrace(); } while (0) + +static inline void showStackTrace() { + void *callstack[128]; + int i, frames = backtrace(callstack, 128); + char **strs = backtrace_symbols(callstack, frames); + for (i = 0; i < frames; ++i) { + if (i == 0) { + AKLOGI("=== Trace ==="); + continue; + } + AKLOGI("%s", strs[i]); + } + free(strs); +} +#else +#include <cassert> +#define ASSERT(success) assert(success) +#define SHOW_STACK_TRACE +#endif + #else #define AKLOGE(fmt, ...) #define AKLOGI(fmt, ...) #define DUMP_RESULT(words, frequencies, maxWordCount, maxWordLength) #define DUMP_WORD(word, length) #define DUMP_WORD_INT(word, length) +#define ASSERT(success) +#define SHOW_STACK_TRACE +// TODO: INTS_TO_CHARS +#define SHORTS_TO_CHARS(input, length, output) #endif #ifdef FLAG_DO_PROFILE @@ -102,14 +155,14 @@ static unsigned int profile_counter[PROF_BUF_SIZE]; #define PROF_RESET prof_reset() #define PROF_COUNT(prof_buf_id) ++profile_counter[prof_buf_id] -#define PROF_OPEN do { PROF_RESET; PROF_START(PROF_BUF_SIZE - 1); } while(0) +#define PROF_OPEN do { PROF_RESET; PROF_START(PROF_BUF_SIZE - 1); } while (0) #define PROF_START(prof_buf_id) do { \ - PROF_COUNT(prof_buf_id); profile_old[prof_buf_id] = (clock()); } while(0) -#define PROF_CLOSE do { PROF_END(PROF_BUF_SIZE - 1); PROF_OUTALL; } while(0) + PROF_COUNT(prof_buf_id); profile_old[prof_buf_id] = (clock()); } while (0) +#define PROF_CLOSE do { PROF_END(PROF_BUF_SIZE - 1); PROF_OUTALL; } while (0) #define PROF_END(prof_buf_id) profile_buf[prof_buf_id] += ((clock()) - profile_old[prof_buf_id]) #define PROF_CLOCKOUT(prof_buf_id) \ AKLOGI("%s : clock is %f", __FUNCTION__, (clock() - profile_old[prof_buf_id])) -#define PROF_OUTALL do { AKLOGI("--- %s ---", __FUNCTION__); prof_out(); } while(0) +#define PROF_OUTALL do { AKLOGI("--- %s ---", __FUNCTION__); prof_out(); } while (0) static inline void prof_reset(void) { for (int i = 0; i < PROF_BUF_SIZE; ++i) { @@ -167,6 +220,12 @@ static inline void prof_out(void) { #define DEBUG_CORRECTION_FREQ false #define DEBUG_WORDS_PRIORITY_QUEUE false +#ifdef FLAG_FULL_DBG +#define DEBUG_GEO_FULL true +#else +#define DEBUG_GEO_FULL false +#endif + #else // FLAG_DBG #define DEBUG_DICT false @@ -181,6 +240,8 @@ static inline void prof_out(void) { #define DEBUG_CORRECTION_FREQ false #define DEBUG_WORDS_PRIORITY_QUEUE false +#define DEBUG_GEO_FULL false + #endif // FLAG_DBG #ifndef U_SHORT_MAX @@ -210,15 +271,15 @@ static inline void prof_out(void) { #define FLAG_BIGRAM_FREQ 0x7F #define DICTIONARY_VERSION_MIN 200 -#define NOT_VALID_WORD -99 -#define NOT_A_CHARACTER -1 -#define NOT_A_DISTANCE -1 -#define NOT_A_COORDINATE -1 -#define EQUIVALENT_CHAR_WITHOUT_DISTANCE_INFO -2 -#define PROXIMITY_CHAR_WITHOUT_DISTANCE_INFO -3 -#define ADDITIONAL_PROXIMITY_CHAR_DISTANCE_INFO -4 -#define NOT_AN_INDEX -1 -#define NOT_A_PROBABILITY -1 +#define NOT_VALID_WORD (-99) +#define NOT_A_CODE_POINT (-1) +#define NOT_A_DISTANCE (-1) +#define NOT_A_COORDINATE (-1) +#define EQUIVALENT_CHAR_WITHOUT_DISTANCE_INFO (-2) +#define PROXIMITY_CHAR_WITHOUT_DISTANCE_INFO (-3) +#define ADDITIONAL_PROXIMITY_CHAR_DISTANCE_INFO (-4) +#define NOT_AN_INDEX (-1) +#define NOT_A_PROBABILITY (-1) #define KEYCODE_SPACE ' ' @@ -297,6 +358,9 @@ static inline void prof_out(void) { // Max Distance between point to key #define MAX_POINT_TO_KEY_LENGTH 10000000 +// The max number of the keys in one keyboard layout +#define MAX_KEY_COUNT_IN_A_KEYBOARD 64 + // TODO: Reduce this constant if possible; check the maximum number of digraphs in the same // word in the dictionary for languages with digraphs, like German and French #define DEFAULT_MAX_DIGRAPH_SEARCH_DEPTH 5 @@ -329,8 +393,8 @@ template<typename T> inline T max(T a, T b) { return a > b ? a : b; } #define NEUTRAL_AREA_RADIUS_RATIO 1.3f // DEBUG -#define INPUTLENGTH_FOR_DEBUG -1 -#define MIN_OUTPUT_INDEX_FOR_DEBUG -1 +#define INPUTLENGTH_FOR_DEBUG (-1) +#define MIN_OUTPUT_INDEX_FOR_DEBUG (-1) #define DISALLOW_COPY_AND_ASSIGN(TypeName) \ TypeName(const TypeName&); \ diff --git a/native/jni/src/dictionary.h b/native/jni/src/dictionary.h index e9a03ce55..a1358890d 100644 --- a/native/jni/src/dictionary.h +++ b/native/jni/src/dictionary.h @@ -90,11 +90,12 @@ class Dictionary { // static inline methods should be defined in the header file inline int Dictionary::wideStrLen(unsigned short *str) { if (!str) return 0; - unsigned short *end = str; - while (*end) { - end++; + int length = 0; + while (*str) { + str++; + length++; } - return end - str; + return length; } } // namespace latinime #endif // LATINIME_DICTIONARY_H diff --git a/native/jni/src/geometry_utils.h b/native/jni/src/geometry_utils.h index f30e9fcc0..bad5eda61 100644 --- a/native/jni/src/geometry_utils.h +++ b/native/jni/src/geometry_utils.h @@ -25,14 +25,17 @@ #define M_PI_F 3.14159265f -namespace latinime { +#define ROUND_FLOAT_10000(f) ((f) < 1000.0f && (f) > 0.001f) \ + ? (floorf((f) * 10000.0f) / 10000.0f) : (f) -static inline float squareFloat(float x) { - return x * x; -} +#define SQUARE_FLOAT(x) ((x) * (x)) + +namespace latinime { static inline float getSquaredDistanceFloat(float x1, float y1, float x2, float y2) { - return squareFloat(x1 - x2) + squareFloat(y1 - y2); + const float deltaX = x1 - x2; + const float deltaY = y1 - y2; + return SQUARE_FLOAT(deltaX) + SQUARE_FLOAT(deltaY); } static inline float getDistanceFloat(float x1, float y1, float x2, float y2) { @@ -52,9 +55,11 @@ static inline float getAngle(int x1, int y1, int x2, int y2) { } static inline float getAngleDiff(float a1, float a2) { - const float diff = fabsf(a1 - a2); + const float deltaA = fabsf(a1 - a2); + const float diff = ROUND_FLOAT_10000(deltaA); if (diff > M_PI_F) { - return 2.0f * M_PI_F - diff; + const float normalizedDiff = 2.0f * M_PI_F - diff; + return ROUND_FLOAT_10000(normalizedDiff); } return diff; } @@ -76,7 +81,7 @@ static inline float pointToLineSegSquaredDistanceFloat( const float ray2y = y2 - y1; const float dotProduct = ray1x * ray2x + ray1y * ray2y; - const float lineLengthSqr = squareFloat(ray2x) + squareFloat(ray2y); + const float lineLengthSqr = SQUARE_FLOAT(ray2x) + SQUARE_FLOAT(ray2y); const float projectionLengthSqr = dotProduct / lineLengthSqr; float projectionX; diff --git a/native/jni/src/proximity_info.cpp b/native/jni/src/proximity_info.cpp index e681f6f97..c9f83b62c 100644 --- a/native/jni/src/proximity_info.cpp +++ b/native/jni/src/proximity_info.cpp @@ -29,7 +29,6 @@ namespace latinime { -/* static */ const int ProximityInfo::NOT_A_CODE = -1; /* static */ const float ProximityInfo::NOT_A_DISTANCE_FLOAT = -1.0f; static inline void safeGetOrFillZeroIntArrayRegion(JNIEnv *env, jintArray jArray, jsize len, @@ -63,11 +62,13 @@ ProximityInfo::ProximityInfo(JNIEnv *env, const jstring localeJStr, const int ma CELL_WIDTH((keyboardWidth + gridWidth - 1) / gridWidth), CELL_HEIGHT((keyboardHeight + gridHeight - 1) / gridHeight), KEY_COUNT(min(keyCount, MAX_KEY_COUNT_IN_A_KEYBOARD)), + KEYBOARD_WIDTH(keyboardWidth), KEYBOARD_HEIGHT(keyboardHeight), HAS_TOUCH_POSITION_CORRECTION_DATA(keyCount > 0 && keyXCoordinates && keyYCoordinates && keyWidths && keyHeights && keyCharCodes && sweetSpotCenterXs && sweetSpotCenterYs && sweetSpotRadii), mProximityCharsArray(new int32_t[GRID_WIDTH * GRID_HEIGHT * MAX_PROXIMITY_CHARS_SIZE - /* proximityGridLength */]) { + /* proximityGridLength */]), + mCodeToKeyMap() { const int proximityGridLength = GRID_WIDTH * GRID_HEIGHT * MAX_PROXIMITY_CHARS_SIZE; if (DEBUG_PROXIMITY_INFO) { AKLOGI("Create proximity info array %d", proximityGridLength); @@ -84,26 +85,13 @@ ProximityInfo::ProximityInfo(JNIEnv *env, const jstring localeJStr, const int ma safeGetOrFillZeroIntArrayRegion(env, keyYCoordinates, KEY_COUNT, mKeyYCoordinates); safeGetOrFillZeroIntArrayRegion(env, keyWidths, KEY_COUNT, mKeyWidths); safeGetOrFillZeroIntArrayRegion(env, keyHeights, KEY_COUNT, mKeyHeights); - safeGetOrFillZeroIntArrayRegion(env, keyCharCodes, KEY_COUNT, mKeyCharCodes); + safeGetOrFillZeroIntArrayRegion(env, keyCharCodes, KEY_COUNT, mKeyCodePoints); safeGetOrFillZeroFloatArrayRegion(env, sweetSpotCenterXs, KEY_COUNT, mSweetSpotCenterXs); safeGetOrFillZeroFloatArrayRegion(env, sweetSpotCenterYs, KEY_COUNT, mSweetSpotCenterYs); safeGetOrFillZeroFloatArrayRegion(env, sweetSpotRadii, KEY_COUNT, mSweetSpotRadii); - initializeCodeToKeyIndex(); initializeG(); } -// Build the reversed look up table from the char code to the index in mKeyXCoordinates, -// mKeyYCoordinates, mKeyWidths, mKeyHeights, mKeyCharCodes. -void ProximityInfo::initializeCodeToKeyIndex() { - memset(mCodeToKeyIndex, -1, (MAX_CHAR_CODE + 1) * sizeof(mCodeToKeyIndex[0])); - for (int i = 0; i < KEY_COUNT; ++i) { - const int code = mKeyCharCodes[i]; - if (0 <= code && code <= MAX_CHAR_CODE) { - mCodeToKeyIndex[code] = i; - } - } -} - ProximityInfo::~ProximityInfo() { delete[] mProximityCharsArray; } @@ -117,7 +105,8 @@ bool ProximityInfo::hasSpaceProximity(const int x, const int y) const { if (x < 0 || y < 0) { if (DEBUG_DICT) { AKLOGI("HasSpaceProximity: Illegal coordinates (%d, %d)", x, y); - assert(false); + // TODO: Enable this assertion. + //assert(false); } return false; } @@ -140,13 +129,15 @@ bool ProximityInfo::hasSpaceProximity(const int x, const int y) const { static inline float getNormalizedSquaredDistanceFloat(float x1, float y1, float x2, float y2, float scale) { - return squareFloat((x1 - x2) / scale) + squareFloat((y1 - y2) / scale); + const float deltaX = x1 - x2; + const float deltaY = y1 - y2; + return (SQUARE_FLOAT(deltaX) + SQUARE_FLOAT(deltaY)) / SQUARE_FLOAT(scale); } float ProximityInfo::getNormalizedSquaredDistanceFromCenterFloat( const int keyId, const int x, const int y) const { - const float centerX = static_cast<float>(getKeyCenterXOfIdG(keyId)); - const float centerY = static_cast<float>(getKeyCenterYOfIdG(keyId)); + const float centerX = static_cast<float>(getKeyCenterXOfKeyIdG(keyId)); + const float centerY = static_cast<float>(getKeyCenterYOfKeyIdG(keyId)); const float touchX = static_cast<float>(x); const float touchY = static_cast<float>(y); const float keyWidth = static_cast<float>(getMostCommonKeyWidth()); @@ -178,7 +169,7 @@ void ProximityInfo::calculateNearbyKeyCodes( if (c < KEYCODE_SPACE || c == primaryKey) { continue; } - const int keyIndex = getKeyIndex(c); + const int keyIndex = getKeyIndexOf(c); const bool onKey = isOnKey(keyIndex, x, y); const int distance = squaredDistanceToEdge(keyIndex, x, y); if (onKey || distance < MOST_COMMON_KEY_WIDTH_SQUARE) { @@ -208,7 +199,7 @@ void ProximityInfo::calculateNearbyKeyCodes( const int32_t ac = additionalProximityChars[j]; int k = 0; for (; k < insertPos; ++k) { - if ((int)ac == inputCodes[k]) { + if (static_cast<int>(ac) == inputCodes[k]) { break; } } @@ -227,42 +218,39 @@ void ProximityInfo::calculateNearbyKeyCodes( } // Add a delimiter for the proximity characters for (int i = insertPos; i < MAX_PROXIMITY_CHARS_SIZE; ++i) { - inputCodes[i] = NOT_A_CODE; + inputCodes[i] = NOT_A_CODE_POINT; } } -int ProximityInfo::getKeyIndex(const int c) const { +int ProximityInfo::getKeyIndexOf(const int c) const { if (KEY_COUNT == 0) { // We do not have the coordinate data return NOT_AN_INDEX; } - const unsigned short baseLowerC = toBaseLowerCase(c); - if (baseLowerC > MAX_CHAR_CODE) { - return NOT_AN_INDEX; + const int baseLowerC = static_cast<int>(toBaseLowerCase(c)); + hash_map_compat<int, int>::const_iterator mapPos = mCodeToKeyMap.find(baseLowerC); + if (mapPos != mCodeToKeyMap.end()) { + return mapPos->second; } - return mCodeToKeyIndex[baseLowerC]; + return NOT_AN_INDEX; } -int ProximityInfo::getKeyCode(const int keyIndex) const { +int ProximityInfo::getCodePointOf(const int keyIndex) const { if (keyIndex < 0 || keyIndex >= KEY_COUNT) { - return NOT_AN_INDEX; + return NOT_A_CODE_POINT; } - return mKeyToCodeIndexG[keyIndex]; + return mKeyIndexToCodePointG[keyIndex]; } void ProximityInfo::initializeG() { // TODO: Optimize for (int i = 0; i < KEY_COUNT; ++i) { - const int code = mKeyCharCodes[i]; + const int code = mKeyCodePoints[i]; const int lowerCode = toBaseLowerCase(code); mCenterXsG[i] = mKeyXCoordinates[i] + mKeyWidths[i] / 2; mCenterYsG[i] = mKeyYCoordinates[i] + mKeyHeights[i] / 2; - if (code != lowerCode && lowerCode >= 0 && lowerCode <= MAX_CHAR_CODE) { - mCodeToKeyIndex[lowerCode] = i; - mKeyToCodeIndexG[i] = lowerCode; - } else { - mKeyToCodeIndexG[i] = code; - } + mCodeToKeyMap[lowerCode] = i; + mKeyIndexToCodePointG[i] = lowerCode; } for (int i = 0; i < KEY_COUNT; i++) { mKeyKeyDistancesG[i][i] = 0; @@ -274,22 +262,22 @@ void ProximityInfo::initializeG() { } } -float ProximityInfo::getKeyCenterXOfCharG(int charCode) const { - return getKeyCenterXOfIdG(getKeyIndex(charCode)); +int ProximityInfo::getKeyCenterXOfCodePointG(int charCode) const { + return getKeyCenterXOfKeyIdG(getKeyIndexOf(charCode)); } -float ProximityInfo::getKeyCenterYOfCharG(int charCode) const { - return getKeyCenterYOfIdG(getKeyIndex(charCode)); +int ProximityInfo::getKeyCenterYOfCodePointG(int charCode) const { + return getKeyCenterYOfKeyIdG(getKeyIndexOf(charCode)); } -float ProximityInfo::getKeyCenterXOfIdG(int keyId) const { +int ProximityInfo::getKeyCenterXOfKeyIdG(int keyId) const { if (keyId >= 0) { return mCenterXsG[keyId]; } return 0; } -float ProximityInfo::getKeyCenterYOfIdG(int keyId) const { +int ProximityInfo::getKeyCenterYOfKeyIdG(int keyId) const { if (keyId >= 0) { return mCenterYsG[keyId]; } @@ -297,11 +285,11 @@ float ProximityInfo::getKeyCenterYOfIdG(int keyId) const { } int ProximityInfo::getKeyKeyDistanceG(int key0, int key1) const { - const int keyId0 = getKeyIndex(key0); - const int keyId1 = getKeyIndex(key1); + const int keyId0 = getKeyIndexOf(key0); + const int keyId1 = getKeyIndexOf(key1); if (keyId0 >= 0 && keyId1 >= 0) { return mKeyKeyDistancesG[keyId0][keyId1]; } - return 0; + return MAX_POINT_TO_KEY_LENGTH; } } // namespace latinime diff --git a/native/jni/src/proximity_info.h b/native/jni/src/proximity_info.h index 822909b7a..0d8c6a3ca 100644 --- a/native/jni/src/proximity_info.h +++ b/native/jni/src/proximity_info.h @@ -20,12 +20,18 @@ #include <stdint.h> #include "defines.h" +#include "hash_map_compat.h" #include "jni.h" namespace latinime { class Correction; +inline bool isSkippableChar(const uint16_t character) { + // TODO: Do not hardcode here + return character == '\'' || character == '-'; +} + class ProximityInfo { public: ProximityInfo(JNIEnv *env, const jstring localeJStr, const int maxProximityCharsSize, @@ -41,8 +47,8 @@ class ProximityInfo { float getNormalizedSquaredDistanceFromCenterFloat( const int keyId, const int x, const int y) const; bool sameAsTyped(const unsigned short *word, int length) const; - int getKeyIndex(const int c) const; - int getKeyCode(const int keyIndex) const; + int getKeyIndexOf(const int c) const; + int getCodePointOf(const int keyIndex) const; bool hasSweetSpotData(const int keyIndex) const { // When there are no calibration data for a key, // the radius of the key is assigned to zero. @@ -96,23 +102,25 @@ class ProximityInfo { return GRID_HEIGHT; } - float getKeyCenterXOfCharG(int charCode) const; - float getKeyCenterYOfCharG(int charCode) const; - float getKeyCenterXOfIdG(int keyId) const; - float getKeyCenterYOfIdG(int keyId) const; + int getKeyboardWidth() const { + return KEYBOARD_WIDTH; + } + + int getKeyboardHeight() const { + return KEYBOARD_HEIGHT; + } + + int getKeyCenterXOfCodePointG(int charCode) const; + int getKeyCenterYOfCodePointG(int charCode) const; + int getKeyCenterXOfKeyIdG(int keyId) const; + int getKeyCenterYOfKeyIdG(int keyId) const; int getKeyKeyDistanceG(int key0, int key1) const; private: DISALLOW_IMPLICIT_CONSTRUCTORS(ProximityInfo); - // The max number of the keys in one keyboard layout - static const int MAX_KEY_COUNT_IN_A_KEYBOARD = 64; - // The upper limit of the char code in mCodeToKeyIndex - static const int MAX_CHAR_CODE = 127; - static const int NOT_A_CODE; static const float NOT_A_DISTANCE_FLOAT; int getStartIndexFromCoordinates(const int x, const int y) const; - void initializeCodeToKeyIndex(); void initializeG(); float calculateNormalizedSquaredDistance(const int keyIndex, const int inputIndex) const; float calculateSquaredDistanceFromSweetSpotCenter( @@ -136,6 +144,8 @@ class ProximityInfo { const int CELL_WIDTH; const int CELL_HEIGHT; const int KEY_COUNT; + const int KEYBOARD_WIDTH; + const int KEYBOARD_HEIGHT; const bool HAS_TOUCH_POSITION_CORRECTION_DATA; char mLocaleStr[MAX_LOCALE_STRING_LENGTH]; int32_t *mProximityCharsArray; @@ -143,13 +153,13 @@ class ProximityInfo { int32_t mKeyYCoordinates[MAX_KEY_COUNT_IN_A_KEYBOARD]; int32_t mKeyWidths[MAX_KEY_COUNT_IN_A_KEYBOARD]; int32_t mKeyHeights[MAX_KEY_COUNT_IN_A_KEYBOARD]; - int32_t mKeyCharCodes[MAX_KEY_COUNT_IN_A_KEYBOARD]; + int32_t mKeyCodePoints[MAX_KEY_COUNT_IN_A_KEYBOARD]; float mSweetSpotCenterXs[MAX_KEY_COUNT_IN_A_KEYBOARD]; float mSweetSpotCenterYs[MAX_KEY_COUNT_IN_A_KEYBOARD]; float mSweetSpotRadii[MAX_KEY_COUNT_IN_A_KEYBOARD]; - int mCodeToKeyIndex[MAX_CHAR_CODE + 1]; + hash_map_compat<int, int> mCodeToKeyMap; - int mKeyToCodeIndexG[MAX_KEY_COUNT_IN_A_KEYBOARD]; + int mKeyIndexToCodePointG[MAX_KEY_COUNT_IN_A_KEYBOARD]; int mCenterXsG[MAX_KEY_COUNT_IN_A_KEYBOARD]; int mCenterYsG[MAX_KEY_COUNT_IN_A_KEYBOARD]; int mKeyKeyDistancesG[MAX_KEY_COUNT_IN_A_KEYBOARD][MAX_KEY_COUNT_IN_A_KEYBOARD]; diff --git a/native/jni/src/proximity_info_state.cpp b/native/jni/src/proximity_info_state.cpp index f01b81e8d..9b189183b 100644 --- a/native/jni/src/proximity_info_state.cpp +++ b/native/jni/src/proximity_info_state.cpp @@ -25,10 +25,25 @@ #include "proximity_info_state.h" namespace latinime { + +const int ProximityInfoState::NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR_LOG_2 = 10; +const int ProximityInfoState::NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR = + 1 << NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR_LOG_2; +const float ProximityInfoState::NOT_A_DISTANCE_FLOAT = -1.0f; +const int ProximityInfoState::NOT_A_CODE = -1; + void ProximityInfoState::initInputParams(const int pointerId, const float maxPointToKeyLength, const ProximityInfo *proximityInfo, const int32_t *const inputCodes, const int inputSize, const int *const xCoordinates, const int *const yCoordinates, const int *const times, const int *const pointerIds, const bool isGeometric) { + + if (isGeometric) { + mIsContinuationPossible = checkAndReturnIsContinuationPossible( + inputSize, xCoordinates, yCoordinates, times); + } else { + mIsContinuationPossible = false; + } + mProximityInfo = proximityInfo; mHasTouchPositionCorrectionData = proximityInfo->hasTouchPositionCorrectionData(); mMostCommonKeyWidthSquare = proximityInfo->getMostCommonKeyWidthSquare(); @@ -70,41 +85,114 @@ void ProximityInfoState::initInputParams(const int pointerId, const float maxPoi /////////////////////// // Setup touch points + int pushTouchPointStartIndex = 0; + int lastSavedInputSize = 0; mMaxPointToKeyLength = maxPointToKeyLength; - mInputXs.clear(); - mInputYs.clear(); - mTimes.clear(); - mLengthCache.clear(); - mDistanceCache.clear(); - + if (mIsContinuationPossible && mInputIndice.size() > 1) { + // Just update difference. + // Two points prior is never skipped. Thus, we pop 2 input point data here. + pushTouchPointStartIndex = mInputIndice[mInputIndice.size() - 2]; + popInputData(); + popInputData(); + lastSavedInputSize = mInputXs.size(); + } else { + // Clear all data. + mInputXs.clear(); + mInputYs.clear(); + mTimes.clear(); + mInputIndice.clear(); + mLengthCache.clear(); + mDistanceCache.clear(); + mNearKeysVector.clear(); + } + if (DEBUG_GEO_FULL) { + AKLOGI("Init ProximityInfoState: reused points = %d, last input size = %d", + pushTouchPointStartIndex, lastSavedInputSize); + } mInputSize = 0; + if (xCoordinates && yCoordinates) { const bool proximityOnly = !isGeometric && (xCoordinates[0] < 0 || yCoordinates[0] < 0); - for (int i = 0; i < inputSize; ++i) { + int lastInputIndex = pushTouchPointStartIndex; + for (int i = lastInputIndex; i < inputSize; ++i) { + const int pid = pointerIds ? pointerIds[i] : 0; + if (pointerId == pid) { + lastInputIndex = i; + } + } + if (DEBUG_GEO_FULL) { + AKLOGI("Init ProximityInfoState: last input index = %d", lastInputIndex); + } + // Working space to save near keys distances for current, prev and prevprev input point. + NearKeysDistanceMap nearKeysDistances[3]; + // These pointers are swapped for each inputs points. + NearKeysDistanceMap *currentNearKeysDistances = &nearKeysDistances[0]; + NearKeysDistanceMap *prevNearKeysDistances = &nearKeysDistances[1]; + NearKeysDistanceMap *prevPrevNearKeysDistances = &nearKeysDistances[2]; + + for (int i = pushTouchPointStartIndex; i <= lastInputIndex; ++i) { // Assuming pointerId == 0 if pointerIds is null. const int pid = pointerIds ? pointerIds[i] : 0; + if (DEBUG_GEO_FULL) { + AKLOGI("Init ProximityInfoState: (%d)PID = %d", i, pid); + } if (pointerId == pid) { const int c = isGeometric ? NOT_A_COORDINATE : getPrimaryCharAt(i); const int x = proximityOnly ? NOT_A_COORDINATE : xCoordinates[i]; const int y = proximityOnly ? NOT_A_COORDINATE : yCoordinates[i]; const int time = times ? times[i] : -1; - if (pushTouchPoint(c, x, y, time, isGeometric)) { - ++mInputSize; + if (pushTouchPoint(i, c, x, y, time, isGeometric /* do sampling */, + i == lastInputIndex, currentNearKeysDistances, prevNearKeysDistances, + prevPrevNearKeysDistances)) { + // Previous point information was popped. + NearKeysDistanceMap *tmp = prevNearKeysDistances; + prevNearKeysDistances = currentNearKeysDistances; + currentNearKeysDistances = tmp; + } else { + NearKeysDistanceMap *tmp = prevPrevNearKeysDistances; + prevPrevNearKeysDistances = prevNearKeysDistances; + prevNearKeysDistances = currentNearKeysDistances; + currentNearKeysDistances = tmp; } } } + mInputSize = mInputXs.size(); } if (mInputSize > 0) { const int keyCount = mProximityInfo->getKeyCount(); + mNearKeysVector.resize(mInputSize); mDistanceCache.resize(mInputSize * keyCount); - for (int i = 0; i < mInputSize; ++i) { + for (int i = lastSavedInputSize; i < mInputSize; ++i) { + mNearKeysVector[i].reset(); + static const float NEAR_KEY_NORMALIZED_SQUARED_THRESHOLD = 4.0f; for (int k = 0; k < keyCount; ++k) { const int index = i * keyCount + k; const int x = mInputXs[i]; const int y = mInputYs[i]; - mDistanceCache[index] = + const float normalizedSquaredDistance = mProximityInfo->getNormalizedSquaredDistanceFromCenterFloat(k, x, y); + mDistanceCache[index] = normalizedSquaredDistance; + if (normalizedSquaredDistance < NEAR_KEY_NORMALIZED_SQUARED_THRESHOLD) { + mNearKeysVector[i].set(k, 1); + } + } + } + + static const float READ_FORWORD_LENGTH_SCALE = 0.95f; + const int readForwordLength = static_cast<int>( + hypotf(mProximityInfo->getKeyboardWidth(), mProximityInfo->getKeyboardHeight()) + * READ_FORWORD_LENGTH_SCALE); + for (int i = 0; i < mInputSize; ++i) { + if (DEBUG_GEO_FULL) { + AKLOGI("Sampled(%d): x = %d, y = %d, time = %d", i, mInputXs[i], mInputYs[i], + mTimes[i]); + } + for (int j = max(i + 1, lastSavedInputSize); j < mInputSize; ++j) { + if (mLengthCache[j] - mLengthCache[i] >= readForwordLength) { + break; + } + mNearKeysVector[i] |= mNearKeysVector[j]; } } } @@ -135,7 +223,7 @@ void ProximityInfoState::initInputParams(const int pointerId, const float maxPoi const int currentChar = proximityChars[j]; const float squaredDistance = hasInputCoordinates() ? calculateNormalizedSquaredDistance( - mProximityInfo->getKeyIndex(currentChar), i) : + mProximityInfo->getKeyIndexOf(currentChar), i) : NOT_A_DISTANCE_FLOAT; if (squaredDistance >= 0.0f) { mNormalizedSquaredDistances[i * MAX_PROXIMITY_CHARS_SIZE_INTERNAL + j] = @@ -151,33 +239,211 @@ void ProximityInfoState::initInputParams(const int pointerId, const float maxPoi } } } + + if (DEBUG_GEO_FULL) { + AKLOGI("ProximityState init finished: %d points out of %d", mInputSize, inputSize); + } } -bool ProximityInfoState::pushTouchPoint(const int nodeChar, int x, int y, - const int time, const bool sample) { - const uint32_t size = mInputXs.size(); - // TODO: Should have a const variable for 10 - const int sampleRate = mProximityInfo->getMostCommonKeyWidth() / 10; - if (size > 0) { - const int dist = getDistanceInt(x, y, mInputXs[size - 1], mInputYs[size - 1]); - if (sample && dist < sampleRate) { +bool ProximityInfoState::checkAndReturnIsContinuationPossible(const int inputSize, + const int *const xCoordinates, const int *const yCoordinates, const int *const times) { + for (int i = 0; i < mInputSize; ++i) { + const int index = mInputIndice[i]; + if (index > inputSize || xCoordinates[index] != mInputXs[i] || + yCoordinates[index] != mInputYs[i] || times[index] != mTimes[i]) { return false; } - mLengthCache.push_back(mLengthCache[size - 1] + dist); - } else { - mLengthCache.push_back(0); } + return true; +} + +// Calculating point to key distance for all near keys and returning the distance between +// the given point and the nearest key position. +float ProximityInfoState::updateNearKeysDistances(const int x, const int y, + NearKeysDistanceMap *const currentNearKeysDistances) { + static const float NEAR_KEY_THRESHOLD = 4.0f; + + currentNearKeysDistances->clear(); + const int keyCount = mProximityInfo->getKeyCount(); + float nearestKeyDistance = mMaxPointToKeyLength; + for (int k = 0; k < keyCount; ++k) { + const float dist = mProximityInfo->getNormalizedSquaredDistanceFromCenterFloat(k, x, y); + if (dist < NEAR_KEY_THRESHOLD) { + currentNearKeysDistances->insert(std::pair<int, float>(k, dist)); + } + if (nearestKeyDistance > dist) { + nearestKeyDistance = dist; + } + } + return nearestKeyDistance; +} + +// Check if previous point is at local minimum position to near keys. +bool ProximityInfoState::isPrevLocalMin(const NearKeysDistanceMap *const currentNearKeysDistances, + const NearKeysDistanceMap *const prevNearKeysDistances, + const NearKeysDistanceMap *const prevPrevNearKeysDistances) const { + static const float MARGIN = 0.01f; + + for (NearKeysDistanceMap::const_iterator it = prevNearKeysDistances->begin(); + it != prevNearKeysDistances->end(); ++it) { + NearKeysDistanceMap::const_iterator itPP = prevPrevNearKeysDistances->find(it->first); + NearKeysDistanceMap::const_iterator itC = currentNearKeysDistances->find(it->first); + if ((itPP == prevPrevNearKeysDistances->end() || itPP->second > it->second + MARGIN) + && (itC == currentNearKeysDistances->end() || itC->second > it->second + MARGIN)) { + return true; + } + } + return false; +} + +// Calculating a point score that indicates usefulness of the point. +float ProximityInfoState::getPointScore( + const int x, const int y, const int time, const bool lastPoint, const float nearest, + const NearKeysDistanceMap *const currentNearKeysDistances, + const NearKeysDistanceMap *const prevNearKeysDistances, + const NearKeysDistanceMap *const prevPrevNearKeysDistances) const { + static const int DISTANCE_BASE_SCALE = 100; + static const int SAVE_DISTANCE_SCALE = 200; + static const int SKIP_DISTANCE_SCALE = 25; + static const int CHECK_LOCALMIN_DISTANCE_THRESHOLD_SCALE = 40; + static const int STRAIGHT_SKIP_DISTANCE_THRESHOLD_SCALE = 50; + static const int CORNER_CHECK_DISTANCE_THRESHOLD_SCALE = 27; + static const float SAVE_DISTANCE_SCORE = 2.0f; + static const float SKIP_DISTANCE_SCORE = -1.0f; + static const float CHECK_LOCALMIN_DISTANCE_SCORE = -1.0f; + static const float STRAIGHT_ANGLE_THRESHOLD = M_PI_F / 36.0f; + static const float STRAIGHT_SKIP_NEAREST_DISTANCE_THRESHOLD = 0.5f; + static const float STRAIGHT_SKIP_SCORE = -1.0f; + static const float CORNER_ANGLE_THRESHOLD = M_PI_F / 2.0f; + static const float CORNER_SCORE = 1.0f; + + const std::size_t size = mInputXs.size(); + if (size <= 1) { + return 0.0f; + } + const int baseSampleRate = mProximityInfo->getMostCommonKeyWidth(); + const int distNext = getDistanceInt(x, y, mInputXs.back(), mInputYs.back()) + * DISTANCE_BASE_SCALE; + const int distPrev = getDistanceInt(mInputXs.back(), mInputYs.back(), + mInputXs[size - 2], mInputYs[size - 2]) * DISTANCE_BASE_SCALE; + float score = 0.0f; + + // Sum of distances + if (distPrev + distNext > baseSampleRate * SAVE_DISTANCE_SCALE) { + score += SAVE_DISTANCE_SCORE; + } + // Distance + if (distPrev < baseSampleRate * SKIP_DISTANCE_SCALE) { + score += SKIP_DISTANCE_SCORE; + } + // Location + if (distPrev < baseSampleRate * CHECK_LOCALMIN_DISTANCE_THRESHOLD_SCALE) { + if (!isPrevLocalMin(currentNearKeysDistances, prevNearKeysDistances, + prevPrevNearKeysDistances)) { + score += CHECK_LOCALMIN_DISTANCE_SCORE; + } + } + // Angle + const float angle1 = getAngle(x, y, mInputXs.back(), mInputYs.back()); + const float angle2 = getAngle(mInputXs.back(), mInputYs.back(), + mInputXs[size - 2], mInputYs[size - 2]); + const float angleDiff = getAngleDiff(angle1, angle2); + // Skip straight + if (nearest > STRAIGHT_SKIP_NEAREST_DISTANCE_THRESHOLD + && distPrev < baseSampleRate * STRAIGHT_SKIP_DISTANCE_THRESHOLD_SCALE + && angleDiff < STRAIGHT_ANGLE_THRESHOLD) { + score += STRAIGHT_SKIP_SCORE; + } + // Save corner + if (distPrev > baseSampleRate * CORNER_CHECK_DISTANCE_THRESHOLD_SCALE + && angleDiff > CORNER_ANGLE_THRESHOLD) { + score += CORNER_SCORE; + } + return score; +} + +// Sampling touch point and pushing information to vectors. +// Returning if previous point is popped or not. +bool ProximityInfoState::pushTouchPoint(const int inputIndex, const int nodeChar, int x, int y, + const int time, const bool sample, const bool isLastPoint, + NearKeysDistanceMap *const currentNearKeysDistances, + const NearKeysDistanceMap *const prevNearKeysDistances, + const NearKeysDistanceMap *const prevPrevNearKeysDistances) { + static const float LAST_POINT_SKIP_DISTANCE_SCALE = 0.25f; + + size_t size = mInputXs.size(); + bool popped = false; + if (nodeChar < 0 && sample) { + const float nearest = updateNearKeysDistances(x, y, currentNearKeysDistances); + const float score = getPointScore(x, y, time, isLastPoint, nearest, + currentNearKeysDistances, prevNearKeysDistances, prevPrevNearKeysDistances); + if (score < 0) { + // Pop previous point because it would be useless. + popInputData(); + size = mInputXs.size(); + popped = true; + } else { + popped = false; + } + // Check if the last point should be skipped. + if (isLastPoint) { + if (size > 0 && getDistanceFloat(x, y, mInputXs.back(), mInputYs.back()) + < mProximityInfo->getMostCommonKeyWidth() * LAST_POINT_SKIP_DISTANCE_SCALE) { + if (DEBUG_GEO_FULL) { + AKLOGI("p0: size = %zd, x = %d, y = %d, lx = %d, ly = %d, dist = %f, " + "width = %f", size, x, y, mInputXs.back(), mInputYs.back(), + getDistanceFloat(x, y, mInputXs.back(), mInputYs.back()), + mProximityInfo->getMostCommonKeyWidth() + * LAST_POINT_SKIP_DISTANCE_SCALE); + } + return popped; + } else if (size > 1) { + int minChar = 0; + float minDist = mMaxPointToKeyLength; + for (NearKeysDistanceMap::const_iterator it = currentNearKeysDistances->begin(); + it != currentNearKeysDistances->end(); ++it) { + if (minDist > it->second) { + minChar = it->first; + minDist = it->second; + } + } + NearKeysDistanceMap::const_iterator itPP = + prevNearKeysDistances->find(minChar); + if (DEBUG_GEO_FULL) { + AKLOGI("p1: char = %c, minDist = %f, prevNear key minDist = %f", + minChar, itPP->second, minDist); + } + if (itPP != prevNearKeysDistances->end() && minDist > itPP->second) { + return popped; + } + } + } + } + if (nodeChar >= 0 && (x < 0 || y < 0)) { - const int keyId = mProximityInfo->getKeyIndex(nodeChar); + const int keyId = mProximityInfo->getKeyIndexOf(nodeChar); if (keyId >= 0) { - x = mProximityInfo->getKeyCenterXOfIdG(keyId); - y = mProximityInfo->getKeyCenterYOfIdG(keyId); + x = mProximityInfo->getKeyCenterXOfKeyIdG(keyId); + y = mProximityInfo->getKeyCenterYOfKeyIdG(keyId); } } + + // Pushing point information. + if (size > 0) { + mLengthCache.push_back( + mLengthCache.back() + getDistanceInt(x, y, mInputXs.back(), mInputYs.back())); + } else { + mLengthCache.push_back(0); + } mInputXs.push_back(x); mInputYs.push_back(y); mTimes.push_back(time); - return true; + mInputIndice.push_back(inputIndex); + if (DEBUG_GEO_FULL) { + AKLOGI("pushTouchPoint: x = %03d, y = %03d, time = %d, index = %d, popped ? %01d", + x, y, time, inputIndex, popped); + } + return popped; } float ProximityInfoState::calculateNormalizedSquaredDistance( @@ -198,28 +464,29 @@ float ProximityInfoState::calculateNormalizedSquaredDistance( } int ProximityInfoState::getDuration(const int index) const { - if (mInputSize > 0 && index > 0 && index < static_cast<int>(mInputSize) - 1) { + if (mInputSize > 0 && index > 0 && index < mInputSize - 1) { return mTimes[index + 1] - mTimes[index - 1]; } return 0; } -float ProximityInfoState::getPointToKeyLength(int inputIndex, int charCode, float scale) { - const int keyId = mProximityInfo->getKeyIndex(charCode); - if (keyId >= 0) { +float ProximityInfoState::getPointToKeyLength(const int inputIndex, const int codePoint, + const float scale) const { + const int keyId = mProximityInfo->getKeyIndexOf(codePoint); + if (keyId != NOT_AN_INDEX) { const int index = inputIndex * mProximityInfo->getKeyCount() + keyId; return min(mDistanceCache[index] * scale, mMaxPointToKeyLength); } - return 0; -} - -int ProximityInfoState::getKeyKeyDistance(int key0, int key1) { - return mProximityInfo->getKeyKeyDistanceG(key0, key1); + if (isSkippableChar(codePoint)) { + return 0; + } + // If the char is not a key on the keyboard then return the max length. + return MAX_POINT_TO_KEY_LENGTH; } -int ProximityInfoState::getSpaceY() { - const int keyId = mProximityInfo->getKeyIndex(' '); - return mProximityInfo->getKeyCenterYOfIdG(keyId); +int ProximityInfoState::getSpaceY() const { + const int keyId = mProximityInfo->getKeyIndexOf(' '); + return mProximityInfo->getKeyCenterYOfKeyIdG(keyId); } float ProximityInfoState::calculateSquaredDistanceFromSweetSpotCenter( @@ -230,4 +497,46 @@ float ProximityInfoState::calculateSquaredDistanceFromSweetSpotCenter( const float inputY = static_cast<float>(mInputYs[inputIndex]); return square(inputX - sweetSpotCenterX) + square(inputY - sweetSpotCenterY); } + +// Puts possible characters into filter and returns new filter size. +int32_t ProximityInfoState::getAllPossibleChars( + const size_t index, int32_t *const filter, const int32_t filterSize) const { + if (index >= mInputXs.size()) { + return filterSize; + } + int i = filterSize; + for (int j = 0; j < mProximityInfo->getKeyCount(); ++j) { + if (mNearKeysVector[index].test(j)) { + const int32_t keyCodePoint = mProximityInfo->getCodePointOf(j); + bool insert = true; + // TODO: Avoid linear search + for (int k = 0; k < filterSize; ++k) { + if (filter[k] == keyCodePoint) { + insert = false; + break; + } + } + if (insert) { + filter[i++] = keyCodePoint; + } + } + } + return i; +} + +float ProximityInfoState::getAveragePointDuration() const { + if (mInputSize == 0) { + return 0.0f; + } + return static_cast<float>(mTimes[mInputSize - 1] - mTimes[0]) / static_cast<float>(mInputSize); +} + +void ProximityInfoState::popInputData() { + mInputXs.pop_back(); + mInputYs.pop_back(); + mTimes.pop_back(); + mLengthCache.pop_back(); + mInputIndice.pop_back(); +} + } // namespace latinime diff --git a/native/jni/src/proximity_info_state.h b/native/jni/src/proximity_info_state.h index 26fd89b36..48862a7c7 100644 --- a/native/jni/src/proximity_info_state.h +++ b/native/jni/src/proximity_info_state.h @@ -17,6 +17,7 @@ #ifndef LATINIME_PROXIMITY_INFO_STATE_H #define LATINIME_PROXIMITY_INFO_STATE_H +#include <bitset> #include <cstring> // for memset() #include <stdint.h> #include <string> @@ -24,6 +25,7 @@ #include "char_utils.h" #include "defines.h" +#include "hash_map_compat.h" namespace latinime { @@ -31,13 +33,11 @@ class ProximityInfo; class ProximityInfoState { public: - static const int NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR_LOG_2 = 10; - static const int NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR = - 1 << NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR_LOG_2; - // The upper limit of the char code in mCodeToKeyIndex - static const int MAX_CHAR_CODE = 127; - static const float NOT_A_DISTANCE_FLOAT = -1.0f; - static const int NOT_A_CODE = -1; + typedef std::bitset<MAX_KEY_COUNT_IN_A_KEYBOARD> NearKeycodesSet; + static const int NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR_LOG_2; + static const int NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR; + static const float NOT_A_DISTANCE_FLOAT; + static const int NOT_A_CODE; ///////////////////////////////////////// // Defined in proximity_info_state.cpp // @@ -54,7 +54,8 @@ class ProximityInfoState { : mProximityInfo(0), mMaxPointToKeyLength(0), mHasTouchPositionCorrectionData(false), mMostCommonKeyWidthSquare(0), mLocaleStr(), mKeyCount(0), mCellHeight(0), mCellWidth(0), mGridHeight(0), mGridWidth(0), - mInputXs(), mInputYs(), mTimes(), mDistanceCache(), mLengthCache(), + mIsContinuationPossible(false), mInputXs(), mInputYs(), mTimes(), mInputIndice(), + mDistanceCache(), mLengthCache(), mNearKeysVector(), mTouchPositionCorrectionEnabled(false), mInputSize(0) { memset(mInputCodes, 0, sizeof(mInputCodes)); memset(mNormalizedSquaredDistances, 0, sizeof(mNormalizedSquaredDistances)); @@ -196,26 +197,33 @@ class ProximityInfoState { return mInputSize; } - int getInputX(int index) const { + int getInputX(const int index) const { return mInputXs[index]; } - int getInputY(int index) const { + int getInputY(const int index) const { return mInputYs[index]; } - int getLengthCache(int index) const { + int getLengthCache(const int index) const { return mLengthCache[index]; } - float getPointToKeyLength(int inputIndex, int charCode, float scale); + bool isContinuationPossible() const { + return mIsContinuationPossible; + } + + float getPointToKeyLength(const int inputIndex, const int charCode, const float scale) const; - int getKeyKeyDistance(int key0, int key1); + int getSpaceY() const; - int getSpaceY(); + int32_t getAllPossibleChars( + const size_t startIndex, int32_t *const filter, const int32_t filterSize) const; + float getAveragePointDuration() const; private: DISALLOW_COPY_AND_ASSIGN(ProximityInfoState); + typedef hash_map_compat<int, float> NearKeysDistanceMap; ///////////////////////////////////////// // Defined in proximity_info_state.cpp // ///////////////////////////////////////// @@ -224,7 +232,11 @@ class ProximityInfoState { float calculateSquaredDistanceFromSweetSpotCenter( const int keyIndex, const int inputIndex) const; - bool pushTouchPoint(const int nodeChar, int x, int y, const int time, const bool sample); + bool pushTouchPoint(const int inputIndex, const int nodeChar, int x, int y, const int time, + const bool sample, const bool isLastPoint, + NearKeysDistanceMap *const currentNearKeysDistances, + const NearKeysDistanceMap *const prevNearKeysDistances, + const NearKeysDistanceMap *const prevPrevNearKeysDistances); ///////////////////////////////////////// // Defined here // ///////////////////////////////////////// @@ -238,6 +250,20 @@ class ProximityInfoState { return mInputCodes + (index * MAX_PROXIMITY_CHARS_SIZE_INTERNAL); } + float updateNearKeysDistances(const int x, const int y, + NearKeysDistanceMap *const currentNearKeysDistances); + bool isPrevLocalMin(const NearKeysDistanceMap *const currentNearKeysDistances, + const NearKeysDistanceMap *const prevNearKeysDistances, + const NearKeysDistanceMap *const prevPrevNearKeysDistances) const; + float getPointScore( + const int x, const int y, const int time, const bool last, const float nearest, + const NearKeysDistanceMap *const currentNearKeysDistances, + const NearKeysDistanceMap *const prevNearKeysDistances, + const NearKeysDistanceMap *const prevPrevNearKeysDistances) const; + bool checkAndReturnIsContinuationPossible(const int inputSize, const int *const xCoordinates, + const int *const yCoordinates, const int *const times); + void popInputData(); + // const const ProximityInfo *mProximityInfo; float mMaxPointToKeyLength; @@ -249,12 +275,15 @@ class ProximityInfoState { int mCellWidth; int mGridHeight; int mGridWidth; + bool mIsContinuationPossible; std::vector<int> mInputXs; std::vector<int> mInputYs; std::vector<int> mTimes; + std::vector<int> mInputIndice; std::vector<float> mDistanceCache; std::vector<int> mLengthCache; + std::vector<NearKeycodesSet> mNearKeysVector; bool mTouchPositionCorrectionEnabled; int32_t mInputCodes[MAX_PROXIMITY_CHARS_SIZE_INTERNAL * MAX_WORD_LENGTH_INTERNAL]; int mNormalizedSquaredDistances[MAX_PROXIMITY_CHARS_SIZE_INTERNAL * MAX_WORD_LENGTH_INTERNAL]; diff --git a/native/jni/src/terminal_attributes.h b/native/jni/src/terminal_attributes.h index 34ab8f0ef..53ae385ea 100644 --- a/native/jni/src/terminal_attributes.h +++ b/native/jni/src/terminal_attributes.h @@ -52,9 +52,9 @@ class TerminalAttributes { 0 != (shortcutFlags & BinaryFormat::FLAG_ATTRIBUTE_HAS_NEXT); unsigned int i; for (i = 0; i < MAX_WORD_LENGTH_INTERNAL; ++i) { - const int charCode = BinaryFormat::getCharCodeAndForwardPointer(mDict, &mPos); - if (NOT_A_CHARACTER == charCode) break; - outWord[i] = (uint16_t)charCode; + const int codePoint = BinaryFormat::getCodePointAndForwardPointer(mDict, &mPos); + if (NOT_A_CODE_POINT == codePoint) break; + outWord[i] = (uint16_t)codePoint; } *outFreq = BinaryFormat::getAttributeFrequencyFromFlags(shortcutFlags); mPos += BinaryFormat::CHARACTER_ARRAY_TERMINATOR_SIZE; @@ -62,8 +62,8 @@ class TerminalAttributes { } }; - TerminalAttributes(const uint8_t *const dict, const uint8_t flags, const int pos) : - mDict(dict), mFlags(flags), mStartPos(pos) { + TerminalAttributes(const uint8_t *const dict, const uint8_t flags, const int pos) + : mDict(dict), mFlags(flags), mStartPos(pos) { } inline ShortcutIterator getShortcutIterator() const { @@ -72,6 +72,10 @@ class TerminalAttributes { return ShortcutIterator(mDict, mStartPos + BinaryFormat::SHORTCUT_LIST_SIZE_SIZE, mFlags); } + bool isBlacklistedOrNotAWord() const { + return mFlags & (BinaryFormat::FLAG_IS_BLACKLISTED | BinaryFormat::FLAG_IS_NOT_A_WORD); + } + private: DISALLOW_IMPLICIT_CONSTRUCTORS(TerminalAttributes); const uint8_t *const mDict; diff --git a/native/jni/src/unigram_dictionary.cpp b/native/jni/src/unigram_dictionary.cpp index ba3c2db6b..49d044fbc 100644 --- a/native/jni/src/unigram_dictionary.cpp +++ b/native/jni/src/unigram_dictionary.cpp @@ -58,12 +58,12 @@ UnigramDictionary::~UnigramDictionary() { } static inline unsigned int getCodesBufferSize(const int *codes, const int codesSize) { - return sizeof(*codes) * codesSize; + return static_cast<unsigned int>(sizeof(*codes)) * codesSize; } // TODO: This needs to take a const unsigned short* and not tinker with its contents -static inline void addWord( - unsigned short *word, int length, int frequency, WordsPriorityQueue *queue, int type) { +static inline void addWord(unsigned short *word, int length, int frequency, + WordsPriorityQueue *queue, int type) { queue->push(frequency, word, length, type); } @@ -106,7 +106,7 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit WordsPriorityQueuePool *queuePool, const digraph_t *const digraphs, const unsigned int digraphsSize) const { - const int startIndex = codesDest - codesBuffer; + const int startIndex = static_cast<int>(codesDest - codesBuffer); if (currentDepth < MAX_DIGRAPH_SEARCH_DEPTH) { for (int i = 0; i < codesRemain; ++i) { xCoordinatesBuffer[startIndex + i] = xcoordinates[codesBufferSize - codesRemain + i]; @@ -170,8 +170,7 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit // bigramMap contains the association <bigram address> -> <bigram frequency> // bigramFilter is a bloom filter for fast rejection: see functions setInFilter and isInFilter // in bigram_dictionary.cpp -int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, - const int *xcoordinates, +int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const int codesSize, const std::map<int, int> *bigramMap, const uint8_t *bigramFilter, const bool useFullEditDistance, unsigned short *outWords, int *frequencies, @@ -226,7 +225,7 @@ int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, short unsigned int *w = outWords + j * MAX_WORD_LENGTH; char s[MAX_WORD_LENGTH]; for (int i = 0; i <= MAX_WORD_LENGTH; i++) s[i] = w[i]; - (void)s; + (void)s; // To suppress compiler warning AKLOGI("%s %i", s, frequencies[j]); } } @@ -315,7 +314,6 @@ void UnigramDictionary::initSuggestions(ProximityInfo *proximityInfo, const int correction->initCorrection(proximityInfo, inputSize, maxDepth); } -static const char QUOTE = '\''; static const char SPACE = ' '; void UnigramDictionary::getOneWordSuggestions(ProximityInfo *proximityInfo, @@ -391,9 +389,11 @@ inline void UnigramDictionary::onTerminal(const int probability, const int finalProbability = correction->getFinalProbability(probability, &wordPointer, &wordLength); - if (0 != finalProbability) { + if (0 != finalProbability && !terminalAttributes.isBlacklistedOrNotAWord()) { // If the probability is 0, we don't want to add this word. However we still // want to add its shortcuts (including a possible whitelist entry) if any. + // Furthermore, if this is not a word (shortcut only for example) or a blacklisted + // entry then we never want to suggest this. addWord(wordPointer, wordLength, finalProbability, masterQueue, Dictionary::KIND_CORRECTION); } @@ -450,7 +450,7 @@ int UnigramDictionary::getSubStringSuggestion( const bool hasAutoCorrectionCandidate, const int currentWordIndex, const int inputWordStartPos, const int inputWordLength, const int outputWordStartPos, const bool isSpaceProximity, int *freqArray, - int*wordLengthArray, unsigned short *outputWord, int *outputWordLength) const { + int *wordLengthArray, unsigned short *outputWord, int *outputWordLength) const { if (inputWordLength > MULTIPLE_WORDS_SUGGESTION_MAX_WORD_LENGTH) { return FLAG_MULTIPLE_SUGGEST_ABORT; } @@ -545,9 +545,9 @@ int UnigramDictionary::getSubStringSuggestion( freq = score >> (nextWordLength + TWO_WORDS_PLUS_OTHER_ERROR_CORRECTION_DEMOTION_DIVIDER); } if (DEBUG_DICT) { - AKLOGI("Freq(%d): %d, length: %d, input length: %d, input start: %d (%d)" - , currentWordIndex, freq, nextWordLength, inputWordLength, inputWordStartPos, - wordLengthArray[0]); + AKLOGI("Freq(%d): %d, length: %d, input length: %d, input start: %d (%d)", + currentWordIndex, freq, nextWordLength, inputWordLength, inputWordStartPos, + (currentWordIndex > 0) ? wordLengthArray[0] : 0); } if (freq <= 0 || nextWordLength <= 0 || MAX_WORD_LENGTH <= (outputWordStartPos + nextWordLength)) { @@ -595,11 +595,10 @@ int UnigramDictionary::getSubStringSuggestion( void UnigramDictionary::getMultiWordsSuggestionRec(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, - const bool useFullEditDistance, const int inputSize, - Correction *correction, WordsPriorityQueuePool *queuePool, - const bool hasAutoCorrectionCandidate, const int startInputPos, const int startWordIndex, - const int outputWordLength, int *freqArray, int *wordLengthArray, - unsigned short *outputWord) const { + const bool useFullEditDistance, const int inputSize, Correction *correction, + WordsPriorityQueuePool *queuePool, const bool hasAutoCorrectionCandidate, + const int startInputPos, const int startWordIndex, const int outputWordLength, + int *freqArray, int *wordLengthArray, unsigned short *outputWord) const { if (startWordIndex >= (MULTIPLE_WORDS_SUGGESTION_MAX_WORDS - 1)) { // Return if the last word index return; @@ -639,7 +638,7 @@ void UnigramDictionary::getMultiWordsSuggestionRec(ProximityInfo *proximityInfo, // Missing space inputWordStartPos = i; inputWordLength = inputSize - i; - if(getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, codes, + if (getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, codes, useFullEditDistance, correction, queuePool, inputSize, hasAutoCorrectionCandidate, startWordIndex + 1, inputWordStartPos, inputWordLength, tempOutputWordLength, false /* missing space */, freqArray, wordLengthArray, outputWord, 0) @@ -722,13 +721,13 @@ inline int UnigramDictionary::getMostFrequentWordLike(const int startInputIndex, // In and out parameters may point to the same location. This function takes care // not to use any input parameters after it wrote into its outputs. static inline bool testCharGroupForContinuedLikeness(const uint8_t flags, - const uint8_t *const root, const int startPos, - const uint16_t *const inWord, const int startInputIndex, - int32_t *outNewWord, int *outInputIndex, int *outPos) { + const uint8_t *const root, const int startPos, const uint16_t *const inWord, + const int startInputIndex, const int inputSize, int32_t *outNewWord, int *outInputIndex, + int *outPos) { const bool hasMultipleChars = (0 != (BinaryFormat::FLAG_HAS_MULTIPLE_CHARS & flags)); int pos = startPos; - int32_t character = BinaryFormat::getCharCodeAndForwardPointer(root, &pos); - int32_t baseChar = toBaseLowerCase(character); + int32_t codePoint = BinaryFormat::getCodePointAndForwardPointer(root, &pos); + int32_t baseChar = toBaseLowerCase(codePoint); const uint16_t wChar = toBaseLowerCase(inWord[startInputIndex]); if (baseChar != wChar) { @@ -737,18 +736,18 @@ static inline bool testCharGroupForContinuedLikeness(const uint8_t flags, return false; } int inputIndex = startInputIndex; - outNewWord[inputIndex] = character; + outNewWord[inputIndex] = codePoint; if (hasMultipleChars) { - character = BinaryFormat::getCharCodeAndForwardPointer(root, &pos); - while (NOT_A_CHARACTER != character) { - baseChar = toBaseLowerCase(character); - if (toBaseLowerCase(inWord[++inputIndex]) != baseChar) { + codePoint = BinaryFormat::getCodePointAndForwardPointer(root, &pos); + while (NOT_A_CODE_POINT != codePoint) { + baseChar = toBaseLowerCase(codePoint); + if (inputIndex + 1 >= inputSize || toBaseLowerCase(inWord[++inputIndex]) != baseChar) { *outPos = BinaryFormat::skipOtherCharacters(root, pos); *outInputIndex = startInputIndex; return false; } - outNewWord[inputIndex] = character; - character = BinaryFormat::getCharCodeAndForwardPointer(root, &pos); + outNewWord[inputIndex] = codePoint; + codePoint = BinaryFormat::getCodePointAndForwardPointer(root, &pos); } } *outInputIndex = inputIndex + 1; @@ -763,8 +762,9 @@ static inline bool testCharGroupForContinuedLikeness(const uint8_t flags, static inline void onTerminalWordLike(const int freq, int32_t *newWord, const int length, short unsigned int *outWord, int *maxFreq) { if (freq > *maxFreq) { - for (int q = 0; q < length; ++q) + for (int q = 0; q < length; ++q) { outWord[q] = newWord[q]; + } outWord[length] = 0; *maxFreq = freq; } @@ -773,7 +773,7 @@ static inline void onTerminalWordLike(const int freq, int32_t *newWord, const in // Will find the highest frequency of the words like the one passed as an argument, // that is, everything that only differs by case/accents. int UnigramDictionary::getMostFrequentWordLikeInner(const uint16_t *const inWord, - const int length, short unsigned int *outWord) const { + const int inputSize, short unsigned int *outWord) const { int32_t newWord[MAX_WORD_LENGTH_INTERNAL]; int depth = 0; int maxFreq = -1; @@ -793,12 +793,12 @@ int UnigramDictionary::getMostFrequentWordLikeInner(const uint16_t *const inWord int inputIndex = stackInputIndex[depth]; const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos); // Test whether all chars in this group match with the word we are searching for. If so, - // we want to traverse its children (or if the length match, evaluate its frequency). + // we want to traverse its children (or if the inputSize match, evaluate its frequency). // Note that this function will output the position regardless, but will only write // into inputIndex if there is a match. const bool isAlike = testCharGroupForContinuedLikeness(flags, root, pos, inWord, - inputIndex, newWord, &inputIndex, &pos); - if (isAlike && (BinaryFormat::FLAG_IS_TERMINAL & flags) && (inputIndex == length)) { + inputIndex, inputSize, newWord, &inputIndex, &pos); + if (isAlike && (BinaryFormat::FLAG_IS_TERMINAL & flags) && (inputIndex == inputSize)) { const int frequency = BinaryFormat::readFrequencyWithoutMovingPointer(root, pos); onTerminalWordLike(frequency, newWord, inputIndex, outWord, &maxFreq); } @@ -807,8 +807,8 @@ int UnigramDictionary::getMostFrequentWordLikeInner(const uint16_t *const inWord const int childrenNodePos = BinaryFormat::readChildrenPosition(root, flags, pos); // If we had a match and the word has children, we want to traverse them. We don't have // to traverse words longer than the one we are searching for, since they will not match - // anyway, so don't traverse unless inputIndex < length. - if (isAlike && (-1 != childrenNodePos) && (inputIndex < length)) { + // anyway, so don't traverse unless inputIndex < inputSize. + if (isAlike && (-1 != childrenNodePos) && (inputIndex < inputSize)) { // Save position for this depth, to get back to this once children are done stackChildCount[depth] = charGroupIndex; stackSiblingPos[depth] = siblingPos; @@ -841,11 +841,17 @@ int UnigramDictionary::getFrequency(const int32_t *const inWord, const int lengt return NOT_A_PROBABILITY; } const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos); + if (flags & (BinaryFormat::FLAG_IS_BLACKLISTED | BinaryFormat::FLAG_IS_NOT_A_WORD)) { + // If this is not a word, or if it's a blacklisted entry, it should behave as + // having no frequency outside of the suggestion process (where it should be used + // for shortcuts). + return NOT_A_PROBABILITY; + } const bool hasMultipleChars = (0 != (BinaryFormat::FLAG_HAS_MULTIPLE_CHARS & flags)); if (hasMultipleChars) { pos = BinaryFormat::skipOtherCharacters(root, pos); } else { - BinaryFormat::getCharCodeAndForwardPointer(DICT_ROOT, &pos); + BinaryFormat::getCodePointAndForwardPointer(DICT_ROOT, &pos); } const int unigramFreq = BinaryFormat::readFrequencyWithoutMovingPointer(root, pos); return unigramFreq; @@ -899,23 +905,23 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos, // else if FLAG_IS_TERMINAL: the frequency // else if MASK_GROUP_ADDRESS_TYPE is not NONE: the children address // Note that you can't have a node that both is not a terminal and has no children. - int32_t c = BinaryFormat::getCharCodeAndForwardPointer(DICT_ROOT, &pos); - assert(NOT_A_CHARACTER != c); + int32_t c = BinaryFormat::getCodePointAndForwardPointer(DICT_ROOT, &pos); + assert(NOT_A_CODE_POINT != c); // We are going to loop through each character and make it look like it's a different // node each time. To do that, we will process characters in this node in order until - // we find the character terminator. This is signalled by getCharCode* returning - // NOT_A_CHARACTER. + // we find the character terminator. This is signalled by getCodePoint* returning + // NOT_A_CODE_POINT. // As a special case, if there is only one character in this node, we must not read the - // next bytes so we will simulate the NOT_A_CHARACTER return by testing the flags. + // next bytes so we will simulate the NOT_A_CODE_POINT return by testing the flags. // This way, each loop run will look like a "virtual node". do { // We prefetch the next char. If 'c' is the last char of this node, we will have - // NOT_A_CHARACTER in the next char. From this we can decide whether this virtual node + // NOT_A_CODE_POINT in the next char. From this we can decide whether this virtual node // should behave as a terminal or not and whether we have children. const int32_t nextc = hasMultipleChars - ? BinaryFormat::getCharCodeAndForwardPointer(DICT_ROOT, &pos) : NOT_A_CHARACTER; - const bool isLastChar = (NOT_A_CHARACTER == nextc); + ? BinaryFormat::getCodePointAndForwardPointer(DICT_ROOT, &pos) : NOT_A_CODE_POINT; + const bool isLastChar = (NOT_A_CODE_POINT == nextc); // If there are more chars in this nodes, then this virtual node is not a terminal. // If we are on the last char, this virtual node is a terminal if this node is. const bool isTerminal = isLastChar && isTerminalNode; @@ -944,9 +950,9 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos, // Prepare for the next character. Promote the prefetched char to current char - the loop // will take care of prefetching the next. If we finally found our last char, nextc will - // contain NOT_A_CHARACTER. + // contain NOT_A_CODE_POINT. c = nextc; - } while (NOT_A_CHARACTER != c); + } while (NOT_A_CODE_POINT != c); if (isTerminalNode) { // The frequency should be here, because we come here only if this is actually diff --git a/native/jni/src/unigram_dictionary.h b/native/jni/src/unigram_dictionary.h index 2c6622210..57129bb07 100644 --- a/native/jni/src/unigram_dictionary.h +++ b/native/jni/src/unigram_dictionary.h @@ -43,11 +43,11 @@ class UnigramDictionary { int fullWordMultiplier, int maxWordLength, int maxWords, const unsigned int flags); int getFrequency(const int32_t *const inWord, const int length) const; int getBigramPosition(int pos, unsigned short *word, int offset, int length) const; - int getSuggestions( - ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, - const int *codes, const int codesSize, const std::map<int, int> *bigramMap, - const uint8_t *bigramFilter, const bool useFullEditDistance, unsigned short *outWords, - int *frequencies, int *outputTypes) const; + int getSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, + const int *ycoordinates, const int *codes, const int codesSize, + const std::map<int, int> *bigramMap, const uint8_t *bigramFilter, + const bool useFullEditDistance, unsigned short *outWords, int *frequencies, + int *outputTypes) const; virtual ~UnigramDictionary(); private: @@ -94,7 +94,7 @@ class UnigramDictionary { const int currentWordIndex) const; int getMostFrequentWordLike(const int startInputIndex, const int inputSize, Correction *correction, unsigned short *word) const; - int getMostFrequentWordLikeInner(const uint16_t *const inWord, const int length, + int getMostFrequentWordLikeInner(const uint16_t *const inWord, const int inputSize, short unsigned int *outWord) const; int getSubStringSuggestion( ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, diff --git a/native/jni/src/words_priority_queue_pool.h b/native/jni/src/words_priority_queue_pool.h index c5de9797f..2d52903e0 100644 --- a/native/jni/src/words_priority_queue_pool.h +++ b/native/jni/src/words_priority_queue_pool.h @@ -30,7 +30,7 @@ class WordsPriorityQueuePool { mainQueueMaxWords, maxWordLength)) { for (int i = 0, subQueueBufOffset = 0; i < MULTIPLE_WORDS_SUGGESTION_MAX_WORDS * SUB_QUEUE_MAX_COUNT; - ++i, subQueueBufOffset += sizeof(WordsPriorityQueue)) { + ++i, subQueueBufOffset += static_cast<int>(sizeof(WordsPriorityQueue))) { mSubQueues[i] = new(mSubQueueBuf + subQueueBufOffset) WordsPriorityQueue(subQueueMaxWords, maxWordLength); } |