diff options
Diffstat (limited to 'native/jni/src/defines.h')
-rw-r--r-- | native/jni/src/defines.h | 126 |
1 files changed, 53 insertions, 73 deletions
diff --git a/native/jni/src/defines.h b/native/jni/src/defines.h index e0edff584..89dfa39b3 100644 --- a/native/jni/src/defines.h +++ b/native/jni/src/defines.h @@ -35,46 +35,74 @@ // Must be equal to ProximityInfo.MAX_PROXIMITY_CHARS_SIZE in Java #define MAX_PROXIMITY_CHARS_SIZE 16 #define ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE 2 +#define NELEMS(x) (sizeof(x) / sizeof((x)[0])) -#if defined(FLAG_DO_PROFILE) || defined(FLAG_DBG) -#include <android/log.h> -#ifndef LOG_TAG -#define LOG_TAG "LatinIME: " -#endif // LOG_TAG -#define AKLOGE(fmt, ...) __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, fmt, ##__VA_ARGS__) -#define AKLOGI(fmt, ...) __android_log_print(ANDROID_LOG_INFO, LOG_TAG, fmt, ##__VA_ARGS__) - -#define DUMP_RESULT(words, frequencies) do { dumpResult(words, frequencies); } while (0) -#define DUMP_WORD(word, length) do { dumpWord(word, length); } while (0) -#define INTS_TO_CHARS(input, length, output) do { \ - intArrayToCharArray(input, length, output); } while (0) - -// TODO: Support full UTF-8 conversion -AK_FORCE_INLINE static int intArrayToCharArray(const int *source, const int sourceSize, - char *dest) { +AK_FORCE_INLINE static int intArrayToCharArray(const int *const source, const int sourceSize, + char *dest, const int destSize) { + // We want to always terminate with a 0 char, so stop one short of the length to make + // sure there is room. + const int destLimit = destSize - 1; int si = 0; int di = 0; - while (si < sourceSize && di < MAX_WORD_LENGTH - 1 && 0 != source[si]) { + while (si < sourceSize && di < destLimit && 0 != source[si]) { const int codePoint = source[si++]; - if (codePoint < 0x7F) { + if (codePoint < 0x7F) { // One byte dest[di++] = codePoint; - } else if (codePoint < 0x7FF) { + } else if (codePoint < 0x7FF) { // Two bytes + if (di + 1 >= destLimit) break; dest[di++] = 0xC0 + (codePoint >> 6); dest[di++] = 0x80 + (codePoint & 0x3F); - } else if (codePoint < 0xFFFF) { + } else if (codePoint < 0xFFFF) { // Three bytes + if (di + 2 >= destLimit) break; dest[di++] = 0xE0 + (codePoint >> 12); - dest[di++] = 0x80 + ((codePoint & 0xFC0) >> 6); + dest[di++] = 0x80 + ((codePoint >> 6) & 0x3F); dest[di++] = 0x80 + (codePoint & 0x3F); + } else if (codePoint <= 0x1FFFFF) { // Four bytes + if (di + 3 >= destLimit) break; + dest[di++] = 0xF0 + (codePoint >> 18); + dest[di++] = 0x80 + ((codePoint >> 12) & 0x3F); + dest[di++] = 0x80 + ((codePoint >> 6) & 0x3F); + dest[di++] = 0x80 + (codePoint & 0x3F); + } else if (codePoint <= 0x3FFFFFF) { // Five bytes + if (di + 4 >= destLimit) break; + dest[di++] = 0xF8 + (codePoint >> 24); + dest[di++] = 0x80 + ((codePoint >> 18) & 0x3F); + dest[di++] = 0x80 + ((codePoint >> 12) & 0x3F); + dest[di++] = 0x80 + ((codePoint >> 6) & 0x3F); + dest[di++] = codePoint & 0x3F; + } else if (codePoint <= 0x7FFFFFFF) { // Six bytes + if (di + 5 >= destLimit) break; + dest[di++] = 0xFC + (codePoint >> 30); + dest[di++] = 0x80 + ((codePoint >> 24) & 0x3F); + dest[di++] = 0x80 + ((codePoint >> 18) & 0x3F); + dest[di++] = 0x80 + ((codePoint >> 12) & 0x3F); + dest[di++] = 0x80 + ((codePoint >> 6) & 0x3F); + dest[di++] = codePoint & 0x3F; + } else { + // Not a code point... skip. } } dest[di] = 0; return di; } +#if defined(FLAG_DO_PROFILE) || defined(FLAG_DBG) +#include <android/log.h> +#ifndef LOG_TAG +#define LOG_TAG "LatinIME: " +#endif // LOG_TAG +#define AKLOGE(fmt, ...) __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, fmt, ##__VA_ARGS__) +#define AKLOGI(fmt, ...) __android_log_print(ANDROID_LOG_INFO, LOG_TAG, fmt, ##__VA_ARGS__) + +#define DUMP_RESULT(words, frequencies) do { dumpResult(words, frequencies); } while (0) +#define DUMP_WORD(word, length) do { dumpWord(word, length); } while (0) +#define INTS_TO_CHARS(input, length, output, outlength) do { \ + intArrayToCharArray(input, length, output, outlength); } while (0) + static inline void dumpWordInfo(const int *word, const int length, const int rank, const int probability) { static char charBuf[50]; - const int N = intArrayToCharArray(word, length, charBuf); + const int N = intArrayToCharArray(word, length, charBuf, NELEMS(charBuf)); if (N > 1) { AKLOGI("%2d [ %s ] (%d)", rank, charBuf, probability); } @@ -90,7 +118,7 @@ static inline void dumpResult(const int *outWords, const int *frequencies) { static AK_FORCE_INLINE void dumpWord(const int *word, const int length) { static char charBuf[50]; - const int N = intArrayToCharArray(word, length, charBuf); + const int N = intArrayToCharArray(word, length, charBuf, NELEMS(charBuf)); if (N > 1) { AKLOGI("[ %s ]", charBuf); } @@ -264,37 +292,24 @@ static inline void prof_out(void) { // of the binary dictionary where a {key,value} string pair scheme is used. #define LARGEST_INT_DIGIT_COUNT 11 -#define NOT_VALID_WORD (-99) #define NOT_A_CODE_POINT (-1) #define NOT_A_DISTANCE (-1) #define NOT_A_COORDINATE (-1) -#define MATCH_CHAR_WITHOUT_DISTANCE_INFO (-2) -#define PROXIMITY_CHAR_WITHOUT_DISTANCE_INFO (-3) -#define ADDITIONAL_PROXIMITY_CHAR_DISTANCE_INFO (-4) #define NOT_AN_INDEX (-1) #define NOT_A_PROBABILITY (-1) +#define NOT_A_DICT_POS (S_INT_MIN) #define KEYCODE_SPACE ' ' #define KEYCODE_SINGLE_QUOTE '\'' #define KEYCODE_HYPHEN_MINUS '-' -#define CALIBRATE_SCORE_BY_TOUCH_COORDINATES true -#define SUGGEST_MULTIPLE_WORDS true #define SUGGEST_INTERFACE_OUTPUT_SCALE 1000000.0f - -#define ZERO_DISTANCE_PROMOTION_RATE 110.0f -#define NEUTRAL_SCORE_SQUARED_RADIUS 8.0f -#define HALF_SCORE_SQUARED_RADIUS 32.0f #define MAX_PROBABILITY 255 #define MAX_BIGRAM_ENCODED_PROBABILITY 15 -#define MULTIPLE_WORDS_DEMOTION_RATE 80 // Assuming locale strings such as en_US, sr-Latn etc. #define MAX_LOCALE_STRING_LENGTH 10 -/* heuristic... This should be changed if we change the unit of the probability. */ -#define SUPPRESS_SHORT_MULTIPLE_WORDS_THRESHOLD_FREQ (MAX_PROBABILITY * 58 / 100) - // Max value for length, distance and probability which are used in weighting // TODO: Remove #define MAX_VALUE_FOR_WEIGHTING 10000000 @@ -306,45 +321,9 @@ static inline void prof_out(void) { #define MAX_POINTER_COUNT 1 #define MAX_POINTER_COUNT_G 2 -// Queue IDs and size for DicNodesCache -#define DIC_NODES_CACHE_INITIAL_QUEUE_ID_ACTIVE 0 -#define DIC_NODES_CACHE_INITIAL_QUEUE_ID_NEXT_ACTIVE 1 -#define DIC_NODES_CACHE_INITIAL_QUEUE_ID_TERMINAL 2 -#define DIC_NODES_CACHE_INITIAL_QUEUE_ID_CACHE_FOR_CONTINUOUS_SUGGESTION 3 -#define DIC_NODES_CACHE_PRIORITY_QUEUES_SIZE 4 - -// Size, in bytes, of the bloom filter index for bigrams -// 128 gives us 1024 buckets. The probability of false positive is (1 - e ** (-kn/m))**k, -// where k is the number of hash functions, n the number of bigrams, and m the number of -// bits we can test. -// At the moment 100 is the maximum number of bigrams for a word with the current -// dictionaries, so n = 100. 1024 buckets give us m = 1024. -// With 1 hash function, our false positive rate is about 9.3%, which should be enough for -// our uses since we are only using this to increase average performance. For the record, -// k = 2 gives 3.1% and k = 3 gives 1.6%. With k = 1, making m = 2048 gives 4.8%, -// and m = 4096 gives 2.4%. -#define BIGRAM_FILTER_BYTE_SIZE 128 -// Must be smaller than BIGRAM_FILTER_BYTE_SIZE * 8, and preferably prime. 1021 is the largest -// prime under 128 * 8. -#define BIGRAM_FILTER_MODULO 1021 -#if BIGRAM_FILTER_BYTE_SIZE * 8 < BIGRAM_FILTER_MODULO -#error "BIGRAM_FILTER_MODULO is larger than BIGRAM_FILTER_BYTE_SIZE" -#endif - -// Max number of bigram maps (previous word contexts) to be cached. Increasing this number could -// improve bigram lookup speed for multi-word suggestions, but at the cost of more memory usage. -// Also, there are diminishing returns since the most frequently used bigrams are typically near -// the beginning of the input and are thus the first ones to be cached. Note that these bigrams -// are reset for each new composing word. -#define MAX_CACHED_PREV_WORDS_IN_BIGRAM_MAP 25 -// Most common previous word contexts currently have 100 bigrams -#define DEFAULT_HASH_MAP_SIZE_FOR_EACH_BIGRAM_MAP 100 - template<typename T> AK_FORCE_INLINE const T &min(const T &a, const T &b) { return a < b ? a : b; } template<typename T> AK_FORCE_INLINE const T &max(const T &a, const T &b) { return a > b ? a : b; } -#define NELEMS(x) (sizeof(x) / sizeof((x)[0])) - // DEBUG #define INPUTLENGTH_FOR_DEBUG (-1) #define MIN_OUTPUT_INDEX_FOR_DEBUG (-1) @@ -394,6 +373,7 @@ typedef enum { CT_TRANSPOSITION, CT_COMPLETION, CT_TERMINAL, + CT_TERMINAL_INSERTION, // Create new word with space omission CT_NEW_WORD_SPACE_OMITTION, // Create new word with space substitution |