diff options
Diffstat (limited to 'native/jni/src/defines.h')
-rw-r--r-- | native/jni/src/defines.h | 259 |
1 files changed, 131 insertions, 128 deletions
diff --git a/native/jni/src/defines.h b/native/jni/src/defines.h index e06ee42b0..0aedc287f 100644 --- a/native/jni/src/defines.h +++ b/native/jni/src/defines.h @@ -1,11 +1,11 @@ /* - * Copyright (C) 2010, The Android Open Source Project + * Copyright (C) 2010 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, @@ -17,99 +17,91 @@ #ifndef LATINIME_DEFINES_H #define LATINIME_DEFINES_H -#include <stdint.h> +#ifdef __GNUC__ +#define AK_FORCE_INLINE __attribute__((always_inline)) __inline__ +#else // __GNUC__ +#define AK_FORCE_INLINE inline +#endif // __GNUC__ + +#if defined(FLAG_DO_PROFILE) || defined(FLAG_DBG) +#undef AK_FORCE_INLINE +#define AK_FORCE_INLINE inline +#endif // defined(FLAG_DO_PROFILE) || defined(FLAG_DBG) + +// Must be equal to Constants.Dictionary.MAX_WORD_LENGTH in Java +#define MAX_WORD_LENGTH 48 +// Must be equal to BinaryDictionary.MAX_RESULTS in Java +#define MAX_RESULTS 18 +// Must be equal to ProximityInfo.MAX_PROXIMITY_CHARS_SIZE in Java +#define MAX_PROXIMITY_CHARS_SIZE 16 +#define ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE 2 #if defined(FLAG_DO_PROFILE) || defined(FLAG_DBG) #include <android/log.h> #ifndef LOG_TAG #define LOG_TAG "LatinIME: " -#endif +#endif // LOG_TAG #define AKLOGE(fmt, ...) __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, fmt, ##__VA_ARGS__) #define AKLOGI(fmt, ...) __android_log_print(ANDROID_LOG_INFO, LOG_TAG, fmt, ##__VA_ARGS__) -#define DUMP_RESULT(words, frequencies, maxWordCount, maxWordLength) do { \ - dumpResult(words, frequencies, maxWordCount, maxWordLength); } while (0) +#define DUMP_RESULT(words, frequencies) do { dumpResult(words, frequencies); } while (0) #define DUMP_WORD(word, length) do { dumpWord(word, length); } while (0) -#define DUMP_WORD_INT(word, length) do { dumpWordInt(word, length); } while (0) -// TODO: INTS_TO_CHARS -#define SHORTS_TO_CHARS(input, length, output) do { \ - shortArrayToCharArray(input, length, output); } while (0) - -static inline void dumpWordInfo(const unsigned short *word, const int length, - const int rank, const int frequency) { - static char charBuf[50]; - int i = 0; - for (; i < length; ++i) { - const unsigned short c = word[i]; - if (c == 0) { - break; +#define INTS_TO_CHARS(input, length, output) do { \ + intArrayToCharArray(input, length, output); } while (0) + +// TODO: Support full UTF-8 conversion +AK_FORCE_INLINE static int intArrayToCharArray(const int *source, const int sourceSize, + char *dest) { + int si = 0; + int di = 0; + while (si < sourceSize && di < MAX_WORD_LENGTH - 1 && 0 != source[si]) { + const int codePoint = source[si++]; + if (codePoint < 0x7F) { + dest[di++] = codePoint; + } else if (codePoint < 0x7FF) { + dest[di++] = 0xC0 + (codePoint >> 6); + dest[di++] = 0x80 + (codePoint & 0x3F); + } else if (codePoint < 0xFFFF) { + dest[di++] = 0xE0 + (codePoint >> 12); + dest[di++] = 0x80 + ((codePoint & 0xFC0) >> 6); + dest[di++] = 0x80 + (codePoint & 0x3F); } - // static_cast only for debugging - charBuf[i] = static_cast<char>(c); } - charBuf[i] = 0; - if (i > 1) { + dest[di] = 0; + return di; +} + +static inline void dumpWordInfo(const int *word, const int length, const int rank, + const int frequency) { + static char charBuf[50]; + const int N = intArrayToCharArray(word, length, charBuf); + if (N > 1) { AKLOGI("%2d [ %s ] (%d)", rank, charBuf, frequency); } } -static inline void dumpResult( - const unsigned short *outWords, const int *frequencies, const int maxWordCounts, - const int maxWordLength) { +static inline void dumpResult(const int *outWords, const int *frequencies) { AKLOGI("--- DUMP RESULT ---------"); - for (int i = 0; i < maxWordCounts; ++i) { - dumpWordInfo(&outWords[i * maxWordLength], maxWordLength, i, frequencies[i]); + for (int i = 0; i < MAX_RESULTS; ++i) { + dumpWordInfo(&outWords[i * MAX_WORD_LENGTH], MAX_WORD_LENGTH, i, frequencies[i]); } AKLOGI("-------------------------"); } -static inline void dumpWord(const unsigned short *word, const int length) { +static AK_FORCE_INLINE void dumpWord(const int *word, const int length) { static char charBuf[50]; - int i = 0; - for (; i < length; ++i) { - const unsigned short c = word[i]; - if (c == 0) { - break; - } - // static_cast only for debugging - charBuf[i] = static_cast<char>(c); - } - charBuf[i] = 0; - if (i > 1) { + const int N = intArrayToCharArray(word, length, charBuf); + if (N > 1) { AKLOGI("[ %s ]", charBuf); } } -static inline void dumpWordInt(const int *word, const int length) { - static char charBuf[50]; - - for (int i = 0; i < length; ++i) { - charBuf[i] = word[i]; - } - charBuf[length] = 0; - AKLOGI("i[ %s ]", charBuf); -} - -// TODO: Change this to intArrayToCharArray -static inline void shortArrayToCharArray( - const unsigned short *input, const int length, char *output) { - int i = 0; - for (;i < length; ++i) { - const unsigned short c = input[i]; - if (c == 0) { - break; - } - // static_cast only for debugging - output[i] = static_cast<char>(c); - } - output[i] = 0; -} - #ifndef __ANDROID__ #include <cassert> #include <execinfo.h> #include <stdlib.h> +#define DO_ASSERT_TEST #define ASSERT(success) do { if (!(success)) { showStackTrace(); assert(success);} } while (0) #define SHOW_STACK_TRACE do { showStackTrace(); } while (0) @@ -126,23 +118,23 @@ static inline void showStackTrace() { } free(strs); } -#else +#else // __ANDROID__ #include <cassert> +#define DO_ASSERT_TEST #define ASSERT(success) assert(success) #define SHOW_STACK_TRACE -#endif +#endif // __ANDROID__ -#else +#else // defined(FLAG_DO_PROFILE) || defined(FLAG_DBG) #define AKLOGE(fmt, ...) #define AKLOGI(fmt, ...) -#define DUMP_RESULT(words, frequencies, maxWordCount, maxWordLength) +#define DUMP_RESULT(words, frequencies) #define DUMP_WORD(word, length) -#define DUMP_WORD_INT(word, length) +#undef DO_ASSERT_TEST #define ASSERT(success) #define SHOW_STACK_TRACE -// TODO: INTS_TO_CHARS -#define SHORTS_TO_CHARS(input, length, output) -#endif +#define INTS_TO_CHARS(input, length, output) +#endif // defined(FLAG_DO_PROFILE) || defined(FLAG_DBG) #ifdef FLAG_DO_PROFILE // Profiler @@ -178,15 +170,15 @@ static inline void prof_out(void) { } AKLOGI("Total time is %6.3f ms.", profile_buf[PROF_BUF_SIZE - 1] * 1000.0f / static_cast<float>(CLOCKS_PER_SEC)); - float all = 0; + float all = 0.0f; for (int i = 0; i < PROF_BUF_SIZE - 1; ++i) { all += profile_buf[i]; } - if (all == 0) all = 1; + if (all < 1.0f) all = 1.0f; for (int i = 0; i < PROF_BUF_SIZE - 1; ++i) { - if (profile_buf[i]) { + if (profile_buf[i] > 0.0f) { AKLOGI("(%d): Used %4.2f%%, %8.4f ms. Called %d times.", - i, (profile_buf[i] * 100 / all), + i, (profile_buf[i] * 100.0f / all), profile_buf[i] * 1000.0f / static_cast<float>(CLOCKS_PER_SEC), profile_counter[i]); } @@ -219,6 +211,11 @@ static inline void prof_out(void) { #define DEBUG_CORRECTION false #define DEBUG_CORRECTION_FREQ false #define DEBUG_WORDS_PRIORITY_QUEUE false +#define DEBUG_SAMPLING_POINTS false +#define DEBUG_POINTS_PROBABILITY false +#define DEBUG_DOUBLE_LETTER false +#define DEBUG_CACHE false +#define DEBUG_DUMP_ERROR false #ifdef FLAG_FULL_DBG #define DEBUG_GEO_FULL true @@ -239,14 +236,16 @@ static inline void prof_out(void) { #define DEBUG_CORRECTION false #define DEBUG_CORRECTION_FREQ false #define DEBUG_WORDS_PRIORITY_QUEUE false +#define DEBUG_SAMPLING_POINTS false +#define DEBUG_POINTS_PROBABILITY false +#define DEBUG_DOUBLE_LETTER false +#define DEBUG_CACHE false +#define DEBUG_DUMP_ERROR false #define DEBUG_GEO_FULL false #endif // FLAG_DBG -#ifndef U_SHORT_MAX -#define U_SHORT_MAX 65535 // ((1 << 16) - 1) -#endif #ifndef S_INT_MAX #define S_INT_MAX 2147483647 // ((1 << 31) - 1) #endif @@ -257,44 +256,38 @@ static inline void prof_out(void) { #define S_INT_MIN (-2147483647 - 1) // -(1 << 31) #endif +#define M_PI_F 3.14159265f +#define MAX_PERCENTILE 100 + +// Number of base-10 digits in the largest integer + 1 to leave room for a zero terminator. +// As such, this is the maximum number of characters will be needed to represent an int as a +// string, including the terminator; this is used as the size of a string buffer large enough to +// hold any value that is intended to fit in an integer, e.g. in the code that reads the header +// of the binary dictionary where a {key,value} string pair scheme is used. +#define LARGEST_INT_DIGIT_COUNT 11 + // Define this to use mmap() for dictionary loading. Undefine to use malloc() instead of mmap(). // We measured and compared performance of both, and found mmap() is fairly good in terms of // loading time, and acceptable even for several initial lookups which involve page faults. #define USE_MMAP_FOR_DICTIONARY -// 22-bit address = ~4MB dictionary size limit, which on average would be about 200k-300k words -#define ADDRESS_MASK 0x3FFFFF - -// The bit that decides if an address follows in the next 22 bits -#define FLAG_ADDRESS_MASK 0x40 -// The bit that decides if this is a terminal node for a word. The node could still have children, -// if the word has other endings. -#define FLAG_TERMINAL_MASK 0x80 - -#define FLAG_BIGRAM_READ 0x80 -#define FLAG_BIGRAM_CHILDEXIST 0x40 -#define FLAG_BIGRAM_CONTINUED 0x80 -#define FLAG_BIGRAM_FREQ 0x7F - -#define DICTIONARY_VERSION_MIN 200 #define NOT_VALID_WORD (-99) #define NOT_A_CODE_POINT (-1) #define NOT_A_DISTANCE (-1) #define NOT_A_COORDINATE (-1) -#define EQUIVALENT_CHAR_WITHOUT_DISTANCE_INFO (-2) +#define MATCH_CHAR_WITHOUT_DISTANCE_INFO (-2) #define PROXIMITY_CHAR_WITHOUT_DISTANCE_INFO (-3) #define ADDITIONAL_PROXIMITY_CHAR_DISTANCE_INFO (-4) #define NOT_AN_INDEX (-1) #define NOT_A_PROBABILITY (-1) #define KEYCODE_SPACE ' ' +#define KEYCODE_SINGLE_QUOTE '\'' +#define KEYCODE_HYPHEN_MINUS '-' #define CALIBRATE_SCORE_BY_TOUCH_COORDINATES true - -#define SUGGEST_WORDS_WITH_MISSING_CHARACTER true -#define SUGGEST_WORDS_WITH_EXCESSIVE_CHARACTER true -#define SUGGEST_WORDS_WITH_TRANSPOSED_CHARACTERS true #define SUGGEST_MULTIPLE_WORDS true +#define SUGGEST_INTERFACE_OUTPUT_SCALE 1000000.0f // The following "rate"s are used as a multiplier before dividing by 100, so they are in percent. #define WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE 80 @@ -316,22 +309,12 @@ static inline void prof_out(void) { #define TWO_WORDS_CAPITALIZED_DEMOTION_RATE 50 #define TWO_WORDS_CORRECTION_DEMOTION_BASE 80 #define TWO_WORDS_PLUS_OTHER_ERROR_CORRECTION_DEMOTION_DIVIDER 1 -#define ZERO_DISTANCE_PROMOTION_RATE 110 +#define ZERO_DISTANCE_PROMOTION_RATE 110.0f #define NEUTRAL_SCORE_SQUARED_RADIUS 8.0f #define HALF_SCORE_SQUARED_RADIUS 32.0f #define MAX_FREQ 255 #define MAX_BIGRAM_FREQ 15 -// This must be greater than or equal to MAX_WORD_LENGTH defined in BinaryDictionary.java -// This is only used for the size of array. Not to be used in c functions. -#define MAX_WORD_LENGTH_INTERNAL 48 - -// This must be the same as ProximityInfo#MAX_PROXIMITY_CHARS_SIZE, currently it's 16. -#define MAX_PROXIMITY_CHARS_SIZE_INTERNAL 16 - -// This must be equal to ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE in KeyDetector.java -#define ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE 2 - // Assuming locale strings such as en_US, sr-Latn etc. #define MAX_LOCALE_STRING_LENGTH 10 @@ -350,19 +333,17 @@ static inline void prof_out(void) { #define MULTIPLE_WORDS_DEMOTION_RATE 80 #define MIN_INPUT_LENGTH_FOR_THREE_OR_MORE_WORDS_CORRECTION 6 -#define TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD 0.35 -#define START_TWO_WORDS_CORRECTION_THRESHOLD 0.185 +#define TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD 0.35f +#define START_TWO_WORDS_CORRECTION_THRESHOLD 0.185f /* heuristic... This should be changed if we change the unit of the frequency. */ #define SUPPRESS_SHORT_MULTIPLE_WORDS_THRESHOLD_FREQ (MAX_FREQ * 58 / 100) #define MAX_DEPTH_MULTIPLIER 3 - #define FIRST_WORD_INDEX 0 -#define MAX_SPACES_INTERNAL 16 - -// Max Distance between point to key -#define MAX_POINT_TO_KEY_LENGTH 10000000 +// Max value for length, distance and probability which are used in weighting +// TODO: Remove +#define MAX_VALUE_FOR_WEIGHTING 10000000 // The max number of the keys in one keyboard layout #define MAX_KEY_COUNT_IN_A_KEYBOARD 64 @@ -372,10 +353,10 @@ static inline void prof_out(void) { #define DEFAULT_MAX_DIGRAPH_SEARCH_DEPTH 5 #define MIN_USER_TYPED_LENGTH_FOR_MULTIPLE_WORD_SUGGESTION 3 -#define MIN_USER_TYPED_LENGTH_FOR_EXCESSIVE_CHARACTER_SUGGESTION 3 // TODO: Remove -#define MAX_POINTER_COUNT_FOR_G 2 +#define MAX_POINTER_COUNT 1 +#define MAX_POINTER_COUNT_G 2 // Size, in bytes, of the bloom filter index for bigrams // 128 gives us 1024 buckets. The probability of false positive is (1 - e ** (-kn/m))**k, @@ -395,11 +376,10 @@ static inline void prof_out(void) { #error "BIGRAM_FILTER_MODULO is larger than BIGRAM_FILTER_BYTE_SIZE" #endif -template<typename T> inline T min(T a, T b) { return a < b ? a : b; } -template<typename T> inline T max(T a, T b) { return a > b ? a : b; } +template<typename T> AK_FORCE_INLINE const T &min(const T &a, const T &b) { return a < b ? a : b; } +template<typename T> AK_FORCE_INLINE const T &max(const T &a, const T &b) { return a > b ? a : b; } -// The ratio of neutral area radius to sweet spot radius. -#define NEUTRAL_AREA_RADIUS_RATIO 1.3f +#define NELEMS(x) (sizeof(x) / sizeof((x)[0])) // DEBUG #define INPUTLENGTH_FOR_DEBUG (-1) @@ -416,12 +396,35 @@ template<typename T> inline T max(T a, T b) { return a > b ? a : b; } // Used as a return value for character comparison typedef enum { // Same char, possibly with different case or accent - EQUIVALENT_CHAR, + MATCH_CHAR, // It is a char located nearby on the keyboard - NEAR_PROXIMITY_CHAR, + PROXIMITY_CHAR, + // Additional proximity char which can differ by language. + ADDITIONAL_PROXIMITY_CHAR, + // It is a substitution char + SUBSTITUTION_CHAR, // It is an unrelated char UNRELATED_CHAR, - // Additional proximity char which can differ by language. - ADDITIONAL_PROXIMITY_CHAR } ProximityType; + +typedef enum { + NOT_A_DOUBLE_LETTER, + A_DOUBLE_LETTER, + A_STRONG_DOUBLE_LETTER +} DoubleLetterLevel; + +typedef enum { + CT_MATCH, + CT_PROXIMITY, + CT_ADDITIONAL_PROXIMITY, + CT_SUBSTITUTION, + CT_OMISSION, + CT_INSERTION, + CT_TRANSPOSITION, + CT_SPACE_SUBSTITUTION, + CT_SPACE_OMISSION, + CT_COMPLETION, + CT_TERMINAL, + CT_NEW_WORD, +} CorrectionType; #endif // LATINIME_DEFINES_H |