diff options
-rw-r--r-- | java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java | 8 | ||||
-rw-r--r-- | native/jni/src/bigram_dictionary.h | 4 | ||||
-rw-r--r-- | native/jni/src/binary_format.h | 15 | ||||
-rw-r--r-- | native/jni/src/char_utils.cpp | 2 | ||||
-rw-r--r-- | native/jni/src/char_utils.h | 12 | ||||
-rw-r--r-- | tools/maketext/Android.mk | 1 | ||||
-rw-r--r-- | tools/maketext/etc/Android.mk | 3 |
7 files changed, 22 insertions, 23 deletions
diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java index 273ee329d..7f042335a 100644 --- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java +++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java @@ -783,10 +783,10 @@ public class BinaryDictInputOutput { // their lower bound and exclude their higher bound so we need to have the first step // start at exactly 1 unit higher than floor(unigramFreq + half a step). // Note : to reconstruct the score, the dictionary reader will need to divide - // MAX_TERMINAL_FREQUENCY - unigramFreq by 16.5 likewise, and add - // (discretizedFrequency + 0.5) times this value to get the median value of the step, - // which is the best approximation. This is how we get the most precise result with - // only four bits. + // MAX_TERMINAL_FREQUENCY - unigramFreq by 16.5 likewise to get the value of the step, + // and add (discretizedFrequency + 0.5 + 0.5) times this value to get the best + // approximation. (0.5 to get the first step start, and 0.5 to get the middle of the + // step pointed by the discretized frequency. final float stepSize = (MAX_TERMINAL_FREQUENCY - unigramFrequency) / (1.5f + MAX_BIGRAM_FREQUENCY); final float firstStepStart = 1 + unigramFrequency + (stepSize / 2.0f); diff --git a/native/jni/src/bigram_dictionary.h b/native/jni/src/bigram_dictionary.h index d676cca63..5f11ae822 100644 --- a/native/jni/src/bigram_dictionary.h +++ b/native/jni/src/bigram_dictionary.h @@ -29,8 +29,6 @@ class BigramDictionary { BigramDictionary(const unsigned char *dict, int maxWordLength, int maxPredictions); int getBigrams(const int32_t *word, int length, int *inputCodes, int codesSize, unsigned short *outWords, int *frequencies, int *outputTypes) const; - int getBigramListPositionForWord(const int32_t *prevWord, const int prevWordLength, - const bool forceLowerCaseSearch) const; void fillBigramAddressToFrequencyMapAndFilter(const int32_t *prevWord, const int prevWordLength, std::map<int, int> *map, uint8_t *filter) const; bool isValidBigram(const int32_t *word1, int length1, const int32_t *word2, int length2) const; @@ -45,6 +43,8 @@ class BigramDictionary { bool getFirstBitOfByte(int *pos) { return (DICT[*pos] & 0x80) > 0; } bool getSecondBitOfByte(int *pos) { return (DICT[*pos] & 0x40) > 0; } bool checkFirstCharacter(unsigned short *word, int *inputCodes) const; + int getBigramListPositionForWord(const int32_t *prevWord, const int prevWordLength, + const bool forceLowerCaseSearch) const; const unsigned char *DICT; const int MAX_WORD_LENGTH; diff --git a/native/jni/src/binary_format.h b/native/jni/src/binary_format.h index 4cabc8404..d8f3e83dd 100644 --- a/native/jni/src/binary_format.h +++ b/native/jni/src/binary_format.h @@ -61,13 +61,6 @@ class BinaryFormat { static const int FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES = 0x20; static const int FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES = 0x30; - private: - DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryFormat); - const static int32_t MINIMAL_ONE_BYTE_CHARACTER_VALUE = 0x20; - const static int32_t CHARACTER_ARRAY_TERMINATOR = 0x1F; - const static int MULTIPLE_BYTE_CHARACTER_ADDITIONAL_SIZE = 2; - - public: const static int UNKNOWN_FORMAT = -1; // Originally, format version 1 had a 16-bit magic number, then the version number `01' // then options that must be 0. Hence the first 32-bits of the format are always as follow @@ -94,7 +87,6 @@ class BinaryFormat { static int skipFrequency(const uint8_t flags, const int pos); static int skipShortcuts(const uint8_t *const dict, const uint8_t flags, const int pos); static int skipBigrams(const uint8_t *const dict, const uint8_t flags, const int pos); - static int skipAllAttributes(const uint8_t *const dict, const uint8_t flags, const int pos); static int skipChildrenPosAndAttributes(const uint8_t *const dict, const uint8_t flags, const int pos); static int readChildrenPosition(const uint8_t *const dict, const uint8_t flags, const int pos); @@ -118,6 +110,13 @@ class BinaryFormat { REQUIRES_FRENCH_LIGATURES_PROCESSING = 0x4 }; const static unsigned int NO_FLAGS = 0; + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryFormat); + const static int32_t MINIMAL_ONE_BYTE_CHARACTER_VALUE = 0x20; + const static int32_t CHARACTER_ARRAY_TERMINATOR = 0x1F; + const static int MULTIPLE_BYTE_CHARACTER_ADDITIONAL_SIZE = 2; + static int skipAllAttributes(const uint8_t *const dict, const uint8_t flags, const int pos); }; inline int BinaryFormat::detectFormat(const uint8_t *const dict) { diff --git a/native/jni/src/char_utils.cpp b/native/jni/src/char_utils.cpp index 223291f60..9d886da31 100644 --- a/native/jni/src/char_utils.cpp +++ b/native/jni/src/char_utils.cpp @@ -889,7 +889,7 @@ static int compare_pair_capital(const void *a, const void *b) { - static_cast<int>((static_cast<const struct LatinCapitalSmallPair *>(b))->capital); } -unsigned short latin_tolower(unsigned short c) { +unsigned short latin_tolower(const unsigned short c) { struct LatinCapitalSmallPair *p = static_cast<struct LatinCapitalSmallPair *>(bsearch(&c, SORTED_CHAR_MAP, sizeof(SORTED_CHAR_MAP) / sizeof(SORTED_CHAR_MAP[0]), diff --git a/native/jni/src/char_utils.h b/native/jni/src/char_utils.h index edd96bbb0..b30677fa7 100644 --- a/native/jni/src/char_utils.h +++ b/native/jni/src/char_utils.h @@ -17,21 +17,23 @@ #ifndef LATINIME_CHAR_UTILS_H #define LATINIME_CHAR_UTILS_H +#include <cctype> + namespace latinime { -inline static int isAsciiUpper(unsigned short c) { - return c >= 'A' && c <= 'Z'; +inline static bool isAsciiUpper(unsigned short c) { + return isupper(static_cast<int>(c)) != 0; } inline static unsigned short toAsciiLower(unsigned short c) { return c - 'A' + 'a'; } -inline static int isAscii(unsigned short c) { - return c <= 127; +inline static bool isAscii(unsigned short c) { + return isascii(static_cast<int>(c)) != 0; } -unsigned short latin_tolower(unsigned short c); +unsigned short latin_tolower(const unsigned short c); /** * Table mapping most combined Latin, Greek, and Cyrillic characters diff --git a/tools/maketext/Android.mk b/tools/maketext/Android.mk index 98731b718..77914cae6 100644 --- a/tools/maketext/Android.mk +++ b/tools/maketext/Android.mk @@ -19,7 +19,6 @@ include $(CLEAR_VARS) LOCAL_SRC_FILES += $(call all-java-files-under,src) LOCAL_JAR_MANIFEST := etc/manifest.txt LOCAL_JAVA_RESOURCE_DIRS := res -LOCAL_MODULE_TAGS := eng LOCAL_MODULE := maketext include $(BUILD_HOST_JAVA_LIBRARY) diff --git a/tools/maketext/etc/Android.mk b/tools/maketext/etc/Android.mk index 4fa194bcd..475676b3a 100644 --- a/tools/maketext/etc/Android.mk +++ b/tools/maketext/etc/Android.mk @@ -15,7 +15,6 @@ LOCAL_PATH := $(call my-dir) include $(CLEAR_VARS) -LOCAL_MODULE_TAGS := eng - LOCAL_PREBUILT_EXECUTABLES := maketext + include $(BUILD_HOST_PREBUILT) |