diff options
Diffstat (limited to 'native')
-rw-r--r-- | native/jni/Android.mk | 28 | ||||
-rw-r--r-- | native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp | 6 | ||||
-rw-r--r-- | native/jni/src/bigram_dictionary.cpp | 14 | ||||
-rw-r--r-- | native/jni/src/binary_format.h | 27 | ||||
-rw-r--r-- | native/jni/src/correction.cpp | 1 | ||||
-rw-r--r-- | native/jni/src/dictionary.cpp | 4 | ||||
-rw-r--r-- | native/jni/src/dictionary.h | 2 | ||||
-rw-r--r-- | native/jni/src/unigram_dictionary.cpp | 18 | ||||
-rw-r--r-- | native/jni/src/unigram_dictionary.h | 2 |
9 files changed, 52 insertions, 50 deletions
diff --git a/native/jni/Android.mk b/native/jni/Android.mk index 5e0d3518d..d53757fd4 100644 --- a/native/jni/Android.mk +++ b/native/jni/Android.mk @@ -20,24 +20,6 @@ LOCAL_PATH := $(call my-dir) #FLAG_DBG := true #FLAG_DO_PROFILE := true -TARGETING_UNBUNDLED_FROYO := true - -ifeq ($(TARGET_ARCH), x86) - TARGETING_UNBUNDLED_FROYO := false -endif - -ifeq ($(TARGET_ARCH), mips) - TARGETING_UNBUNDLED_FROYO := false -endif - -ifeq ($(FLAG_DBG), true) - TARGETING_UNBUNDLED_FROYO := false -endif - -ifeq ($(FLAG_DO_PROFILE), true) - TARGETING_UNBUNDLED_FROYO := false -endif - ###################################### include $(CLEAR_VARS) @@ -69,11 +51,6 @@ LOCAL_SRC_FILES := \ $(LATIN_IME_JNI_SRC_FILES) \ $(addprefix $(LATIN_IME_SRC_DIR)/,$(LATIN_IME_CORE_SRC_FILES)) -ifeq ($(TARGETING_UNBUNDLED_FROYO), true) - LOCAL_NDK_VERSION := 4 - LOCAL_SDK_VERSION := 8 -endif - ifeq ($(FLAG_DO_PROFILE), true) $(warning Making profiling version of native library) LOCAL_CFLAGS += -DFLAG_DO_PROFILE @@ -117,11 +94,6 @@ ifeq ($(FLAG_DBG), true) endif # FLAG_DBG endif # FLAG_DO_PROFILE -ifeq ($(TARGETING_UNBUNDLED_FROYO), true) - LOCAL_NDK_VERSION := 4 - LOCAL_SDK_VERSION := 8 -endif - LOCAL_MODULE := libjni_latinime LOCAL_MODULE_TAGS := optional diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp index f130062a1..d10dc962e 100644 --- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp +++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp @@ -173,12 +173,12 @@ static int latinime_BinaryDictionary_getBigrams(JNIEnv *env, jobject object, jlo return count; } -static jboolean latinime_BinaryDictionary_isValidWord(JNIEnv *env, jobject object, jlong dict, +static jint latinime_BinaryDictionary_getFrequency(JNIEnv *env, jobject object, jlong dict, jintArray wordArray, jint wordLength) { Dictionary *dictionary = (Dictionary*)dict; if (!dictionary) return (jboolean) false; jint *word = env->GetIntArrayElements(wordArray, 0); - jboolean result = dictionary->isValidWord(word, wordLength); + jint result = dictionary->getFrequency(word, wordLength); env->ReleaseIntArrayElements(wordArray, word, JNI_ABORT); return result; } @@ -253,7 +253,7 @@ static JNINativeMethod sMethods[] = { {"closeNative", "(J)V", (void*)latinime_BinaryDictionary_close}, {"getSuggestionsNative", "(JJ[I[I[II[IZ[C[I)I", (void*)latinime_BinaryDictionary_getSuggestions}, - {"isValidWordNative", "(J[II)Z", (void*)latinime_BinaryDictionary_isValidWord}, + {"getFrequencyNative", "(J[II)I", (void*)latinime_BinaryDictionary_getFrequency}, {"isValidBigramNative", "(J[I[I)Z", (void*)latinime_BinaryDictionary_isValidBigram}, {"getBigramsNative", "(J[II[II[C[III)I", (void*)latinime_BinaryDictionary_getBigrams}, {"calcNormalizedScoreNative", "([CI[CII)F", diff --git a/native/jni/src/bigram_dictionary.cpp b/native/jni/src/bigram_dictionary.cpp index ac2a26172..eb4bf8d1a 100644 --- a/native/jni/src/bigram_dictionary.cpp +++ b/native/jni/src/bigram_dictionary.cpp @@ -117,14 +117,22 @@ int BigramDictionary::getBigrams(const int32_t *prevWord, int prevWordLength, in do { bigramFlags = BinaryFormat::getFlagsAndForwardPointer(root, &pos); uint16_t bigramBuffer[MAX_WORD_LENGTH]; + int unigramFreq; const int bigramPos = BinaryFormat::getAttributeAddressAndForwardPointer(root, bigramFlags, &pos); const int length = BinaryFormat::getWordAtAddress(root, bigramPos, MAX_WORD_LENGTH, - bigramBuffer); + bigramBuffer, &unigramFreq); // codesSize == 0 means we are trying to find bigram predictions. if (codesSize < 1 || checkFirstCharacter(bigramBuffer)) { - const int frequency = UnigramDictionary::MASK_ATTRIBUTE_FREQUENCY & bigramFlags; + const int bigramFreq = UnigramDictionary::MASK_ATTRIBUTE_FREQUENCY & bigramFlags; + // Due to space constraints, the frequency for bigrams is approximate - the lower the + // unigram frequency, the worse the precision. The theoritical maximum error in + // resulting frequency is 8 - although in the practice it's never bigger than 3 or 4 + // in very bad cases. This means that sometimes, we'll see some bigrams interverted + // here, but it can't get too bad. + const int frequency = + BinaryFormat::computeFrequencyForBigram(unigramFreq, bigramFreq); if (addWordBigram(bigramBuffer, length, frequency)) { ++bigramCount; } @@ -149,8 +157,8 @@ int BigramDictionary::getBigramListPositionForWord(const int32_t *prevWord, } else { pos = BinaryFormat::skipOtherCharacters(root, pos); } - pos = BinaryFormat::skipChildrenPosition(flags, pos); pos = BinaryFormat::skipFrequency(flags, pos); + pos = BinaryFormat::skipChildrenPosition(flags, pos); pos = BinaryFormat::skipShortcuts(root, flags, pos); return pos; } diff --git a/native/jni/src/binary_format.h b/native/jni/src/binary_format.h index 40f197619..51bf8ebbc 100644 --- a/native/jni/src/binary_format.h +++ b/native/jni/src/binary_format.h @@ -66,7 +66,8 @@ class BinaryFormat { static int getTerminalPosition(const uint8_t* const root, const int32_t* const inWord, const int length); static int getWordAtAddress(const uint8_t* const root, const int address, const int maxDepth, - uint16_t* outWord); + uint16_t* outWord, int* outUnigramFrequency); + static int computeFrequencyForBigram(const int unigramFreq, const int bigramFreq); static int getProbability(const int position, const std::map<int, int> *bigramMap, const uint8_t *bigramFilter, const int unigramFreq); @@ -390,10 +391,11 @@ inline int BinaryFormat::getTerminalPosition(const uint8_t* const root, * address: the byte position of the last chargroup of the word we are searching for (this is * what is stored as the "bigram address" in each bigram) * outword: an array to write the found word, with MAX_WORD_LENGTH size. + * outUnigramFrequency: a pointer to an int to write the frequency into. * Return value : the length of the word, of 0 if the word was not found. */ inline int BinaryFormat::getWordAtAddress(const uint8_t* const root, const int address, - const int maxDepth, uint16_t* outWord) { + const int maxDepth, uint16_t* outWord, int* outUnigramFrequency) { int pos = 0; int wordPos = 0; @@ -421,11 +423,12 @@ inline int BinaryFormat::getWordAtAddress(const uint8_t* const root, const int a // We count chars in order to avoid infinite loops if the file is broken or // if there is some other bug int charCount = maxDepth; - while (-1 != nextChar && --charCount > 0) { + while (NOT_A_CHARACTER != nextChar && --charCount > 0) { outWord[++wordPos] = nextChar; nextChar = getCharCodeAndForwardPointer(root, &pos); } } + *outUnigramFrequency = readFrequencyWithoutMovingPointer(root, pos); return ++wordPos; } // We need to skip past this char group, so skip any remaining chars after the @@ -529,6 +532,16 @@ static inline int backoff(const int unigramFreq) { // return unigramFreq > 8 ? unigramFreq - 8 : (0 == unigramFreq ? 0 : 8); } +inline int BinaryFormat::computeFrequencyForBigram(const int unigramFreq, const int bigramFreq) { + // We divide the range [unigramFreq..255] in 16.5 steps - in other words, we want the + // unigram frequency to be the median value of the 17th step from the top. A value of + // 0 for the bigram frequency represents the middle of the 16th step from the top, + // while a value of 15 represents the middle of the top step. + // See makedict.BinaryDictInputOutput for details. + const float stepSize = ((float)MAX_FREQ - unigramFreq) / (1.5f + MAX_BIGRAM_FREQ); + return (int)(unigramFreq + (bigramFreq + 1) * stepSize); +} + // This returns a probability in log space. inline int BinaryFormat::getProbability(const int position, const std::map<int, int> *bigramMap, const uint8_t *bigramFilter, const int unigramFreq) { @@ -537,13 +550,7 @@ inline int BinaryFormat::getProbability(const int position, const std::map<int, const std::map<int, int>::const_iterator bigramFreqIt = bigramMap->find(position); if (bigramFreqIt != bigramMap->end()) { const int bigramFreq = bigramFreqIt->second; - // We divide the range [unigramFreq..255] in 16.5 steps - in other words, we want the - // unigram frequency to be the median value of the 17th step from the top. A value of - // 0 for the bigram frequency represents the middle of the 16th step from the top, - // while a value of 15 represents the middle of the top step. - // See makedict.BinaryDictInputOutput for details. - const float stepSize = ((float)MAX_FREQ - unigramFreq) / (1.5f + MAX_BIGRAM_FREQ); - return (int)(unigramFreq + bigramFreq * stepSize); + return computeFrequencyForBigram(unigramFreq, bigramFreq); } else { return backoff(unigramFreq); } diff --git a/native/jni/src/correction.cpp b/native/jni/src/correction.cpp index fe3f292c1..f7ef7efc0 100644 --- a/native/jni/src/correction.cpp +++ b/native/jni/src/correction.cpp @@ -55,6 +55,7 @@ inline static void dumpEditDistance10ForDebug(int *editDistanceTable, } AKLOGI("[ %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d ]", c[0], c[1], c[2], c[3], c[4], c[5], c[6], c[7], c[8], c[9], c[10]); + (void)c; } } } diff --git a/native/jni/src/dictionary.cpp b/native/jni/src/dictionary.cpp index 65d0f73a3..1fb02478b 100644 --- a/native/jni/src/dictionary.cpp +++ b/native/jni/src/dictionary.cpp @@ -55,8 +55,8 @@ Dictionary::~Dictionary() { delete mBigramDictionary; } -bool Dictionary::isValidWord(const int32_t *word, int length) { - return mUnigramDictionary->isValidWord(word, length); +int Dictionary::getFrequency(const int32_t *word, int length) { + return mUnigramDictionary->getFrequency(word, length); } bool Dictionary::isValidBigram(const int32_t *word1, int length1, const int32_t *word2, diff --git a/native/jni/src/dictionary.h b/native/jni/src/dictionary.h index 87891ee4d..9f2367904 100644 --- a/native/jni/src/dictionary.h +++ b/native/jni/src/dictionary.h @@ -52,7 +52,7 @@ class Dictionary { maxWordLength, maxBigrams); } - bool isValidWord(const int32_t *word, int length); + int getFrequency(const int32_t *word, int length); bool isValidBigram(const int32_t *word1, int length1, const int32_t *word2, int length2); void *getDict() { return (void *)mDict; } int getDictSize() { return mDictSize; } diff --git a/native/jni/src/unigram_dictionary.cpp b/native/jni/src/unigram_dictionary.cpp index 828582848..d68265afb 100644 --- a/native/jni/src/unigram_dictionary.cpp +++ b/native/jni/src/unigram_dictionary.cpp @@ -222,6 +222,7 @@ int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, short unsigned int* w = outWords + j * MAX_WORD_LENGTH; char s[MAX_WORD_LENGTH]; for (int i = 0; i <= MAX_WORD_LENGTH; i++) s[i] = w[i]; + (void)s; AKLOGI("%s %i", s, frequencies[j]); } } @@ -747,8 +748,21 @@ int UnigramDictionary::getMostFrequentWordLikeInner(const uint16_t * const inWor return maxFreq; } -bool UnigramDictionary::isValidWord(const int32_t* const inWord, const int length) const { - return NOT_VALID_WORD != BinaryFormat::getTerminalPosition(DICT_ROOT, inWord, length); +int UnigramDictionary::getFrequency(const int32_t* const inWord, const int length) const { + const uint8_t* const root = DICT_ROOT; + int pos = BinaryFormat::getTerminalPosition(root, inWord, length); + if (NOT_VALID_WORD == pos) { + return NOT_A_PROBABILITY; + } + const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos); + const bool hasMultipleChars = (0 != (FLAG_HAS_MULTIPLE_CHARS & flags)); + if (hasMultipleChars) { + pos = BinaryFormat::skipOtherCharacters(root, pos); + } else { + BinaryFormat::getCharCodeAndForwardPointer(DICT_ROOT, &pos); + } + const int unigramFreq = BinaryFormat::readFrequencyWithoutMovingPointer(root, pos); + return unigramFreq; } // TODO: remove this function. diff --git a/native/jni/src/unigram_dictionary.h b/native/jni/src/unigram_dictionary.h index b9233518f..b70894004 100644 --- a/native/jni/src/unigram_dictionary.h +++ b/native/jni/src/unigram_dictionary.h @@ -72,7 +72,7 @@ class UnigramDictionary { UnigramDictionary(const uint8_t* const streamStart, int typedLetterMultipler, int fullWordMultiplier, int maxWordLength, int maxWords, const unsigned int flags); - bool isValidWord(const int32_t* const inWord, const int length) const; + int getFrequency(const int32_t* const inWord, const int length) const; int getBigramPosition(int pos, unsigned short *word, int offset, int length) const; int getSuggestions(ProximityInfo *proximityInfo, WordsPriorityQueuePool *queuePool, Correction *correction, const int *xcoordinates, const int *ycoordinates, |