aboutsummaryrefslogtreecommitdiffstats
path: root/native
diff options
context:
space:
mode:
Diffstat (limited to 'native')
-rw-r--r--native/jni/Android.mk28
-rw-r--r--native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp6
-rw-r--r--native/jni/src/bigram_dictionary.cpp14
-rw-r--r--native/jni/src/binary_format.h27
-rw-r--r--native/jni/src/correction.cpp1
-rw-r--r--native/jni/src/dictionary.cpp4
-rw-r--r--native/jni/src/dictionary.h2
-rw-r--r--native/jni/src/unigram_dictionary.cpp18
-rw-r--r--native/jni/src/unigram_dictionary.h2
9 files changed, 52 insertions, 50 deletions
diff --git a/native/jni/Android.mk b/native/jni/Android.mk
index 5e0d3518d..d53757fd4 100644
--- a/native/jni/Android.mk
+++ b/native/jni/Android.mk
@@ -20,24 +20,6 @@ LOCAL_PATH := $(call my-dir)
#FLAG_DBG := true
#FLAG_DO_PROFILE := true
-TARGETING_UNBUNDLED_FROYO := true
-
-ifeq ($(TARGET_ARCH), x86)
- TARGETING_UNBUNDLED_FROYO := false
-endif
-
-ifeq ($(TARGET_ARCH), mips)
- TARGETING_UNBUNDLED_FROYO := false
-endif
-
-ifeq ($(FLAG_DBG), true)
- TARGETING_UNBUNDLED_FROYO := false
-endif
-
-ifeq ($(FLAG_DO_PROFILE), true)
- TARGETING_UNBUNDLED_FROYO := false
-endif
-
######################################
include $(CLEAR_VARS)
@@ -69,11 +51,6 @@ LOCAL_SRC_FILES := \
$(LATIN_IME_JNI_SRC_FILES) \
$(addprefix $(LATIN_IME_SRC_DIR)/,$(LATIN_IME_CORE_SRC_FILES))
-ifeq ($(TARGETING_UNBUNDLED_FROYO), true)
- LOCAL_NDK_VERSION := 4
- LOCAL_SDK_VERSION := 8
-endif
-
ifeq ($(FLAG_DO_PROFILE), true)
$(warning Making profiling version of native library)
LOCAL_CFLAGS += -DFLAG_DO_PROFILE
@@ -117,11 +94,6 @@ ifeq ($(FLAG_DBG), true)
endif # FLAG_DBG
endif # FLAG_DO_PROFILE
-ifeq ($(TARGETING_UNBUNDLED_FROYO), true)
- LOCAL_NDK_VERSION := 4
- LOCAL_SDK_VERSION := 8
-endif
-
LOCAL_MODULE := libjni_latinime
LOCAL_MODULE_TAGS := optional
diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
index f130062a1..d10dc962e 100644
--- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
+++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
@@ -173,12 +173,12 @@ static int latinime_BinaryDictionary_getBigrams(JNIEnv *env, jobject object, jlo
return count;
}
-static jboolean latinime_BinaryDictionary_isValidWord(JNIEnv *env, jobject object, jlong dict,
+static jint latinime_BinaryDictionary_getFrequency(JNIEnv *env, jobject object, jlong dict,
jintArray wordArray, jint wordLength) {
Dictionary *dictionary = (Dictionary*)dict;
if (!dictionary) return (jboolean) false;
jint *word = env->GetIntArrayElements(wordArray, 0);
- jboolean result = dictionary->isValidWord(word, wordLength);
+ jint result = dictionary->getFrequency(word, wordLength);
env->ReleaseIntArrayElements(wordArray, word, JNI_ABORT);
return result;
}
@@ -253,7 +253,7 @@ static JNINativeMethod sMethods[] = {
{"closeNative", "(J)V", (void*)latinime_BinaryDictionary_close},
{"getSuggestionsNative", "(JJ[I[I[II[IZ[C[I)I",
(void*)latinime_BinaryDictionary_getSuggestions},
- {"isValidWordNative", "(J[II)Z", (void*)latinime_BinaryDictionary_isValidWord},
+ {"getFrequencyNative", "(J[II)I", (void*)latinime_BinaryDictionary_getFrequency},
{"isValidBigramNative", "(J[I[I)Z", (void*)latinime_BinaryDictionary_isValidBigram},
{"getBigramsNative", "(J[II[II[C[III)I", (void*)latinime_BinaryDictionary_getBigrams},
{"calcNormalizedScoreNative", "([CI[CII)F",
diff --git a/native/jni/src/bigram_dictionary.cpp b/native/jni/src/bigram_dictionary.cpp
index ac2a26172..eb4bf8d1a 100644
--- a/native/jni/src/bigram_dictionary.cpp
+++ b/native/jni/src/bigram_dictionary.cpp
@@ -117,14 +117,22 @@ int BigramDictionary::getBigrams(const int32_t *prevWord, int prevWordLength, in
do {
bigramFlags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
uint16_t bigramBuffer[MAX_WORD_LENGTH];
+ int unigramFreq;
const int bigramPos = BinaryFormat::getAttributeAddressAndForwardPointer(root, bigramFlags,
&pos);
const int length = BinaryFormat::getWordAtAddress(root, bigramPos, MAX_WORD_LENGTH,
- bigramBuffer);
+ bigramBuffer, &unigramFreq);
// codesSize == 0 means we are trying to find bigram predictions.
if (codesSize < 1 || checkFirstCharacter(bigramBuffer)) {
- const int frequency = UnigramDictionary::MASK_ATTRIBUTE_FREQUENCY & bigramFlags;
+ const int bigramFreq = UnigramDictionary::MASK_ATTRIBUTE_FREQUENCY & bigramFlags;
+ // Due to space constraints, the frequency for bigrams is approximate - the lower the
+ // unigram frequency, the worse the precision. The theoritical maximum error in
+ // resulting frequency is 8 - although in the practice it's never bigger than 3 or 4
+ // in very bad cases. This means that sometimes, we'll see some bigrams interverted
+ // here, but it can't get too bad.
+ const int frequency =
+ BinaryFormat::computeFrequencyForBigram(unigramFreq, bigramFreq);
if (addWordBigram(bigramBuffer, length, frequency)) {
++bigramCount;
}
@@ -149,8 +157,8 @@ int BigramDictionary::getBigramListPositionForWord(const int32_t *prevWord,
} else {
pos = BinaryFormat::skipOtherCharacters(root, pos);
}
- pos = BinaryFormat::skipChildrenPosition(flags, pos);
pos = BinaryFormat::skipFrequency(flags, pos);
+ pos = BinaryFormat::skipChildrenPosition(flags, pos);
pos = BinaryFormat::skipShortcuts(root, flags, pos);
return pos;
}
diff --git a/native/jni/src/binary_format.h b/native/jni/src/binary_format.h
index 40f197619..51bf8ebbc 100644
--- a/native/jni/src/binary_format.h
+++ b/native/jni/src/binary_format.h
@@ -66,7 +66,8 @@ class BinaryFormat {
static int getTerminalPosition(const uint8_t* const root, const int32_t* const inWord,
const int length);
static int getWordAtAddress(const uint8_t* const root, const int address, const int maxDepth,
- uint16_t* outWord);
+ uint16_t* outWord, int* outUnigramFrequency);
+ static int computeFrequencyForBigram(const int unigramFreq, const int bigramFreq);
static int getProbability(const int position, const std::map<int, int> *bigramMap,
const uint8_t *bigramFilter, const int unigramFreq);
@@ -390,10 +391,11 @@ inline int BinaryFormat::getTerminalPosition(const uint8_t* const root,
* address: the byte position of the last chargroup of the word we are searching for (this is
* what is stored as the "bigram address" in each bigram)
* outword: an array to write the found word, with MAX_WORD_LENGTH size.
+ * outUnigramFrequency: a pointer to an int to write the frequency into.
* Return value : the length of the word, of 0 if the word was not found.
*/
inline int BinaryFormat::getWordAtAddress(const uint8_t* const root, const int address,
- const int maxDepth, uint16_t* outWord) {
+ const int maxDepth, uint16_t* outWord, int* outUnigramFrequency) {
int pos = 0;
int wordPos = 0;
@@ -421,11 +423,12 @@ inline int BinaryFormat::getWordAtAddress(const uint8_t* const root, const int a
// We count chars in order to avoid infinite loops if the file is broken or
// if there is some other bug
int charCount = maxDepth;
- while (-1 != nextChar && --charCount > 0) {
+ while (NOT_A_CHARACTER != nextChar && --charCount > 0) {
outWord[++wordPos] = nextChar;
nextChar = getCharCodeAndForwardPointer(root, &pos);
}
}
+ *outUnigramFrequency = readFrequencyWithoutMovingPointer(root, pos);
return ++wordPos;
}
// We need to skip past this char group, so skip any remaining chars after the
@@ -529,6 +532,16 @@ static inline int backoff(const int unigramFreq) {
// return unigramFreq > 8 ? unigramFreq - 8 : (0 == unigramFreq ? 0 : 8);
}
+inline int BinaryFormat::computeFrequencyForBigram(const int unigramFreq, const int bigramFreq) {
+ // We divide the range [unigramFreq..255] in 16.5 steps - in other words, we want the
+ // unigram frequency to be the median value of the 17th step from the top. A value of
+ // 0 for the bigram frequency represents the middle of the 16th step from the top,
+ // while a value of 15 represents the middle of the top step.
+ // See makedict.BinaryDictInputOutput for details.
+ const float stepSize = ((float)MAX_FREQ - unigramFreq) / (1.5f + MAX_BIGRAM_FREQ);
+ return (int)(unigramFreq + (bigramFreq + 1) * stepSize);
+}
+
// This returns a probability in log space.
inline int BinaryFormat::getProbability(const int position, const std::map<int, int> *bigramMap,
const uint8_t *bigramFilter, const int unigramFreq) {
@@ -537,13 +550,7 @@ inline int BinaryFormat::getProbability(const int position, const std::map<int,
const std::map<int, int>::const_iterator bigramFreqIt = bigramMap->find(position);
if (bigramFreqIt != bigramMap->end()) {
const int bigramFreq = bigramFreqIt->second;
- // We divide the range [unigramFreq..255] in 16.5 steps - in other words, we want the
- // unigram frequency to be the median value of the 17th step from the top. A value of
- // 0 for the bigram frequency represents the middle of the 16th step from the top,
- // while a value of 15 represents the middle of the top step.
- // See makedict.BinaryDictInputOutput for details.
- const float stepSize = ((float)MAX_FREQ - unigramFreq) / (1.5f + MAX_BIGRAM_FREQ);
- return (int)(unigramFreq + bigramFreq * stepSize);
+ return computeFrequencyForBigram(unigramFreq, bigramFreq);
} else {
return backoff(unigramFreq);
}
diff --git a/native/jni/src/correction.cpp b/native/jni/src/correction.cpp
index fe3f292c1..f7ef7efc0 100644
--- a/native/jni/src/correction.cpp
+++ b/native/jni/src/correction.cpp
@@ -55,6 +55,7 @@ inline static void dumpEditDistance10ForDebug(int *editDistanceTable,
}
AKLOGI("[ %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d ]",
c[0], c[1], c[2], c[3], c[4], c[5], c[6], c[7], c[8], c[9], c[10]);
+ (void)c;
}
}
}
diff --git a/native/jni/src/dictionary.cpp b/native/jni/src/dictionary.cpp
index 65d0f73a3..1fb02478b 100644
--- a/native/jni/src/dictionary.cpp
+++ b/native/jni/src/dictionary.cpp
@@ -55,8 +55,8 @@ Dictionary::~Dictionary() {
delete mBigramDictionary;
}
-bool Dictionary::isValidWord(const int32_t *word, int length) {
- return mUnigramDictionary->isValidWord(word, length);
+int Dictionary::getFrequency(const int32_t *word, int length) {
+ return mUnigramDictionary->getFrequency(word, length);
}
bool Dictionary::isValidBigram(const int32_t *word1, int length1, const int32_t *word2,
diff --git a/native/jni/src/dictionary.h b/native/jni/src/dictionary.h
index 87891ee4d..9f2367904 100644
--- a/native/jni/src/dictionary.h
+++ b/native/jni/src/dictionary.h
@@ -52,7 +52,7 @@ class Dictionary {
maxWordLength, maxBigrams);
}
- bool isValidWord(const int32_t *word, int length);
+ int getFrequency(const int32_t *word, int length);
bool isValidBigram(const int32_t *word1, int length1, const int32_t *word2, int length2);
void *getDict() { return (void *)mDict; }
int getDictSize() { return mDictSize; }
diff --git a/native/jni/src/unigram_dictionary.cpp b/native/jni/src/unigram_dictionary.cpp
index 828582848..d68265afb 100644
--- a/native/jni/src/unigram_dictionary.cpp
+++ b/native/jni/src/unigram_dictionary.cpp
@@ -222,6 +222,7 @@ int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo,
short unsigned int* w = outWords + j * MAX_WORD_LENGTH;
char s[MAX_WORD_LENGTH];
for (int i = 0; i <= MAX_WORD_LENGTH; i++) s[i] = w[i];
+ (void)s;
AKLOGI("%s %i", s, frequencies[j]);
}
}
@@ -747,8 +748,21 @@ int UnigramDictionary::getMostFrequentWordLikeInner(const uint16_t * const inWor
return maxFreq;
}
-bool UnigramDictionary::isValidWord(const int32_t* const inWord, const int length) const {
- return NOT_VALID_WORD != BinaryFormat::getTerminalPosition(DICT_ROOT, inWord, length);
+int UnigramDictionary::getFrequency(const int32_t* const inWord, const int length) const {
+ const uint8_t* const root = DICT_ROOT;
+ int pos = BinaryFormat::getTerminalPosition(root, inWord, length);
+ if (NOT_VALID_WORD == pos) {
+ return NOT_A_PROBABILITY;
+ }
+ const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
+ const bool hasMultipleChars = (0 != (FLAG_HAS_MULTIPLE_CHARS & flags));
+ if (hasMultipleChars) {
+ pos = BinaryFormat::skipOtherCharacters(root, pos);
+ } else {
+ BinaryFormat::getCharCodeAndForwardPointer(DICT_ROOT, &pos);
+ }
+ const int unigramFreq = BinaryFormat::readFrequencyWithoutMovingPointer(root, pos);
+ return unigramFreq;
}
// TODO: remove this function.
diff --git a/native/jni/src/unigram_dictionary.h b/native/jni/src/unigram_dictionary.h
index b9233518f..b70894004 100644
--- a/native/jni/src/unigram_dictionary.h
+++ b/native/jni/src/unigram_dictionary.h
@@ -72,7 +72,7 @@ class UnigramDictionary {
UnigramDictionary(const uint8_t* const streamStart, int typedLetterMultipler,
int fullWordMultiplier, int maxWordLength, int maxWords, const unsigned int flags);
- bool isValidWord(const int32_t* const inWord, const int length) const;
+ int getFrequency(const int32_t* const inWord, const int length) const;
int getBigramPosition(int pos, unsigned short *word, int offset, int length) const;
int getSuggestions(ProximityInfo *proximityInfo, WordsPriorityQueuePool *queuePool,
Correction *correction, const int *xcoordinates, const int *ycoordinates,