aboutsummaryrefslogtreecommitdiffstats
path: root/native
diff options
context:
space:
mode:
Diffstat (limited to 'native')
-rw-r--r--native/jni/Android.mk31
-rw-r--r--native/jni/com_android_inputmethod_keyboard_ProximityInfo.cpp32
-rw-r--r--native/jni/com_android_inputmethod_keyboard_ProximityInfo.h6
-rw-r--r--native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp130
-rw-r--r--native/jni/com_android_inputmethod_latin_BinaryDictionary.h6
-rw-r--r--native/jni/com_android_inputmethod_latin_DicTraverseSession.cpp14
-rw-r--r--native/jni/com_android_inputmethod_latin_DicTraverseSession.h5
-rw-r--r--native/jni/jni_common.cpp39
-rw-r--r--native/jni/jni_common.h4
-rw-r--r--native/jni/src/additional_proximity_chars.cpp10
-rw-r--r--native/jni/src/additional_proximity_chars.h17
-rw-r--r--native/jni/src/basechars.cpp194
-rw-r--r--native/jni/src/bigram_dictionary.cpp103
-rw-r--r--native/jni/src/bigram_dictionary.h27
-rw-r--r--native/jni/src/binary_format.h201
-rw-r--r--native/jni/src/char_utils.cpp346
-rw-r--r--native/jni/src/char_utils.h40
-rw-r--r--native/jni/src/correction.cpp256
-rw-r--r--native/jni/src/correction.h185
-rw-r--r--native/jni/src/defines.h259
-rw-r--r--native/jni/src/dic_traverse_wrapper.h15
-rw-r--r--native/jni/src/dictionary.cpp63
-rw-r--r--native/jni/src/dictionary.h57
-rw-r--r--native/jni/src/geometry_utils.h52
-rw-r--r--native/jni/src/gesture/gesture_decoder_wrapper.h70
-rw-r--r--native/jni/src/gesture/incremental_decoder_wrapper.h71
-rw-r--r--native/jni/src/hash_map_compat.h4
-rw-r--r--native/jni/src/proximity_info.cpp176
-rw-r--r--native/jni/src/proximity_info.h115
-rw-r--r--native/jni/src/proximity_info_params.cpp103
-rw-r--r--native/jni/src/proximity_info_params.h108
-rw-r--r--native/jni/src/proximity_info_state.cpp651
-rw-r--r--native/jni/src/proximity_info_state.h289
-rw-r--r--native/jni/src/proximity_info_state_utils.cpp1073
-rw-r--r--native/jni/src/proximity_info_state_utils.h163
-rw-r--r--native/jni/src/proximity_info_utils.h242
-rw-r--r--native/jni/src/suggest/gesture_suggest.cpp (renamed from native/jni/src/gesture/gesture_decoder_wrapper.cpp)9
-rw-r--r--native/jni/src/suggest/gesture_suggest.h61
-rw-r--r--native/jni/src/suggest/suggest_interface.h (renamed from native/jni/src/gesture/incremental_decoder_interface.h)25
-rw-r--r--native/jni/src/suggest/typing_suggest.cpp (renamed from native/jni/src/gesture/incremental_decoder_wrapper.cpp)9
-rw-r--r--native/jni/src/suggest/typing_suggest.h61
-rw-r--r--native/jni/src/suggest_utils.h57
-rw-r--r--native/jni/src/terminal_attributes.h22
-rw-r--r--native/jni/src/unigram_dictionary.cpp185
-rw-r--r--native/jni/src/unigram_dictionary.h74
-rw-r--r--native/jni/src/words_priority_queue.cpp76
-rw-r--r--native/jni/src/words_priority_queue.h126
-rw-r--r--native/jni/src/words_priority_queue_pool.h22
48 files changed, 3571 insertions, 2313 deletions
diff --git a/native/jni/Android.mk b/native/jni/Android.mk
index 874d10639..3735ec07b 100644
--- a/native/jni/Android.mk
+++ b/native/jni/Android.mk
@@ -26,9 +26,16 @@ include $(CLEAR_VARS)
LATIN_IME_SRC_DIR := src
LATIN_IME_SRC_FULLPATH_DIR := $(LOCAL_PATH)/$(LATIN_IME_SRC_DIR)
-LOCAL_C_INCLUDES += $(LATIN_IME_SRC_FULLPATH_DIR) $(LATIN_IME_SRC_FULLPATH_DIR)/gesture
+LOCAL_C_INCLUDES += $(LATIN_IME_SRC_FULLPATH_DIR) $(LATIN_IME_SRC_FULLPATH_DIR)/suggest
-LOCAL_CFLAGS += -Werror -Wall
+LOCAL_CFLAGS += -Werror -Wall -Wextra -Weffc++ -Wformat=2 -Wcast-qual -Wcast-align \
+ -Wwrite-strings -Wfloat-equal -Wpointer-arith -Winit-self -Wredundant-decls -Wno-system-headers
+
+ifeq ($(TARGET_ARCH), arm)
+ifneq ($(TARGET_GCC_VERSION), 4.7)
+LOCAL_CFLAGS += -Winline
+endif # TARGET_GCC_VERSION
+endif # TARGET_ARCH
# To suppress compiler warnings for unused variables/functions used for debug features etc.
LOCAL_CFLAGS += -Wno-unused-parameter -Wno-unused-function
@@ -41,17 +48,19 @@ LATIN_IME_JNI_SRC_FILES := \
LATIN_IME_CORE_SRC_FILES := \
additional_proximity_chars.cpp \
- basechars.cpp \
bigram_dictionary.cpp \
char_utils.cpp \
correction.cpp \
dictionary.cpp \
dic_traverse_wrapper.cpp \
proximity_info.cpp \
+ proximity_info_params.cpp \
proximity_info_state.cpp \
+ proximity_info_state_utils.cpp \
unigram_dictionary.cpp \
- gesture/gesture_decoder_wrapper.cpp \
- gesture/incremental_decoder_wrapper.cpp
+ words_priority_queue.cpp \
+ suggest/gesture_suggest.cpp \
+ suggest/typing_suggest.cpp
LOCAL_SRC_FILES := \
$(LATIN_IME_JNI_SRC_FILES) \
@@ -59,11 +68,15 @@ LOCAL_SRC_FILES := \
ifeq ($(FLAG_DO_PROFILE), true)
$(warning Making profiling version of native library)
- LOCAL_CFLAGS += -DFLAG_DO_PROFILE
+ LOCAL_CFLAGS += -DFLAG_DO_PROFILE -funwind-tables -fno-inline
else # FLAG_DO_PROFILE
ifeq ($(FLAG_DBG), true)
$(warning Making debug version of native library)
- LOCAL_CFLAGS += -DFLAG_DBG
+ LOCAL_CFLAGS += -DFLAG_DBG -funwind-tables -fno-inline
+ifeq ($(FLAG_FULL_DBG), true)
+ $(warning Making full debug version of native library)
+ LOCAL_CFLAGS += -DFLAG_FULL_DBG
+endif # FLAG_FULL_DBG
endif # FLAG_DBG
endif # FLAG_DO_PROFILE
@@ -82,11 +95,11 @@ LOCAL_WHOLE_STATIC_LIBRARIES := libjni_latinime_common_static
ifeq ($(FLAG_DO_PROFILE), true)
$(warning Making profiling version of native library)
- LOCAL_SHARED_LIBRARIES += liblog
+ LOCAL_LDFLAGS += -llog
else # FLAG_DO_PROFILE
ifeq ($(FLAG_DBG), true)
$(warning Making debug version of native library)
- LOCAL_SHARED_LIBRARIES += liblog
+ LOCAL_LDFLAGS += -llog
endif # FLAG_DBG
endif # FLAG_DO_PROFILE
diff --git a/native/jni/com_android_inputmethod_keyboard_ProximityInfo.cpp b/native/jni/com_android_inputmethod_keyboard_ProximityInfo.cpp
index 560b3a533..30ca3f1b8 100644
--- a/native/jni/com_android_inputmethod_keyboard_ProximityInfo.cpp
+++ b/native/jni/com_android_inputmethod_keyboard_ProximityInfo.cpp
@@ -1,11 +1,11 @@
/*
- * Copyright (C) 2011, The Android Open Source Project
+ * Copyright (C) 2011 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
- * http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
@@ -17,39 +17,39 @@
#define LOG_TAG "LatinIME: jni: ProximityInfo"
#include "com_android_inputmethod_keyboard_ProximityInfo.h"
+#include "defines.h"
#include "jni.h"
#include "jni_common.h"
#include "proximity_info.h"
namespace latinime {
-static jlong latinime_Keyboard_setProximityInfo(JNIEnv *env, jobject object,
- jstring localeJStr, jint maxProximityCharsSize, jint displayWidth, jint displayHeight,
- jint gridWidth, jint gridHeight, jint mostCommonkeyWidth, jintArray proximityChars,
- jint keyCount, jintArray keyXCoordinates, jintArray keyYCoordinates,
- jintArray keyWidths, jintArray keyHeights, jintArray keyCharCodes,
- jfloatArray sweetSpotCenterXs, jfloatArray sweetSpotCenterYs, jfloatArray sweetSpotRadii) {
- ProximityInfo *proximityInfo = new ProximityInfo(env, localeJStr, maxProximityCharsSize,
- displayWidth, displayHeight, gridWidth, gridHeight, mostCommonkeyWidth, proximityChars,
- keyCount, keyXCoordinates, keyYCoordinates, keyWidths, keyHeights, keyCharCodes,
+static jlong latinime_Keyboard_setProximityInfo(JNIEnv *env, jclass clazz, jstring localeJStr,
+ jint displayWidth, jint displayHeight, jint gridWidth, jint gridHeight,
+ jint mostCommonkeyWidth, jintArray proximityChars, jint keyCount,
+ jintArray keyXCoordinates, jintArray keyYCoordinates, jintArray keyWidths,
+ jintArray keyHeights, jintArray keyCharCodes, jfloatArray sweetSpotCenterXs,
+ jfloatArray sweetSpotCenterYs, jfloatArray sweetSpotRadii) {
+ ProximityInfo *proximityInfo = new ProximityInfo(env, localeJStr, displayWidth, displayHeight,
+ gridWidth, gridHeight, mostCommonkeyWidth, proximityChars, keyCount,
+ keyXCoordinates, keyYCoordinates, keyWidths, keyHeights, keyCharCodes,
sweetSpotCenterXs, sweetSpotCenterYs, sweetSpotRadii);
return reinterpret_cast<jlong>(proximityInfo);
}
-static void latinime_Keyboard_release(JNIEnv *env, jobject object, jlong proximityInfo) {
+static void latinime_Keyboard_release(JNIEnv *env, jclass clazz, jlong proximityInfo) {
ProximityInfo *pi = reinterpret_cast<ProximityInfo *>(proximityInfo);
delete pi;
}
-static JNINativeMethod sKeyboardMethods[] = {
- {"setProximityInfoNative", "(Ljava/lang/String;IIIIII[II[I[I[I[I[I[F[F[F)J",
+static JNINativeMethod sMethods[] = {
+ {"setProximityInfoNative", "(Ljava/lang/String;IIIII[II[I[I[I[I[I[F[F[F)J",
reinterpret_cast<void *>(latinime_Keyboard_setProximityInfo)},
{"releaseProximityInfoNative", "(J)V", reinterpret_cast<void *>(latinime_Keyboard_release)}
};
int register_ProximityInfo(JNIEnv *env) {
const char *const kClassPathName = "com/android/inputmethod/keyboard/ProximityInfo";
- return registerNativeMethods(env, kClassPathName, sKeyboardMethods,
- sizeof(sKeyboardMethods) / sizeof(sKeyboardMethods[0]));
+ return registerNativeMethods(env, kClassPathName, sMethods, NELEMS(sMethods));
}
} // namespace latinime
diff --git a/native/jni/com_android_inputmethod_keyboard_ProximityInfo.h b/native/jni/com_android_inputmethod_keyboard_ProximityInfo.h
index 51fa895d3..c3503c8c3 100644
--- a/native/jni/com_android_inputmethod_keyboard_ProximityInfo.h
+++ b/native/jni/com_android_inputmethod_keyboard_ProximityInfo.h
@@ -1,11 +1,11 @@
/*
- * Copyright (C) 2011, The Android Open Source Project
+ * Copyright (C) 2011 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
- * http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
@@ -20,8 +20,6 @@
#include "jni.h"
namespace latinime {
-
int register_ProximityInfo(JNIEnv *env);
-
} // namespace latinime
#endif // _COM_ANDROID_INPUTMETHOD_KEYBOARD_PROXIMITYINFO_H
diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
index dd2513f9f..ca38b0de5 100644
--- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
+++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
@@ -1,11 +1,11 @@
/*
- * Copyright (C) 2009, The Android Open Source Project
+ * Copyright (C) 2009 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
- * http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
@@ -14,7 +14,6 @@
* limitations under the License.
*/
-
#include <cstring> // for memset()
#define LOG_TAG "LatinIME: jni: BinaryDictionary"
@@ -43,10 +42,8 @@ class ProximityInfo;
static void releaseDictBuf(const void *dictBuf, const size_t length, const int fd);
-static jlong latinime_BinaryDictionary_open(JNIEnv *env, jobject object,
- jstring sourceDir, jlong dictOffset, jlong dictSize,
- jint typedLetterMultiplier, jint fullWordMultiplier, jint maxWordLength, jint maxWords,
- jint maxPredictions) {
+static jlong latinime_BinaryDictionary_open(JNIEnv *env, jclass clazz, jstring sourceDir,
+ jlong dictOffset, jlong dictSize) {
PROF_OPEN;
PROF_START(66);
const jsize sourceDirUtf8Length = env->GetStringUTFLength(sourceDir);
@@ -71,7 +68,7 @@ static jlong latinime_BinaryDictionary_open(JNIEnv *env, jobject object,
adjust = static_cast<int>(dictOffset) % pagesize;
int adjDictOffset = static_cast<int>(dictOffset) - adjust;
int adjDictSize = static_cast<int>(dictSize) + adjust;
- dictBuf = mmap(0, sizeof(char) * adjDictSize, PROT_READ, MAP_PRIVATE, fd, adjDictOffset);
+ dictBuf = mmap(0, adjDictSize, PROT_READ, MAP_PRIVATE, fd, adjDictOffset);
if (dictBuf == MAP_FAILED) {
AKLOGE("DICT: Can't mmap dictionary. errno=%d", errno);
return 0;
@@ -85,17 +82,17 @@ static jlong latinime_BinaryDictionary_open(JNIEnv *env, jobject object,
AKLOGE("DICT: Can't fopen sourceDir. sourceDirChars=%s errno=%d", sourceDirChars, errno);
return 0;
}
- dictBuf = malloc(sizeof(char) * dictSize);
+ dictBuf = malloc(dictSize);
if (!dictBuf) {
AKLOGE("DICT: Can't allocate memory region for dictionary. errno=%d", errno);
return 0;
}
- int ret = fseek(file, (long)dictOffset, SEEK_SET);
+ int ret = fseek(file, static_cast<long>(dictOffset), SEEK_SET);
if (ret != 0) {
AKLOGE("DICT: Failure in fseek. ret=%d errno=%d", ret, errno);
return 0;
}
- ret = fread(dictBuf, sizeof(char) * dictSize, 1, file);
+ ret = fread(dictBuf, dictSize, 1, file);
if (ret != 1) {
AKLOGE("DICT: Failure in fread. ret=%d errno=%d", ret, errno);
return 0;
@@ -120,20 +117,19 @@ static jlong latinime_BinaryDictionary_open(JNIEnv *env, jobject object,
releaseDictBuf(dictBuf, 0, 0);
#endif // USE_MMAP_FOR_DICTIONARY
} else {
- dictionary = new Dictionary(dictBuf, static_cast<int>(dictSize), fd, adjust,
- typedLetterMultiplier, fullWordMultiplier, maxWordLength, maxWords, maxPredictions);
+ dictionary = new Dictionary(dictBuf, static_cast<int>(dictSize), fd, adjust);
}
PROF_END(66);
PROF_CLOSE;
- return (jlong)dictionary;
+ return reinterpret_cast<jlong>(dictionary);
}
-static int latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jobject object, jlong dict,
+static int latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jclass clazz, jlong dict,
jlong proximityInfo, jlong dicTraverseSession, jintArray xCoordinatesArray,
jintArray yCoordinatesArray, jintArray timesArray, jintArray pointerIdsArray,
- jintArray inputCodePointsArray, jint arraySize, jint commitPoint, jboolean isGesture,
+ jintArray inputCodePointsArray, jint inputSize, jint commitPoint, jboolean isGesture,
jintArray prevWordCodePointsForBigrams, jboolean useFullEditDistance,
- jcharArray outputCharsArray, jintArray scoresArray, jintArray spaceIndicesArray,
+ jintArray outputCodePointsArray, jintArray scoresArray, jintArray spaceIndicesArray,
jintArray outputTypesArray) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) return 0;
@@ -141,20 +137,20 @@ static int latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jobject object,
void *traverseSession = reinterpret_cast<void *>(dicTraverseSession);
// Input values
- int xCoordinates[arraySize];
- int yCoordinates[arraySize];
- int times[arraySize];
- int pointerIds[arraySize];
+ int xCoordinates[inputSize];
+ int yCoordinates[inputSize];
+ int times[inputSize];
+ int pointerIds[inputSize];
const jsize inputCodePointsLength = env->GetArrayLength(inputCodePointsArray);
int inputCodePoints[inputCodePointsLength];
const jsize prevWordCodePointsLength =
prevWordCodePointsForBigrams ? env->GetArrayLength(prevWordCodePointsForBigrams) : 0;
int prevWordCodePointsInternal[prevWordCodePointsLength];
int *prevWordCodePoints = 0;
- env->GetIntArrayRegion(xCoordinatesArray, 0, arraySize, xCoordinates);
- env->GetIntArrayRegion(yCoordinatesArray, 0, arraySize, yCoordinates);
- env->GetIntArrayRegion(timesArray, 0, arraySize, times);
- env->GetIntArrayRegion(pointerIdsArray, 0, arraySize, pointerIds);
+ env->GetIntArrayRegion(xCoordinatesArray, 0, inputSize, xCoordinates);
+ env->GetIntArrayRegion(yCoordinatesArray, 0, inputSize, yCoordinates);
+ env->GetIntArrayRegion(timesArray, 0, inputSize, times);
+ env->GetIntArrayRegion(pointerIdsArray, 0, inputSize, pointerIds);
env->GetIntArrayRegion(inputCodePointsArray, 0, inputCodePointsLength, inputCodePoints);
if (prevWordCodePointsForBigrams) {
env->GetIntArrayRegion(prevWordCodePointsForBigrams, 0, prevWordCodePointsLength,
@@ -163,34 +159,43 @@ static int latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jobject object,
}
// Output values
- // TODO: Should be "outputCodePointsLength" and "int outputCodePoints[]"
- const jsize outputCharsLength = env->GetArrayLength(outputCharsArray);
- unsigned short outputChars[outputCharsLength];
+ /* By the way, let's check the output array length here to make sure */
+ const jsize outputCodePointsLength = env->GetArrayLength(outputCodePointsArray);
+ if (outputCodePointsLength != (MAX_WORD_LENGTH * MAX_RESULTS)) {
+ AKLOGE("Invalid outputCodePointsLength: %d", outputCodePointsLength);
+ ASSERT(false);
+ return 0;
+ }
const jsize scoresLength = env->GetArrayLength(scoresArray);
+ if (scoresLength != MAX_RESULTS) {
+ AKLOGE("Invalid scoresLength: %d", scoresLength);
+ ASSERT(false);
+ return 0;
+ }
+ int outputCodePoints[outputCodePointsLength];
int scores[scoresLength];
const jsize spaceIndicesLength = env->GetArrayLength(spaceIndicesArray);
int spaceIndices[spaceIndicesLength];
const jsize outputTypesLength = env->GetArrayLength(outputTypesArray);
int outputTypes[outputTypesLength];
- memset(outputChars, 0, sizeof(outputChars));
+ memset(outputCodePoints, 0, sizeof(outputCodePoints));
memset(scores, 0, sizeof(scores));
memset(spaceIndices, 0, sizeof(spaceIndices));
memset(outputTypes, 0, sizeof(outputTypes));
int count;
- if (isGesture || arraySize > 0) {
+ if (isGesture || inputSize > 0) {
count = dictionary->getSuggestions(pInfo, traverseSession, xCoordinates, yCoordinates,
- times, pointerIds, inputCodePoints, arraySize, prevWordCodePoints,
- prevWordCodePointsLength, commitPoint, isGesture, useFullEditDistance, outputChars,
- scores, spaceIndices, outputTypes);
+ times, pointerIds, inputCodePoints, inputSize, prevWordCodePoints,
+ prevWordCodePointsLength, commitPoint, isGesture, useFullEditDistance,
+ outputCodePoints, scores, spaceIndices, outputTypes);
} else {
count = dictionary->getBigrams(prevWordCodePoints, prevWordCodePointsLength,
- inputCodePoints, arraySize, outputChars, scores, outputTypes);
+ inputCodePoints, inputSize, outputCodePoints, scores, outputTypes);
}
// Copy back the output values
- // TODO: Should be SetIntArrayRegion()
- env->SetCharArrayRegion(outputCharsArray, 0, outputCharsLength, outputChars);
+ env->SetIntArrayRegion(outputCodePointsArray, 0, outputCodePointsLength, outputCodePoints);
env->SetIntArrayRegion(scoresArray, 0, scoresLength, scores);
env->SetIntArrayRegion(spaceIndicesArray, 0, spaceIndicesLength, spaceIndices);
env->SetIntArrayRegion(outputTypesArray, 0, outputTypesLength, outputTypes);
@@ -198,7 +203,7 @@ static int latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jobject object,
return count;
}
-static jint latinime_BinaryDictionary_getFrequency(JNIEnv *env, jobject object, jlong dict,
+static jint latinime_BinaryDictionary_getFrequency(JNIEnv *env, jclass clazz, jlong dict,
jintArray wordArray) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) return 0;
@@ -208,10 +213,10 @@ static jint latinime_BinaryDictionary_getFrequency(JNIEnv *env, jobject object,
return dictionary->getFrequency(codePoints, codePointLength);
}
-static jboolean latinime_BinaryDictionary_isValidBigram(JNIEnv *env, jobject object, jlong dict,
+static jboolean latinime_BinaryDictionary_isValidBigram(JNIEnv *env, jclass clazz, jlong dict,
jintArray wordArray1, jintArray wordArray2) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
- if (!dictionary) return (jboolean) false;
+ if (!dictionary) return JNI_FALSE;
const jsize codePointLength1 = env->GetArrayLength(wordArray1);
const jsize codePointLength2 = env->GetArrayLength(wordArray2);
int codePoints1[codePointLength1];
@@ -221,33 +226,31 @@ static jboolean latinime_BinaryDictionary_isValidBigram(JNIEnv *env, jobject obj
return dictionary->isValidBigram(codePoints1, codePointLength1, codePoints2, codePointLength2);
}
-static jfloat latinime_BinaryDictionary_calcNormalizedScore(JNIEnv *env, jobject object,
- jcharArray before, jcharArray after, jint score) {
+static jfloat latinime_BinaryDictionary_calcNormalizedScore(JNIEnv *env, jclass clazz,
+ jintArray before, jintArray after, jint score) {
jsize beforeLength = env->GetArrayLength(before);
jsize afterLength = env->GetArrayLength(after);
- jchar beforeChars[beforeLength];
- jchar afterChars[afterLength];
- env->GetCharArrayRegion(before, 0, beforeLength, beforeChars);
- env->GetCharArrayRegion(after, 0, afterLength, afterChars);
- return Correction::RankingAlgorithm::calcNormalizedScore(
- static_cast<unsigned short *>(beforeChars), beforeLength,
- static_cast<unsigned short *>(afterChars), afterLength, score);
+ int beforeCodePoints[beforeLength];
+ int afterCodePoints[afterLength];
+ env->GetIntArrayRegion(before, 0, beforeLength, beforeCodePoints);
+ env->GetIntArrayRegion(after, 0, afterLength, afterCodePoints);
+ return Correction::RankingAlgorithm::calcNormalizedScore(beforeCodePoints, beforeLength,
+ afterCodePoints, afterLength, score);
}
-static jint latinime_BinaryDictionary_editDistance(JNIEnv *env, jobject object,
- jcharArray before, jcharArray after) {
+static jint latinime_BinaryDictionary_editDistance(JNIEnv *env, jclass clazz, jintArray before,
+ jintArray after) {
jsize beforeLength = env->GetArrayLength(before);
jsize afterLength = env->GetArrayLength(after);
- jchar beforeChars[beforeLength];
- jchar afterChars[afterLength];
- env->GetCharArrayRegion(before, 0, beforeLength, beforeChars);
- env->GetCharArrayRegion(after, 0, afterLength, afterChars);
- return Correction::RankingAlgorithm::editDistance(
- static_cast<unsigned short *>(beforeChars), beforeLength,
- static_cast<unsigned short *>(afterChars), afterLength);
+ int beforeCodePoints[beforeLength];
+ int afterCodePoints[afterLength];
+ env->GetIntArrayRegion(before, 0, beforeLength, beforeCodePoints);
+ env->GetIntArrayRegion(after, 0, afterLength, afterCodePoints);
+ return Correction::RankingAlgorithm::editDistance(beforeCodePoints, beforeLength,
+ afterCodePoints, afterLength);
}
-static void latinime_BinaryDictionary_close(JNIEnv *env, jobject object, jlong dict) {
+static void latinime_BinaryDictionary_close(JNIEnv *env, jclass clazz, jlong dict) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) return;
const void *dictBuf = dictionary->getDict();
@@ -277,24 +280,23 @@ static void releaseDictBuf(const void *dictBuf, const size_t length, const int f
}
static JNINativeMethod sMethods[] = {
- {"openNative", "(Ljava/lang/String;JJIIIII)J",
+ {"openNative", "(Ljava/lang/String;JJ)J",
reinterpret_cast<void *>(latinime_BinaryDictionary_open)},
{"closeNative", "(J)V", reinterpret_cast<void *>(latinime_BinaryDictionary_close)},
- {"getSuggestionsNative", "(JJJ[I[I[I[I[IIIZ[IZ[C[I[I[I)I",
+ {"getSuggestionsNative", "(JJJ[I[I[I[I[IIIZ[IZ[I[I[I[I)I",
reinterpret_cast<void *>(latinime_BinaryDictionary_getSuggestions)},
{"getFrequencyNative", "(J[I)I",
reinterpret_cast<void *>(latinime_BinaryDictionary_getFrequency)},
{"isValidBigramNative", "(J[I[I)Z",
reinterpret_cast<void *>(latinime_BinaryDictionary_isValidBigram)},
- {"calcNormalizedScoreNative", "([C[CI)F",
+ {"calcNormalizedScoreNative", "([I[II)F",
reinterpret_cast<void *>(latinime_BinaryDictionary_calcNormalizedScore)},
- {"editDistanceNative", "([C[C)I",
+ {"editDistanceNative", "([I[I)I",
reinterpret_cast<void *>(latinime_BinaryDictionary_editDistance)}
};
int register_BinaryDictionary(JNIEnv *env) {
const char *const kClassPathName = "com/android/inputmethod/latin/BinaryDictionary";
- return registerNativeMethods(env, kClassPathName, sMethods,
- sizeof(sMethods) / sizeof(sMethods[0]));
+ return registerNativeMethods(env, kClassPathName, sMethods, NELEMS(sMethods));
}
} // namespace latinime
diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.h b/native/jni/com_android_inputmethod_latin_BinaryDictionary.h
index b9e944f07..2a07f9936 100644
--- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.h
+++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.h
@@ -1,11 +1,11 @@
/*
- * Copyright (C) 2011, The Android Open Source Project
+ * Copyright (C) 2011 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
- * http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
@@ -20,8 +20,6 @@
#include "jni.h"
namespace latinime {
-
int register_BinaryDictionary(JNIEnv *env);
-
} // namespace latinime
#endif // _COM_ANDROID_INPUTMETHOD_LATIN_BINARYDICTIONARY_H
diff --git a/native/jni/com_android_inputmethod_latin_DicTraverseSession.cpp b/native/jni/com_android_inputmethod_latin_DicTraverseSession.cpp
index 5d405f117..9b39245b9 100644
--- a/native/jni/com_android_inputmethod_latin_DicTraverseSession.cpp
+++ b/native/jni/com_android_inputmethod_latin_DicTraverseSession.cpp
@@ -1,11 +1,11 @@
/*
- * Copyright (C) 2012, The Android Open Source Project
+ * Copyright (C) 2012 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
- * http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
@@ -17,18 +17,19 @@
#define LOG_TAG "LatinIME: jni: Session"
#include "com_android_inputmethod_latin_DicTraverseSession.h"
+#include "defines.h"
#include "dic_traverse_wrapper.h"
#include "jni.h"
#include "jni_common.h"
namespace latinime {
class Dictionary;
-static jlong latinime_setDicTraverseSession(JNIEnv *env, jobject object, jstring localeJStr) {
+static jlong latinime_setDicTraverseSession(JNIEnv *env, jclass clazz, jstring localeJStr) {
void *traverseSession = DicTraverseWrapper::getDicTraverseSession(env, localeJStr);
return reinterpret_cast<jlong>(traverseSession);
}
-static void latinime_initDicTraverseSession(JNIEnv *env, jobject object, jlong traverseSession,
+static void latinime_initDicTraverseSession(JNIEnv *env, jclass clazz, jlong traverseSession,
jlong dictionary, jintArray previousWord, jint previousWordLength) {
void *ts = reinterpret_cast<void *>(traverseSession);
Dictionary *dict = reinterpret_cast<Dictionary *>(dictionary);
@@ -41,7 +42,7 @@ static void latinime_initDicTraverseSession(JNIEnv *env, jobject object, jlong t
DicTraverseWrapper::initDicTraverseSession(ts, dict, prevWord, previousWordLength);
}
-static void latinime_releaseDicTraverseSession(JNIEnv *env, jobject object, jlong traverseSession) {
+static void latinime_releaseDicTraverseSession(JNIEnv *env, jclass clazz, jlong traverseSession) {
void *ts = reinterpret_cast<void *>(traverseSession);
DicTraverseWrapper::releaseDicTraverseSession(ts);
}
@@ -57,7 +58,6 @@ static JNINativeMethod sMethods[] = {
int register_DicTraverseSession(JNIEnv *env) {
const char *const kClassPathName = "com/android/inputmethod/latin/DicTraverseSession";
- return registerNativeMethods(env, kClassPathName, sMethods,
- sizeof(sMethods) / sizeof(sMethods[0]));
+ return registerNativeMethods(env, kClassPathName, sMethods, NELEMS(sMethods));
}
} // namespace latinime
diff --git a/native/jni/com_android_inputmethod_latin_DicTraverseSession.h b/native/jni/com_android_inputmethod_latin_DicTraverseSession.h
index 37531e96b..badcbb9ea 100644
--- a/native/jni/com_android_inputmethod_latin_DicTraverseSession.h
+++ b/native/jni/com_android_inputmethod_latin_DicTraverseSession.h
@@ -1,11 +1,11 @@
/*
- * Copyright (C) 2012, The Android Open Source Project
+ * Copyright (C) 2012 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
- * http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
@@ -17,7 +17,6 @@
#ifndef _COM_ANDROID_INPUTMETHOD_LATIN_DICTRAVERSESESSION_H
#define _COM_ANDROID_INPUTMETHOD_LATIN_DICTRAVERSESESSION_H
-#include "defines.h"
#include "jni.h"
namespace latinime {
diff --git a/native/jni/jni_common.cpp b/native/jni/jni_common.cpp
index 0da166903..1ea204102 100644
--- a/native/jni/jni_common.cpp
+++ b/native/jni/jni_common.cpp
@@ -1,11 +1,11 @@
/*
- * Copyright (C) 2011, The Android Open Source Project
+ * Copyright (C) 2011 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
- * http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
@@ -16,8 +16,6 @@
#define LOG_TAG "LatinIME: jni"
-#include <cassert>
-
#include "com_android_inputmethod_keyboard_ProximityInfo.h"
#include "com_android_inputmethod_latin_BinaryDictionary.h"
#include "com_android_inputmethod_latin_DicTraverseSession.h"
@@ -25,45 +23,38 @@
#include "jni.h"
#include "jni_common.h"
-using namespace latinime;
-
/*
* Returns the JNI version on success, -1 on failure.
*/
jint JNI_OnLoad(JavaVM *vm, void *reserved) {
JNIEnv *env = 0;
- jint result = -1;
if (vm->GetEnv(reinterpret_cast<void **>(&env), JNI_VERSION_1_6) != JNI_OK) {
AKLOGE("ERROR: GetEnv failed");
- goto bail;
+ return -1;
}
- assert(env);
-
- if (!register_BinaryDictionary(env)) {
+ ASSERT(env);
+ if (!env) {
+ AKLOGE("ERROR: JNIEnv is invalid");
+ return -1;
+ }
+ if (!latinime::register_BinaryDictionary(env)) {
AKLOGE("ERROR: BinaryDictionary native registration failed");
- goto bail;
+ return -1;
}
-
- if (!register_DicTraverseSession(env)) {
+ if (!latinime::register_DicTraverseSession(env)) {
AKLOGE("ERROR: DicTraverseSession native registration failed");
- goto bail;
+ return -1;
}
-
- if (!register_ProximityInfo(env)) {
+ if (!latinime::register_ProximityInfo(env)) {
AKLOGE("ERROR: ProximityInfo native registration failed");
- goto bail;
+ return -1;
}
-
/* success -- return valid version number */
- result = JNI_VERSION_1_6;
-
-bail:
- return result;
+ return JNI_VERSION_1_6;
}
namespace latinime {
-
int registerNativeMethods(JNIEnv *env, const char *className, JNINativeMethod *methods,
int numMethods) {
jclass clazz = env->FindClass(className);
diff --git a/native/jni/jni_common.h b/native/jni/jni_common.h
index 993f97e80..f960b05a6 100644
--- a/native/jni/jni_common.h
+++ b/native/jni/jni_common.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2011, The Android Open Source Project
+ * Copyright (C) 2011 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -20,9 +20,7 @@
#include "jni.h"
namespace latinime {
-
int registerNativeMethods(JNIEnv *env, const char *className, JNINativeMethod *methods,
int numMethods);
-
} // namespace latinime
#endif // LATINIME_JNI_COMMON_H
diff --git a/native/jni/src/additional_proximity_chars.cpp b/native/jni/src/additional_proximity_chars.cpp
index f59492741..661c50e91 100644
--- a/native/jni/src/additional_proximity_chars.cpp
+++ b/native/jni/src/additional_proximity_chars.cpp
@@ -21,23 +21,23 @@ namespace latinime {
// TODO: Have proximity character informations in each language's binary dictionary.
const char *AdditionalProximityChars::LOCALE_EN_US = "en";
-const int32_t AdditionalProximityChars::EN_US_ADDITIONAL_A[EN_US_ADDITIONAL_A_SIZE] = {
+const int AdditionalProximityChars::EN_US_ADDITIONAL_A[EN_US_ADDITIONAL_A_SIZE] = {
'e', 'i', 'o', 'u'
};
-const int32_t AdditionalProximityChars::EN_US_ADDITIONAL_E[EN_US_ADDITIONAL_E_SIZE] = {
+const int AdditionalProximityChars::EN_US_ADDITIONAL_E[EN_US_ADDITIONAL_E_SIZE] = {
'a', 'i', 'o', 'u'
};
-const int32_t AdditionalProximityChars::EN_US_ADDITIONAL_I[EN_US_ADDITIONAL_I_SIZE] = {
+const int AdditionalProximityChars::EN_US_ADDITIONAL_I[EN_US_ADDITIONAL_I_SIZE] = {
'a', 'e', 'o', 'u'
};
-const int32_t AdditionalProximityChars::EN_US_ADDITIONAL_O[EN_US_ADDITIONAL_O_SIZE] = {
+const int AdditionalProximityChars::EN_US_ADDITIONAL_O[EN_US_ADDITIONAL_O_SIZE] = {
'a', 'e', 'i', 'u'
};
-const int32_t AdditionalProximityChars::EN_US_ADDITIONAL_U[EN_US_ADDITIONAL_U_SIZE] = {
+const int AdditionalProximityChars::EN_US_ADDITIONAL_U[EN_US_ADDITIONAL_U_SIZE] = {
'a', 'e', 'i', 'o'
};
} // namespace latinime
diff --git a/native/jni/src/additional_proximity_chars.h b/native/jni/src/additional_proximity_chars.h
index d420c4664..a88fd6cea 100644
--- a/native/jni/src/additional_proximity_chars.h
+++ b/native/jni/src/additional_proximity_chars.h
@@ -18,7 +18,6 @@
#define LATINIME_ADDITIONAL_PROXIMITY_CHARS_H
#include <cstring>
-#include <stdint.h>
#include "defines.h"
@@ -29,24 +28,24 @@ class AdditionalProximityChars {
DISALLOW_IMPLICIT_CONSTRUCTORS(AdditionalProximityChars);
static const char *LOCALE_EN_US;
static const int EN_US_ADDITIONAL_A_SIZE = 4;
- static const int32_t EN_US_ADDITIONAL_A[];
+ static const int EN_US_ADDITIONAL_A[];
static const int EN_US_ADDITIONAL_E_SIZE = 4;
- static const int32_t EN_US_ADDITIONAL_E[];
+ static const int EN_US_ADDITIONAL_E[];
static const int EN_US_ADDITIONAL_I_SIZE = 4;
- static const int32_t EN_US_ADDITIONAL_I[];
+ static const int EN_US_ADDITIONAL_I[];
static const int EN_US_ADDITIONAL_O_SIZE = 4;
- static const int32_t EN_US_ADDITIONAL_O[];
+ static const int EN_US_ADDITIONAL_O[];
static const int EN_US_ADDITIONAL_U_SIZE = 4;
- static const int32_t EN_US_ADDITIONAL_U[];
+ static const int EN_US_ADDITIONAL_U[];
- static bool isEnLocale(const char *localeStr) {
+ AK_FORCE_INLINE static bool isEnLocale(const char *localeStr) {
const size_t LOCALE_EN_US_SIZE = strlen(LOCALE_EN_US);
return localeStr && strlen(localeStr) >= LOCALE_EN_US_SIZE
&& strncmp(localeStr, LOCALE_EN_US, LOCALE_EN_US_SIZE) == 0;
}
public:
- static int getAdditionalCharsSize(const char *localeStr, const int32_t c) {
+ static int getAdditionalCharsSize(const char *const localeStr, const int c) {
if (!isEnLocale(localeStr)) {
return 0;
}
@@ -66,7 +65,7 @@ class AdditionalProximityChars {
}
}
- static const int32_t *getAdditionalChars(const char *localeStr, const int32_t c) {
+ static const int *getAdditionalChars(const char *const localeStr, const int c) {
if (!isEnLocale(localeStr)) {
return 0;
}
diff --git a/native/jni/src/basechars.cpp b/native/jni/src/basechars.cpp
deleted file mode 100644
index d97311ed6..000000000
--- a/native/jni/src/basechars.cpp
+++ /dev/null
@@ -1,194 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <stdint.h>
-
-#include "char_utils.h"
-
-namespace latinime {
-
-/*
- * Table mapping most combined Latin, Greek, and Cyrillic characters
- * to their base characters. If c is in range, BASE_CHARS[c] == c
- * if c is not a combined character, or the base character if it
- * is combined.
- */
-const uint16_t BASE_CHARS[BASE_CHARS_SIZE] = {
- 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
- 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f,
- 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017,
- 0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f,
- 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027,
- 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f,
- 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
- 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f,
- 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
- 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f,
- 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057,
- 0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f,
- 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
- 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f,
- 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
- 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f,
- 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
- 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
- 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
- 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
- 0x0020, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
- 0x0020, 0x00a9, 0x0061, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x0020,
- 0x00b0, 0x00b1, 0x0032, 0x0033, 0x0020, 0x03bc, 0x00b6, 0x00b7,
- 0x0020, 0x0031, 0x006f, 0x00bb, 0x0031, 0x0031, 0x0033, 0x00bf,
- 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x00c6, 0x0043,
- 0x0045, 0x0045, 0x0045, 0x0045, 0x0049, 0x0049, 0x0049, 0x0049,
- 0x00d0, 0x004e, 0x004f, 0x004f, 0x004f, 0x004f, 0x004f, 0x00d7,
- 0x004f, 0x0055, 0x0055, 0x0055, 0x0055, 0x0059, 0x00de, 0x0073, // Manually changed d8 to 4f
- // Manually changed df to 73
- 0x0061, 0x0061, 0x0061, 0x0061, 0x0061, 0x0061, 0x00e6, 0x0063,
- 0x0065, 0x0065, 0x0065, 0x0065, 0x0069, 0x0069, 0x0069, 0x0069,
- 0x00f0, 0x006e, 0x006f, 0x006f, 0x006f, 0x006f, 0x006f, 0x00f7,
- 0x006f, 0x0075, 0x0075, 0x0075, 0x0075, 0x0079, 0x00fe, 0x0079, // Manually changed f8 to 6f
- 0x0041, 0x0061, 0x0041, 0x0061, 0x0041, 0x0061, 0x0043, 0x0063,
- 0x0043, 0x0063, 0x0043, 0x0063, 0x0043, 0x0063, 0x0044, 0x0064,
- 0x0110, 0x0111, 0x0045, 0x0065, 0x0045, 0x0065, 0x0045, 0x0065,
- 0x0045, 0x0065, 0x0045, 0x0065, 0x0047, 0x0067, 0x0047, 0x0067,
- 0x0047, 0x0067, 0x0047, 0x0067, 0x0048, 0x0068, 0x0126, 0x0127,
- 0x0049, 0x0069, 0x0049, 0x0069, 0x0049, 0x0069, 0x0049, 0x0069,
- 0x0049, 0x0131, 0x0049, 0x0069, 0x004a, 0x006a, 0x004b, 0x006b,
- 0x0138, 0x004c, 0x006c, 0x004c, 0x006c, 0x004c, 0x006c, 0x004c,
- 0x006c, 0x004c, 0x006c, 0x004e, 0x006e, 0x004e, 0x006e, 0x004e,
- 0x006e, 0x02bc, 0x014a, 0x014b, 0x004f, 0x006f, 0x004f, 0x006f,
- 0x004f, 0x006f, 0x0152, 0x0153, 0x0052, 0x0072, 0x0052, 0x0072,
- 0x0052, 0x0072, 0x0053, 0x0073, 0x0053, 0x0073, 0x0053, 0x0073,
- 0x0053, 0x0073, 0x0054, 0x0074, 0x0054, 0x0074, 0x0166, 0x0167,
- 0x0055, 0x0075, 0x0055, 0x0075, 0x0055, 0x0075, 0x0055, 0x0075,
- 0x0055, 0x0075, 0x0055, 0x0075, 0x0057, 0x0077, 0x0059, 0x0079,
- 0x0059, 0x005a, 0x007a, 0x005a, 0x007a, 0x005a, 0x007a, 0x0073,
- 0x0180, 0x0181, 0x0182, 0x0183, 0x0184, 0x0185, 0x0186, 0x0187,
- 0x0188, 0x0189, 0x018a, 0x018b, 0x018c, 0x018d, 0x018e, 0x018f,
- 0x0190, 0x0191, 0x0192, 0x0193, 0x0194, 0x0195, 0x0196, 0x0197,
- 0x0198, 0x0199, 0x019a, 0x019b, 0x019c, 0x019d, 0x019e, 0x019f,
- 0x004f, 0x006f, 0x01a2, 0x01a3, 0x01a4, 0x01a5, 0x01a6, 0x01a7,
- 0x01a8, 0x01a9, 0x01aa, 0x01ab, 0x01ac, 0x01ad, 0x01ae, 0x0055,
- 0x0075, 0x01b1, 0x01b2, 0x01b3, 0x01b4, 0x01b5, 0x01b6, 0x01b7,
- 0x01b8, 0x01b9, 0x01ba, 0x01bb, 0x01bc, 0x01bd, 0x01be, 0x01bf,
- 0x01c0, 0x01c1, 0x01c2, 0x01c3, 0x0044, 0x0044, 0x0064, 0x004c,
- 0x004c, 0x006c, 0x004e, 0x004e, 0x006e, 0x0041, 0x0061, 0x0049,
- 0x0069, 0x004f, 0x006f, 0x0055, 0x0075, 0x00dc, 0x00fc, 0x00dc,
- 0x00fc, 0x00dc, 0x00fc, 0x00dc, 0x00fc, 0x01dd, 0x00c4, 0x00e4,
- 0x0226, 0x0227, 0x00c6, 0x00e6, 0x01e4, 0x01e5, 0x0047, 0x0067,
- 0x004b, 0x006b, 0x004f, 0x006f, 0x01ea, 0x01eb, 0x01b7, 0x0292,
- 0x006a, 0x0044, 0x0044, 0x0064, 0x0047, 0x0067, 0x01f6, 0x01f7,
- 0x004e, 0x006e, 0x00c5, 0x00e5, 0x00c6, 0x00e6, 0x00d8, 0x00f8,
- 0x0041, 0x0061, 0x0041, 0x0061, 0x0045, 0x0065, 0x0045, 0x0065,
- 0x0049, 0x0069, 0x0049, 0x0069, 0x004f, 0x006f, 0x004f, 0x006f,
- 0x0052, 0x0072, 0x0052, 0x0072, 0x0055, 0x0075, 0x0055, 0x0075,
- 0x0053, 0x0073, 0x0054, 0x0074, 0x021c, 0x021d, 0x0048, 0x0068,
- 0x0220, 0x0221, 0x0222, 0x0223, 0x0224, 0x0225, 0x0041, 0x0061,
- 0x0045, 0x0065, 0x00d6, 0x00f6, 0x00d5, 0x00f5, 0x004f, 0x006f,
- 0x022e, 0x022f, 0x0059, 0x0079, 0x0234, 0x0235, 0x0236, 0x0237,
- 0x0238, 0x0239, 0x023a, 0x023b, 0x023c, 0x023d, 0x023e, 0x023f,
- 0x0240, 0x0241, 0x0242, 0x0243, 0x0244, 0x0245, 0x0246, 0x0247,
- 0x0248, 0x0249, 0x024a, 0x024b, 0x024c, 0x024d, 0x024e, 0x024f,
- 0x0250, 0x0251, 0x0252, 0x0253, 0x0254, 0x0255, 0x0256, 0x0257,
- 0x0258, 0x0259, 0x025a, 0x025b, 0x025c, 0x025d, 0x025e, 0x025f,
- 0x0260, 0x0261, 0x0262, 0x0263, 0x0264, 0x0265, 0x0266, 0x0267,
- 0x0268, 0x0269, 0x026a, 0x026b, 0x026c, 0x026d, 0x026e, 0x026f,
- 0x0270, 0x0271, 0x0272, 0x0273, 0x0274, 0x0275, 0x0276, 0x0277,
- 0x0278, 0x0279, 0x027a, 0x027b, 0x027c, 0x027d, 0x027e, 0x027f,
- 0x0280, 0x0281, 0x0282, 0x0283, 0x0284, 0x0285, 0x0286, 0x0287,
- 0x0288, 0x0289, 0x028a, 0x028b, 0x028c, 0x028d, 0x028e, 0x028f,
- 0x0290, 0x0291, 0x0292, 0x0293, 0x0294, 0x0295, 0x0296, 0x0297,
- 0x0298, 0x0299, 0x029a, 0x029b, 0x029c, 0x029d, 0x029e, 0x029f,
- 0x02a0, 0x02a1, 0x02a2, 0x02a3, 0x02a4, 0x02a5, 0x02a6, 0x02a7,
- 0x02a8, 0x02a9, 0x02aa, 0x02ab, 0x02ac, 0x02ad, 0x02ae, 0x02af,
- 0x0068, 0x0266, 0x006a, 0x0072, 0x0279, 0x027b, 0x0281, 0x0077,
- 0x0079, 0x02b9, 0x02ba, 0x02bb, 0x02bc, 0x02bd, 0x02be, 0x02bf,
- 0x02c0, 0x02c1, 0x02c2, 0x02c3, 0x02c4, 0x02c5, 0x02c6, 0x02c7,
- 0x02c8, 0x02c9, 0x02ca, 0x02cb, 0x02cc, 0x02cd, 0x02ce, 0x02cf,
- 0x02d0, 0x02d1, 0x02d2, 0x02d3, 0x02d4, 0x02d5, 0x02d6, 0x02d7,
- 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x02de, 0x02df,
- 0x0263, 0x006c, 0x0073, 0x0078, 0x0295, 0x02e5, 0x02e6, 0x02e7,
- 0x02e8, 0x02e9, 0x02ea, 0x02eb, 0x02ec, 0x02ed, 0x02ee, 0x02ef,
- 0x02f0, 0x02f1, 0x02f2, 0x02f3, 0x02f4, 0x02f5, 0x02f6, 0x02f7,
- 0x02f8, 0x02f9, 0x02fa, 0x02fb, 0x02fc, 0x02fd, 0x02fe, 0x02ff,
- 0x0300, 0x0301, 0x0302, 0x0303, 0x0304, 0x0305, 0x0306, 0x0307,
- 0x0308, 0x0309, 0x030a, 0x030b, 0x030c, 0x030d, 0x030e, 0x030f,
- 0x0310, 0x0311, 0x0312, 0x0313, 0x0314, 0x0315, 0x0316, 0x0317,
- 0x0318, 0x0319, 0x031a, 0x031b, 0x031c, 0x031d, 0x031e, 0x031f,
- 0x0320, 0x0321, 0x0322, 0x0323, 0x0324, 0x0325, 0x0326, 0x0327,
- 0x0328, 0x0329, 0x032a, 0x032b, 0x032c, 0x032d, 0x032e, 0x032f,
- 0x0330, 0x0331, 0x0332, 0x0333, 0x0334, 0x0335, 0x0336, 0x0337,
- 0x0338, 0x0339, 0x033a, 0x033b, 0x033c, 0x033d, 0x033e, 0x033f,
- 0x0300, 0x0301, 0x0342, 0x0313, 0x0308, 0x0345, 0x0346, 0x0347,
- 0x0348, 0x0349, 0x034a, 0x034b, 0x034c, 0x034d, 0x034e, 0x034f,
- 0x0350, 0x0351, 0x0352, 0x0353, 0x0354, 0x0355, 0x0356, 0x0357,
- 0x0358, 0x0359, 0x035a, 0x035b, 0x035c, 0x035d, 0x035e, 0x035f,
- 0x0360, 0x0361, 0x0362, 0x0363, 0x0364, 0x0365, 0x0366, 0x0367,
- 0x0368, 0x0369, 0x036a, 0x036b, 0x036c, 0x036d, 0x036e, 0x036f,
- 0x0370, 0x0371, 0x0372, 0x0373, 0x02b9, 0x0375, 0x0376, 0x0377,
- 0x0378, 0x0379, 0x0020, 0x037b, 0x037c, 0x037d, 0x003b, 0x037f,
- 0x0380, 0x0381, 0x0382, 0x0383, 0x0020, 0x00a8, 0x0391, 0x00b7,
- 0x0395, 0x0397, 0x0399, 0x038b, 0x039f, 0x038d, 0x03a5, 0x03a9,
- 0x03ca, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
- 0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
- 0x03a0, 0x03a1, 0x03a2, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
- 0x03a8, 0x03a9, 0x0399, 0x03a5, 0x03b1, 0x03b5, 0x03b7, 0x03b9,
- 0x03cb, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
- 0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
- 0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
- 0x03c8, 0x03c9, 0x03b9, 0x03c5, 0x03bf, 0x03c5, 0x03c9, 0x03cf,
- 0x03b2, 0x03b8, 0x03a5, 0x03d2, 0x03d2, 0x03c6, 0x03c0, 0x03d7,
- 0x03d8, 0x03d9, 0x03da, 0x03db, 0x03dc, 0x03dd, 0x03de, 0x03df,
- 0x03e0, 0x03e1, 0x03e2, 0x03e3, 0x03e4, 0x03e5, 0x03e6, 0x03e7,
- 0x03e8, 0x03e9, 0x03ea, 0x03eb, 0x03ec, 0x03ed, 0x03ee, 0x03ef,
- 0x03ba, 0x03c1, 0x03c2, 0x03f3, 0x0398, 0x03b5, 0x03f6, 0x03f7,
- 0x03f8, 0x03a3, 0x03fa, 0x03fb, 0x03fc, 0x03fd, 0x03fe, 0x03ff,
- 0x0415, 0x0415, 0x0402, 0x0413, 0x0404, 0x0405, 0x0406, 0x0406,
- 0x0408, 0x0409, 0x040a, 0x040b, 0x041a, 0x0418, 0x0423, 0x040f,
- 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
- 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
- 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
- 0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
- 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
- 0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
- 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
- 0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
- 0x0435, 0x0435, 0x0452, 0x0433, 0x0454, 0x0455, 0x0456, 0x0456,
- 0x0458, 0x0459, 0x045a, 0x045b, 0x043a, 0x0438, 0x0443, 0x045f,
- 0x0460, 0x0461, 0x0462, 0x0463, 0x0464, 0x0465, 0x0466, 0x0467,
- 0x0468, 0x0469, 0x046a, 0x046b, 0x046c, 0x046d, 0x046e, 0x046f,
- 0x0470, 0x0471, 0x0472, 0x0473, 0x0474, 0x0475, 0x0474, 0x0475,
- 0x0478, 0x0479, 0x047a, 0x047b, 0x047c, 0x047d, 0x047e, 0x047f,
- 0x0480, 0x0481, 0x0482, 0x0483, 0x0484, 0x0485, 0x0486, 0x0487,
- 0x0488, 0x0489, 0x048a, 0x048b, 0x048c, 0x048d, 0x048e, 0x048f,
- 0x0490, 0x0491, 0x0492, 0x0493, 0x0494, 0x0495, 0x0496, 0x0497,
- 0x0498, 0x0499, 0x049a, 0x049b, 0x049c, 0x049d, 0x049e, 0x049f,
- 0x04a0, 0x04a1, 0x04a2, 0x04a3, 0x04a4, 0x04a5, 0x04a6, 0x04a7,
- 0x04a8, 0x04a9, 0x04aa, 0x04ab, 0x04ac, 0x04ad, 0x04ae, 0x04af,
- 0x04b0, 0x04b1, 0x04b2, 0x04b3, 0x04b4, 0x04b5, 0x04b6, 0x04b7,
- 0x04b8, 0x04b9, 0x04ba, 0x04bb, 0x04bc, 0x04bd, 0x04be, 0x04bf,
- 0x04c0, 0x0416, 0x0436, 0x04c3, 0x04c4, 0x04c5, 0x04c6, 0x04c7,
- 0x04c8, 0x04c9, 0x04ca, 0x04cb, 0x04cc, 0x04cd, 0x04ce, 0x04cf,
- 0x0410, 0x0430, 0x0410, 0x0430, 0x04d4, 0x04d5, 0x0415, 0x0435,
- 0x04d8, 0x04d9, 0x04d8, 0x04d9, 0x0416, 0x0436, 0x0417, 0x0437,
- 0x04e0, 0x04e1, 0x0418, 0x0438, 0x0418, 0x0438, 0x041e, 0x043e,
- 0x04e8, 0x04e9, 0x04e8, 0x04e9, 0x042d, 0x044d, 0x0423, 0x0443,
- 0x0423, 0x0443, 0x0423, 0x0443, 0x0427, 0x0447, 0x04f6, 0x04f7,
- 0x042b, 0x044b, 0x04fa, 0x04fb, 0x04fc, 0x04fd, 0x04fe, 0x04ff,
-};
-// generated with:
-// cat UnicodeData.txt | perl -e 'while (<>) { @foo = split(/;/); $foo[5] =~ s/<.*> //; $base[hex($foo[0])] = hex($foo[5]);} for ($i = 0; $i < 0x500; $i += 8) { for ($j = $i; $j < $i + 8; $j++) { printf("0x%04x, ", $base[$j] ? $base[$j] : $j)}; print "\n"; }'
-} // namespace latinime
diff --git a/native/jni/src/bigram_dictionary.cpp b/native/jni/src/bigram_dictionary.cpp
index dade4f16b..ef0434c49 100644
--- a/native/jni/src/bigram_dictionary.cpp
+++ b/native/jni/src/bigram_dictionary.cpp
@@ -21,13 +21,13 @@
#include "bigram_dictionary.h"
#include "binary_format.h"
#include "bloom_filter.h"
+#include "char_utils.h"
#include "defines.h"
#include "dictionary.h"
namespace latinime {
-BigramDictionary::BigramDictionary(const unsigned char *dict, int maxWordLength, int maxPredictions)
- : DICT(dict), MAX_WORD_LENGTH(maxWordLength), MAX_PREDICTIONS(maxPredictions) {
+BigramDictionary::BigramDictionary(const uint8_t *const streamStart) : DICT_ROOT(streamStart) {
if (DEBUG_DICT) {
AKLOGI("BigramDictionary - constructor");
}
@@ -36,57 +36,57 @@ BigramDictionary::BigramDictionary(const unsigned char *dict, int maxWordLength,
BigramDictionary::~BigramDictionary() {
}
-bool BigramDictionary::addWordBigram(unsigned short *word, int length, int frequency,
- int *bigramFreq, unsigned short *bigramChars, int *outputTypes) const {
+void BigramDictionary::addWordBigram(int *word, int length, int frequency, int *bigramFreq,
+ int *bigramCodePoints, int *outputTypes) const {
word[length] = 0;
if (DEBUG_DICT) {
#ifdef FLAG_DBG
char s[length + 1];
- for (int i = 0; i <= length; i++) s[i] = word[i];
+ for (int i = 0; i <= length; i++) s[i] = static_cast<char>(word[i]);
AKLOGI("Bigram: Found word = %s, freq = %d :", s, frequency);
#endif
}
// Find the right insertion point
int insertAt = 0;
- while (insertAt < MAX_PREDICTIONS) {
+ while (insertAt < MAX_RESULTS) {
if (frequency > bigramFreq[insertAt] || (bigramFreq[insertAt] == frequency
- && length < Dictionary::wideStrLen(bigramChars + insertAt * MAX_WORD_LENGTH))) {
+ && length < getCodePointCount(MAX_WORD_LENGTH,
+ bigramCodePoints + insertAt * MAX_WORD_LENGTH))) {
break;
}
insertAt++;
}
if (DEBUG_DICT) {
- AKLOGI("Bigram: InsertAt -> %d MAX_PREDICTIONS: %d", insertAt, MAX_PREDICTIONS);
+ AKLOGI("Bigram: InsertAt -> %d MAX_RESULTS: %d", insertAt, MAX_RESULTS);
}
- if (insertAt < MAX_PREDICTIONS) {
- memmove(bigramFreq + (insertAt + 1),
- bigramFreq + insertAt,
- (MAX_PREDICTIONS - insertAt - 1) * sizeof(bigramFreq[0]));
- bigramFreq[insertAt] = frequency;
- outputTypes[insertAt] = Dictionary::KIND_PREDICTION;
- memmove(bigramChars + (insertAt + 1) * MAX_WORD_LENGTH,
- bigramChars + insertAt * MAX_WORD_LENGTH,
- (MAX_PREDICTIONS - insertAt - 1) * sizeof(bigramChars[0]) * MAX_WORD_LENGTH);
- unsigned short *dest = bigramChars + insertAt * MAX_WORD_LENGTH;
- while (length--) {
- *dest++ = *word++;
- }
- *dest = 0; // NULL terminate
- if (DEBUG_DICT) {
- AKLOGI("Bigram: Added word at %d", insertAt);
- }
- return true;
+ if (insertAt >= MAX_RESULTS) {
+ return;
+ }
+ memmove(bigramFreq + (insertAt + 1),
+ bigramFreq + insertAt,
+ (MAX_RESULTS - insertAt - 1) * sizeof(bigramFreq[0]));
+ bigramFreq[insertAt] = frequency;
+ outputTypes[insertAt] = Dictionary::KIND_PREDICTION;
+ memmove(bigramCodePoints + (insertAt + 1) * MAX_WORD_LENGTH,
+ bigramCodePoints + insertAt * MAX_WORD_LENGTH,
+ (MAX_RESULTS - insertAt - 1) * sizeof(bigramCodePoints[0]) * MAX_WORD_LENGTH);
+ int *dest = bigramCodePoints + insertAt * MAX_WORD_LENGTH;
+ while (length--) {
+ *dest++ = *word++;
+ }
+ *dest = 0; // NULL terminate
+ if (DEBUG_DICT) {
+ AKLOGI("Bigram: Added word at %d", insertAt);
}
- return false;
}
/* Parameters :
* prevWord: the word before, the one for which we need to look up bigrams.
* prevWordLength: its length.
- * inputCodes: what user typed, in the same format as for UnigramDictionary::getSuggestions.
- * codesSize: the size of the codes array.
- * bigramChars: an array for output, at the same format as outwords for getSuggestions.
+ * inputCodePoints: what user typed, in the same format as for UnigramDictionary::getSuggestions.
+ * inputSize: the size of the codes array.
+ * bigramCodePoints: an array for output, at the same format as outwords for getSuggestions.
* bigramFreq: an array to output frequencies.
* outputTypes: an array to output types.
* This method returns the number of bigrams this word has, for backward compatibility.
@@ -97,12 +97,12 @@ bool BigramDictionary::addWordBigram(unsigned short *word, int length, int frequ
* and the bigrams are used to boost unigram result scores, it makes little sense to
* reduce their scope to the ones that match the first letter.
*/
-int BigramDictionary::getBigrams(const int32_t *prevWord, int prevWordLength, int *inputCodes,
- int codesSize, unsigned short *bigramChars, int *bigramFreq, int *outputTypes) const {
+int BigramDictionary::getBigrams(const int *prevWord, int prevWordLength, int *inputCodePoints,
+ int inputSize, int *bigramCodePoints, int *bigramFreq, int *outputTypes) const {
// TODO: remove unused arguments, and refrain from storing stuff in members of this class
// TODO: have "in" arguments before "out" ones, and make out args explicit in the name
- const uint8_t *const root = DICT;
+ const uint8_t *const root = DICT_ROOT;
int pos = getBigramListPositionForWord(prevWord, prevWordLength,
false /* forceLowerCaseSearch */);
// getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams
@@ -117,15 +117,15 @@ int BigramDictionary::getBigrams(const int32_t *prevWord, int prevWordLength, in
int bigramCount = 0;
do {
bigramFlags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
- uint16_t bigramBuffer[MAX_WORD_LENGTH];
+ int bigramBuffer[MAX_WORD_LENGTH];
int unigramFreq = 0;
const int bigramPos = BinaryFormat::getAttributeAddressAndForwardPointer(root, bigramFlags,
&pos);
const int length = BinaryFormat::getWordAtAddress(root, bigramPos, MAX_WORD_LENGTH,
bigramBuffer, &unigramFreq);
- // codesSize == 0 means we are trying to find bigram predictions.
- if (codesSize < 1 || checkFirstCharacter(bigramBuffer, inputCodes)) {
+ // inputSize == 0 means we are trying to find bigram predictions.
+ if (inputSize < 1 || checkFirstCharacter(bigramBuffer, inputCodePoints)) {
const int bigramFreqTemp = BinaryFormat::MASK_ATTRIBUTE_FREQUENCY & bigramFlags;
// Due to space constraints, the frequency for bigrams is approximate - the lower the
// unigram frequency, the worse the precision. The theoritical maximum error in
@@ -134,21 +134,20 @@ int BigramDictionary::getBigrams(const int32_t *prevWord, int prevWordLength, in
// here, but it can't get too bad.
const int frequency =
BinaryFormat::computeFrequencyForBigram(unigramFreq, bigramFreqTemp);
- if (addWordBigram(bigramBuffer, length, frequency, bigramFreq, bigramChars,
- outputTypes)) {
- ++bigramCount;
- }
+ addWordBigram(bigramBuffer, length, frequency, bigramFreq, bigramCodePoints,
+ outputTypes);
+ ++bigramCount;
}
} while (BinaryFormat::FLAG_ATTRIBUTE_HAS_NEXT & bigramFlags);
- return bigramCount;
+ return min(bigramCount, MAX_RESULTS);
}
// Returns a pointer to the start of the bigram list.
// If the word is not found or has no bigrams, this function returns 0.
-int BigramDictionary::getBigramListPositionForWord(const int32_t *prevWord,
- const int prevWordLength, const bool forceLowerCaseSearch) const {
+int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const int prevWordLength,
+ const bool forceLowerCaseSearch) const {
if (0 >= prevWordLength) return 0;
- const uint8_t *const root = DICT;
+ const uint8_t *const root = DICT_ROOT;
int pos = BinaryFormat::getTerminalPosition(root, prevWord, prevWordLength,
forceLowerCaseSearch);
@@ -166,10 +165,10 @@ int BigramDictionary::getBigramListPositionForWord(const int32_t *prevWord,
return pos;
}
-void BigramDictionary::fillBigramAddressToFrequencyMapAndFilter(const int32_t *prevWord,
+void BigramDictionary::fillBigramAddressToFrequencyMapAndFilter(const int *prevWord,
const int prevWordLength, std::map<int, int> *map, uint8_t *filter) const {
memset(filter, 0, BIGRAM_FILTER_BYTE_SIZE);
- const uint8_t *const root = DICT;
+ const uint8_t *const root = DICT_ROOT;
int pos = getBigramListPositionForWord(prevWord, prevWordLength,
false /* forceLowerCaseSearch */);
if (0 == pos) {
@@ -190,25 +189,25 @@ void BigramDictionary::fillBigramAddressToFrequencyMapAndFilter(const int32_t *p
} while (0 != (BinaryFormat::FLAG_ATTRIBUTE_HAS_NEXT & bigramFlags));
}
-bool BigramDictionary::checkFirstCharacter(unsigned short *word, int *inputCodes) const {
+bool BigramDictionary::checkFirstCharacter(int *word, int *inputCodePoints) const {
// Checks whether this word starts with same character or neighboring characters of
// what user typed.
int maxAlt = MAX_ALTERNATIVES;
- const unsigned short firstBaseChar = toBaseLowerCase(*word);
+ const int firstBaseLowerCodePoint = toBaseLowerCase(*word);
while (maxAlt > 0) {
- if (toBaseLowerCase(*inputCodes) == firstBaseChar) {
+ if (toBaseLowerCase(*inputCodePoints) == firstBaseLowerCodePoint) {
return true;
}
- inputCodes++;
+ inputCodePoints++;
maxAlt--;
}
return false;
}
-bool BigramDictionary::isValidBigram(const int32_t *word1, int length1, const int32_t *word2,
+bool BigramDictionary::isValidBigram(const int *word1, int length1, const int *word2,
int length2) const {
- const uint8_t *const root = DICT;
+ const uint8_t *const root = DICT_ROOT;
int pos = getBigramListPositionForWord(word1, length1, false /* forceLowerCaseSearch */);
// getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams
if (0 == pos) return false;
diff --git a/native/jni/src/bigram_dictionary.h b/native/jni/src/bigram_dictionary.h
index 5f11ae822..2ce6c1d0d 100644
--- a/native/jni/src/bigram_dictionary.h
+++ b/native/jni/src/bigram_dictionary.h
@@ -26,29 +26,22 @@ namespace latinime {
class BigramDictionary {
public:
- BigramDictionary(const unsigned char *dict, int maxWordLength, int maxPredictions);
- int getBigrams(const int32_t *word, int length, int *inputCodes, int codesSize,
- unsigned short *outWords, int *frequencies, int *outputTypes) const;
- void fillBigramAddressToFrequencyMapAndFilter(const int32_t *prevWord, const int prevWordLength,
+ BigramDictionary(const uint8_t *const streamStart);
+ int getBigrams(const int *word, int length, int *inputCodePoints, int inputSize, int *outWords,
+ int *frequencies, int *outputTypes) const;
+ void fillBigramAddressToFrequencyMapAndFilter(const int *prevWord, const int prevWordLength,
std::map<int, int> *map, uint8_t *filter) const;
- bool isValidBigram(const int32_t *word1, int length1, const int32_t *word2, int length2) const;
+ bool isValidBigram(const int *word1, int length1, const int *word2, int length2) const;
~BigramDictionary();
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(BigramDictionary);
- bool addWordBigram(unsigned short *word, int length, int frequency,
- int *bigramFreq, unsigned short *bigramChars, int *outputTypes) const;
- int getBigramAddress(int *pos, bool advance);
- int getBigramFreq(int *pos);
- void searchForTerminalNode(int addressLookingFor, int frequency);
- bool getFirstBitOfByte(int *pos) { return (DICT[*pos] & 0x80) > 0; }
- bool getSecondBitOfByte(int *pos) { return (DICT[*pos] & 0x40) > 0; }
- bool checkFirstCharacter(unsigned short *word, int *inputCodes) const;
- int getBigramListPositionForWord(const int32_t *prevWord, const int prevWordLength,
+ void addWordBigram(int *word, int length, int frequency, int *bigramFreq, int *bigramCodePoints,
+ int *outputTypes) const;
+ bool checkFirstCharacter(int *word, int *inputCodePoints) const;
+ int getBigramListPositionForWord(const int *prevWord, const int prevWordLength,
const bool forceLowerCaseSearch) const;
- const unsigned char *DICT;
- const int MAX_WORD_LENGTH;
- const int MAX_PREDICTIONS;
+ const uint8_t *const DICT_ROOT;
// TODO: Re-implement proximity correction for bigram correction
static const int MAX_ALTERNATIVES = 1;
};
diff --git a/native/jni/src/binary_format.h b/native/jni/src/binary_format.h
index eec52e323..2d7c4b492 100644
--- a/native/jni/src/binary_format.h
+++ b/native/jni/src/binary_format.h
@@ -17,8 +17,10 @@
#ifndef LATINIME_BINARY_FORMAT_H
#define LATINIME_BINARY_FORMAT_H
-#include <limits>
+#include <cstdlib>
#include <map>
+#include <stdint.h>
+
#include "bloom_filter.h"
#include "char_utils.h"
@@ -28,10 +30,6 @@ class BinaryFormat {
public:
// Mask and flags for children address type selection.
static const int MASK_GROUP_ADDRESS_TYPE = 0xC0;
- static const int FLAG_GROUP_ADDRESS_TYPE_NOADDRESS = 0x00;
- static const int FLAG_GROUP_ADDRESS_TYPE_ONEBYTE = 0x40;
- static const int FLAG_GROUP_ADDRESS_TYPE_TWOBYTES = 0x80;
- static const int FLAG_GROUP_ADDRESS_TYPE_THREEBYTES = 0xC0;
// Flag for single/multiple char group
static const int FLAG_HAS_MULTIPLE_CHARS = 0x20;
@@ -61,36 +59,24 @@ class BinaryFormat {
// Mask and flags for attribute address type selection.
static const int MASK_ATTRIBUTE_ADDRESS_TYPE = 0x30;
- static const int FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE = 0x10;
- static const int FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES = 0x20;
- static const int FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES = 0x30;
- const static int UNKNOWN_FORMAT = -1;
- // Originally, format version 1 had a 16-bit magic number, then the version number `01'
- // then options that must be 0. Hence the first 32-bits of the format are always as follow
- // and it's okay to consider them a magic number as a whole.
- const static uint32_t FORMAT_VERSION_1_MAGIC_NUMBER = 0x78B10100;
- const static unsigned int FORMAT_VERSION_1_HEADER_SIZE = 5;
- // The versions of Latin IME that only handle format version 1 only test for the magic
- // number, so we had to change it so that version 2 files would be rejected by older
- // implementations. On this occasion, we made the magic number 32 bits long.
- const static uint32_t FORMAT_VERSION_2_MAGIC_NUMBER = 0x9BC13AFE;
-
- const static int CHARACTER_ARRAY_TERMINATOR_SIZE = 1;
- const static int SHORTCUT_LIST_SIZE_SIZE = 2;
+ static const int UNKNOWN_FORMAT = -1;
+ static const int SHORTCUT_LIST_SIZE_SIZE = 2;
static int detectFormat(const uint8_t *const dict);
- static unsigned int getHeaderSize(const uint8_t *const dict);
- static unsigned int getFlags(const uint8_t *const dict);
+ static int getHeaderSize(const uint8_t *const dict);
+ static int getFlags(const uint8_t *const dict);
+ static void readHeaderValue(const uint8_t *const dict, const char *const key, int *outValue,
+ const int outValueSize);
+ static int readHeaderValueInt(const uint8_t *const dict, const char *const key);
static int getGroupCountAndForwardPointer(const uint8_t *const dict, int *pos);
static uint8_t getFlagsAndForwardPointer(const uint8_t *const dict, int *pos);
- static int32_t getCodePointAndForwardPointer(const uint8_t *const dict, int *pos);
+ static int getCodePointAndForwardPointer(const uint8_t *const dict, int *pos);
static int readFrequencyWithoutMovingPointer(const uint8_t *const dict, const int pos);
static int skipOtherCharacters(const uint8_t *const dict, const int pos);
static int skipChildrenPosition(const uint8_t flags, const int pos);
static int skipFrequency(const uint8_t flags, const int pos);
static int skipShortcuts(const uint8_t *const dict, const uint8_t flags, const int pos);
- static int skipBigrams(const uint8_t *const dict, const uint8_t flags, const int pos);
static int skipChildrenPosAndAttributes(const uint8_t *const dict, const uint8_t flags,
const int pos);
static int readChildrenPosition(const uint8_t *const dict, const uint8_t flags, const int pos);
@@ -98,10 +84,10 @@ class BinaryFormat {
static int getAttributeAddressAndForwardPointer(const uint8_t *const dict, const uint8_t flags,
int *pos);
static int getAttributeFrequencyFromFlags(const int flags);
- static int getTerminalPosition(const uint8_t *const root, const int32_t *const inWord,
+ static int getTerminalPosition(const uint8_t *const root, const int *const inWord,
const int length, const bool forceLowerCaseSearch);
static int getWordAtAddress(const uint8_t *const root, const int address, const int maxDepth,
- uint16_t *outWord, int *outUnigramFrequency);
+ int *outWord, int *outUnigramFrequency);
static int computeFrequencyForBigram(const int unigramFreq, const int bigramFreq);
static int getProbability(const int position, const std::map<int, int> *bigramMap,
const uint8_t *bigramFilter, const int unigramFreq);
@@ -113,19 +99,39 @@ class BinaryFormat {
REQUIRES_GERMAN_UMLAUT_PROCESSING = 0x1,
REQUIRES_FRENCH_LIGATURES_PROCESSING = 0x4
};
- const static unsigned int NO_FLAGS = 0;
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryFormat);
- const static int32_t MINIMAL_ONE_BYTE_CHARACTER_VALUE = 0x20;
- const static int32_t CHARACTER_ARRAY_TERMINATOR = 0x1F;
- const static int MULTIPLE_BYTE_CHARACTER_ADDITIONAL_SIZE = 2;
+ static const int FLAG_GROUP_ADDRESS_TYPE_NOADDRESS = 0x00;
+ static const int FLAG_GROUP_ADDRESS_TYPE_ONEBYTE = 0x40;
+ static const int FLAG_GROUP_ADDRESS_TYPE_TWOBYTES = 0x80;
+ static const int FLAG_GROUP_ADDRESS_TYPE_THREEBYTES = 0xC0;
+ static const int FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE = 0x10;
+ static const int FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES = 0x20;
+ static const int FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES = 0x30;
+
+ // Originally, format version 1 had a 16-bit magic number, then the version number `01'
+ // then options that must be 0. Hence the first 32-bits of the format are always as follow
+ // and it's okay to consider them a magic number as a whole.
+ static const int FORMAT_VERSION_1_MAGIC_NUMBER = 0x78B10100;
+ static const int FORMAT_VERSION_1_HEADER_SIZE = 5;
+ // The versions of Latin IME that only handle format version 1 only test for the magic
+ // number, so we had to change it so that version 2 files would be rejected by older
+ // implementations. On this occasion, we made the magic number 32 bits long.
+ static const int FORMAT_VERSION_2_MAGIC_NUMBER = -1681835266; // 0x9BC13AFE
+
+ static const int CHARACTER_ARRAY_TERMINATOR_SIZE = 1;
+ static const int MINIMAL_ONE_BYTE_CHARACTER_VALUE = 0x20;
+ static const int CHARACTER_ARRAY_TERMINATOR = 0x1F;
+ static const int MULTIPLE_BYTE_CHARACTER_ADDITIONAL_SIZE = 2;
+ static const int NO_FLAGS = 0;
static int skipAllAttributes(const uint8_t *const dict, const uint8_t flags, const int pos);
+ static int skipBigrams(const uint8_t *const dict, const uint8_t flags, const int pos);
};
-inline int BinaryFormat::detectFormat(const uint8_t *const dict) {
+AK_FORCE_INLINE int BinaryFormat::detectFormat(const uint8_t *const dict) {
// The magic number is stored big-endian.
- const uint32_t magicNumber = (dict[0] << 24) + (dict[1] << 16) + (dict[2] << 8) + dict[3];
+ const int magicNumber = (dict[0] << 24) + (dict[1] << 16) + (dict[2] << 8) + dict[3];
switch (magicNumber) {
case FORMAT_VERSION_1_MAGIC_NUMBER:
// Format 1 header is exactly 5 bytes long and looks like:
@@ -145,16 +151,16 @@ inline int BinaryFormat::detectFormat(const uint8_t *const dict) {
}
}
-inline unsigned int BinaryFormat::getFlags(const uint8_t *const dict) {
+inline int BinaryFormat::getFlags(const uint8_t *const dict) {
switch (detectFormat(dict)) {
case 1:
- return NO_FLAGS;
+ return NO_FLAGS; // TODO: NO_FLAGS is unused anywhere else?
default:
return (dict[6] << 8) + dict[7];
}
}
-inline unsigned int BinaryFormat::getHeaderSize(const uint8_t *const dict) {
+inline int BinaryFormat::getHeaderSize(const uint8_t *const dict) {
switch (detectFormat(dict)) {
case 1:
return FORMAT_VERSION_1_HEADER_SIZE;
@@ -162,11 +168,73 @@ inline unsigned int BinaryFormat::getHeaderSize(const uint8_t *const dict) {
// See the format of the header in the comment in detectFormat() above
return (dict[8] << 24) + (dict[9] << 16) + (dict[10] << 8) + dict[11];
default:
- return std::numeric_limits<unsigned int>::max();
+ return S_INT_MAX;
}
}
-inline int BinaryFormat::getGroupCountAndForwardPointer(const uint8_t *const dict, int *pos) {
+inline void BinaryFormat::readHeaderValue(const uint8_t *const dict, const char *const key,
+ int *outValue, const int outValueSize) {
+ int outValueIndex = 0;
+ // Only format 2 and above have header attributes as {key,value} string pairs. For prior
+ // formats, we just return an empty string, as if the key wasn't found.
+ if (2 <= detectFormat(dict)) {
+ const int headerOptionsOffset = 4 /* magic number */
+ + 2 /* dictionary version */ + 2 /* flags */;
+ const int headerSize =
+ (dict[headerOptionsOffset] << 24) + (dict[headerOptionsOffset + 1] << 16)
+ + (dict[headerOptionsOffset + 2] << 8) + dict[headerOptionsOffset + 3];
+ const int headerEnd = headerOptionsOffset + 4 + headerSize;
+ int index = headerOptionsOffset + 4;
+ while (index < headerEnd) {
+ int keyIndex = 0;
+ int codePoint = getCodePointAndForwardPointer(dict, &index);
+ while (codePoint != NOT_A_CODE_POINT) {
+ if (codePoint != key[keyIndex++]) {
+ break;
+ }
+ codePoint = getCodePointAndForwardPointer(dict, &index);
+ }
+ if (codePoint == NOT_A_CODE_POINT && key[keyIndex] == 0) {
+ // We found the key! Copy and return the value.
+ codePoint = getCodePointAndForwardPointer(dict, &index);
+ while (codePoint != NOT_A_CODE_POINT && outValueIndex < outValueSize) {
+ outValue[outValueIndex++] = codePoint;
+ codePoint = getCodePointAndForwardPointer(dict, &index);
+ }
+ // Finished copying. Break to go to the termination code.
+ break;
+ }
+ // We didn't find the key, skip the remainder of it and its value
+ while (codePoint != NOT_A_CODE_POINT) {
+ codePoint = getCodePointAndForwardPointer(dict, &index);
+ }
+ codePoint = getCodePointAndForwardPointer(dict, &index);
+ while (codePoint != NOT_A_CODE_POINT) {
+ codePoint = getCodePointAndForwardPointer(dict, &index);
+ }
+ }
+ // We couldn't find it - fall through and return an empty value.
+ }
+ // Put a terminator 0 if possible at all (always unless outValueSize is <= 0)
+ if (outValueIndex >= outValueSize) outValueIndex = outValueSize - 1;
+ if (outValueIndex >= 0) outValue[outValueIndex] = 0;
+}
+
+inline int BinaryFormat::readHeaderValueInt(const uint8_t *const dict, const char *const key) {
+ const int bufferSize = LARGEST_INT_DIGIT_COUNT;
+ int intBuffer[bufferSize];
+ char charBuffer[bufferSize];
+ BinaryFormat::readHeaderValue(dict, key, intBuffer, bufferSize);
+ for (int i = 0; i < bufferSize; ++i) {
+ charBuffer[i] = intBuffer[i];
+ }
+ // If not a number, return S_INT_MIN
+ if (!isdigit(charBuffer[0])) return S_INT_MIN;
+ return atoi(charBuffer);
+}
+
+AK_FORCE_INLINE int BinaryFormat::getGroupCountAndForwardPointer(const uint8_t *const dict,
+ int *pos) {
const int msb = dict[(*pos)++];
if (msb < 0x80) return msb;
return ((msb & 0x7F) << 8) | dict[(*pos)++];
@@ -176,17 +244,18 @@ inline uint8_t BinaryFormat::getFlagsAndForwardPointer(const uint8_t *const dict
return dict[(*pos)++];
}
-inline int32_t BinaryFormat::getCodePointAndForwardPointer(const uint8_t *const dict, int *pos) {
+AK_FORCE_INLINE int BinaryFormat::getCodePointAndForwardPointer(const uint8_t *const dict,
+ int *pos) {
const int origin = *pos;
- const int32_t codePoint = dict[origin];
+ const int codePoint = dict[origin];
if (codePoint < MINIMAL_ONE_BYTE_CHARACTER_VALUE) {
if (codePoint == CHARACTER_ARRAY_TERMINATOR) {
*pos = origin + 1;
return NOT_A_CODE_POINT;
} else {
*pos = origin + 3;
- const int32_t char_1 = codePoint << 16;
- const int32_t char_2 = char_1 + (dict[origin + 1] << 8);
+ const int char_1 = codePoint << 16;
+ const int char_2 = char_1 + (dict[origin + 1] << 8);
return char_2 + dict[origin + 2];
}
} else {
@@ -200,9 +269,9 @@ inline int BinaryFormat::readFrequencyWithoutMovingPointer(const uint8_t *const
return dict[pos];
}
-inline int BinaryFormat::skipOtherCharacters(const uint8_t *const dict, const int pos) {
+AK_FORCE_INLINE int BinaryFormat::skipOtherCharacters(const uint8_t *const dict, const int pos) {
int currentPos = pos;
- int32_t character = dict[currentPos++];
+ int character = dict[currentPos++];
while (CHARACTER_ARRAY_TERMINATOR != character) {
if (character < MINIMAL_ONE_BYTE_CHARACTER_VALUE) {
currentPos += MULTIPLE_BYTE_CHARACTER_ADDITIONAL_SIZE;
@@ -226,7 +295,7 @@ static inline int attributeAddressSize(const uint8_t flags) {
*/
}
-static inline int skipExistingBigrams(const uint8_t *const dict, const int pos) {
+static AK_FORCE_INLINE int skipExistingBigrams(const uint8_t *const dict, const int pos) {
int currentPos = pos;
uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(dict, &currentPos);
while (flags & BinaryFormat::FLAG_ATTRIBUTE_HAS_NEXT) {
@@ -243,8 +312,8 @@ static inline int childrenAddressSize(const uint8_t flags) {
/* See the note in attributeAddressSize. The same applies here */
}
-static inline int shortcutByteSize(const uint8_t *const dict, const int pos) {
- return ((int)(dict[pos] << 8)) + (dict[pos + 1]);
+static AK_FORCE_INLINE int shortcutByteSize(const uint8_t *const dict, const int pos) {
+ return (static_cast<int>(dict[pos] << 8)) + (dict[pos + 1]);
}
inline int BinaryFormat::skipChildrenPosition(const uint8_t flags, const int pos) {
@@ -255,7 +324,7 @@ inline int BinaryFormat::skipFrequency(const uint8_t flags, const int pos) {
return FLAG_IS_TERMINAL & flags ? pos + 1 : pos;
}
-inline int BinaryFormat::skipShortcuts(const uint8_t *const dict, const uint8_t flags,
+AK_FORCE_INLINE int BinaryFormat::skipShortcuts(const uint8_t *const dict, const uint8_t flags,
const int pos) {
if (FLAG_HAS_SHORTCUT_TARGETS & flags) {
return pos + shortcutByteSize(dict, pos);
@@ -264,7 +333,7 @@ inline int BinaryFormat::skipShortcuts(const uint8_t *const dict, const uint8_t
}
}
-inline int BinaryFormat::skipBigrams(const uint8_t *const dict, const uint8_t flags,
+AK_FORCE_INLINE int BinaryFormat::skipBigrams(const uint8_t *const dict, const uint8_t flags,
const int pos) {
if (FLAG_HAS_BIGRAMS & flags) {
return skipExistingBigrams(dict, pos);
@@ -273,7 +342,7 @@ inline int BinaryFormat::skipBigrams(const uint8_t *const dict, const uint8_t fl
}
}
-inline int BinaryFormat::skipAllAttributes(const uint8_t *const dict, const uint8_t flags,
+AK_FORCE_INLINE int BinaryFormat::skipAllAttributes(const uint8_t *const dict, const uint8_t flags,
const int pos) {
// This function skips all attributes: shortcuts and bigrams.
int newPos = pos;
@@ -282,7 +351,7 @@ inline int BinaryFormat::skipAllAttributes(const uint8_t *const dict, const uint
return newPos;
}
-inline int BinaryFormat::skipChildrenPosAndAttributes(const uint8_t *const dict,
+AK_FORCE_INLINE int BinaryFormat::skipChildrenPosAndAttributes(const uint8_t *const dict,
const uint8_t flags, const int pos) {
int currentPos = pos;
currentPos = skipChildrenPosition(flags, currentPos);
@@ -290,8 +359,8 @@ inline int BinaryFormat::skipChildrenPosAndAttributes(const uint8_t *const dict,
return currentPos;
}
-inline int BinaryFormat::readChildrenPosition(const uint8_t *const dict, const uint8_t flags,
- const int pos) {
+AK_FORCE_INLINE int BinaryFormat::readChildrenPosition(const uint8_t *const dict,
+ const uint8_t flags, const int pos) {
int offset = 0;
switch (MASK_GROUP_ADDRESS_TYPE & flags) {
case FLAG_GROUP_ADDRESS_TYPE_ONEBYTE:
@@ -318,7 +387,7 @@ inline bool BinaryFormat::hasChildrenInFlags(const uint8_t flags) {
return (FLAG_GROUP_ADDRESS_TYPE_NOADDRESS != (MASK_GROUP_ADDRESS_TYPE & flags));
}
-inline int BinaryFormat::getAttributeAddressAndForwardPointer(const uint8_t *const dict,
+AK_FORCE_INLINE int BinaryFormat::getAttributeAddressAndForwardPointer(const uint8_t *const dict,
const uint8_t flags, int *pos) {
int offset = 0;
const int origin = *pos;
@@ -352,8 +421,8 @@ inline int BinaryFormat::getAttributeFrequencyFromFlags(const int flags) {
// This function gets the byte position of the last chargroup of the exact matching word in the
// dictionary. If no match is found, it returns NOT_VALID_WORD.
-inline int BinaryFormat::getTerminalPosition(const uint8_t *const root,
- const int32_t *const inWord, const int length, const bool forceLowerCaseSearch) {
+AK_FORCE_INLINE int BinaryFormat::getTerminalPosition(const uint8_t *const root,
+ const int *const inWord, const int length, const bool forceLowerCaseSearch) {
int pos = 0;
int wordPos = 0;
@@ -362,14 +431,14 @@ inline int BinaryFormat::getTerminalPosition(const uint8_t *const root,
// there was no match (or we would have found it).
if (wordPos >= length) return NOT_VALID_WORD;
int charGroupCount = BinaryFormat::getGroupCountAndForwardPointer(root, &pos);
- const int32_t wChar = forceLowerCaseSearch ? toLowerCase(inWord[wordPos]) : inWord[wordPos];
+ const int wChar = forceLowerCaseSearch ? toLowerCase(inWord[wordPos]) : inWord[wordPos];
while (true) {
// If there are no more character groups in this node, it means we could not
// find a matching character for this depth, therefore there is no match.
if (0 >= charGroupCount) return NOT_VALID_WORD;
const int charGroupPos = pos;
const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
- int32_t character = BinaryFormat::getCodePointAndForwardPointer(root, &pos);
+ int character = BinaryFormat::getCodePointAndForwardPointer(root, &pos);
if (character == wChar) {
// This is the correct node. Only one character group may start with the same
// char within a node, so either we found our match in this node, or there is
@@ -438,8 +507,8 @@ inline int BinaryFormat::getTerminalPosition(const uint8_t *const root,
* outUnigramFrequency: a pointer to an int to write the frequency into.
* Return value : the length of the word, of 0 if the word was not found.
*/
-inline int BinaryFormat::getWordAtAddress(const uint8_t *const root, const int address,
- const int maxDepth, uint16_t *outWord, int *outUnigramFrequency) {
+AK_FORCE_INLINE int BinaryFormat::getWordAtAddress(const uint8_t *const root, const int address,
+ const int maxDepth, int *outWord, int *outUnigramFrequency) {
int pos = 0;
int wordPos = 0;
@@ -457,13 +526,13 @@ inline int BinaryFormat::getWordAtAddress(const uint8_t *const root, const int a
--charGroupCount) {
const int startPos = pos;
const uint8_t flags = getFlagsAndForwardPointer(root, &pos);
- const int32_t character = getCodePointAndForwardPointer(root, &pos);
+ const int character = getCodePointAndForwardPointer(root, &pos);
if (address == startPos) {
// We found the address. Copy the rest of the word in the buffer and return
// the length.
outWord[wordPos] = character;
if (FLAG_HAS_MULTIPLE_CHARS & flags) {
- int32_t nextChar = getCodePointAndForwardPointer(root, &pos);
+ int nextChar = getCodePointAndForwardPointer(root, &pos);
// We count chars in order to avoid infinite loops if the file is broken or
// if there is some other bug
int charCount = maxDepth;
@@ -522,13 +591,12 @@ inline int BinaryFormat::getWordAtAddress(const uint8_t *const root, const int a
if (0 != lastCandidateGroupPos) {
const uint8_t lastFlags =
getFlagsAndForwardPointer(root, &lastCandidateGroupPos);
- const int32_t lastChar =
+ const int lastChar =
getCodePointAndForwardPointer(root, &lastCandidateGroupPos);
// We copy all the characters in this group to the buffer
outWord[wordPos] = lastChar;
if (FLAG_HAS_MULTIPLE_CHARS & lastFlags) {
- int32_t nextChar =
- getCodePointAndForwardPointer(root, &lastCandidateGroupPos);
+ int nextChar = getCodePointAndForwardPointer(root, &lastCandidateGroupPos);
int charCount = maxDepth;
while (-1 != nextChar && --charCount > 0) {
outWord[++wordPos] = nextChar;
@@ -595,9 +663,8 @@ inline int BinaryFormat::getProbability(const int position, const std::map<int,
if (bigramFreqIt != bigramMap->end()) {
const int bigramFreq = bigramFreqIt->second;
return computeFrequencyForBigram(unigramFreq, bigramFreq);
- } else {
- return backoff(unigramFreq);
}
+ return backoff(unigramFreq);
}
} // namespace latinime
#endif // LATINIME_BINARY_FORMAT_H
diff --git a/native/jni/src/char_utils.cpp b/native/jni/src/char_utils.cpp
index d0547a982..8d917ea74 100644
--- a/native/jni/src/char_utils.cpp
+++ b/native/jni/src/char_utils.cpp
@@ -17,6 +17,7 @@
#include <cstdlib>
#include "char_utils.h"
+#include "defines.h"
namespace latinime {
@@ -25,78 +26,62 @@ struct LatinCapitalSmallPair {
unsigned short small;
};
-// Generated from http://unicode.org/Public/UNIDATA/UnicodeData.txt
-//
-// 1. Run the following code. Bascially taken from
-// Dictionary::toLowerCase(unsigned short c) in dictionary.cpp.
-// Then, get the list of chars where cc != ccc.
-//
-// unsigned short c, cc, ccc, ccc2;
-// for (c = 0; c < 0xFFFF ; c++) {
-// if (c < sizeof(BASE_CHARS) / sizeof(BASE_CHARS[0])) {
-// cc = BASE_CHARS[c];
-// } else {
-// cc = c;
-// }
-//
-// // tolower
-// int isBase = 0;
-// if (cc >='A' && cc <= 'Z') {
-// ccc = (cc | 0x20);
-// ccc2 = ccc;
-// isBase = 1;
-// } else if (cc > 0x7F) {
-// ccc = u_tolower(cc);
-// ccc2 = latin_tolower(cc);
-// } else {
-// ccc = cc;
-// ccc2 = ccc;
-// }
-// if (!isBase && cc != ccc) {
-// wprintf(L" 0x%04X => 0x%04X => 0x%04X %lc => %lc => %lc \n",
-// c, cc, ccc, c, cc, ccc);
-// //assert(ccc == ccc2);
-// }
-// }
-//
-// Initially, started with an empty latin_tolower() as below.
-//
-// unsigned short latin_tolower(unsigned short c) {
-// return c;
-// }
-//
-//
-// 2. Process the list obtained by 1 by the following perl script and apply
-// 'sort -u' as well. Get the SORTED_CHAR_MAP[].
-// Note that '$1' in the perl script is 'cc' in the above C code.
-//
-// while(<>) {
-// / 0x\w* => 0x(\w*) =/;
-// open(HDL, "grep -iw ^" . $1 . " UnicodeData.txt | ");
-// $line = <HDL>;
-// chomp $line;
-// @cols = split(/;/, $line);
-// print " { 0x$1, 0x$cols[13] }, // $cols[1]\n";
-// }
-//
-//
-// 3. Update the latin_tolower() function above with SORTED_CHAR_MAP. Enable
-// the assert(ccc == ccc2) above and confirm the function exits successfully.
-//
+/*
+ * How to update the SORTED_CHAR_MAP[] array.
+ *
+ * 1. Download http://unicode.org/Public/UNIDATA/UnicodeData.txt
+ *
+ * 2. Have a latest version of ICU4C dev package installed
+ * (Note: the current data has been generated with version 4.8)
+ * $ apt-get install libicu-dev
+ *
+ * 3. Build the following code
+ * (You need this file, char_utils.h, and defines.h)
+ * $ g++ -o char_utils -DUPDATING_CHAR_UTILS char_utils.cpp -licuuc
+ */
+#ifdef UPDATING_CHAR_UTILS
+#include <stdio.h>
+#include <unicode/uchar.h> // ICU4C
+
+extern "C" int main() {
+ for (unsigned short c = 0; c < 0xFFFF; c++) {
+ const unsigned short baseC = c < NELEMS(BASE_CHARS) ? BASE_CHARS[c] : c;
+ if (baseC <= 0x7F) continue;
+ const unsigned short icu4cLowerBaseC = u_tolower(baseC);
+ const unsigned short myLowerBaseC = latin_tolower(baseC);
+ if (baseC != icu4cLowerBaseC) {
+#ifdef CONFIRMING_CHAR_UTILS
+ if (icu4cLowerBaseC != myLowerBaseC) {
+ fprintf(stderr, "icu4cLowerBaseC != myLowerBaseC, 0x%04X, 0x%04X\n",
+ icu4cLowerBaseC, myLowerBaseC);
+ }
+#else // CONFIRMING_CHAR_UTILS
+ printf("0x%04X, 0x%04X\n", baseC, icu4cLowerBaseC);
+#endif // CONFIRMING_CHAR_UTILS
+ }
+ }
+}
+#endif // UPDATING_CHAR_UTILS
+/*
+ * 4. Process the list with UnicodeData.txt
+ * (You need UnicodeData.txt in the current directory)
+ * $ ./char_utils | sort -u | \
+ * perl -e 'open(FH, "UnicodeData.txt"); @buf = <FH>; close(FH); \
+ * while(<>){/0x(\w*), 0x(\w*)/; @lines = grep(/^$1/, @buf); @cols = split(/;/, $lines[0]); \
+ * print " { 0x$1, 0x$cols[13] }, // $cols[1]\n";}'
+ *
+ * 5. Update the SORTED_CHAR_MAP[] array below with the output above.
+ * Then, rebuild with -DCONFIRMING_CHAR_UTILS and confirm the program exits successfully.
+ * $ g++ -o char_utils -DUPDATING_CHAR_UTILS -DCONFIRMING_CHAR_UTILS char_utils.cpp -licuuc
+ * $ ./char_utils
+ * $
+ */
static const struct LatinCapitalSmallPair SORTED_CHAR_MAP[] = {
- { 0x00C4, 0x00E4 }, // LATIN CAPITAL LETTER A WITH DIAERESIS
- { 0x00C5, 0x00E5 }, // LATIN CAPITAL LETTER A WITH RING ABOVE
{ 0x00C6, 0x00E6 }, // LATIN CAPITAL LETTER AE
{ 0x00D0, 0x00F0 }, // LATIN CAPITAL LETTER ETH
- { 0x00D1, 0x00F1 }, // LATIN CAPITAL LETTER N WITH TILDE
- { 0x00D5, 0x00F5 }, // LATIN CAPITAL LETTER O WITH TILDE
- { 0x00D6, 0x00F6 }, // LATIN CAPITAL LETTER O WITH DIAERESIS
- { 0x00D8, 0x00F8 }, // LATIN CAPITAL LETTER O WITH STROKE
- { 0x00DC, 0x00FC }, // LATIN CAPITAL LETTER U WITH DIAERESIS
{ 0x00DE, 0x00FE }, // LATIN CAPITAL LETTER THORN
{ 0x0110, 0x0111 }, // LATIN CAPITAL LETTER D WITH STROKE
{ 0x0126, 0x0127 }, // LATIN CAPITAL LETTER H WITH STROKE
- { 0x0141, 0x0142 }, // LATIN CAPITAL LETTER L WITH STROKE
{ 0x014A, 0x014B }, // LATIN CAPITAL LETTER ENG
{ 0x0152, 0x0153 }, // LATIN CAPITAL LIGATURE OE
{ 0x0166, 0x0167 }, // LATIN CAPITAL LETTER T WITH STROKE
@@ -135,15 +120,12 @@ static const struct LatinCapitalSmallPair SORTED_CHAR_MAP[] = {
{ 0x01B8, 0x01B9 }, // LATIN CAPITAL LETTER EZH REVERSED
{ 0x01BC, 0x01BD }, // LATIN CAPITAL LETTER TONE FIVE
{ 0x01E4, 0x01E5 }, // LATIN CAPITAL LETTER G WITH STROKE
- { 0x01EA, 0x01EB }, // LATIN CAPITAL LETTER O WITH OGONEK
{ 0x01F6, 0x0195 }, // LATIN CAPITAL LETTER HWAIR
{ 0x01F7, 0x01BF }, // LATIN CAPITAL LETTER WYNN
{ 0x021C, 0x021D }, // LATIN CAPITAL LETTER YOGH
{ 0x0220, 0x019E }, // LATIN CAPITAL LETTER N WITH LONG RIGHT LEG
{ 0x0222, 0x0223 }, // LATIN CAPITAL LETTER OU
{ 0x0224, 0x0225 }, // LATIN CAPITAL LETTER Z WITH HOOK
- { 0x0226, 0x0227 }, // LATIN CAPITAL LETTER A WITH DOT ABOVE
- { 0x022E, 0x022F }, // LATIN CAPITAL LETTER O WITH DOT ABOVE
{ 0x023A, 0x2C65 }, // LATIN CAPITAL LETTER A WITH STROKE
{ 0x023B, 0x023C }, // LATIN CAPITAL LETTER C WITH STROKE
{ 0x023D, 0x019A }, // LATIN CAPITAL LETTER L WITH BAR
@@ -320,6 +302,7 @@ static const struct LatinCapitalSmallPair SORTED_CHAR_MAP[] = {
{ 0x0520, 0x0521 }, // CYRILLIC CAPITAL LETTER EL WITH MIDDLE HOOK
{ 0x0522, 0x0523 }, // CYRILLIC CAPITAL LETTER EN WITH MIDDLE HOOK
{ 0x0524, 0x0525 }, // CYRILLIC CAPITAL LETTER PE WITH DESCENDER
+ { 0x0526, 0x0527 }, // CYRILLIC CAPITAL LETTER SHHA WITH DESCENDER
{ 0x0531, 0x0561 }, // ARMENIAN CAPITAL LETTER AYB
{ 0x0532, 0x0562 }, // ARMENIAN CAPITAL LETTER BEN
{ 0x0533, 0x0563 }, // ARMENIAN CAPITAL LETTER GIM
@@ -793,6 +776,7 @@ static const struct LatinCapitalSmallPair SORTED_CHAR_MAP[] = {
{ 0xA65A, 0xA65B }, // CYRILLIC CAPITAL LETTER BLENDED YUS
{ 0xA65C, 0xA65D }, // CYRILLIC CAPITAL LETTER IOTIFIED CLOSED LITTLE YUS
{ 0xA65E, 0xA65F }, // CYRILLIC CAPITAL LETTER YN
+ { 0xA660, 0xA661 }, // CYRILLIC CAPITAL LETTER REVERSED TSE
{ 0xA662, 0xA663 }, // CYRILLIC CAPITAL LETTER SOFT DE
{ 0xA664, 0xA665 }, // CYRILLIC CAPITAL LETTER SOFT EL
{ 0xA666, 0xA667 }, // CYRILLIC CAPITAL LETTER SOFT EM
@@ -858,6 +842,13 @@ static const struct LatinCapitalSmallPair SORTED_CHAR_MAP[] = {
{ 0xA784, 0xA785 }, // LATIN CAPITAL LETTER INSULAR S
{ 0xA786, 0xA787 }, // LATIN CAPITAL LETTER INSULAR T
{ 0xA78B, 0xA78C }, // LATIN CAPITAL LETTER SALTILLO
+ { 0xA78D, 0x0265 }, // LATIN CAPITAL LETTER TURNED H
+ { 0xA790, 0xA791 }, // LATIN CAPITAL LETTER N WITH DESCENDER
+ { 0xA7A0, 0xA7A1 }, // LATIN CAPITAL LETTER G WITH OBLIQUE STROKE
+ { 0xA7A2, 0xA7A3 }, // LATIN CAPITAL LETTER K WITH OBLIQUE STROKE
+ { 0xA7A4, 0xA7A5 }, // LATIN CAPITAL LETTER N WITH OBLIQUE STROKE
+ { 0xA7A6, 0xA7A7 }, // LATIN CAPITAL LETTER R WITH OBLIQUE STROKE
+ { 0xA7A8, 0xA7A9 }, // LATIN CAPITAL LETTER S WITH OBLIQUE STROKE
{ 0xFF21, 0xFF41 }, // FULLWIDTH LATIN CAPITAL LETTER A
{ 0xFF22, 0xFF42 }, // FULLWIDTH LATIN CAPITAL LETTER B
{ 0xFF23, 0xFF43 }, // FULLWIDTH LATIN CAPITAL LETTER C
@@ -894,9 +885,220 @@ static int compare_pair_capital(const void *a, const void *b) {
unsigned short latin_tolower(const unsigned short c) {
struct LatinCapitalSmallPair *p =
static_cast<struct LatinCapitalSmallPair *>(bsearch(&c, SORTED_CHAR_MAP,
- sizeof(SORTED_CHAR_MAP) / sizeof(SORTED_CHAR_MAP[0]),
- sizeof(SORTED_CHAR_MAP[0]),
- compare_pair_capital));
+ NELEMS(SORTED_CHAR_MAP), sizeof(SORTED_CHAR_MAP[0]), compare_pair_capital));
return p ? p->small : c;
}
+
+/*
+ * Table mapping most combined Latin, Greek, and Cyrillic characters
+ * to their base characters. If c is in range, BASE_CHARS[c] == c
+ * if c is not a combined character, or the base character if it
+ * is combined.
+ *
+ * Generated with:
+ * cat UnicodeData.txt | perl -e 'while (<>) { @foo = split(/;/); $foo[5] =~ s/<.*> //; \
+ * $base[hex($foo[0])] = hex($foo[5]);} \
+ * for ($i = 0; $i < 0x500; $i += 8) { printf("/" . "* U+%04X *" . "/ ", $i); \
+ * for ($j = $i; $j < $i + 8; $j++) { \
+ * printf("0x%04X, ", $base[$j] ? $base[$j] : $j)}; print "\n"; }'
+ */
+const unsigned short BASE_CHARS[BASE_CHARS_SIZE] = {
+ /* U+0000 */ 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
+ /* U+0008 */ 0x0008, 0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x000E, 0x000F,
+ /* U+0010 */ 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017,
+ /* U+0018 */ 0x0018, 0x0019, 0x001A, 0x001B, 0x001C, 0x001D, 0x001E, 0x001F,
+ /* U+0020 */ 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027,
+ /* U+0028 */ 0x0028, 0x0029, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F,
+ /* U+0030 */ 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
+ /* U+0038 */ 0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F,
+ /* U+0040 */ 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
+ /* U+0048 */ 0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F,
+ /* U+0050 */ 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057,
+ /* U+0058 */ 0x0058, 0x0059, 0x005A, 0x005B, 0x005C, 0x005D, 0x005E, 0x005F,
+ /* U+0060 */ 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
+ /* U+0068 */ 0x0068, 0x0069, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F,
+ /* U+0070 */ 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
+ /* U+0078 */ 0x0078, 0x0079, 0x007A, 0x007B, 0x007C, 0x007D, 0x007E, 0x007F,
+ /* U+0080 */ 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
+ /* U+0088 */ 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
+ /* U+0090 */ 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
+ /* U+0098 */ 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
+ /* U+00A0 */ 0x0020, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,
+ /* U+00A8 */ 0x0020, 0x00A9, 0x0061, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x0020,
+ /* U+00B0 */ 0x00B0, 0x00B1, 0x0032, 0x0033, 0x0020, 0x03BC, 0x00B6, 0x00B7,
+ /* U+00B8 */ 0x0020, 0x0031, 0x006F, 0x00BB, 0x0031, 0x0031, 0x0033, 0x00BF,
+ /* U+00C0 */ 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x00C6, 0x0043,
+ /* U+00C8 */ 0x0045, 0x0045, 0x0045, 0x0045, 0x0049, 0x0049, 0x0049, 0x0049,
+ /* U+00D0 */ 0x00D0, 0x004E, 0x004F, 0x004F, 0x004F, 0x004F, 0x004F, 0x00D7,
+ /* U+00D8 */ 0x004F, 0x0055, 0x0055, 0x0055, 0x0055, 0x0059, 0x00DE, 0x0073,
+ // U+00D8: Manually changed from 00D8 to 004F
+ // TODO: Check if it's really acceptable to consider Ø a diacritical variant of O
+ // U+00DF: Manually changed from 00DF to 0073
+ /* U+00E0 */ 0x0061, 0x0061, 0x0061, 0x0061, 0x0061, 0x0061, 0x00E6, 0x0063,
+ /* U+00E8 */ 0x0065, 0x0065, 0x0065, 0x0065, 0x0069, 0x0069, 0x0069, 0x0069,
+ /* U+00F0 */ 0x00F0, 0x006E, 0x006F, 0x006F, 0x006F, 0x006F, 0x006F, 0x00F7,
+ /* U+00F8 */ 0x006F, 0x0075, 0x0075, 0x0075, 0x0075, 0x0079, 0x00FE, 0x0079,
+ // U+00F8: Manually changed from 00F8 to 006F
+ // TODO: Check if it's really acceptable to consider ø a diacritical variant of o
+ /* U+0100 */ 0x0041, 0x0061, 0x0041, 0x0061, 0x0041, 0x0061, 0x0043, 0x0063,
+ /* U+0108 */ 0x0043, 0x0063, 0x0043, 0x0063, 0x0043, 0x0063, 0x0044, 0x0064,
+ /* U+0110 */ 0x0110, 0x0111, 0x0045, 0x0065, 0x0045, 0x0065, 0x0045, 0x0065,
+ /* U+0118 */ 0x0045, 0x0065, 0x0045, 0x0065, 0x0047, 0x0067, 0x0047, 0x0067,
+ /* U+0120 */ 0x0047, 0x0067, 0x0047, 0x0067, 0x0048, 0x0068, 0x0126, 0x0127,
+ /* U+0128 */ 0x0049, 0x0069, 0x0049, 0x0069, 0x0049, 0x0069, 0x0049, 0x0069,
+ /* U+0130 */ 0x0049, 0x0131, 0x0049, 0x0069, 0x004A, 0x006A, 0x004B, 0x006B,
+ /* U+0138 */ 0x0138, 0x004C, 0x006C, 0x004C, 0x006C, 0x004C, 0x006C, 0x004C,
+ /* U+0140 */ 0x006C, 0x004C, 0x006C, 0x004E, 0x006E, 0x004E, 0x006E, 0x004E,
+ // U+0141: Manually changed from 0141 to 004C
+ // U+0142: Manually changed from 0142 to 006C
+ /* U+0148 */ 0x006E, 0x02BC, 0x014A, 0x014B, 0x004F, 0x006F, 0x004F, 0x006F,
+ /* U+0150 */ 0x004F, 0x006F, 0x0152, 0x0153, 0x0052, 0x0072, 0x0052, 0x0072,
+ /* U+0158 */ 0x0052, 0x0072, 0x0053, 0x0073, 0x0053, 0x0073, 0x0053, 0x0073,
+ /* U+0160 */ 0x0053, 0x0073, 0x0054, 0x0074, 0x0054, 0x0074, 0x0166, 0x0167,
+ /* U+0168 */ 0x0055, 0x0075, 0x0055, 0x0075, 0x0055, 0x0075, 0x0055, 0x0075,
+ /* U+0170 */ 0x0055, 0x0075, 0x0055, 0x0075, 0x0057, 0x0077, 0x0059, 0x0079,
+ /* U+0178 */ 0x0059, 0x005A, 0x007A, 0x005A, 0x007A, 0x005A, 0x007A, 0x0073,
+ /* U+0180 */ 0x0180, 0x0181, 0x0182, 0x0183, 0x0184, 0x0185, 0x0186, 0x0187,
+ /* U+0188 */ 0x0188, 0x0189, 0x018A, 0x018B, 0x018C, 0x018D, 0x018E, 0x018F,
+ /* U+0190 */ 0x0190, 0x0191, 0x0192, 0x0193, 0x0194, 0x0195, 0x0196, 0x0197,
+ /* U+0198 */ 0x0198, 0x0199, 0x019A, 0x019B, 0x019C, 0x019D, 0x019E, 0x019F,
+ /* U+01A0 */ 0x004F, 0x006F, 0x01A2, 0x01A3, 0x01A4, 0x01A5, 0x01A6, 0x01A7,
+ /* U+01A8 */ 0x01A8, 0x01A9, 0x01AA, 0x01AB, 0x01AC, 0x01AD, 0x01AE, 0x0055,
+ /* U+01B0 */ 0x0075, 0x01B1, 0x01B2, 0x01B3, 0x01B4, 0x01B5, 0x01B6, 0x01B7,
+ /* U+01B8 */ 0x01B8, 0x01B9, 0x01BA, 0x01BB, 0x01BC, 0x01BD, 0x01BE, 0x01BF,
+ /* U+01C0 */ 0x01C0, 0x01C1, 0x01C2, 0x01C3, 0x0044, 0x0044, 0x0064, 0x004C,
+ /* U+01C8 */ 0x004C, 0x006C, 0x004E, 0x004E, 0x006E, 0x0041, 0x0061, 0x0049,
+ /* U+01D0 */ 0x0069, 0x004F, 0x006F, 0x0055, 0x0075, 0x0055, 0x0075, 0x0055,
+ // U+01D5: Manually changed from 00DC to 0055
+ // U+01D6: Manually changed from 00FC to 0075
+ // U+01D7: Manually changed from 00DC to 0055
+ /* U+01D8 */ 0x0075, 0x0055, 0x0075, 0x0055, 0x0075, 0x01DD, 0x0041, 0x0061,
+ // U+01D8: Manually changed from 00FC to 0075
+ // U+01D9: Manually changed from 00DC to 0055
+ // U+01DA: Manually changed from 00FC to 0075
+ // U+01DB: Manually changed from 00DC to 0055
+ // U+01DC: Manually changed from 00FC to 0075
+ // U+01DE: Manually changed from 00C4 to 0041
+ // U+01DF: Manually changed from 00E4 to 0061
+ /* U+01E0 */ 0x0041, 0x0061, 0x00C6, 0x00E6, 0x01E4, 0x01E5, 0x0047, 0x0067,
+ // U+01E0: Manually changed from 0226 to 0041
+ // U+01E1: Manually changed from 0227 to 0061
+ /* U+01E8 */ 0x004B, 0x006B, 0x004F, 0x006F, 0x004F, 0x006F, 0x01B7, 0x0292,
+ // U+01EC: Manually changed from 01EA to 004F
+ // U+01ED: Manually changed from 01EB to 006F
+ /* U+01F0 */ 0x006A, 0x0044, 0x0044, 0x0064, 0x0047, 0x0067, 0x01F6, 0x01F7,
+ /* U+01F8 */ 0x004E, 0x006E, 0x0041, 0x0061, 0x00C6, 0x00E6, 0x004F, 0x006F,
+ // U+01FA: Manually changed from 00C5 to 0041
+ // U+01FB: Manually changed from 00E5 to 0061
+ // U+01FE: Manually changed from 00D8 to 004F
+ // TODO: Check if it's really acceptable to consider Ø a diacritical variant of O
+ // U+01FF: Manually changed from 00F8 to 006F
+ // TODO: Check if it's really acceptable to consider ø a diacritical variant of o
+ /* U+0200 */ 0x0041, 0x0061, 0x0041, 0x0061, 0x0045, 0x0065, 0x0045, 0x0065,
+ /* U+0208 */ 0x0049, 0x0069, 0x0049, 0x0069, 0x004F, 0x006F, 0x004F, 0x006F,
+ /* U+0210 */ 0x0052, 0x0072, 0x0052, 0x0072, 0x0055, 0x0075, 0x0055, 0x0075,
+ /* U+0218 */ 0x0053, 0x0073, 0x0054, 0x0074, 0x021C, 0x021D, 0x0048, 0x0068,
+ /* U+0220 */ 0x0220, 0x0221, 0x0222, 0x0223, 0x0224, 0x0225, 0x0041, 0x0061,
+ /* U+0228 */ 0x0045, 0x0065, 0x004F, 0x006F, 0x004F, 0x006F, 0x004F, 0x006F,
+ // U+022A: Manually changed from 00D6 to 004F
+ // U+022B: Manually changed from 00F6 to 006F
+ // U+022C: Manually changed from 00D5 to 004F
+ // U+022D: Manually changed from 00F5 to 006F
+ /* U+0230 */ 0x004F, 0x006F, 0x0059, 0x0079, 0x0234, 0x0235, 0x0236, 0x0237,
+ // U+0230: Manually changed from 022E to 004F
+ // U+0231: Manually changed from 022F to 006F
+ /* U+0238 */ 0x0238, 0x0239, 0x023A, 0x023B, 0x023C, 0x023D, 0x023E, 0x023F,
+ /* U+0240 */ 0x0240, 0x0241, 0x0242, 0x0243, 0x0244, 0x0245, 0x0246, 0x0247,
+ /* U+0248 */ 0x0248, 0x0249, 0x024A, 0x024B, 0x024C, 0x024D, 0x024E, 0x024F,
+ /* U+0250 */ 0x0250, 0x0251, 0x0252, 0x0253, 0x0254, 0x0255, 0x0256, 0x0257,
+ /* U+0258 */ 0x0258, 0x0259, 0x025A, 0x025B, 0x025C, 0x025D, 0x025E, 0x025F,
+ /* U+0260 */ 0x0260, 0x0261, 0x0262, 0x0263, 0x0264, 0x0265, 0x0266, 0x0267,
+ /* U+0268 */ 0x0268, 0x0269, 0x026A, 0x026B, 0x026C, 0x026D, 0x026E, 0x026F,
+ /* U+0270 */ 0x0270, 0x0271, 0x0272, 0x0273, 0x0274, 0x0275, 0x0276, 0x0277,
+ /* U+0278 */ 0x0278, 0x0279, 0x027A, 0x027B, 0x027C, 0x027D, 0x027E, 0x027F,
+ /* U+0280 */ 0x0280, 0x0281, 0x0282, 0x0283, 0x0284, 0x0285, 0x0286, 0x0287,
+ /* U+0288 */ 0x0288, 0x0289, 0x028A, 0x028B, 0x028C, 0x028D, 0x028E, 0x028F,
+ /* U+0290 */ 0x0290, 0x0291, 0x0292, 0x0293, 0x0294, 0x0295, 0x0296, 0x0297,
+ /* U+0298 */ 0x0298, 0x0299, 0x029A, 0x029B, 0x029C, 0x029D, 0x029E, 0x029F,
+ /* U+02A0 */ 0x02A0, 0x02A1, 0x02A2, 0x02A3, 0x02A4, 0x02A5, 0x02A6, 0x02A7,
+ /* U+02A8 */ 0x02A8, 0x02A9, 0x02AA, 0x02AB, 0x02AC, 0x02AD, 0x02AE, 0x02AF,
+ /* U+02B0 */ 0x0068, 0x0266, 0x006A, 0x0072, 0x0279, 0x027B, 0x0281, 0x0077,
+ /* U+02B8 */ 0x0079, 0x02B9, 0x02BA, 0x02BB, 0x02BC, 0x02BD, 0x02BE, 0x02BF,
+ /* U+02C0 */ 0x02C0, 0x02C1, 0x02C2, 0x02C3, 0x02C4, 0x02C5, 0x02C6, 0x02C7,
+ /* U+02C8 */ 0x02C8, 0x02C9, 0x02CA, 0x02CB, 0x02CC, 0x02CD, 0x02CE, 0x02CF,
+ /* U+02D0 */ 0x02D0, 0x02D1, 0x02D2, 0x02D3, 0x02D4, 0x02D5, 0x02D6, 0x02D7,
+ /* U+02D8 */ 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x02DE, 0x02DF,
+ /* U+02E0 */ 0x0263, 0x006C, 0x0073, 0x0078, 0x0295, 0x02E5, 0x02E6, 0x02E7,
+ /* U+02E8 */ 0x02E8, 0x02E9, 0x02EA, 0x02EB, 0x02EC, 0x02ED, 0x02EE, 0x02EF,
+ /* U+02F0 */ 0x02F0, 0x02F1, 0x02F2, 0x02F3, 0x02F4, 0x02F5, 0x02F6, 0x02F7,
+ /* U+02F8 */ 0x02F8, 0x02F9, 0x02FA, 0x02FB, 0x02FC, 0x02FD, 0x02FE, 0x02FF,
+ /* U+0300 */ 0x0300, 0x0301, 0x0302, 0x0303, 0x0304, 0x0305, 0x0306, 0x0307,
+ /* U+0308 */ 0x0308, 0x0309, 0x030A, 0x030B, 0x030C, 0x030D, 0x030E, 0x030F,
+ /* U+0310 */ 0x0310, 0x0311, 0x0312, 0x0313, 0x0314, 0x0315, 0x0316, 0x0317,
+ /* U+0318 */ 0x0318, 0x0319, 0x031A, 0x031B, 0x031C, 0x031D, 0x031E, 0x031F,
+ /* U+0320 */ 0x0320, 0x0321, 0x0322, 0x0323, 0x0324, 0x0325, 0x0326, 0x0327,
+ /* U+0328 */ 0x0328, 0x0329, 0x032A, 0x032B, 0x032C, 0x032D, 0x032E, 0x032F,
+ /* U+0330 */ 0x0330, 0x0331, 0x0332, 0x0333, 0x0334, 0x0335, 0x0336, 0x0337,
+ /* U+0338 */ 0x0338, 0x0339, 0x033A, 0x033B, 0x033C, 0x033D, 0x033E, 0x033F,
+ /* U+0340 */ 0x0300, 0x0301, 0x0342, 0x0313, 0x0308, 0x0345, 0x0346, 0x0347,
+ /* U+0348 */ 0x0348, 0x0349, 0x034A, 0x034B, 0x034C, 0x034D, 0x034E, 0x034F,
+ /* U+0350 */ 0x0350, 0x0351, 0x0352, 0x0353, 0x0354, 0x0355, 0x0356, 0x0357,
+ /* U+0358 */ 0x0358, 0x0359, 0x035A, 0x035B, 0x035C, 0x035D, 0x035E, 0x035F,
+ /* U+0360 */ 0x0360, 0x0361, 0x0362, 0x0363, 0x0364, 0x0365, 0x0366, 0x0367,
+ /* U+0368 */ 0x0368, 0x0369, 0x036A, 0x036B, 0x036C, 0x036D, 0x036E, 0x036F,
+ /* U+0370 */ 0x0370, 0x0371, 0x0372, 0x0373, 0x02B9, 0x0375, 0x0376, 0x0377,
+ /* U+0378 */ 0x0378, 0x0379, 0x0020, 0x037B, 0x037C, 0x037D, 0x003B, 0x037F,
+ /* U+0380 */ 0x0380, 0x0381, 0x0382, 0x0383, 0x0020, 0x00A8, 0x0391, 0x00B7,
+ /* U+0388 */ 0x0395, 0x0397, 0x0399, 0x038B, 0x039F, 0x038D, 0x03A5, 0x03A9,
+ /* U+0390 */ 0x03CA, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
+ /* U+0398 */ 0x0398, 0x0399, 0x039A, 0x039B, 0x039C, 0x039D, 0x039E, 0x039F,
+ /* U+03A0 */ 0x03A0, 0x03A1, 0x03A2, 0x03A3, 0x03A4, 0x03A5, 0x03A6, 0x03A7,
+ /* U+03A8 */ 0x03A8, 0x03A9, 0x0399, 0x03A5, 0x03B1, 0x03B5, 0x03B7, 0x03B9,
+ /* U+03B0 */ 0x03CB, 0x03B1, 0x03B2, 0x03B3, 0x03B4, 0x03B5, 0x03B6, 0x03B7,
+ /* U+03B8 */ 0x03B8, 0x03B9, 0x03BA, 0x03BB, 0x03BC, 0x03BD, 0x03BE, 0x03BF,
+ /* U+03C0 */ 0x03C0, 0x03C1, 0x03C2, 0x03C3, 0x03C4, 0x03C5, 0x03C6, 0x03C7,
+ /* U+03C8 */ 0x03C8, 0x03C9, 0x03B9, 0x03C5, 0x03BF, 0x03C5, 0x03C9, 0x03CF,
+ /* U+03D0 */ 0x03B2, 0x03B8, 0x03A5, 0x03D2, 0x03D2, 0x03C6, 0x03C0, 0x03D7,
+ /* U+03D8 */ 0x03D8, 0x03D9, 0x03DA, 0x03DB, 0x03DC, 0x03DD, 0x03DE, 0x03DF,
+ /* U+03E0 */ 0x03E0, 0x03E1, 0x03E2, 0x03E3, 0x03E4, 0x03E5, 0x03E6, 0x03E7,
+ /* U+03E8 */ 0x03E8, 0x03E9, 0x03EA, 0x03EB, 0x03EC, 0x03ED, 0x03EE, 0x03EF,
+ /* U+03F0 */ 0x03BA, 0x03C1, 0x03C2, 0x03F3, 0x0398, 0x03B5, 0x03F6, 0x03F7,
+ /* U+03F8 */ 0x03F8, 0x03A3, 0x03FA, 0x03FB, 0x03FC, 0x03FD, 0x03FE, 0x03FF,
+ /* U+0400 */ 0x0415, 0x0415, 0x0402, 0x0413, 0x0404, 0x0405, 0x0406, 0x0406,
+ /* U+0408 */ 0x0408, 0x0409, 0x040A, 0x040B, 0x041A, 0x0418, 0x0423, 0x040F,
+ /* U+0410 */ 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
+ /* U+0418 */ 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F,
+ // U+0419: Manually changed from 0418 to 0419
+ /* U+0420 */ 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
+ /* U+0428 */ 0x0428, 0x0429, 0x042C, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F,
+ // U+042A: Manually changed from 042A to 042C
+ /* U+0430 */ 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
+ /* U+0438 */ 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, 0x043F,
+ // U+0439: Manually changed from 0438 to 0439
+ /* U+0440 */ 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
+ /* U+0448 */ 0x0448, 0x0449, 0x044C, 0x044B, 0x044C, 0x044D, 0x044E, 0x044F,
+ // U+044A: Manually changed from 044A to 044C
+ /* U+0450 */ 0x0435, 0x0435, 0x0452, 0x0433, 0x0454, 0x0455, 0x0456, 0x0456,
+ /* U+0458 */ 0x0458, 0x0459, 0x045A, 0x045B, 0x043A, 0x0438, 0x0443, 0x045F,
+ /* U+0460 */ 0x0460, 0x0461, 0x0462, 0x0463, 0x0464, 0x0465, 0x0466, 0x0467,
+ /* U+0468 */ 0x0468, 0x0469, 0x046A, 0x046B, 0x046C, 0x046D, 0x046E, 0x046F,
+ /* U+0470 */ 0x0470, 0x0471, 0x0472, 0x0473, 0x0474, 0x0475, 0x0474, 0x0475,
+ /* U+0478 */ 0x0478, 0x0479, 0x047A, 0x047B, 0x047C, 0x047D, 0x047E, 0x047F,
+ /* U+0480 */ 0x0480, 0x0481, 0x0482, 0x0483, 0x0484, 0x0485, 0x0486, 0x0487,
+ /* U+0488 */ 0x0488, 0x0489, 0x048A, 0x048B, 0x048C, 0x048D, 0x048E, 0x048F,
+ /* U+0490 */ 0x0490, 0x0491, 0x0492, 0x0493, 0x0494, 0x0495, 0x0496, 0x0497,
+ /* U+0498 */ 0x0498, 0x0499, 0x049A, 0x049B, 0x049C, 0x049D, 0x049E, 0x049F,
+ /* U+04A0 */ 0x04A0, 0x04A1, 0x04A2, 0x04A3, 0x04A4, 0x04A5, 0x04A6, 0x04A7,
+ /* U+04A8 */ 0x04A8, 0x04A9, 0x04AA, 0x04AB, 0x04AC, 0x04AD, 0x04AE, 0x04AF,
+ /* U+04B0 */ 0x04B0, 0x04B1, 0x04B2, 0x04B3, 0x04B4, 0x04B5, 0x04B6, 0x04B7,
+ /* U+04B8 */ 0x04B8, 0x04B9, 0x04BA, 0x04BB, 0x04BC, 0x04BD, 0x04BE, 0x04BF,
+ /* U+04C0 */ 0x04C0, 0x0416, 0x0436, 0x04C3, 0x04C4, 0x04C5, 0x04C6, 0x04C7,
+ /* U+04C8 */ 0x04C8, 0x04C9, 0x04CA, 0x04CB, 0x04CC, 0x04CD, 0x04CE, 0x04CF,
+ /* U+04D0 */ 0x0410, 0x0430, 0x0410, 0x0430, 0x04D4, 0x04D5, 0x0415, 0x0435,
+ /* U+04D8 */ 0x04D8, 0x04D9, 0x04D8, 0x04D9, 0x0416, 0x0436, 0x0417, 0x0437,
+ /* U+04E0 */ 0x04E0, 0x04E1, 0x0418, 0x0438, 0x0418, 0x0438, 0x041E, 0x043E,
+ /* U+04E8 */ 0x04E8, 0x04E9, 0x04E8, 0x04E9, 0x042D, 0x044D, 0x0423, 0x0443,
+ /* U+04F0 */ 0x0423, 0x0443, 0x0423, 0x0443, 0x0427, 0x0447, 0x04F6, 0x04F7,
+ /* U+04F8 */ 0x042B, 0x044B, 0x04FA, 0x04FB, 0x04FC, 0x04FD, 0x04FE, 0x04FF,
+};
} // namespace latinime
diff --git a/native/jni/src/char_utils.h b/native/jni/src/char_utils.h
index 9008e364c..7a4384dbb 100644
--- a/native/jni/src/char_utils.h
+++ b/native/jni/src/char_utils.h
@@ -18,22 +18,23 @@
#define LATINIME_CHAR_UTILS_H
#include <cctype>
-#include <stdint.h>
+
+#include "defines.h"
namespace latinime {
-inline static bool isAsciiUpper(unsigned short c) {
+inline static bool isAsciiUpper(int c) {
// Note: isupper(...) reports false positives for some Cyrillic characters, causing them to
// be incorrectly lower-cased using toAsciiLower(...) rather than latin_tolower(...).
return (c >= 'A' && c <= 'Z');
}
-inline static unsigned short toAsciiLower(unsigned short c) {
+inline static int toAsciiLower(int c) {
return c - 'A' + 'a';
}
-inline static bool isAscii(unsigned short c) {
- return isascii(static_cast<int>(c)) != 0;
+inline static bool isAscii(int c) {
+ return isascii(c) != 0;
}
unsigned short latin_tolower(const unsigned short c);
@@ -44,33 +45,42 @@ unsigned short latin_tolower(const unsigned short c);
* if c is not a combined character, or the base character if it
* is combined.
*/
-
static const int BASE_CHARS_SIZE = 0x0500;
-extern const uint16_t BASE_CHARS[BASE_CHARS_SIZE];
+extern const unsigned short BASE_CHARS[BASE_CHARS_SIZE];
-inline static unsigned short toBaseChar(unsigned short c) {
+inline static int toBaseCodePoint(int c) {
if (c < BASE_CHARS_SIZE) {
- return BASE_CHARS[c];
+ return static_cast<int>(BASE_CHARS[c]);
}
return c;
}
-inline static unsigned short toLowerCase(const unsigned short c) {
+AK_FORCE_INLINE static int toLowerCase(const int c) {
if (isAsciiUpper(c)) {
return toAsciiLower(c);
} else if (isAscii(c)) {
return c;
}
- return latin_tolower(c);
+ return static_cast<int>(latin_tolower(static_cast<unsigned short>(c)));
}
-inline static unsigned short toBaseLowerCase(const unsigned short c) {
- return toLowerCase(toBaseChar(c));
+AK_FORCE_INLINE static int toBaseLowerCase(const int c) {
+ return toLowerCase(toBaseCodePoint(c));
}
-inline static bool isSkippableChar(const uint16_t character) {
+inline static bool isSkippableCodePoint(const int codePoint) {
// TODO: Do not hardcode here
- return character == '\'' || character == '-';
+ return codePoint == KEYCODE_SINGLE_QUOTE || codePoint == KEYCODE_HYPHEN_MINUS;
+}
+
+inline static int getCodePointCount(const int arraySize, const int *const codePoints) {
+ int size = 0;
+ for (; size < arraySize; ++size) {
+ if (codePoints[size] == '\0') {
+ break;
+ }
+ }
+ return size;
}
} // namespace latinime
diff --git a/native/jni/src/correction.cpp b/native/jni/src/correction.cpp
index 524abe9a1..0ae02d506 100644
--- a/native/jni/src/correction.cpp
+++ b/native/jni/src/correction.cpp
@@ -14,17 +14,15 @@
* limitations under the License.
*/
-#include <cassert>
-#include <cctype>
-#include <cmath>
-#include <cstring>
-
#define LOG_TAG "LatinIME: correction.cpp"
+#include <cmath>
+
#include "char_utils.h"
#include "correction.h"
#include "defines.h"
#include "proximity_info_state.h"
+#include "suggest_utils.h"
namespace latinime {
@@ -35,7 +33,7 @@ class ProximityInfo;
/////////////////////////////
inline static void initEditDistance(int *editDistanceTable) {
- for (int i = 0; i <= MAX_WORD_LENGTH_INTERNAL; ++i) {
+ for (int i = 0; i <= MAX_WORD_LENGTH; ++i) {
editDistanceTable[i] = i;
}
}
@@ -60,29 +58,6 @@ inline static void dumpEditDistance10ForDebug(int *editDistanceTable,
}
}
-inline static void calcEditDistanceOneStep(int *editDistanceTable, const unsigned short *input,
- const int inputSize, const unsigned short *output, const int outputLength) {
- // TODO: Make sure that editDistance[0 ~ MAX_WORD_LENGTH_INTERNAL] is not touched.
- // Let dp[i][j] be editDistanceTable[i * (inputSize + 1) + j].
- // Assuming that dp[0][0] ... dp[outputLength - 1][inputSize] are already calculated,
- // and calculate dp[ouputLength][0] ... dp[outputLength][inputSize].
- int *const current = editDistanceTable + outputLength * (inputSize + 1);
- const int *const prev = editDistanceTable + (outputLength - 1) * (inputSize + 1);
- const int *const prevprev =
- outputLength >= 2 ? editDistanceTable + (outputLength - 2) * (inputSize + 1) : 0;
- current[0] = outputLength;
- const uint32_t co = toBaseLowerCase(output[outputLength - 1]);
- const uint32_t prevCO = outputLength >= 2 ? toBaseLowerCase(output[outputLength - 2]) : 0;
- for (int i = 1; i <= inputSize; ++i) {
- const uint32_t ci = toBaseLowerCase(input[i - 1]);
- const uint16_t cost = (ci == co) ? 0 : 1;
- current[i] = min(current[i - 1] + 1, min(prev[i] + 1, prev[i - 1] + cost));
- if (i >= 2 && prevprev && ci == prevCO && co == toBaseLowerCase(input[i - 2])) {
- current[i] = min(current[i], prevprev[i - 2] + 1);
- }
- }
-}
-
inline static int getCurrentEditDistance(int *editDistanceTable, const int editDistanceTableWidth,
const int outputLength, const int inputSize) {
if (DEBUG_EDIT_DISTANCE) {
@@ -91,16 +66,6 @@ inline static int getCurrentEditDistance(int *editDistanceTable, const int editD
return editDistanceTable[(editDistanceTableWidth + 1) * (outputLength) + inputSize];
}
-//////////////////////
-// inline functions //
-//////////////////////
-static const char SINGLE_QUOTE = '\'';
-
-inline bool Correction::isSingleQuote(const unsigned short c) {
- const unsigned short userTypedChar = mProximityInfoState.getPrimaryCharAt(mInputIndex);
- return (c == SINGLE_QUOTE && userTypedChar != SINGLE_QUOTE);
-}
-
////////////////
// Correction //
////////////////
@@ -109,14 +74,13 @@ void Correction::resetCorrection() {
mTotalTraverseCount = 0;
}
-void Correction::initCorrection(const ProximityInfo *pi, const int inputSize,
- const int maxDepth) {
+void Correction::initCorrection(const ProximityInfo *pi, const int inputSize, const int maxDepth) {
mProximityInfo = pi;
mInputSize = inputSize;
mMaxDepth = maxDepth;
mMaxEditDistance = mInputSize < 5 ? 2 : mInputSize / 2;
// TODO: This is not supposed to be required. Check what's going wrong with
- // editDistance[0 ~ MAX_WORD_LENGTH_INTERNAL]
+ // editDistance[0 ~ MAX_WORD_LENGTH]
initEditDistance(mEditDistanceTable);
}
@@ -148,7 +112,7 @@ void Correction::setCorrectionParams(const int skipPos, const int excessivePos,
mMaxErrors = maxErrors;
}
-void Correction::checkState() {
+void Correction::checkState() const {
if (DEBUG_DICT) {
int inputCount = 0;
if (mSkipPos >= 0) ++inputCount;
@@ -157,36 +121,25 @@ void Correction::checkState() {
}
}
-bool Correction::sameAsTyped() {
+bool Correction::sameAsTyped() const {
return mProximityInfoState.sameAsTyped(mWord, mOutputIndex);
}
int Correction::getFreqForSplitMultipleWords(const int *freqArray, const int *wordLengthArray,
- const int wordCount, const bool isSpaceProximity, const unsigned short *word) {
+ const int wordCount, const bool isSpaceProximity, const int *word) const {
return Correction::RankingAlgorithm::calcFreqForSplitMultipleWords(freqArray, wordLengthArray,
wordCount, this, isSpaceProximity, word);
}
-int Correction::getFinalProbability(const int probability, unsigned short **word, int *wordLength) {
+int Correction::getFinalProbability(const int probability, int **word, int *wordLength) {
return getFinalProbabilityInternal(probability, word, wordLength, mInputSize);
}
-int Correction::getFinalProbabilityForSubQueue(const int probability, unsigned short **word,
- int *wordLength, const int inputSize) {
+int Correction::getFinalProbabilityForSubQueue(const int probability, int **word, int *wordLength,
+ const int inputSize) {
return getFinalProbabilityInternal(probability, word, wordLength, inputSize);
}
-int Correction::getFinalProbabilityInternal(const int probability, unsigned short **word,
- int *wordLength, const int inputSize) {
- const int outputIndex = mTerminalOutputIndex;
- const int inputIndex = mTerminalInputIndex;
- *wordLength = outputIndex + 1;
- *word = mWord;
- int finalProbability= Correction::RankingAlgorithm::calculateFinalProbability(
- inputIndex, outputIndex, probability, mEditDistanceTable, this, inputSize);
- return finalProbability;
-}
-
bool Correction::initProcessState(const int outputIndex) {
if (mCorrectionStates[outputIndex].mChildCount <= 0) {
return false;
@@ -217,8 +170,7 @@ bool Correction::initProcessState(const int outputIndex) {
return true;
}
-int Correction::goDownTree(
- const int parentIndex, const int childCount, const int firstChildPos) {
+int Correction::goDownTree(const int parentIndex, const int childCount, const int firstChildPos) {
mCorrectionStates[mOutputIndex].mParentIndex = parentIndex;
mCorrectionStates[mOutputIndex].mChildCount = childCount;
mCorrectionStates[mOutputIndex].mSiblingPos = firstChildPos;
@@ -230,42 +182,6 @@ int Correction::getInputIndex() const {
return mInputIndex;
}
-void Correction::incrementInputIndex() {
- ++mInputIndex;
-}
-
-void Correction::incrementOutputIndex() {
- ++mOutputIndex;
- mCorrectionStates[mOutputIndex].mParentIndex = mCorrectionStates[mOutputIndex - 1].mParentIndex;
- mCorrectionStates[mOutputIndex].mChildCount = mCorrectionStates[mOutputIndex - 1].mChildCount;
- mCorrectionStates[mOutputIndex].mSiblingPos = mCorrectionStates[mOutputIndex - 1].mSiblingPos;
- mCorrectionStates[mOutputIndex].mInputIndex = mInputIndex;
- mCorrectionStates[mOutputIndex].mNeedsToTraverseAllNodes = mNeedsToTraverseAllNodes;
-
- mCorrectionStates[mOutputIndex].mEquivalentCharCount = mEquivalentCharCount;
- mCorrectionStates[mOutputIndex].mProximityCount = mProximityCount;
- mCorrectionStates[mOutputIndex].mTransposedCount = mTransposedCount;
- mCorrectionStates[mOutputIndex].mExcessiveCount = mExcessiveCount;
- mCorrectionStates[mOutputIndex].mSkippedCount = mSkippedCount;
-
- mCorrectionStates[mOutputIndex].mSkipPos = mSkipPos;
- mCorrectionStates[mOutputIndex].mTransposedPos = mTransposedPos;
- mCorrectionStates[mOutputIndex].mExcessivePos = mExcessivePos;
-
- mCorrectionStates[mOutputIndex].mLastCharExceeded = mLastCharExceeded;
-
- mCorrectionStates[mOutputIndex].mMatching = mMatching;
- mCorrectionStates[mOutputIndex].mProximityMatching = mProximityMatching;
- mCorrectionStates[mOutputIndex].mAdditionalProximityMatching = mAdditionalProximityMatching;
- mCorrectionStates[mOutputIndex].mTransposing = mTransposing;
- mCorrectionStates[mOutputIndex].mExceeding = mExceeding;
- mCorrectionStates[mOutputIndex].mSkipping = mSkipping;
-}
-
-void Correction::startToTraverseAllNodes() {
- mNeedsToTraverseAllNodes = true;
-}
-
bool Correction::needsToPrune() const {
// TODO: use edit distance here
return mOutputIndex - 1 >= mMaxDepth || mProximityCount > mMaxEditDistance
@@ -273,44 +189,15 @@ bool Correction::needsToPrune() const {
|| (!mDoAutoCompletion && (mOutputIndex > mInputSize));
}
-void Correction::addCharToCurrentWord(const int32_t c) {
- mWord[mOutputIndex] = c;
- const unsigned short *primaryInputWord = mProximityInfoState.getPrimaryInputWord();
- calcEditDistanceOneStep(mEditDistanceTable, primaryInputWord, mInputSize,
- mWord, mOutputIndex + 1);
-}
-
-Correction::CorrectionType Correction::processSkipChar(
- const int32_t c, const bool isTerminal, const bool inputIndexIncremented) {
- addCharToCurrentWord(c);
- mTerminalInputIndex = mInputIndex - (inputIndexIncremented ? 1 : 0);
- mTerminalOutputIndex = mOutputIndex;
- if (mNeedsToTraverseAllNodes && isTerminal) {
- incrementOutputIndex();
- return TRAVERSE_ALL_ON_TERMINAL;
- } else {
- incrementOutputIndex();
- return TRAVERSE_ALL_NOT_ON_TERMINAL;
- }
-}
-
-Correction::CorrectionType Correction::processUnrelatedCorrectionType() {
- // Needs to set mTerminalInputIndex and mTerminalOutputIndex before returning any CorrectionType
- mTerminalInputIndex = mInputIndex;
- mTerminalOutputIndex = mOutputIndex;
- return UNRELATED;
+inline static bool isEquivalentChar(ProximityType type) {
+ return type == MATCH_CHAR;
}
-inline bool isEquivalentChar(ProximityType type) {
- return type == EQUIVALENT_CHAR;
+inline static bool isProximityCharOrEquivalentChar(ProximityType type) {
+ return type == MATCH_CHAR || type == PROXIMITY_CHAR;
}
-inline bool isProximityCharOrEquivalentChar(ProximityType type) {
- return type == EQUIVALENT_CHAR || type == NEAR_PROXIMITY_CHAR;
-}
-
-Correction::CorrectionType Correction::processCharAndCalcState(
- const int32_t c, const bool isTerminal) {
+Correction::CorrectionType Correction::processCharAndCalcState(const int c, const bool isTerminal) {
const int correctionCount = (mSkippedCount + mExcessiveCount + mTransposedCount);
if (correctionCount > mMaxErrors) {
return processUnrelatedCorrectionType();
@@ -327,14 +214,14 @@ Correction::CorrectionType Correction::processCharAndCalcState(
bool incremented = false;
if (mLastCharExceeded && mInputIndex == mInputSize - 1) {
// TODO: Do not check the proximity if EditDistance exceeds the threshold
- const ProximityType matchId = mProximityInfoState.getMatchedProximityId(
+ const ProximityType matchId = mProximityInfoState.getProximityType(
mInputIndex, c, true, &proximityIndex);
if (isEquivalentChar(matchId)) {
mLastCharExceeded = false;
--mExcessiveCount;
mDistances[mOutputIndex] =
mProximityInfoState.getNormalizedSquaredDistance(mInputIndex, 0);
- } else if (matchId == NEAR_PROXIMITY_CHAR) {
+ } else if (matchId == PROXIMITY_CHAR) {
mLastCharExceeded = false;
--mExcessiveCount;
++mProximityCount;
@@ -363,7 +250,7 @@ Correction::CorrectionType Correction::processCharAndCalcState(
if (mSkippedCount == 0 && mSkipPos < mOutputIndex) {
if (DEBUG_DICT) {
// TODO: Enable this assertion.
- //assert(mSkipPos == mOutputIndex - 1);
+ //ASSERT(mSkipPos == mOutputIndex - 1);
}
mSkipPos = mOutputIndex;
}
@@ -381,7 +268,7 @@ Correction::CorrectionType Correction::processCharAndCalcState(
bool secondTransposing = false;
if (mTransposedCount % 2 == 1) {
- if (isEquivalentChar(mProximityInfoState.getMatchedProximityId(
+ if (isEquivalentChar(mProximityInfoState.getProximityType(
mInputIndex - 1, c, false))) {
++mTransposedCount;
secondTransposing = true;
@@ -412,16 +299,16 @@ Correction::CorrectionType Correction::processCharAndCalcState(
: (noCorrectionsHappenedSoFar && mProximityCount == 0);
ProximityType matchedProximityCharId = secondTransposing
- ? EQUIVALENT_CHAR
- : mProximityInfoState.getMatchedProximityId(
+ ? MATCH_CHAR
+ : mProximityInfoState.getProximityType(
mInputIndex, c, checkProximityChars, &proximityIndex);
- if (UNRELATED_CHAR == matchedProximityCharId
+ if (SUBSTITUTION_CHAR == matchedProximityCharId
|| ADDITIONAL_PROXIMITY_CHAR == matchedProximityCharId) {
if (canTryCorrection && mOutputIndex > 0
&& mCorrectionStates[mOutputIndex].mProximityMatching
&& mCorrectionStates[mOutputIndex].mExceeding
- && isEquivalentChar(mProximityInfoState.getMatchedProximityId(
+ && isEquivalentChar(mProximityInfoState.getProximityType(
mInputIndex, mWord[mOutputIndex - 1], false))) {
if (DEBUG_CORRECTION
&& (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputSize)
@@ -440,12 +327,12 @@ Correction::CorrectionType Correction::processCharAndCalcState(
// Here, we are doing something equivalent to matchedProximityCharId,
// but we already know that "excessive char correction" just happened
// so that we just need to check "mProximityCount == 0".
- matchedProximityCharId = mProximityInfoState.getMatchedProximityId(
+ matchedProximityCharId = mProximityInfoState.getProximityType(
mInputIndex, c, mProximityCount == 0, &proximityIndex);
}
}
- if (UNRELATED_CHAR == matchedProximityCharId
+ if (SUBSTITUTION_CHAR == matchedProximityCharId
|| ADDITIONAL_PROXIMITY_CHAR == matchedProximityCharId) {
if (ADDITIONAL_PROXIMITY_CHAR == matchedProximityCharId) {
mAdditionalProximityMatching = true;
@@ -457,10 +344,10 @@ Correction::CorrectionType Correction::processCharAndCalcState(
if (mInputIndex < mInputSize - 1 && mOutputIndex > 0 && mTransposedCount > 0
&& !mCorrectionStates[mOutputIndex].mTransposing
&& mCorrectionStates[mOutputIndex - 1].mTransposing
- && isEquivalentChar(mProximityInfoState.getMatchedProximityId(
+ && isEquivalentChar(mProximityInfoState.getProximityType(
mInputIndex, mWord[mOutputIndex - 1], false))
&& isEquivalentChar(
- mProximityInfoState.getMatchedProximityId(mInputIndex + 1, c, false))) {
+ mProximityInfoState.getProximityType(mInputIndex + 1, c, false))) {
// Conversion t->e
// Example:
// occaisional -> occa sional
@@ -472,7 +359,7 @@ Correction::CorrectionType Correction::processCharAndCalcState(
&& !mCorrectionStates[mOutputIndex].mTransposing
&& mCorrectionStates[mOutputIndex - 1].mTransposing
&& isEquivalentChar(
- mProximityInfoState.getMatchedProximityId(mInputIndex - 1, c, false))) {
+ mProximityInfoState.getProximityType(mInputIndex - 1, c, false))) {
// Conversion t->s
// Example:
// chcolate -> chocolate
@@ -484,7 +371,7 @@ Correction::CorrectionType Correction::processCharAndCalcState(
&& mCorrectionStates[mOutputIndex].mProximityMatching
&& mCorrectionStates[mOutputIndex].mSkipping
&& isEquivalentChar(
- mProximityInfoState.getMatchedProximityId(mInputIndex - 1, c, false))) {
+ mProximityInfoState.getProximityType(mInputIndex - 1, c, false))) {
// Conversion p->s
// Note: This logic tries saving cases like contrst --> contrast -- "a" is one of
// proximity chars of "s", but it should rather be handled as a skipped char.
@@ -496,7 +383,7 @@ Correction::CorrectionType Correction::processCharAndCalcState(
&& mCorrectionStates[mOutputIndex].mSkipping
&& mCorrectionStates[mOutputIndex].mAdditionalProximityMatching
&& isProximityCharOrEquivalentChar(
- mProximityInfoState.getMatchedProximityId(mInputIndex + 1, c, false))) {
+ mProximityInfoState.getProximityType(mInputIndex + 1, c, false))) {
// Conversion s->a
incrementInputIndex();
--mSkippedCount;
@@ -505,7 +392,7 @@ Correction::CorrectionType Correction::processCharAndCalcState(
mDistances[mOutputIndex] = ADDITIONAL_PROXIMITY_CHAR_DISTANCE_INFO;
} else if ((mExceeding || mTransposing) && mInputIndex - 1 < mInputSize
&& isEquivalentChar(
- mProximityInfoState.getMatchedProximityId(mInputIndex + 1, c, false))) {
+ mProximityInfoState.getProximityType(mInputIndex + 1, c, false))) {
// 1.2. Excessive or transpose correction
if (mTransposing) {
++mTransposedCount;
@@ -568,7 +455,7 @@ Correction::CorrectionType Correction::processCharAndCalcState(
mMatching = true;
++mEquivalentCharCount;
mDistances[mOutputIndex] = mProximityInfoState.getNormalizedSquaredDistance(mInputIndex, 0);
- } else if (NEAR_PROXIMITY_CHAR == matchedProximityCharId) {
+ } else if (PROXIMITY_CHAR == matchedProximityCharId) {
mProximityMatching = true;
++mProximityCount;
mDistances[mOutputIndex] =
@@ -628,10 +515,10 @@ Correction::CorrectionType Correction::processCharAndCalcState(
}
}
-inline static int getQuoteCount(const unsigned short *word, const int length) {
+inline static int getQuoteCount(const int *word, const int length) {
int quoteCount = 0;
for (int i = 0; i < length; ++i) {
- if (word[i] == SINGLE_QUOTE) {
+ if (word[i] == KEYCODE_SINGLE_QUOTE) {
++quoteCount;
}
}
@@ -639,15 +526,14 @@ inline static int getQuoteCount(const unsigned short *word, const int length) {
}
inline static bool isUpperCase(unsigned short c) {
- return isAsciiUpper(toBaseChar(c));
+ return isAsciiUpper(toBaseCodePoint(c));
}
//////////////////////
// RankingAlgorithm //
//////////////////////
-/* static */
-int Correction::RankingAlgorithm::calculateFinalProbability(const int inputIndex,
+/* static */ int Correction::RankingAlgorithm::calculateFinalProbability(const int inputIndex,
const int outputIndex, const int freq, int *editDistanceTable, const Correction *correction,
const int inputSize) {
const int excessivePos = correction->getExcessivePos();
@@ -672,7 +558,7 @@ int Correction::RankingAlgorithm::calculateFinalProbability(const int inputIndex
// TODO: use mExcessiveCount
const int matchCount = inputSize - correction->mProximityCount - excessiveCount;
- const unsigned short *word = correction->mWord;
+ const int *word = correction->mWord;
const bool skipped = skippedCount > 0;
const int quoteDiffCount = max(0, getQuoteCount(word, outputLength)
@@ -728,7 +614,7 @@ int Correction::RankingAlgorithm::calculateFinalProbability(const int inputIndex
multiplyIntCapped(matchWeight, &finalFreq);
}
- if (proximityInfoState->getMatchedProximityId(0, word[0], true) == UNRELATED_CHAR) {
+ if (proximityInfoState->getProximityType(0, word[0], true) == SUBSTITUTION_CHAR) {
multiplyRate(FIRST_CHAR_DIFFERENT_DEMOTION_RATE, &finalFreq);
}
@@ -788,28 +674,10 @@ int Correction::RankingAlgorithm::calculateFinalProbability(const int inputIndex
if (i < adjustedProximityMatchedCount) {
multiplyIntCapped(typedLetterMultiplier, &finalFreq);
}
- if (squaredDistance >= 0) {
- // Promote or demote the score according to the distance from the sweet spot
- static const float A = ZERO_DISTANCE_PROMOTION_RATE / 100.0f;
- static const float B = 1.0f;
- static const float C = 0.5f;
- static const float MIN = 0.3f;
- static const float R1 = NEUTRAL_SCORE_SQUARED_RADIUS;
- static const float R2 = HALF_SCORE_SQUARED_RADIUS;
- const float x = static_cast<float>(squaredDistance)
- / ProximityInfoState::NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR;
- const float factor = max((x < R1)
- ? (A * (R1 - x) + B * x) / R1
- : (B * (R2 - x) + C * (x - R1)) / (R2 - R1), MIN);
- // factor is a piecewise linear function like:
- // A -_ .
- // ^-_ .
- // B \ .
- // \_ .
- // C ------------.
- // .
- // 0 R1 R2 .
- multiplyRate((int)(factor * 100.0f), &finalFreq);
+ const float factor =
+ SuggestUtils::getDistanceScalingFactor(static_cast<float>(squaredDistance));
+ if (factor > 0.0f) {
+ multiplyRate(static_cast<int>(factor * 100.0f), &finalFreq);
} else if (squaredDistance == PROXIMITY_CHAR_WITHOUT_DISTANCE_INFO) {
multiplyRate(WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE, &finalFreq);
}
@@ -908,10 +776,9 @@ int Correction::RankingAlgorithm::calculateFinalProbability(const int inputIndex
return finalFreq;
}
-/* static */
-int Correction::RankingAlgorithm::calcFreqForSplitMultipleWords(
- const int *freqArray, const int *wordLengthArray, const int wordCount,
- const Correction *correction, const bool isSpaceProximity, const unsigned short *word) {
+/* static */ int Correction::RankingAlgorithm::calcFreqForSplitMultipleWords(const int *freqArray,
+ const int *wordLengthArray, const int wordCount, const Correction *correction,
+ const bool isSpaceProximity, const int *word) {
const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER;
bool firstCapitalizedWordDemotion = false;
@@ -1040,9 +907,8 @@ int Correction::RankingAlgorithm::calcFreqForSplitMultipleWords(
}
/* Damerau-Levenshtein distance */
-inline static int editDistanceInternal(
- int *editDistanceTable, const unsigned short *before,
- const int beforeLength, const unsigned short *after, const int afterLength) {
+inline static int editDistanceInternal(int *editDistanceTable, const int *before,
+ const int beforeLength, const int *after, const int afterLength) {
// dp[li][lo] dp[a][b] = dp[ a * lo + b]
int *dp = editDistanceTable;
const int li = beforeLength + 1;
@@ -1056,9 +922,9 @@ inline static int editDistanceInternal(
for (int i = 0; i < li - 1; ++i) {
for (int j = 0; j < lo - 1; ++j) {
- const uint32_t ci = toBaseLowerCase(before[i]);
- const uint32_t co = toBaseLowerCase(after[j]);
- const uint16_t cost = (ci == co) ? 0 : 1;
+ const int ci = toBaseLowerCase(before[i]);
+ const int co = toBaseLowerCase(after[j]);
+ const int cost = (ci == co) ? 0 : 1;
dp[(i + 1) * lo + (j + 1)] = min(dp[i * lo + (j + 1)] + 1,
min(dp[(i + 1) * lo + j] + 1, dp[i * lo + j] + cost));
if (i > 0 && j > 0 && ci == toBaseLowerCase(after[j - 1])
@@ -1080,8 +946,8 @@ inline static int editDistanceInternal(
return dp[li * lo - 1];
}
-int Correction::RankingAlgorithm::editDistance(const unsigned short *before,
- const int beforeLength, const unsigned short *after, const int afterLength) {
+/* static */ int Correction::RankingAlgorithm::editDistance(const int *before,
+ const int beforeLength, const int *after, const int afterLength) {
int table[(beforeLength + 1) * (afterLength + 1)];
return editDistanceInternal(table, before, beforeLength, after, afterLength);
}
@@ -1108,23 +974,21 @@ int Correction::RankingAlgorithm::editDistance(const unsigned short *before,
// the result.
// So, we can normalize original score by dividing powf(2, min(b.l(),a.l())) * 255 * 2.
-/* static */
-float Correction::RankingAlgorithm::calcNormalizedScore(const unsigned short *before,
- const int beforeLength, const unsigned short *after, const int afterLength,
- const int score) {
+/* static */ float Correction::RankingAlgorithm::calcNormalizedScore(const int *before,
+ const int beforeLength, const int *after, const int afterLength, const int score) {
if (0 == beforeLength || 0 == afterLength) {
- return 0;
+ return 0.0f;
}
const int distance = editDistance(before, beforeLength, after, afterLength);
int spaceCount = 0;
for (int i = 0; i < afterLength; ++i) {
- if (after[i] == CODE_SPACE) {
+ if (after[i] == KEYCODE_SPACE) {
++spaceCount;
}
}
if (spaceCount == afterLength) {
- return 0;
+ return 0.0f;
}
const float maxScore = score >= S_INT_MAX ? static_cast<float>(S_INT_MAX)
diff --git a/native/jni/src/correction.h b/native/jni/src/correction.h
index f016d5453..f0d62102f 100644
--- a/native/jni/src/correction.h
+++ b/native/jni/src/correction.h
@@ -17,9 +17,7 @@
#ifndef LATINIME_CORRECTION_H
#define LATINIME_CORRECTION_H
-#include <cassert>
#include <cstring> // for memset()
-#include <stdint.h>
#include "correction_state.h"
#include "defines.h"
@@ -56,18 +54,18 @@ class Correction {
// No need to initialize it explicitly here.
}
- virtual ~Correction() {}
+ // Non virtual inline destructor -- never inherit this class
+ ~Correction() {}
void resetCorrection();
- void initCorrection(
- const ProximityInfo *pi, const int inputSize, const int maxWordLength);
+ void initCorrection(const ProximityInfo *pi, const int inputSize, const int maxDepth);
void initCorrectionState(const int rootPos, const int childCount, const bool traverseAll);
// TODO: remove
void setCorrectionParams(const int skipPos, const int excessivePos, const int transposedPos,
const int spaceProximityPos, const int missingSpacePos, const bool useFullEditDistance,
const bool doAutoCompletion, const int maxErrors);
- void checkState();
- bool sameAsTyped();
+ void checkState() const;
+ bool sameAsTyped() const;
bool initProcessState(const int index);
int getInputIndex() const;
@@ -78,14 +76,13 @@ class Correction {
return ++mTotalTraverseCount;
}
- int getFreqForSplitMultipleWords(
- const int *freqArray, const int *wordLengthArray, const int wordCount,
- const bool isSpaceProximity, const unsigned short *word);
- int getFinalProbability(const int probability, unsigned short **word, int *wordLength);
- int getFinalProbabilityForSubQueue(const int probability, unsigned short **word,
- int *wordLength, const int inputSize);
+ int getFreqForSplitMultipleWords(const int *freqArray, const int *wordLengthArray,
+ const int wordCount, const bool isSpaceProximity, const int *word) const;
+ int getFinalProbability(const int probability, int **word, int *wordLength);
+ int getFinalProbabilityForSubQueue(const int probability, int **word, int *wordLength,
+ const int inputSize);
- CorrectionType processCharAndCalcState(const int32_t c, const bool isTerminal);
+ CorrectionType processCharAndCalcState(const int c, const bool isTerminal);
/////////////////////////
// Tree helper methods
@@ -110,29 +107,28 @@ class Correction {
const int inputSize);
static int calcFreqForSplitMultipleWords(const int *freqArray, const int *wordLengthArray,
const int wordCount, const Correction *correction, const bool isSpaceProximity,
- const unsigned short *word);
- static float calcNormalizedScore(const unsigned short *before, const int beforeLength,
- const unsigned short *after, const int afterLength, const int score);
- static int editDistance(const unsigned short *before,
- const int beforeLength, const unsigned short *after, const int afterLength);
+ const int *word);
+ static float calcNormalizedScore(const int *before, const int beforeLength,
+ const int *after, const int afterLength, const int score);
+ static int editDistance(const int *before, const int beforeLength, const int *after,
+ const int afterLength);
private:
- static const int CODE_SPACE = ' ';
static const int MAX_INITIAL_SCORE = 255;
};
// proximity info state
- void initInputParams(const ProximityInfo *proximityInfo, const int32_t *inputCodes,
+ void initInputParams(const ProximityInfo *proximityInfo, const int *inputCodes,
const int inputSize, const int *xCoordinates, const int *yCoordinates) {
- mProximityInfoState.initInputParams(0, MAX_POINT_TO_KEY_LENGTH,
+ mProximityInfoState.initInputParams(0, static_cast<float>(MAX_VALUE_FOR_WEIGHTING),
proximityInfo, inputCodes, inputSize, xCoordinates, yCoordinates, 0, 0, false);
}
- const unsigned short *getPrimaryInputWord() const {
+ const int *getPrimaryInputWord() const {
return mProximityInfoState.getPrimaryInputWord();
}
- unsigned short getPrimaryCharAt(const int index) const {
- return mProximityInfoState.getPrimaryCharAt(index);
+ int getPrimaryCodePointAt(const int index) const {
+ return mProximityInfoState.getPrimaryCodePointAt(index);
}
private:
@@ -147,13 +143,13 @@ class Correction {
}
static const int TWO_31ST_DIV_2 = S_INT_MAX / 2;
- inline static void multiplyIntCapped(const int multiplier, int *base) {
+ AK_FORCE_INLINE static void multiplyIntCapped(const int multiplier, int *base) {
const int temp = *base;
if (temp != S_INT_MAX) {
// Branch if multiplier == 2 for the optimization
if (multiplier < 0) {
if (DEBUG_DICT) {
- assert(false);
+ ASSERT(false);
}
AKLOGI("--- Invalid multiplier: %d", multiplier);
} else if (multiplier == 0) {
@@ -170,18 +166,17 @@ class Correction {
}
}
- inline static int powerIntCapped(const int base, const int n) {
+ AK_FORCE_INLINE static int powerIntCapped(const int base, const int n) {
if (n <= 0) return 1;
if (base == 2) {
return n < 31 ? 1 << n : S_INT_MAX;
- } else {
- int ret = base;
- for (int i = 1; i < n; ++i) multiplyIntCapped(base, &ret);
- return ret;
}
+ int ret = base;
+ for (int i = 1; i < n; ++i) multiplyIntCapped(base, &ret);
+ return ret;
}
- inline static void multiplyRate(const int rate, int *freq) {
+ AK_FORCE_INLINE static void multiplyRate(const int rate, int *freq) {
if (*freq != S_INT_MAX) {
if (*freq > 1000000) {
*freq /= 100;
@@ -215,13 +210,13 @@ class Correction {
inline void incrementInputIndex();
inline void incrementOutputIndex();
inline void startToTraverseAllNodes();
- inline bool isSingleQuote(const unsigned short c);
- inline CorrectionType processSkipChar(
- const int32_t c, const bool isTerminal, const bool inputIndexIncremented);
+ inline bool isSingleQuote(const int c);
+ inline CorrectionType processSkipChar(const int c, const bool isTerminal,
+ const bool inputIndexIncremented);
inline CorrectionType processUnrelatedCorrectionType();
- inline void addCharToCurrentWord(const int32_t c);
- inline int getFinalProbabilityInternal(const int probability, unsigned short **word,
- int *wordLength, const int inputSize);
+ inline void addCharToCurrentWord(const int c);
+ inline int getFinalProbabilityInternal(const int probability, int **word, int *wordLength,
+ const int inputSize);
static const int TYPED_LETTER_MULTIPLIER = 2;
static const int FULL_WORD_MULTIPLIER = 2;
@@ -238,17 +233,17 @@ class Correction {
int mTerminalOutputIndex;
int mMaxErrors;
- uint8_t mTotalTraverseCount;
+ int mTotalTraverseCount;
// The following arrays are state buffer.
- unsigned short mWord[MAX_WORD_LENGTH_INTERNAL];
- int mDistances[MAX_WORD_LENGTH_INTERNAL];
+ int mWord[MAX_WORD_LENGTH];
+ int mDistances[MAX_WORD_LENGTH];
// Edit distance calculation requires a buffer with (N+1)^2 length for the input length N.
// Caveat: Do not create multiple tables per thread as this table eats up RAM a lot.
- int mEditDistanceTable[(MAX_WORD_LENGTH_INTERNAL + 1) * (MAX_WORD_LENGTH_INTERNAL + 1)];
+ int mEditDistanceTable[(MAX_WORD_LENGTH + 1) * (MAX_WORD_LENGTH + 1)];
- CorrectionState mCorrectionStates[MAX_WORD_LENGTH_INTERNAL];
+ CorrectionState mCorrectionStates[MAX_WORD_LENGTH];
// The following member variables are being used as cache values of the correction state.
bool mNeedsToTraverseAllNodes;
@@ -275,5 +270,107 @@ class Correction {
bool mSkipping;
ProximityInfoState mProximityInfoState;
};
+
+inline void Correction::incrementInputIndex() {
+ ++mInputIndex;
+}
+
+AK_FORCE_INLINE void Correction::incrementOutputIndex() {
+ ++mOutputIndex;
+ mCorrectionStates[mOutputIndex].mParentIndex = mCorrectionStates[mOutputIndex - 1].mParentIndex;
+ mCorrectionStates[mOutputIndex].mChildCount = mCorrectionStates[mOutputIndex - 1].mChildCount;
+ mCorrectionStates[mOutputIndex].mSiblingPos = mCorrectionStates[mOutputIndex - 1].mSiblingPos;
+ mCorrectionStates[mOutputIndex].mInputIndex = mInputIndex;
+ mCorrectionStates[mOutputIndex].mNeedsToTraverseAllNodes = mNeedsToTraverseAllNodes;
+
+ mCorrectionStates[mOutputIndex].mEquivalentCharCount = mEquivalentCharCount;
+ mCorrectionStates[mOutputIndex].mProximityCount = mProximityCount;
+ mCorrectionStates[mOutputIndex].mTransposedCount = mTransposedCount;
+ mCorrectionStates[mOutputIndex].mExcessiveCount = mExcessiveCount;
+ mCorrectionStates[mOutputIndex].mSkippedCount = mSkippedCount;
+
+ mCorrectionStates[mOutputIndex].mSkipPos = mSkipPos;
+ mCorrectionStates[mOutputIndex].mTransposedPos = mTransposedPos;
+ mCorrectionStates[mOutputIndex].mExcessivePos = mExcessivePos;
+
+ mCorrectionStates[mOutputIndex].mLastCharExceeded = mLastCharExceeded;
+
+ mCorrectionStates[mOutputIndex].mMatching = mMatching;
+ mCorrectionStates[mOutputIndex].mProximityMatching = mProximityMatching;
+ mCorrectionStates[mOutputIndex].mAdditionalProximityMatching = mAdditionalProximityMatching;
+ mCorrectionStates[mOutputIndex].mTransposing = mTransposing;
+ mCorrectionStates[mOutputIndex].mExceeding = mExceeding;
+ mCorrectionStates[mOutputIndex].mSkipping = mSkipping;
+}
+
+inline void Correction::startToTraverseAllNodes() {
+ mNeedsToTraverseAllNodes = true;
+}
+
+inline bool Correction::isSingleQuote(const int c) {
+ const int userTypedChar = mProximityInfoState.getPrimaryCodePointAt(mInputIndex);
+ return (c == KEYCODE_SINGLE_QUOTE && userTypedChar != KEYCODE_SINGLE_QUOTE);
+}
+
+AK_FORCE_INLINE Correction::CorrectionType Correction::processSkipChar(const int c,
+ const bool isTerminal, const bool inputIndexIncremented) {
+ addCharToCurrentWord(c);
+ mTerminalInputIndex = mInputIndex - (inputIndexIncremented ? 1 : 0);
+ mTerminalOutputIndex = mOutputIndex;
+ incrementOutputIndex();
+ if (mNeedsToTraverseAllNodes && isTerminal) {
+ return TRAVERSE_ALL_ON_TERMINAL;
+ }
+ return TRAVERSE_ALL_NOT_ON_TERMINAL;
+}
+
+inline Correction::CorrectionType Correction::processUnrelatedCorrectionType() {
+ // Needs to set mTerminalInputIndex and mTerminalOutputIndex before returning any CorrectionType
+ mTerminalInputIndex = mInputIndex;
+ mTerminalOutputIndex = mOutputIndex;
+ return UNRELATED;
+}
+
+AK_FORCE_INLINE static void calcEditDistanceOneStep(int *editDistanceTable, const int *input,
+ const int inputSize, const int *output, const int outputLength) {
+ // TODO: Make sure that editDistance[0 ~ MAX_WORD_LENGTH] is not touched.
+ // Let dp[i][j] be editDistanceTable[i * (inputSize + 1) + j].
+ // Assuming that dp[0][0] ... dp[outputLength - 1][inputSize] are already calculated,
+ // and calculate dp[ouputLength][0] ... dp[outputLength][inputSize].
+ int *const current = editDistanceTable + outputLength * (inputSize + 1);
+ const int *const prev = editDistanceTable + (outputLength - 1) * (inputSize + 1);
+ const int *const prevprev =
+ outputLength >= 2 ? editDistanceTable + (outputLength - 2) * (inputSize + 1) : 0;
+ current[0] = outputLength;
+ const int co = toBaseLowerCase(output[outputLength - 1]);
+ const int prevCO = outputLength >= 2 ? toBaseLowerCase(output[outputLength - 2]) : 0;
+ for (int i = 1; i <= inputSize; ++i) {
+ const int ci = toBaseLowerCase(input[i - 1]);
+ const int cost = (ci == co) ? 0 : 1;
+ current[i] = min(current[i - 1] + 1, min(prev[i] + 1, prev[i - 1] + cost));
+ if (i >= 2 && prevprev && ci == prevCO && co == toBaseLowerCase(input[i - 2])) {
+ current[i] = min(current[i], prevprev[i - 2] + 1);
+ }
+ }
+}
+
+AK_FORCE_INLINE void Correction::addCharToCurrentWord(const int c) {
+ mWord[mOutputIndex] = c;
+ const int *primaryInputWord = mProximityInfoState.getPrimaryInputWord();
+ calcEditDistanceOneStep(mEditDistanceTable, primaryInputWord, mInputSize, mWord,
+ mOutputIndex + 1);
+}
+
+inline int Correction::getFinalProbabilityInternal(const int probability, int **word,
+ int *wordLength, const int inputSize) {
+ const int outputIndex = mTerminalOutputIndex;
+ const int inputIndex = mTerminalInputIndex;
+ *wordLength = outputIndex + 1;
+ *word = mWord;
+ int finalProbability= Correction::RankingAlgorithm::calculateFinalProbability(
+ inputIndex, outputIndex, probability, mEditDistanceTable, this, inputSize);
+ return finalProbability;
+}
+
} // namespace latinime
#endif // LATINIME_CORRECTION_H
diff --git a/native/jni/src/defines.h b/native/jni/src/defines.h
index e06ee42b0..0aedc287f 100644
--- a/native/jni/src/defines.h
+++ b/native/jni/src/defines.h
@@ -1,11 +1,11 @@
/*
- * Copyright (C) 2010, The Android Open Source Project
+ * Copyright (C) 2010 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
- * http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
@@ -17,99 +17,91 @@
#ifndef LATINIME_DEFINES_H
#define LATINIME_DEFINES_H
-#include <stdint.h>
+#ifdef __GNUC__
+#define AK_FORCE_INLINE __attribute__((always_inline)) __inline__
+#else // __GNUC__
+#define AK_FORCE_INLINE inline
+#endif // __GNUC__
+
+#if defined(FLAG_DO_PROFILE) || defined(FLAG_DBG)
+#undef AK_FORCE_INLINE
+#define AK_FORCE_INLINE inline
+#endif // defined(FLAG_DO_PROFILE) || defined(FLAG_DBG)
+
+// Must be equal to Constants.Dictionary.MAX_WORD_LENGTH in Java
+#define MAX_WORD_LENGTH 48
+// Must be equal to BinaryDictionary.MAX_RESULTS in Java
+#define MAX_RESULTS 18
+// Must be equal to ProximityInfo.MAX_PROXIMITY_CHARS_SIZE in Java
+#define MAX_PROXIMITY_CHARS_SIZE 16
+#define ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE 2
#if defined(FLAG_DO_PROFILE) || defined(FLAG_DBG)
#include <android/log.h>
#ifndef LOG_TAG
#define LOG_TAG "LatinIME: "
-#endif
+#endif // LOG_TAG
#define AKLOGE(fmt, ...) __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, fmt, ##__VA_ARGS__)
#define AKLOGI(fmt, ...) __android_log_print(ANDROID_LOG_INFO, LOG_TAG, fmt, ##__VA_ARGS__)
-#define DUMP_RESULT(words, frequencies, maxWordCount, maxWordLength) do { \
- dumpResult(words, frequencies, maxWordCount, maxWordLength); } while (0)
+#define DUMP_RESULT(words, frequencies) do { dumpResult(words, frequencies); } while (0)
#define DUMP_WORD(word, length) do { dumpWord(word, length); } while (0)
-#define DUMP_WORD_INT(word, length) do { dumpWordInt(word, length); } while (0)
-// TODO: INTS_TO_CHARS
-#define SHORTS_TO_CHARS(input, length, output) do { \
- shortArrayToCharArray(input, length, output); } while (0)
-
-static inline void dumpWordInfo(const unsigned short *word, const int length,
- const int rank, const int frequency) {
- static char charBuf[50];
- int i = 0;
- for (; i < length; ++i) {
- const unsigned short c = word[i];
- if (c == 0) {
- break;
+#define INTS_TO_CHARS(input, length, output) do { \
+ intArrayToCharArray(input, length, output); } while (0)
+
+// TODO: Support full UTF-8 conversion
+AK_FORCE_INLINE static int intArrayToCharArray(const int *source, const int sourceSize,
+ char *dest) {
+ int si = 0;
+ int di = 0;
+ while (si < sourceSize && di < MAX_WORD_LENGTH - 1 && 0 != source[si]) {
+ const int codePoint = source[si++];
+ if (codePoint < 0x7F) {
+ dest[di++] = codePoint;
+ } else if (codePoint < 0x7FF) {
+ dest[di++] = 0xC0 + (codePoint >> 6);
+ dest[di++] = 0x80 + (codePoint & 0x3F);
+ } else if (codePoint < 0xFFFF) {
+ dest[di++] = 0xE0 + (codePoint >> 12);
+ dest[di++] = 0x80 + ((codePoint & 0xFC0) >> 6);
+ dest[di++] = 0x80 + (codePoint & 0x3F);
}
- // static_cast only for debugging
- charBuf[i] = static_cast<char>(c);
}
- charBuf[i] = 0;
- if (i > 1) {
+ dest[di] = 0;
+ return di;
+}
+
+static inline void dumpWordInfo(const int *word, const int length, const int rank,
+ const int frequency) {
+ static char charBuf[50];
+ const int N = intArrayToCharArray(word, length, charBuf);
+ if (N > 1) {
AKLOGI("%2d [ %s ] (%d)", rank, charBuf, frequency);
}
}
-static inline void dumpResult(
- const unsigned short *outWords, const int *frequencies, const int maxWordCounts,
- const int maxWordLength) {
+static inline void dumpResult(const int *outWords, const int *frequencies) {
AKLOGI("--- DUMP RESULT ---------");
- for (int i = 0; i < maxWordCounts; ++i) {
- dumpWordInfo(&outWords[i * maxWordLength], maxWordLength, i, frequencies[i]);
+ for (int i = 0; i < MAX_RESULTS; ++i) {
+ dumpWordInfo(&outWords[i * MAX_WORD_LENGTH], MAX_WORD_LENGTH, i, frequencies[i]);
}
AKLOGI("-------------------------");
}
-static inline void dumpWord(const unsigned short *word, const int length) {
+static AK_FORCE_INLINE void dumpWord(const int *word, const int length) {
static char charBuf[50];
- int i = 0;
- for (; i < length; ++i) {
- const unsigned short c = word[i];
- if (c == 0) {
- break;
- }
- // static_cast only for debugging
- charBuf[i] = static_cast<char>(c);
- }
- charBuf[i] = 0;
- if (i > 1) {
+ const int N = intArrayToCharArray(word, length, charBuf);
+ if (N > 1) {
AKLOGI("[ %s ]", charBuf);
}
}
-static inline void dumpWordInt(const int *word, const int length) {
- static char charBuf[50];
-
- for (int i = 0; i < length; ++i) {
- charBuf[i] = word[i];
- }
- charBuf[length] = 0;
- AKLOGI("i[ %s ]", charBuf);
-}
-
-// TODO: Change this to intArrayToCharArray
-static inline void shortArrayToCharArray(
- const unsigned short *input, const int length, char *output) {
- int i = 0;
- for (;i < length; ++i) {
- const unsigned short c = input[i];
- if (c == 0) {
- break;
- }
- // static_cast only for debugging
- output[i] = static_cast<char>(c);
- }
- output[i] = 0;
-}
-
#ifndef __ANDROID__
#include <cassert>
#include <execinfo.h>
#include <stdlib.h>
+#define DO_ASSERT_TEST
#define ASSERT(success) do { if (!(success)) { showStackTrace(); assert(success);} } while (0)
#define SHOW_STACK_TRACE do { showStackTrace(); } while (0)
@@ -126,23 +118,23 @@ static inline void showStackTrace() {
}
free(strs);
}
-#else
+#else // __ANDROID__
#include <cassert>
+#define DO_ASSERT_TEST
#define ASSERT(success) assert(success)
#define SHOW_STACK_TRACE
-#endif
+#endif // __ANDROID__
-#else
+#else // defined(FLAG_DO_PROFILE) || defined(FLAG_DBG)
#define AKLOGE(fmt, ...)
#define AKLOGI(fmt, ...)
-#define DUMP_RESULT(words, frequencies, maxWordCount, maxWordLength)
+#define DUMP_RESULT(words, frequencies)
#define DUMP_WORD(word, length)
-#define DUMP_WORD_INT(word, length)
+#undef DO_ASSERT_TEST
#define ASSERT(success)
#define SHOW_STACK_TRACE
-// TODO: INTS_TO_CHARS
-#define SHORTS_TO_CHARS(input, length, output)
-#endif
+#define INTS_TO_CHARS(input, length, output)
+#endif // defined(FLAG_DO_PROFILE) || defined(FLAG_DBG)
#ifdef FLAG_DO_PROFILE
// Profiler
@@ -178,15 +170,15 @@ static inline void prof_out(void) {
}
AKLOGI("Total time is %6.3f ms.",
profile_buf[PROF_BUF_SIZE - 1] * 1000.0f / static_cast<float>(CLOCKS_PER_SEC));
- float all = 0;
+ float all = 0.0f;
for (int i = 0; i < PROF_BUF_SIZE - 1; ++i) {
all += profile_buf[i];
}
- if (all == 0) all = 1;
+ if (all < 1.0f) all = 1.0f;
for (int i = 0; i < PROF_BUF_SIZE - 1; ++i) {
- if (profile_buf[i]) {
+ if (profile_buf[i] > 0.0f) {
AKLOGI("(%d): Used %4.2f%%, %8.4f ms. Called %d times.",
- i, (profile_buf[i] * 100 / all),
+ i, (profile_buf[i] * 100.0f / all),
profile_buf[i] * 1000.0f / static_cast<float>(CLOCKS_PER_SEC),
profile_counter[i]);
}
@@ -219,6 +211,11 @@ static inline void prof_out(void) {
#define DEBUG_CORRECTION false
#define DEBUG_CORRECTION_FREQ false
#define DEBUG_WORDS_PRIORITY_QUEUE false
+#define DEBUG_SAMPLING_POINTS false
+#define DEBUG_POINTS_PROBABILITY false
+#define DEBUG_DOUBLE_LETTER false
+#define DEBUG_CACHE false
+#define DEBUG_DUMP_ERROR false
#ifdef FLAG_FULL_DBG
#define DEBUG_GEO_FULL true
@@ -239,14 +236,16 @@ static inline void prof_out(void) {
#define DEBUG_CORRECTION false
#define DEBUG_CORRECTION_FREQ false
#define DEBUG_WORDS_PRIORITY_QUEUE false
+#define DEBUG_SAMPLING_POINTS false
+#define DEBUG_POINTS_PROBABILITY false
+#define DEBUG_DOUBLE_LETTER false
+#define DEBUG_CACHE false
+#define DEBUG_DUMP_ERROR false
#define DEBUG_GEO_FULL false
#endif // FLAG_DBG
-#ifndef U_SHORT_MAX
-#define U_SHORT_MAX 65535 // ((1 << 16) - 1)
-#endif
#ifndef S_INT_MAX
#define S_INT_MAX 2147483647 // ((1 << 31) - 1)
#endif
@@ -257,44 +256,38 @@ static inline void prof_out(void) {
#define S_INT_MIN (-2147483647 - 1) // -(1 << 31)
#endif
+#define M_PI_F 3.14159265f
+#define MAX_PERCENTILE 100
+
+// Number of base-10 digits in the largest integer + 1 to leave room for a zero terminator.
+// As such, this is the maximum number of characters will be needed to represent an int as a
+// string, including the terminator; this is used as the size of a string buffer large enough to
+// hold any value that is intended to fit in an integer, e.g. in the code that reads the header
+// of the binary dictionary where a {key,value} string pair scheme is used.
+#define LARGEST_INT_DIGIT_COUNT 11
+
// Define this to use mmap() for dictionary loading. Undefine to use malloc() instead of mmap().
// We measured and compared performance of both, and found mmap() is fairly good in terms of
// loading time, and acceptable even for several initial lookups which involve page faults.
#define USE_MMAP_FOR_DICTIONARY
-// 22-bit address = ~4MB dictionary size limit, which on average would be about 200k-300k words
-#define ADDRESS_MASK 0x3FFFFF
-
-// The bit that decides if an address follows in the next 22 bits
-#define FLAG_ADDRESS_MASK 0x40
-// The bit that decides if this is a terminal node for a word. The node could still have children,
-// if the word has other endings.
-#define FLAG_TERMINAL_MASK 0x80
-
-#define FLAG_BIGRAM_READ 0x80
-#define FLAG_BIGRAM_CHILDEXIST 0x40
-#define FLAG_BIGRAM_CONTINUED 0x80
-#define FLAG_BIGRAM_FREQ 0x7F
-
-#define DICTIONARY_VERSION_MIN 200
#define NOT_VALID_WORD (-99)
#define NOT_A_CODE_POINT (-1)
#define NOT_A_DISTANCE (-1)
#define NOT_A_COORDINATE (-1)
-#define EQUIVALENT_CHAR_WITHOUT_DISTANCE_INFO (-2)
+#define MATCH_CHAR_WITHOUT_DISTANCE_INFO (-2)
#define PROXIMITY_CHAR_WITHOUT_DISTANCE_INFO (-3)
#define ADDITIONAL_PROXIMITY_CHAR_DISTANCE_INFO (-4)
#define NOT_AN_INDEX (-1)
#define NOT_A_PROBABILITY (-1)
#define KEYCODE_SPACE ' '
+#define KEYCODE_SINGLE_QUOTE '\''
+#define KEYCODE_HYPHEN_MINUS '-'
#define CALIBRATE_SCORE_BY_TOUCH_COORDINATES true
-
-#define SUGGEST_WORDS_WITH_MISSING_CHARACTER true
-#define SUGGEST_WORDS_WITH_EXCESSIVE_CHARACTER true
-#define SUGGEST_WORDS_WITH_TRANSPOSED_CHARACTERS true
#define SUGGEST_MULTIPLE_WORDS true
+#define SUGGEST_INTERFACE_OUTPUT_SCALE 1000000.0f
// The following "rate"s are used as a multiplier before dividing by 100, so they are in percent.
#define WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE 80
@@ -316,22 +309,12 @@ static inline void prof_out(void) {
#define TWO_WORDS_CAPITALIZED_DEMOTION_RATE 50
#define TWO_WORDS_CORRECTION_DEMOTION_BASE 80
#define TWO_WORDS_PLUS_OTHER_ERROR_CORRECTION_DEMOTION_DIVIDER 1
-#define ZERO_DISTANCE_PROMOTION_RATE 110
+#define ZERO_DISTANCE_PROMOTION_RATE 110.0f
#define NEUTRAL_SCORE_SQUARED_RADIUS 8.0f
#define HALF_SCORE_SQUARED_RADIUS 32.0f
#define MAX_FREQ 255
#define MAX_BIGRAM_FREQ 15
-// This must be greater than or equal to MAX_WORD_LENGTH defined in BinaryDictionary.java
-// This is only used for the size of array. Not to be used in c functions.
-#define MAX_WORD_LENGTH_INTERNAL 48
-
-// This must be the same as ProximityInfo#MAX_PROXIMITY_CHARS_SIZE, currently it's 16.
-#define MAX_PROXIMITY_CHARS_SIZE_INTERNAL 16
-
-// This must be equal to ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE in KeyDetector.java
-#define ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE 2
-
// Assuming locale strings such as en_US, sr-Latn etc.
#define MAX_LOCALE_STRING_LENGTH 10
@@ -350,19 +333,17 @@ static inline void prof_out(void) {
#define MULTIPLE_WORDS_DEMOTION_RATE 80
#define MIN_INPUT_LENGTH_FOR_THREE_OR_MORE_WORDS_CORRECTION 6
-#define TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD 0.35
-#define START_TWO_WORDS_CORRECTION_THRESHOLD 0.185
+#define TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD 0.35f
+#define START_TWO_WORDS_CORRECTION_THRESHOLD 0.185f
/* heuristic... This should be changed if we change the unit of the frequency. */
#define SUPPRESS_SHORT_MULTIPLE_WORDS_THRESHOLD_FREQ (MAX_FREQ * 58 / 100)
#define MAX_DEPTH_MULTIPLIER 3
-
#define FIRST_WORD_INDEX 0
-#define MAX_SPACES_INTERNAL 16
-
-// Max Distance between point to key
-#define MAX_POINT_TO_KEY_LENGTH 10000000
+// Max value for length, distance and probability which are used in weighting
+// TODO: Remove
+#define MAX_VALUE_FOR_WEIGHTING 10000000
// The max number of the keys in one keyboard layout
#define MAX_KEY_COUNT_IN_A_KEYBOARD 64
@@ -372,10 +353,10 @@ static inline void prof_out(void) {
#define DEFAULT_MAX_DIGRAPH_SEARCH_DEPTH 5
#define MIN_USER_TYPED_LENGTH_FOR_MULTIPLE_WORD_SUGGESTION 3
-#define MIN_USER_TYPED_LENGTH_FOR_EXCESSIVE_CHARACTER_SUGGESTION 3
// TODO: Remove
-#define MAX_POINTER_COUNT_FOR_G 2
+#define MAX_POINTER_COUNT 1
+#define MAX_POINTER_COUNT_G 2
// Size, in bytes, of the bloom filter index for bigrams
// 128 gives us 1024 buckets. The probability of false positive is (1 - e ** (-kn/m))**k,
@@ -395,11 +376,10 @@ static inline void prof_out(void) {
#error "BIGRAM_FILTER_MODULO is larger than BIGRAM_FILTER_BYTE_SIZE"
#endif
-template<typename T> inline T min(T a, T b) { return a < b ? a : b; }
-template<typename T> inline T max(T a, T b) { return a > b ? a : b; }
+template<typename T> AK_FORCE_INLINE const T &min(const T &a, const T &b) { return a < b ? a : b; }
+template<typename T> AK_FORCE_INLINE const T &max(const T &a, const T &b) { return a > b ? a : b; }
-// The ratio of neutral area radius to sweet spot radius.
-#define NEUTRAL_AREA_RADIUS_RATIO 1.3f
+#define NELEMS(x) (sizeof(x) / sizeof((x)[0]))
// DEBUG
#define INPUTLENGTH_FOR_DEBUG (-1)
@@ -416,12 +396,35 @@ template<typename T> inline T max(T a, T b) { return a > b ? a : b; }
// Used as a return value for character comparison
typedef enum {
// Same char, possibly with different case or accent
- EQUIVALENT_CHAR,
+ MATCH_CHAR,
// It is a char located nearby on the keyboard
- NEAR_PROXIMITY_CHAR,
+ PROXIMITY_CHAR,
+ // Additional proximity char which can differ by language.
+ ADDITIONAL_PROXIMITY_CHAR,
+ // It is a substitution char
+ SUBSTITUTION_CHAR,
// It is an unrelated char
UNRELATED_CHAR,
- // Additional proximity char which can differ by language.
- ADDITIONAL_PROXIMITY_CHAR
} ProximityType;
+
+typedef enum {
+ NOT_A_DOUBLE_LETTER,
+ A_DOUBLE_LETTER,
+ A_STRONG_DOUBLE_LETTER
+} DoubleLetterLevel;
+
+typedef enum {
+ CT_MATCH,
+ CT_PROXIMITY,
+ CT_ADDITIONAL_PROXIMITY,
+ CT_SUBSTITUTION,
+ CT_OMISSION,
+ CT_INSERTION,
+ CT_TRANSPOSITION,
+ CT_SPACE_SUBSTITUTION,
+ CT_SPACE_OMISSION,
+ CT_COMPLETION,
+ CT_TERMINAL,
+ CT_NEW_WORD,
+} CorrectionType;
#endif // LATINIME_DEFINES_H
diff --git a/native/jni/src/dic_traverse_wrapper.h b/native/jni/src/dic_traverse_wrapper.h
index 292382487..1108a45c8 100644
--- a/native/jni/src/dic_traverse_wrapper.h
+++ b/native/jni/src/dic_traverse_wrapper.h
@@ -1,11 +1,11 @@
/*
- * Copyright (C) 2012, The Android Open Source Project
+ * Copyright (C) 2012 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
- * http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
@@ -17,8 +17,6 @@
#ifndef LATINIME_DIC_TRAVERSE_WRAPPER_H
#define LATINIME_DIC_TRAVERSE_WRAPPER_H
-#include <stdint.h>
-
#include "defines.h"
#include "jni.h"
@@ -33,8 +31,8 @@ class DicTraverseWrapper {
}
return 0;
}
- static void initDicTraverseSession(void *traverseSession,
- const Dictionary *const dictionary, const int *prevWord, const int prevWordLength) {
+ static void initDicTraverseSession(void *traverseSession, const Dictionary *const dictionary,
+ const int *prevWord, const int prevWordLength) {
if (sDicTraverseSessionInitMethod) {
sDicTraverseSessionInitMethod(traverseSession, dictionary, prevWord, prevWordLength);
}
@@ -44,8 +42,7 @@ class DicTraverseWrapper {
sDicTraverseSessionReleaseMethod(traverseSession);
}
}
- static void setTraverseSessionFactoryMethod(
- void *(*factoryMethod)(JNIEnv *, jstring)) {
+ static void setTraverseSessionFactoryMethod(void *(*factoryMethod)(JNIEnv *, jstring)) {
sDicTraverseSessionFactoryMethod = factoryMethod;
}
static void setTraverseSessionInitMethod(
@@ -55,6 +52,7 @@ class DicTraverseWrapper {
static void setTraverseSessionReleaseMethod(void (*releaseMethod)(void *)) {
sDicTraverseSessionReleaseMethod = releaseMethod;
}
+
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(DicTraverseWrapper);
static void *(*sDicTraverseSessionFactoryMethod)(JNIEnv *, jstring);
@@ -62,6 +60,5 @@ class DicTraverseWrapper {
void *, const Dictionary *const, const int *, const int);
static void (*sDicTraverseSessionReleaseMethod)(void *);
};
-int register_DicTraverseSession(JNIEnv *env);
} // namespace latinime
#endif // LATINIME_DIC_TRAVERSE_WRAPPER_H
diff --git a/native/jni/src/dictionary.cpp b/native/jni/src/dictionary.cpp
index 2fbe83e86..2be1f4f39 100644
--- a/native/jni/src/dictionary.cpp
+++ b/native/jni/src/dictionary.cpp
@@ -23,79 +23,66 @@
#include "defines.h"
#include "dictionary.h"
#include "dic_traverse_wrapper.h"
-#include "gesture_decoder_wrapper.h"
+#include "gesture_suggest.h"
#include "unigram_dictionary.h"
namespace latinime {
-// TODO: Change the type of all keyCodes to uint32_t
-Dictionary::Dictionary(void *dict, int dictSize, int mmapFd, int dictBufAdjust,
- int typedLetterMultiplier, int fullWordMultiplier, int maxWordLength, int maxWords,
- int maxPredictions)
+Dictionary::Dictionary(void *dict, int dictSize, int mmapFd, int dictBufAdjust)
: mDict(static_cast<unsigned char *>(dict)),
mOffsetDict((static_cast<unsigned char *>(dict)) + BinaryFormat::getHeaderSize(mDict)),
mDictSize(dictSize), mMmapFd(mmapFd), mDictBufAdjust(dictBufAdjust),
- mUnigramDictionary(new UnigramDictionary(mOffsetDict, typedLetterMultiplier,
- fullWordMultiplier, maxWordLength, maxWords, BinaryFormat::getFlags(mDict))),
- mBigramDictionary(new BigramDictionary(mOffsetDict, maxWordLength, maxPredictions)),
- mGestureDecoder(new GestureDecoderWrapper(maxWordLength, maxWords)) {
- if (DEBUG_DICT) {
- if (MAX_WORD_LENGTH_INTERNAL < maxWordLength) {
- AKLOGI("Max word length (%d) is greater than %d",
- maxWordLength, MAX_WORD_LENGTH_INTERNAL);
- AKLOGI("IN NATIVE SUGGEST Version: %d", (mDict[0] & 0xFF));
- }
- }
+ mUnigramDictionary(new UnigramDictionary(mOffsetDict, BinaryFormat::getFlags(mDict))),
+ mBigramDictionary(new BigramDictionary(mOffsetDict)),
+ mGestureSuggest(new GestureSuggest()) {
}
Dictionary::~Dictionary() {
delete mUnigramDictionary;
delete mBigramDictionary;
- delete mGestureDecoder;
+ delete mGestureSuggest;
}
int Dictionary::getSuggestions(ProximityInfo *proximityInfo, void *traverseSession,
- int *xcoordinates, int *ycoordinates, int *times, int *pointerIds,
- int *codes, int codesSize, int *prevWordChars,
- int prevWordLength, int commitPoint, bool isGesture,
- bool useFullEditDistance, unsigned short *outWords,
- int *frequencies, int *spaceIndices, int *outputTypes) const {
+ int *xcoordinates, int *ycoordinates, int *times, int *pointerIds, int *inputCodePoints,
+ int inputSize, int *prevWordCodePoints, int prevWordLength, int commitPoint, bool isGesture,
+ bool useFullEditDistance, int *outWords, int *frequencies, int *spaceIndices,
+ int *outputTypes) const {
int result = 0;
if (isGesture) {
DicTraverseWrapper::initDicTraverseSession(
- traverseSession, this, prevWordChars, prevWordLength);
- result = mGestureDecoder->getSuggestions(proximityInfo, traverseSession,
- xcoordinates, ycoordinates, times, pointerIds, codes, codesSize, commitPoint,
- outWords, frequencies, spaceIndices, outputTypes);
+ traverseSession, this, prevWordCodePoints, prevWordLength);
+ result = mGestureSuggest->getSuggestions(proximityInfo, traverseSession, xcoordinates,
+ ycoordinates, times, pointerIds, inputCodePoints, inputSize, commitPoint, outWords,
+ frequencies, spaceIndices, outputTypes);
if (DEBUG_DICT) {
- DUMP_RESULT(outWords, frequencies, 18 /* MAX_WORDS */, MAX_WORD_LENGTH_INTERNAL);
+ DUMP_RESULT(outWords, frequencies);
}
return result;
} else {
std::map<int, int> bigramMap;
uint8_t bigramFilter[BIGRAM_FILTER_BYTE_SIZE];
- mBigramDictionary->fillBigramAddressToFrequencyMapAndFilter(prevWordChars,
+ mBigramDictionary->fillBigramAddressToFrequencyMapAndFilter(prevWordCodePoints,
prevWordLength, &bigramMap, bigramFilter);
- result = mUnigramDictionary->getSuggestions(proximityInfo, xcoordinates,
- ycoordinates, codes, codesSize, &bigramMap, bigramFilter,
- useFullEditDistance, outWords, frequencies, outputTypes);
+ result = mUnigramDictionary->getSuggestions(proximityInfo, xcoordinates, ycoordinates,
+ inputCodePoints, inputSize, &bigramMap, bigramFilter, useFullEditDistance, outWords,
+ frequencies, outputTypes);
return result;
}
}
-int Dictionary::getBigrams(const int32_t *word, int length, int *codes, int codesSize,
- unsigned short *outWords, int *frequencies, int *outputTypes) const {
+int Dictionary::getBigrams(const int *word, int length, int *inputCodePoints, int inputSize,
+ int *outWords, int *frequencies, int *outputTypes) const {
if (length <= 0) return 0;
- return mBigramDictionary->getBigrams(word, length, codes, codesSize, outWords, frequencies,
- outputTypes);
+ return mBigramDictionary->getBigrams(word, length, inputCodePoints, inputSize, outWords,
+ frequencies, outputTypes);
}
-int Dictionary::getFrequency(const int32_t *word, int length) const {
+int Dictionary::getFrequency(const int *word, int length) const {
return mUnigramDictionary->getFrequency(word, length);
}
-bool Dictionary::isValidBigram(const int32_t *word1, int length1, const int32_t *word2,
- int length2) const {
+bool Dictionary::isValidBigram(const int *word1, int length1, const int *word2, int length2) const {
return mBigramDictionary->isValidBigram(word1, length1, word2, length2);
}
} // namespace latinime
diff --git a/native/jni/src/dictionary.h b/native/jni/src/dictionary.h
index a1358890d..ecdddd771 100644
--- a/native/jni/src/dictionary.h
+++ b/native/jni/src/dictionary.h
@@ -24,37 +24,36 @@
namespace latinime {
class BigramDictionary;
-class IncrementalDecoderInterface;
class ProximityInfo;
+class SuggestInterface;
class UnigramDictionary;
class Dictionary {
public:
// Taken from SuggestedWords.java
- const static int KIND_TYPED = 0; // What user typed
- const static int KIND_CORRECTION = 1; // Simple correction/suggestion
- const static int KIND_COMPLETION = 2; // Completion (suggestion with appended chars)
- const static int KIND_WHITELIST = 3; // Whitelisted word
- const static int KIND_BLACKLIST = 4; // Blacklisted word
- const static int KIND_HARDCODED = 5; // Hardcoded suggestion, e.g. punctuation
- const static int KIND_APP_DEFINED = 6; // Suggested by the application
- const static int KIND_SHORTCUT = 7; // A shortcut
- const static int KIND_PREDICTION = 8; // A prediction (== a suggestion with no input)
+ static const int KIND_TYPED = 0; // What user typed
+ static const int KIND_CORRECTION = 1; // Simple correction/suggestion
+ static const int KIND_COMPLETION = 2; // Completion (suggestion with appended chars)
+ static const int KIND_WHITELIST = 3; // Whitelisted word
+ static const int KIND_BLACKLIST = 4; // Blacklisted word
+ static const int KIND_HARDCODED = 5; // Hardcoded suggestion, e.g. punctuation
+ static const int KIND_APP_DEFINED = 6; // Suggested by the application
+ static const int KIND_SHORTCUT = 7; // A shortcut
+ static const int KIND_PREDICTION = 8; // A prediction (== a suggestion with no input)
- Dictionary(void *dict, int dictSize, int mmapFd, int dictBufAdjust, int typedLetterMultipler,
- int fullWordMultiplier, int maxWordLength, int maxWords, int maxPredictions);
+ Dictionary(void *dict, int dictSize, int mmapFd, int dictBufAdjust);
int getSuggestions(ProximityInfo *proximityInfo, void *traverseSession, int *xcoordinates,
- int *ycoordinates, int *times, int *pointerIds, int *codes, int codesSize,
- int *prevWordChars, int prevWordLength, int commitPoint, bool isGesture,
- bool useFullEditDistance, unsigned short *outWords,
- int *frequencies, int *spaceIndices, int *outputTypes) const;
+ int *ycoordinates, int *times, int *pointerIds, int *inputCodePoints, int inputSize,
+ int *prevWordCodePoints, int prevWordLength, int commitPoint, bool isGesture,
+ bool useFullEditDistance, int *outWords, int *frequencies, int *spaceIndices,
+ int *outputTypes) const;
- int getBigrams(const int32_t *word, int length, int *codes, int codesSize,
- unsigned short *outWords, int *frequencies, int *outputTypes) const;
+ int getBigrams(const int *word, int length, int *inputCodePoints, int inputSize, int *outWords,
+ int *frequencies, int *outputTypes) const;
- int getFrequency(const int32_t *word, int length) const;
- bool isValidBigram(const int32_t *word1, int length1, const int32_t *word2, int length2) const;
+ int getFrequency(const int *word, int length) const;
+ bool isValidBigram(const int *word1, int length1, const int *word2, int length2) const;
const uint8_t *getDict() const { // required to release dictionary buffer
return mDict;
}
@@ -66,10 +65,6 @@ class Dictionary {
int getDictBufAdjust() const { return mDictBufAdjust; }
virtual ~Dictionary();
- // public static utility methods
- // static inline methods should be defined in the header file
- static int wideStrLen(unsigned short *str);
-
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(Dictionary);
const uint8_t *mDict;
@@ -83,19 +78,7 @@ class Dictionary {
const UnigramDictionary *mUnigramDictionary;
const BigramDictionary *mBigramDictionary;
- IncrementalDecoderInterface *mGestureDecoder;
+ SuggestInterface *mGestureSuggest;
};
-
-// public static utility methods
-// static inline methods should be defined in the header file
-inline int Dictionary::wideStrLen(unsigned short *str) {
- if (!str) return 0;
- int length = 0;
- while (*str) {
- str++;
- length++;
- }
- return length;
-}
} // namespace latinime
#endif // LATINIME_DICTIONARY_H
diff --git a/native/jni/src/geometry_utils.h b/native/jni/src/geometry_utils.h
index 31359e19d..4cbb127e8 100644
--- a/native/jni/src/geometry_utils.h
+++ b/native/jni/src/geometry_utils.h
@@ -19,38 +19,23 @@
#include <cmath>
-#define DEBUG_DECODER false
+#include "defines.h"
-#define M_PI_F 3.14159265f
#define ROUND_FLOAT_10000(f) ((f) < 1000.0f && (f) > 0.001f) \
? (floorf((f) * 10000.0f) / 10000.0f) : (f)
-#define SQUARE_FLOAT(x) ((x) * (x))
namespace latinime {
-static inline float getSquaredDistanceFloat(float x1, float y1, float x2, float y2) {
- const float deltaX = x1 - x2;
- const float deltaY = y1 - y2;
- return SQUARE_FLOAT(deltaX) + SQUARE_FLOAT(deltaY);
-}
-
-static inline float getDistanceFloat(float x1, float y1, float x2, float y2) {
- return hypotf(x1 - x2, y1 - y2);
-}
-
-static inline int getDistanceInt(int x1, int y1, int x2, int y2) {
- return static_cast<int>(getDistanceFloat(static_cast<float>(x1), static_cast<float>(y1),
- static_cast<float>(x2), static_cast<float>(y2)));
-}
+static inline float SQUARE_FLOAT(const float x) { return x * x; }
-static inline float getAngle(int x1, int y1, int x2, int y2) {
+static AK_FORCE_INLINE float getAngle(const int x1, const int y1, const int x2, const int y2) {
const int dx = x1 - x2;
const int dy = y1 - y2;
- if (dx == 0 && dy == 0) return 0;
+ if (dx == 0 && dy == 0) return 0.0f;
return atan2f(static_cast<float>(dy), static_cast<float>(dx));
}
-static inline float getAngleDiff(float a1, float a2) {
+static AK_FORCE_INLINE float getAngleDiff(const float a1, const float a2) {
const float deltaA = fabsf(a1 - a2);
const float diff = ROUND_FLOAT_10000(deltaA);
if (diff > M_PI_F) {
@@ -60,30 +45,9 @@ static inline float getAngleDiff(float a1, float a2) {
return diff;
}
-static inline float pointToLineSegSquaredDistanceFloat(
- float x, float y, float x1, float y1, float x2, float y2, bool extend) {
- const float ray1x = x - x1;
- const float ray1y = y - y1;
- const float ray2x = x2 - x1;
- const float ray2y = y2 - y1;
-
- const float dotProduct = ray1x * ray2x + ray1y * ray2y;
- const float lineLengthSqr = SQUARE_FLOAT(ray2x) + SQUARE_FLOAT(ray2y);
- const float projectionLengthSqr = dotProduct / lineLengthSqr;
-
- float projectionX;
- float projectionY;
- if (!extend && projectionLengthSqr < 0.0f) {
- projectionX = x1;
- projectionY = y1;
- } else if (!extend && projectionLengthSqr > 1.0f) {
- projectionX = x2;
- projectionY = y2;
- } else {
- projectionX = x1 + projectionLengthSqr * ray2x;
- projectionY = y1 + projectionLengthSqr * ray2y;
- }
- return getSquaredDistanceFloat(x, y, projectionX, projectionY);
+static AK_FORCE_INLINE int getDistanceInt(const int x1, const int y1, const int x2,
+ const int y2) {
+ return static_cast<int>(hypotf(static_cast<float>(x1 - x2), static_cast<float>(y1 - y2)));
}
} // namespace latinime
#endif // LATINIME_GEOMETRY_UTILS_H
diff --git a/native/jni/src/gesture/gesture_decoder_wrapper.h b/native/jni/src/gesture/gesture_decoder_wrapper.h
deleted file mode 100644
index 92e1ded49..000000000
--- a/native/jni/src/gesture/gesture_decoder_wrapper.h
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LATINIME_GESTURE_DECODER_WRAPPER_H
-#define LATINIME_GESTURE_DECODER_WRAPPER_H
-
-#include <stdint.h>
-#include "defines.h"
-#include "incremental_decoder_interface.h"
-
-namespace latinime {
-
-class UnigramDictionary;
-class BigramDictionary;
-class ProximityInfo;
-
-class GestureDecoderWrapper : public IncrementalDecoderInterface {
- public:
- GestureDecoderWrapper(const int maxWordLength, const int maxWords)
- : mIncrementalDecoderInterface(getGestureDecoderInstance(maxWordLength, maxWords)) {
- }
-
- virtual ~GestureDecoderWrapper() {
- delete mIncrementalDecoderInterface;
- }
-
- int getSuggestions(ProximityInfo *pInfo, void *traverseSession, int *inputXs, int *inputYs,
- int *times, int *pointerIds, int *codes, int inputSize, int commitPoint,
- unsigned short *outWords, int *frequencies, int *outputIndices,
- int *outputTypes) const {
- if (!mIncrementalDecoderInterface) {
- return 0;
- }
- return mIncrementalDecoderInterface->getSuggestions(
- pInfo, traverseSession, inputXs, inputYs, times, pointerIds, codes,
- inputSize, commitPoint, outWords, frequencies, outputIndices, outputTypes);
- }
-
- static void setGestureDecoderFactoryMethod(
- IncrementalDecoderInterface *(*factoryMethod)(int, int)) {
- sGestureDecoderFactoryMethod = factoryMethod;
- }
-
- private:
- DISALLOW_IMPLICIT_CONSTRUCTORS(GestureDecoderWrapper);
- static IncrementalDecoderInterface *getGestureDecoderInstance(int maxWordLength, int maxWords) {
- if (sGestureDecoderFactoryMethod) {
- return sGestureDecoderFactoryMethod(maxWordLength, maxWords);
- }
- return 0;
- }
-
- static IncrementalDecoderInterface *(*sGestureDecoderFactoryMethod)(int, int);
- IncrementalDecoderInterface *mIncrementalDecoderInterface;
-};
-} // namespace latinime
-#endif // LATINIME_GESTURE_DECODER_WRAPPER_H
diff --git a/native/jni/src/gesture/incremental_decoder_wrapper.h b/native/jni/src/gesture/incremental_decoder_wrapper.h
deleted file mode 100644
index da7afdb8a..000000000
--- a/native/jni/src/gesture/incremental_decoder_wrapper.h
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LATINIME_INCREMENTAL_DECODER_WRAPPER_H
-#define LATINIME_INCREMENTAL_DECODER_WRAPPER_H
-
-#include <stdint.h>
-#include "defines.h"
-#include "incremental_decoder_interface.h"
-
-namespace latinime {
-
-class UnigramDictionary;
-class BigramDictionary;
-class ProximityInfo;
-
-class IncrementalDecoderWrapper : public IncrementalDecoderInterface {
- public:
- IncrementalDecoderWrapper(const int maxWordLength, const int maxWords)
- : mIncrementalDecoderInterface(getIncrementalDecoderInstance(maxWordLength, maxWords)) {
- }
-
- virtual ~IncrementalDecoderWrapper() {
- delete mIncrementalDecoderInterface;
- }
-
- int getSuggestions(ProximityInfo *pInfo, void *traverseSession, int *inputXs, int *inputYs,
- int *times, int *pointerIds, int *codes, int inputSize, int commitPoint,
- unsigned short *outWords, int *frequencies, int *outputIndices,
- int *outputTypes) const {
- if (!mIncrementalDecoderInterface) {
- return 0;
- }
- return mIncrementalDecoderInterface->getSuggestions(
- pInfo, traverseSession, inputXs, inputYs, times, pointerIds, codes,
- inputSize, commitPoint, outWords, frequencies, outputIndices, outputTypes);
- }
-
- static void setIncrementalDecoderFactoryMethod(
- IncrementalDecoderInterface *(*factoryMethod)(int, int)) {
- sIncrementalDecoderFactoryMethod = factoryMethod;
- }
-
- private:
- DISALLOW_IMPLICIT_CONSTRUCTORS(IncrementalDecoderWrapper);
- static IncrementalDecoderInterface *getIncrementalDecoderInstance(int maxWordLength,
- int maxWords) {
- if (sIncrementalDecoderFactoryMethod) {
- return sIncrementalDecoderFactoryMethod(maxWordLength, maxWords);
- }
- return 0;
- }
-
- static IncrementalDecoderInterface *(*sIncrementalDecoderFactoryMethod)(int, int);
- IncrementalDecoderInterface *mIncrementalDecoderInterface;
-};
-} // namespace latinime
-#endif // LATINIME_INCREMENTAL_DECODER_WRAPPER_H
diff --git a/native/jni/src/hash_map_compat.h b/native/jni/src/hash_map_compat.h
index 116359a73..a1e982bc4 100644
--- a/native/jni/src/hash_map_compat.h
+++ b/native/jni/src/hash_map_compat.h
@@ -1,11 +1,11 @@
/*
- * Copyright (C) 2012, The Android Open Source Project
+ * Copyright (C) 2012 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
- * http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
diff --git a/native/jni/src/proximity_info.cpp b/native/jni/src/proximity_info.cpp
index fde93b5a9..74b5e0131 100644
--- a/native/jni/src/proximity_info.cpp
+++ b/native/jni/src/proximity_info.cpp
@@ -14,9 +14,8 @@
* limitations under the License.
*/
-#include <cassert>
-#include <cmath>
#include <cstring>
+#include <cmath>
#define LOG_TAG "LatinIME: proximity_info.cpp"
@@ -26,61 +25,67 @@
#include "geometry_utils.h"
#include "jni.h"
#include "proximity_info.h"
+#include "proximity_info_params.h"
namespace latinime {
-/* static */ const float ProximityInfo::NOT_A_DISTANCE_FLOAT = -1.0f;
-
-static inline void safeGetOrFillZeroIntArrayRegion(JNIEnv *env, jintArray jArray, jsize len,
- jint *buffer) {
+static AK_FORCE_INLINE void safeGetOrFillZeroIntArrayRegion(JNIEnv *env, jintArray jArray,
+ jsize len, jint *buffer) {
if (jArray && buffer) {
env->GetIntArrayRegion(jArray, 0, len, buffer);
} else if (buffer) {
- memset(buffer, 0, len * sizeof(jint));
+ memset(buffer, 0, len * sizeof(buffer[0]));
}
}
-static inline void safeGetOrFillZeroFloatArrayRegion(JNIEnv *env, jfloatArray jArray, jsize len,
- jfloat *buffer) {
+static AK_FORCE_INLINE void safeGetOrFillZeroFloatArrayRegion(JNIEnv *env, jfloatArray jArray,
+ jsize len, jfloat *buffer) {
if (jArray && buffer) {
env->GetFloatArrayRegion(jArray, 0, len, buffer);
} else if (buffer) {
- memset(buffer, 0, len * sizeof(jfloat));
+ memset(buffer, 0, len * sizeof(buffer[0]));
}
}
-ProximityInfo::ProximityInfo(JNIEnv *env, const jstring localeJStr, const int maxProximityCharsSize,
+ProximityInfo::ProximityInfo(JNIEnv *env, const jstring localeJStr,
const int keyboardWidth, const int keyboardHeight, const int gridWidth,
const int gridHeight, const int mostCommonKeyWidth, const jintArray proximityChars,
const int keyCount, const jintArray keyXCoordinates, const jintArray keyYCoordinates,
const jintArray keyWidths, const jintArray keyHeights, const jintArray keyCharCodes,
const jfloatArray sweetSpotCenterXs, const jfloatArray sweetSpotCenterYs,
const jfloatArray sweetSpotRadii)
- : MAX_PROXIMITY_CHARS_SIZE(maxProximityCharsSize), GRID_WIDTH(gridWidth),
- GRID_HEIGHT(gridHeight), MOST_COMMON_KEY_WIDTH(mostCommonKeyWidth),
+ : GRID_WIDTH(gridWidth), GRID_HEIGHT(gridHeight), MOST_COMMON_KEY_WIDTH(mostCommonKeyWidth),
MOST_COMMON_KEY_WIDTH_SQUARE(mostCommonKeyWidth * mostCommonKeyWidth),
CELL_WIDTH((keyboardWidth + gridWidth - 1) / gridWidth),
CELL_HEIGHT((keyboardHeight + gridHeight - 1) / gridHeight),
KEY_COUNT(min(keyCount, MAX_KEY_COUNT_IN_A_KEYBOARD)),
KEYBOARD_WIDTH(keyboardWidth), KEYBOARD_HEIGHT(keyboardHeight),
+ KEYBOARD_HYPOTENUSE(hypotf(KEYBOARD_WIDTH, KEYBOARD_HEIGHT)),
HAS_TOUCH_POSITION_CORRECTION_DATA(keyCount > 0 && keyXCoordinates && keyYCoordinates
&& keyWidths && keyHeights && keyCharCodes && sweetSpotCenterXs
&& sweetSpotCenterYs && sweetSpotRadii),
- mProximityCharsArray(new int32_t[GRID_WIDTH * GRID_HEIGHT * MAX_PROXIMITY_CHARS_SIZE
- /* proximityGridLength */]),
+ mProximityCharsArray(new int[GRID_WIDTH * GRID_HEIGHT * MAX_PROXIMITY_CHARS_SIZE
+ /* proximityCharsLength */]),
mCodeToKeyMap() {
- const int proximityGridLength = GRID_WIDTH * GRID_HEIGHT * MAX_PROXIMITY_CHARS_SIZE;
+ /* Let's check the input array length here to make sure */
+ const jsize proximityCharsLength = env->GetArrayLength(proximityChars);
+ if (proximityCharsLength != GRID_WIDTH * GRID_HEIGHT * MAX_PROXIMITY_CHARS_SIZE) {
+ AKLOGE("Invalid proximityCharsLength: %d", proximityCharsLength);
+ ASSERT(false);
+ return;
+ }
if (DEBUG_PROXIMITY_INFO) {
- AKLOGI("Create proximity info array %d", proximityGridLength);
+ AKLOGI("Create proximity info array %d", proximityCharsLength);
}
const jsize localeCStrUtf8Length = env->GetStringUTFLength(localeJStr);
if (localeCStrUtf8Length >= MAX_LOCALE_STRING_LENGTH) {
AKLOGI("Locale string length too long: length=%d", localeCStrUtf8Length);
- assert(false);
+ ASSERT(false);
}
memset(mLocaleStr, 0, sizeof(mLocaleStr));
env->GetStringUTFRegion(localeJStr, 0, env->GetStringLength(localeJStr), mLocaleStr);
- safeGetOrFillZeroIntArrayRegion(env, proximityChars, proximityGridLength, mProximityCharsArray);
+ safeGetOrFillZeroIntArrayRegion(env, proximityChars, proximityCharsLength,
+ mProximityCharsArray);
safeGetOrFillZeroIntArrayRegion(env, keyXCoordinates, KEY_COUNT, mKeyXCoordinates);
safeGetOrFillZeroIntArrayRegion(env, keyYCoordinates, KEY_COUNT, mKeyYCoordinates);
safeGetOrFillZeroIntArrayRegion(env, keyWidths, KEY_COUNT, mKeyWidths);
@@ -96,26 +101,22 @@ ProximityInfo::~ProximityInfo() {
delete[] mProximityCharsArray;
}
-inline int ProximityInfo::getStartIndexFromCoordinates(const int x, const int y) const {
- return ((y / CELL_HEIGHT) * GRID_WIDTH + (x / CELL_WIDTH))
- * MAX_PROXIMITY_CHARS_SIZE;
-}
-
bool ProximityInfo::hasSpaceProximity(const int x, const int y) const {
if (x < 0 || y < 0) {
if (DEBUG_DICT) {
AKLOGI("HasSpaceProximity: Illegal coordinates (%d, %d)", x, y);
// TODO: Enable this assertion.
- //assert(false);
+ //ASSERT(false);
}
return false;
}
- const int startIndex = getStartIndexFromCoordinates(x, y);
+ const int startIndex = ProximityInfoUtils::getStartIndexFromCoordinates(x, y,
+ CELL_HEIGHT, CELL_WIDTH, GRID_WIDTH);
if (DEBUG_PROXIMITY_INFO) {
AKLOGI("hasSpaceProximity: index %d, %d, %d", startIndex, x, y);
}
- int32_t *proximityCharsArray = mProximityCharsArray;
+ int *proximityCharsArray = mProximityCharsArray;
for (int i = 0; i < MAX_PROXIMITY_CHARS_SIZE; ++i) {
if (DEBUG_PROXIMITY_INFO) {
AKLOGI("Index: %d", mProximityCharsArray[startIndex + i]);
@@ -127,124 +128,25 @@ bool ProximityInfo::hasSpaceProximity(const int x, const int y) const {
return false;
}
-static inline float getNormalizedSquaredDistanceFloat(float x1, float y1, float x2, float y2,
- float scale) {
- const float deltaX = x1 - x2;
- const float deltaY = y1 - y2;
- return (SQUARE_FLOAT(deltaX) + SQUARE_FLOAT(deltaY)) / SQUARE_FLOAT(scale);
-}
-
float ProximityInfo::getNormalizedSquaredDistanceFromCenterFloatG(
const int keyId, const int x, const int y) const {
- const static float verticalSweetSpotScaleForGeometric = 1.1f;
const bool correctTouchPosition = hasTouchPositionCorrectionData();
- const float centerX = static_cast<float>(correctTouchPosition
- ? getSweetSpotCenterXAt(keyId)
+ const float centerX = static_cast<float>(correctTouchPosition ? getSweetSpotCenterXAt(keyId)
: getKeyCenterXOfKeyIdG(keyId));
const float visualKeyCenterY = static_cast<float>(getKeyCenterYOfKeyIdG(keyId));
float centerY;
if (correctTouchPosition) {
const float sweetSpotCenterY = static_cast<float>(getSweetSpotCenterYAt(keyId));
const float gapY = sweetSpotCenterY - visualKeyCenterY;
- centerY = visualKeyCenterY + gapY * verticalSweetSpotScaleForGeometric;
+ centerY = visualKeyCenterY + gapY * ProximityInfoParams::VERTICAL_SWEET_SPOT_SCALE_G;
} else {
centerY = visualKeyCenterY;
}
const float touchX = static_cast<float>(x);
const float touchY = static_cast<float>(y);
const float keyWidth = static_cast<float>(getMostCommonKeyWidth());
- return getNormalizedSquaredDistanceFloat(centerX, centerY, touchX, touchY, keyWidth);
-}
-
-int ProximityInfo::squaredDistanceToEdge(const int keyId, const int x, const int y) const {
- if (keyId < 0) return true; // NOT_A_ID is -1, but return whenever < 0 just in case
- const int left = mKeyXCoordinates[keyId];
- const int top = mKeyYCoordinates[keyId];
- const int right = left + mKeyWidths[keyId];
- const int bottom = top + mKeyHeights[keyId];
- const int edgeX = x < left ? left : (x > right ? right : x);
- const int edgeY = y < top ? top : (y > bottom ? bottom : y);
- const int dx = x - edgeX;
- const int dy = y - edgeY;
- return dx * dx + dy * dy;
-}
-
-void ProximityInfo::calculateNearbyKeyCodes(
- const int x, const int y, const int32_t primaryKey, int *inputCodes) const {
- int32_t *proximityCharsArray = mProximityCharsArray;
- int insertPos = 0;
- inputCodes[insertPos++] = primaryKey;
- const int startIndex = getStartIndexFromCoordinates(x, y);
- if (startIndex >= 0) {
- for (int i = 0; i < MAX_PROXIMITY_CHARS_SIZE; ++i) {
- const int32_t c = proximityCharsArray[startIndex + i];
- if (c < KEYCODE_SPACE || c == primaryKey) {
- continue;
- }
- const int keyIndex = getKeyIndexOf(c);
- const bool onKey = isOnKey(keyIndex, x, y);
- const int distance = squaredDistanceToEdge(keyIndex, x, y);
- if (onKey || distance < MOST_COMMON_KEY_WIDTH_SQUARE) {
- inputCodes[insertPos++] = c;
- if (insertPos >= MAX_PROXIMITY_CHARS_SIZE) {
- if (DEBUG_DICT) {
- assert(false);
- }
- return;
- }
- }
- }
- const int additionalProximitySize =
- AdditionalProximityChars::getAdditionalCharsSize(mLocaleStr, primaryKey);
- if (additionalProximitySize > 0) {
- inputCodes[insertPos++] = ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE;
- if (insertPos >= MAX_PROXIMITY_CHARS_SIZE) {
- if (DEBUG_DICT) {
- assert(false);
- }
- return;
- }
-
- const int32_t *additionalProximityChars =
- AdditionalProximityChars::getAdditionalChars(mLocaleStr, primaryKey);
- for (int j = 0; j < additionalProximitySize; ++j) {
- const int32_t ac = additionalProximityChars[j];
- int k = 0;
- for (; k < insertPos; ++k) {
- if (static_cast<int>(ac) == inputCodes[k]) {
- break;
- }
- }
- if (k < insertPos) {
- continue;
- }
- inputCodes[insertPos++] = ac;
- if (insertPos >= MAX_PROXIMITY_CHARS_SIZE) {
- if (DEBUG_DICT) {
- assert(false);
- }
- return;
- }
- }
- }
- }
- // Add a delimiter for the proximity characters
- for (int i = insertPos; i < MAX_PROXIMITY_CHARS_SIZE; ++i) {
- inputCodes[i] = NOT_A_CODE_POINT;
- }
-}
-
-int ProximityInfo::getKeyIndexOf(const int c) const {
- if (KEY_COUNT == 0) {
- // We do not have the coordinate data
- return NOT_AN_INDEX;
- }
- const int lowerCode = static_cast<int>(toLowerCase(c));
- hash_map_compat<int, int>::const_iterator mapPos = mCodeToKeyMap.find(lowerCode);
- if (mapPos != mCodeToKeyMap.end()) {
- return mapPos->second;
- }
- return NOT_AN_INDEX;
+ return ProximityInfoUtils::getSquaredDistanceFloat(centerX, centerY, touchX, touchY)
+ / SQUARE_FLOAT(keyWidth);
}
int ProximityInfo::getCodePointOf(const int keyIndex) const {
@@ -258,7 +160,7 @@ void ProximityInfo::initializeG() {
// TODO: Optimize
for (int i = 0; i < KEY_COUNT; ++i) {
const int code = mKeyCodePoints[i];
- const int lowerCode = static_cast<int>(toLowerCase(code));
+ const int lowerCode = toLowerCase(code);
mCenterXsG[i] = mKeyXCoordinates[i] + mKeyWidths[i] / 2;
mCenterYsG[i] = mKeyYCoordinates[i] + mKeyHeights[i] / 2;
mCodeToKeyMap[lowerCode] = i;
@@ -275,11 +177,13 @@ void ProximityInfo::initializeG() {
}
int ProximityInfo::getKeyCenterXOfCodePointG(int charCode) const {
- return getKeyCenterXOfKeyIdG(getKeyIndexOf(charCode));
+ return getKeyCenterXOfKeyIdG(
+ ProximityInfoUtils::getKeyIndexOf(KEY_COUNT, charCode, &mCodeToKeyMap));
}
int ProximityInfo::getKeyCenterYOfCodePointG(int charCode) const {
- return getKeyCenterYOfKeyIdG(getKeyIndexOf(charCode));
+ return getKeyCenterYOfKeyIdG(
+ ProximityInfoUtils::getKeyIndexOf(KEY_COUNT, charCode, &mCodeToKeyMap));
}
int ProximityInfo::getKeyCenterXOfKeyIdG(int keyId) const {
@@ -296,12 +200,10 @@ int ProximityInfo::getKeyCenterYOfKeyIdG(int keyId) const {
return 0;
}
-int ProximityInfo::getKeyKeyDistanceG(int key0, int key1) const {
- const int keyId0 = getKeyIndexOf(key0);
- const int keyId1 = getKeyIndexOf(key1);
+int ProximityInfo::getKeyKeyDistanceG(const int keyId0, const int keyId1) const {
if (keyId0 >= 0 && keyId1 >= 0) {
return mKeyKeyDistancesG[keyId0][keyId1];
}
- return MAX_POINT_TO_KEY_LENGTH;
+ return MAX_VALUE_FOR_WEIGHTING;
}
} // namespace latinime
diff --git a/native/jni/src/proximity_info.h b/native/jni/src/proximity_info.h
index 70942aa19..57a175d2c 100644
--- a/native/jni/src/proximity_info.h
+++ b/native/jni/src/proximity_info.h
@@ -17,11 +17,10 @@
#ifndef LATINIME_PROXIMITY_INFO_H
#define LATINIME_PROXIMITY_INFO_H
-#include <stdint.h>
-
#include "defines.h"
#include "hash_map_compat.h"
#include "jni.h"
+#include "proximity_info_utils.h"
namespace latinime {
@@ -29,7 +28,7 @@ class Correction;
class ProximityInfo {
public:
- ProximityInfo(JNIEnv *env, const jstring localeJStr, const int maxProximityCharsSize,
+ ProximityInfo(JNIEnv *env, const jstring localeJStr,
const int keyboardWidth, const int keyboardHeight, const int gridWidth,
const int gridHeight, const int mostCommonKeyWidth, const jintArray proximityChars,
const int keyCount, const jintArray keyXCoordinates, const jintArray keyYCoordinates,
@@ -42,94 +41,59 @@ class ProximityInfo {
float getNormalizedSquaredDistanceFromCenterFloatG(
const int keyId, const int x, const int y) const;
bool sameAsTyped(const unsigned short *word, int length) const;
- int getKeyIndexOf(const int c) const;
int getCodePointOf(const int keyIndex) const;
bool hasSweetSpotData(const int keyIndex) const {
// When there are no calibration data for a key,
// the radius of the key is assigned to zero.
return mSweetSpotRadii[keyIndex] > 0.0f;
}
- float getSweetSpotRadiiAt(int keyIndex) const {
- return mSweetSpotRadii[keyIndex];
- }
- float getSweetSpotCenterXAt(int keyIndex) const {
- return mSweetSpotCenterXs[keyIndex];
- }
- float getSweetSpotCenterYAt(int keyIndex) const {
- return mSweetSpotCenterYs[keyIndex];
- }
+ float getSweetSpotRadiiAt(int keyIndex) const { return mSweetSpotRadii[keyIndex]; }
+ float getSweetSpotCenterXAt(int keyIndex) const { return mSweetSpotCenterXs[keyIndex]; }
+ float getSweetSpotCenterYAt(int keyIndex) const { return mSweetSpotCenterYs[keyIndex]; }
void calculateNearbyKeyCodes(
- const int x, const int y, const int32_t primaryKey, int *inputCodes) const;
-
- bool hasTouchPositionCorrectionData() const {
- return HAS_TOUCH_POSITION_CORRECTION_DATA;
- }
-
- int getMostCommonKeyWidth() const {
- return MOST_COMMON_KEY_WIDTH;
- }
+ const int x, const int y, const int primaryKey, int *inputCodes) const;
+ bool hasTouchPositionCorrectionData() const { return HAS_TOUCH_POSITION_CORRECTION_DATA; }
+ int getMostCommonKeyWidth() const { return MOST_COMMON_KEY_WIDTH; }
+ int getMostCommonKeyWidthSquare() const { return MOST_COMMON_KEY_WIDTH_SQUARE; }
+ int getKeyCount() const { return KEY_COUNT; }
+ int getCellHeight() const { return CELL_HEIGHT; }
+ int getCellWidth() const { return CELL_WIDTH; }
+ int getGridWidth() const { return GRID_WIDTH; }
+ int getGridHeight() const { return GRID_HEIGHT; }
+ int getKeyboardWidth() const { return KEYBOARD_WIDTH; }
+ int getKeyboardHeight() const { return KEYBOARD_HEIGHT; }
+ float getKeyboardHypotenuse() const { return KEYBOARD_HYPOTENUSE; }
- int getMostCommonKeyWidthSquare() const {
- return MOST_COMMON_KEY_WIDTH_SQUARE;
- }
-
- const char *getLocaleStr() const {
- return mLocaleStr;
- }
-
- int getKeyCount() const {
- return KEY_COUNT;
- }
-
- int getCellHeight() const {
- return CELL_HEIGHT;
- }
-
- int getCellWidth() const {
- return CELL_WIDTH;
- }
-
- int getGridWidth() const {
- return GRID_WIDTH;
- }
+ int getKeyCenterXOfCodePointG(int charCode) const;
+ int getKeyCenterYOfCodePointG(int charCode) const;
+ int getKeyCenterXOfKeyIdG(int keyId) const;
+ int getKeyCenterYOfKeyIdG(int keyId) const;
+ int getKeyKeyDistanceG(int keyId0, int keyId1) const;
- int getGridHeight() const {
- return GRID_HEIGHT;
+ AK_FORCE_INLINE void initializeProximities(const int *const inputCodes,
+ const int *const inputXCoordinates, const int *const inputYCoordinates,
+ const int inputSize, int *allInputCodes) const {
+ ProximityInfoUtils::initializeProximities(inputCodes, inputXCoordinates, inputYCoordinates,
+ inputSize, mKeyXCoordinates, mKeyYCoordinates, mKeyWidths, mKeyHeights,
+ mProximityCharsArray, CELL_HEIGHT, CELL_WIDTH, GRID_WIDTH, MOST_COMMON_KEY_WIDTH,
+ KEY_COUNT, mLocaleStr, &mCodeToKeyMap, allInputCodes);
}
- int getKeyboardWidth() const {
- return KEYBOARD_WIDTH;
+ AK_FORCE_INLINE int getKeyIndexOf(const int c) const {
+ return ProximityInfoUtils::getKeyIndexOf(KEY_COUNT, c, &mCodeToKeyMap);
}
- int getKeyboardHeight() const {
- return KEYBOARD_HEIGHT;
+ AK_FORCE_INLINE bool isCodePointOnKeyboard(const int codePoint) const {
+ return getKeyIndexOf(codePoint) != NOT_AN_INDEX;
}
- int getKeyCenterXOfCodePointG(int charCode) const;
- int getKeyCenterYOfCodePointG(int charCode) const;
- int getKeyCenterXOfKeyIdG(int keyId) const;
- int getKeyCenterYOfKeyIdG(int keyId) const;
- int getKeyKeyDistanceG(int key0, int key1) const;
-
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(ProximityInfo);
- static const float NOT_A_DISTANCE_FLOAT;
- int getStartIndexFromCoordinates(const int x, const int y) const;
void initializeG();
float calculateNormalizedSquaredDistance(const int keyIndex, const int inputIndex) const;
bool hasInputCoordinates() const;
- int squaredDistanceToEdge(const int keyId, const int x, const int y) const;
- bool isOnKey(const int keyId, const int x, const int y) const {
- if (keyId < 0) return true; // NOT_A_ID is -1, but return whenever < 0 just in case
- const int left = mKeyXCoordinates[keyId];
- const int top = mKeyYCoordinates[keyId];
- const int right = left + mKeyWidths[keyId] + 1;
- const int bottom = top + mKeyHeights[keyId];
- return left < right && top < bottom && x >= left && x < right && y >= top && y < bottom;
- }
- const int MAX_PROXIMITY_CHARS_SIZE;
const int GRID_WIDTH;
const int GRID_HEIGHT;
const int MOST_COMMON_KEY_WIDTH;
@@ -139,14 +103,15 @@ class ProximityInfo {
const int KEY_COUNT;
const int KEYBOARD_WIDTH;
const int KEYBOARD_HEIGHT;
+ const float KEYBOARD_HYPOTENUSE;
const bool HAS_TOUCH_POSITION_CORRECTION_DATA;
char mLocaleStr[MAX_LOCALE_STRING_LENGTH];
- int32_t *mProximityCharsArray;
- int32_t mKeyXCoordinates[MAX_KEY_COUNT_IN_A_KEYBOARD];
- int32_t mKeyYCoordinates[MAX_KEY_COUNT_IN_A_KEYBOARD];
- int32_t mKeyWidths[MAX_KEY_COUNT_IN_A_KEYBOARD];
- int32_t mKeyHeights[MAX_KEY_COUNT_IN_A_KEYBOARD];
- int32_t mKeyCodePoints[MAX_KEY_COUNT_IN_A_KEYBOARD];
+ int *mProximityCharsArray;
+ int mKeyXCoordinates[MAX_KEY_COUNT_IN_A_KEYBOARD];
+ int mKeyYCoordinates[MAX_KEY_COUNT_IN_A_KEYBOARD];
+ int mKeyWidths[MAX_KEY_COUNT_IN_A_KEYBOARD];
+ int mKeyHeights[MAX_KEY_COUNT_IN_A_KEYBOARD];
+ int mKeyCodePoints[MAX_KEY_COUNT_IN_A_KEYBOARD];
float mSweetSpotCenterXs[MAX_KEY_COUNT_IN_A_KEYBOARD];
float mSweetSpotCenterYs[MAX_KEY_COUNT_IN_A_KEYBOARD];
float mSweetSpotRadii[MAX_KEY_COUNT_IN_A_KEYBOARD];
diff --git a/native/jni/src/proximity_info_params.cpp b/native/jni/src/proximity_info_params.cpp
new file mode 100644
index 000000000..f9a4352ee
--- /dev/null
+++ b/native/jni/src/proximity_info_params.cpp
@@ -0,0 +1,103 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "defines.h"
+#include "proximity_info_params.h"
+
+namespace latinime {
+const float ProximityInfoParams::NOT_A_DISTANCE_FLOAT = -1.0f;
+const int ProximityInfoParams::MIN_DOUBLE_LETTER_BEELINE_SPEED_PERCENTILE = 5;
+const float ProximityInfoParams::VERTICAL_SWEET_SPOT_SCALE_G = 1.1f;
+
+/* Per method constants */
+// Used by ProximityInfoStateUtils::initGeometricDistanceInfos()
+const float ProximityInfoParams::NEAR_KEY_NORMALIZED_SQUARED_THRESHOLD = 4.0f;
+
+// Used by ProximityInfoStateUtils::updateNearKeysDistances()
+const float ProximityInfoParams::NEAR_KEY_THRESHOLD_FOR_DISTANCE = 2.0f;
+
+// Used by ProximityInfoStateUtils::isPrevLocalMin()
+const float ProximityInfoParams::MARGIN_FOR_PREV_LOCAL_MIN = 0.01f;
+
+// Used by ProximityInfoStateUtils::getPointScore()
+const int ProximityInfoParams::DISTANCE_BASE_SCALE = 100;
+const float ProximityInfoParams::NEAR_KEY_THRESHOLD_FOR_POINT_SCORE = 0.6f;
+const int ProximityInfoParams::CORNER_CHECK_DISTANCE_THRESHOLD_SCALE = 25;
+const float ProximityInfoParams::NOT_LOCALMIN_DISTANCE_SCORE = -1.0f;
+const float ProximityInfoParams::LOCALMIN_DISTANCE_AND_NEAR_TO_KEY_SCORE = 1.0f;
+const float ProximityInfoParams::CORNER_ANGLE_THRESHOLD_FOR_POINT_SCORE = M_PI_F * 2.0f / 3.0f;
+const float ProximityInfoParams::CORNER_SUM_ANGLE_THRESHOLD = M_PI_F / 4.0f;
+const float ProximityInfoParams::CORNER_SCORE = 1.0f;
+
+// Used by ProximityInfoStateUtils::refreshSpeedRates()
+const int ProximityInfoParams::NUM_POINTS_FOR_SPEED_CALCULATION = 2;
+
+// Used by ProximityInfoStateUtils::pushTouchPoint()
+const int ProximityInfoParams::LAST_POINT_SKIP_DISTANCE_SCALE = 4;
+
+// Used by ProximityInfoStateUtils::updateAlignPointProbabilities()
+const float ProximityInfoParams::MIN_PROBABILITY = 0.000001f;
+const float ProximityInfoParams::MAX_SKIP_PROBABILITY = 0.95f;
+const float ProximityInfoParams::SKIP_FIRST_POINT_PROBABILITY = 0.01f;
+const float ProximityInfoParams::SKIP_LAST_POINT_PROBABILITY = 0.1f;
+const float ProximityInfoParams::MIN_SPEED_RATE_FOR_SKIP_PROBABILITY = 0.15f;
+const float ProximityInfoParams::SPEED_WEIGHT_FOR_SKIP_PROBABILITY = 0.9f;
+const float ProximityInfoParams::SLOW_STRAIGHT_WEIGHT_FOR_SKIP_PROBABILITY = 0.6f;
+const float ProximityInfoParams::NEAREST_DISTANCE_WEIGHT = 0.5f;
+const float ProximityInfoParams::NEAREST_DISTANCE_BIAS = 0.5f;
+const float ProximityInfoParams::NEAREST_DISTANCE_WEIGHT_FOR_LAST = 0.6f;
+const float ProximityInfoParams::NEAREST_DISTANCE_BIAS_FOR_LAST = 0.4f;
+const float ProximityInfoParams::ANGLE_WEIGHT = 0.90f;
+const float ProximityInfoParams::DEEP_CORNER_ANGLE_THRESHOLD = M_PI_F * 60.0f / 180.0f;
+const float ProximityInfoParams::SKIP_DEEP_CORNER_PROBABILITY = 0.1f;
+const float ProximityInfoParams::CORNER_ANGLE_THRESHOLD = M_PI_F * 30.0f / 180.0f;
+const float ProximityInfoParams::STRAIGHT_ANGLE_THRESHOLD = M_PI_F * 15.0f / 180.0f;
+const float ProximityInfoParams::SKIP_CORNER_PROBABILITY = 0.4f;
+const float ProximityInfoParams::SPEED_MARGIN = 0.1f;
+const float ProximityInfoParams::CENTER_VALUE_OF_NORMALIZED_DISTRIBUTION = 0.0f;
+// TODO: The variance is critical for accuracy; thus, adjusting these parameter by machine
+// learning or something would be efficient.
+const float ProximityInfoParams::SPEEDxANGLE_WEIGHT_FOR_STANDARD_DIVIATION = 0.3f;
+const float ProximityInfoParams::MAX_SPEEDxANGLE_RATE_FOR_STANDERD_DIVIATION = 0.25f;
+const float ProximityInfoParams::SPEEDxNEAREST_WEIGHT_FOR_STANDARD_DIVIATION = 0.5f;
+const float ProximityInfoParams::MAX_SPEEDxNEAREST_RATE_FOR_STANDERD_DIVIATION = 0.15f;
+const float ProximityInfoParams::MIN_STANDERD_DIVIATION = 0.37f;
+const float ProximityInfoParams::PREV_DISTANCE_WEIGHT = 0.5f;
+const float ProximityInfoParams::NEXT_DISTANCE_WEIGHT = 0.6f;
+
+// Used by ProximityInfoStateUtils::suppressCharProbabilities()
+const float ProximityInfoParams::SUPPRESSION_LENGTH_WEIGHT = 1.5f;
+const float ProximityInfoParams::MIN_SUPPRESSION_RATE = 0.1f;
+const float ProximityInfoParams::SUPPRESSION_WEIGHT = 0.5f;
+const float ProximityInfoParams::SUPPRESSION_WEIGHT_FOR_PROBABILITY_GAIN = 0.1f;
+const float ProximityInfoParams::SKIP_PROBABALITY_WEIGHT_FOR_PROBABILITY_GAIN = 0.3f;
+
+// Used by ProximityInfoStateUtils::getMostProbableString()
+const float ProximityInfoParams::DEMOTION_LOG_PROBABILITY = 0.3f;
+
+// Used by ProximityInfoStateUtils::updateSampledSearchKeySets()
+// TODO: Investigate if this is required
+const float ProximityInfoParams::SEARCH_KEY_RADIUS_RATIO = 0.95f;
+
+// Used by ProximityInfoStateUtils::calculateBeelineSpeedRate()
+const int ProximityInfoParams::LOOKUP_RADIUS_PERCENTILE = 50;
+const int ProximityInfoParams::FIRST_POINT_TIME_OFFSET_MILLIS = 150;
+const int ProximityInfoParams::STRONG_DOUBLE_LETTER_TIME_MILLIS = 600;
+
+// Used by ProximityInfoStateUtils::calculateNormalizedSquaredDistance()
+const int ProximityInfoParams::NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR = 1 << 10;
+
+} // namespace latinime
diff --git a/native/jni/src/proximity_info_params.h b/native/jni/src/proximity_info_params.h
new file mode 100644
index 000000000..e7aec0976
--- /dev/null
+++ b/native/jni/src/proximity_info_params.h
@@ -0,0 +1,108 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_PROXIMITY_INFO_PARAMS_H
+#define LATINIME_PROXIMITY_INFO_PARAMS_H
+
+#include "defines.h"
+
+namespace latinime {
+
+class ProximityInfoParams {
+ public:
+ static const float NOT_A_DISTANCE_FLOAT;
+ static const int MIN_DOUBLE_LETTER_BEELINE_SPEED_PERCENTILE;
+ static const float VERTICAL_SWEET_SPOT_SCALE_G;
+
+ // Used by ProximityInfoStateUtils::initGeometricDistanceInfos()
+ static const float NEAR_KEY_NORMALIZED_SQUARED_THRESHOLD;
+
+ // Used by ProximityInfoStateUtils::updateNearKeysDistances()
+ static const float NEAR_KEY_THRESHOLD_FOR_DISTANCE;
+
+ // Used by ProximityInfoStateUtils::isPrevLocalMin()
+ static const float MARGIN_FOR_PREV_LOCAL_MIN;
+
+ // Used by ProximityInfoStateUtils::getPointScore()
+ static const int DISTANCE_BASE_SCALE;
+ static const float NEAR_KEY_THRESHOLD_FOR_POINT_SCORE;
+ static const int CORNER_CHECK_DISTANCE_THRESHOLD_SCALE;
+ static const float NOT_LOCALMIN_DISTANCE_SCORE;
+ static const float LOCALMIN_DISTANCE_AND_NEAR_TO_KEY_SCORE;
+ static const float CORNER_ANGLE_THRESHOLD_FOR_POINT_SCORE;
+ static const float CORNER_SUM_ANGLE_THRESHOLD;
+ static const float CORNER_SCORE;
+
+ // Used by ProximityInfoStateUtils::refreshSpeedRates()
+ static const int NUM_POINTS_FOR_SPEED_CALCULATION;
+
+ // Used by ProximityInfoStateUtils::pushTouchPoint()
+ static const int LAST_POINT_SKIP_DISTANCE_SCALE;
+
+ // Used by ProximityInfoStateUtils::updateAlignPointProbabilities()
+ static const float MIN_PROBABILITY;
+ static const float MAX_SKIP_PROBABILITY;
+ static const float SKIP_FIRST_POINT_PROBABILITY;
+ static const float SKIP_LAST_POINT_PROBABILITY;
+ static const float MIN_SPEED_RATE_FOR_SKIP_PROBABILITY;
+ static const float SPEED_WEIGHT_FOR_SKIP_PROBABILITY;
+ static const float SLOW_STRAIGHT_WEIGHT_FOR_SKIP_PROBABILITY;
+ static const float NEAREST_DISTANCE_WEIGHT;
+ static const float NEAREST_DISTANCE_BIAS;
+ static const float NEAREST_DISTANCE_WEIGHT_FOR_LAST;
+ static const float NEAREST_DISTANCE_BIAS_FOR_LAST;
+ static const float ANGLE_WEIGHT;
+ static const float DEEP_CORNER_ANGLE_THRESHOLD;
+ static const float SKIP_DEEP_CORNER_PROBABILITY;
+ static const float CORNER_ANGLE_THRESHOLD;
+ static const float STRAIGHT_ANGLE_THRESHOLD;
+ static const float SKIP_CORNER_PROBABILITY;
+ static const float SPEED_MARGIN;
+ static const float CENTER_VALUE_OF_NORMALIZED_DISTRIBUTION;
+ static const float SPEEDxANGLE_WEIGHT_FOR_STANDARD_DIVIATION;
+ static const float MAX_SPEEDxANGLE_RATE_FOR_STANDERD_DIVIATION;
+ static const float SPEEDxNEAREST_WEIGHT_FOR_STANDARD_DIVIATION;
+ static const float MAX_SPEEDxNEAREST_RATE_FOR_STANDERD_DIVIATION;
+ static const float MIN_STANDERD_DIVIATION;
+ static const float PREV_DISTANCE_WEIGHT;
+ static const float NEXT_DISTANCE_WEIGHT;
+
+ // Used by ProximityInfoStateUtils::suppressCharProbabilities()
+ static const float SUPPRESSION_LENGTH_WEIGHT;
+ static const float MIN_SUPPRESSION_RATE;
+ static const float SUPPRESSION_WEIGHT;
+ static const float SUPPRESSION_WEIGHT_FOR_PROBABILITY_GAIN;
+ static const float SKIP_PROBABALITY_WEIGHT_FOR_PROBABILITY_GAIN;
+
+ // Used by ProximityInfoStateUtils::getMostProbableString()
+ static const float DEMOTION_LOG_PROBABILITY;
+
+ // Used by ProximityInfoStateUtils::updateSampledSearchKeySets()
+ static const float SEARCH_KEY_RADIUS_RATIO;
+
+ // Used by ProximityInfoStateUtils::calculateBeelineSpeedRate()
+ static const int LOOKUP_RADIUS_PERCENTILE;
+ static const int FIRST_POINT_TIME_OFFSET_MILLIS;
+ static const int STRONG_DOUBLE_LETTER_TIME_MILLIS;
+
+ // Used by ProximityInfoStateUtils::calculateNormalizedSquaredDistance()
+ static const int NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR;
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(ProximityInfoParams);
+};
+} // namespace latinime
+#endif // LATINIME_PROXIMITY_INFO_PARAMS_H
diff --git a/native/jni/src/proximity_info_state.cpp b/native/jni/src/proximity_info_state.cpp
index 392ec8194..fe1c43320 100644
--- a/native/jni/src/proximity_info_state.cpp
+++ b/native/jni/src/proximity_info_state.cpp
@@ -14,8 +14,9 @@
* limitations under the License.
*/
-#include <cstring> // for memset()
-#include <stdint.h>
+#include <cstring> // for memset() and memcpy()
+#include <sstream> // for debug prints
+#include <vector>
#define LOG_TAG "LatinIME: proximity_info_state.cpp"
@@ -23,64 +24,36 @@
#include "geometry_utils.h"
#include "proximity_info.h"
#include "proximity_info_state.h"
+#include "proximity_info_state_utils.h"
namespace latinime {
-const int ProximityInfoState::NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR_LOG_2 = 10;
-const int ProximityInfoState::NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR =
- 1 << NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR_LOG_2;
-const float ProximityInfoState::NOT_A_DISTANCE_FLOAT = -1.0f;
-const int ProximityInfoState::NOT_A_CODE = -1;
-
void ProximityInfoState::initInputParams(const int pointerId, const float maxPointToKeyLength,
- const ProximityInfo *proximityInfo, const int32_t *const inputCodes, const int inputSize,
+ const ProximityInfo *proximityInfo, const int *const inputCodes, const int inputSize,
const int *const xCoordinates, const int *const yCoordinates, const int *const times,
const int *const pointerIds, const bool isGeometric) {
-
- if (isGeometric) {
- mIsContinuationPossible = checkAndReturnIsContinuationPossible(
- inputSize, xCoordinates, yCoordinates, times);
- } else {
- mIsContinuationPossible = false;
+ ASSERT(isGeometric || (inputSize < MAX_WORD_LENGTH));
+ mIsContinuationPossible = ProximityInfoStateUtils::checkAndReturnIsContinuationPossible(
+ inputSize, xCoordinates, yCoordinates, times, mSampledInputSize, &mSampledInputXs,
+ &mSampledInputYs, &mSampledTimes, &mSampledInputIndice);
+ if (DEBUG_DICT) {
+ AKLOGI("isContinuationPossible = %s", (mIsContinuationPossible ? "true" : "false"));
}
mProximityInfo = proximityInfo;
mHasTouchPositionCorrectionData = proximityInfo->hasTouchPositionCorrectionData();
mMostCommonKeyWidthSquare = proximityInfo->getMostCommonKeyWidthSquare();
- mLocaleStr = proximityInfo->getLocaleStr();
mKeyCount = proximityInfo->getKeyCount();
mCellHeight = proximityInfo->getCellHeight();
mCellWidth = proximityInfo->getCellWidth();
mGridHeight = proximityInfo->getGridWidth();
mGridWidth = proximityInfo->getGridHeight();
- memset(mInputCodes, 0, sizeof(mInputCodes));
+ memset(mInputProximities, 0, sizeof(mInputProximities));
if (!isGeometric && pointerId == 0) {
- // Initialize
- // - mInputCodes
- // - mNormalizedSquaredDistances
- // TODO: Merge
- for (int i = 0; i < inputSize; ++i) {
- const int32_t primaryKey = inputCodes[i];
- const int x = xCoordinates[i];
- const int y = yCoordinates[i];
- int *proximities = &mInputCodes[i * MAX_PROXIMITY_CHARS_SIZE_INTERNAL];
- mProximityInfo->calculateNearbyKeyCodes(x, y, primaryKey, proximities);
- }
-
- if (DEBUG_PROXIMITY_CHARS) {
- for (int i = 0; i < inputSize; ++i) {
- AKLOGI("---");
- for (int j = 0; j < MAX_PROXIMITY_CHARS_SIZE_INTERNAL; ++j) {
- int icc = mInputCodes[i * MAX_PROXIMITY_CHARS_SIZE_INTERNAL + j];
- int icfjc = inputCodes[i * MAX_PROXIMITY_CHARS_SIZE_INTERNAL + j];
- icc += 0;
- icfjc += 0;
- AKLOGI("--- (%d)%c,%c", i, icc, icfjc); AKLOGI("--- A<%d>,B<%d>", icc, icfjc);
- }
- }
- }
+ mProximityInfo->initializeProximities(inputCodes, xCoordinates, yCoordinates,
+ inputSize, mInputProximities);
}
///////////////////////
@@ -88,481 +61,249 @@ void ProximityInfoState::initInputParams(const int pointerId, const float maxPoi
int pushTouchPointStartIndex = 0;
int lastSavedInputSize = 0;
mMaxPointToKeyLength = maxPointToKeyLength;
- if (mIsContinuationPossible && mInputIndice.size() > 1) {
+ mSampledInputSize = 0;
+ mMostProbableStringProbability = 0.0f;
+
+ if (mIsContinuationPossible && mSampledInputIndice.size() > 1) {
// Just update difference.
- // Two points prior is never skipped. Thus, we pop 2 input point data here.
- pushTouchPointStartIndex = mInputIndice[mInputIndice.size() - 2];
- popInputData();
- popInputData();
- lastSavedInputSize = mInputXs.size();
+ // Previous two points are never skipped. Thus, we pop 2 input point data here.
+ pushTouchPointStartIndex = ProximityInfoStateUtils::trimLastTwoTouchPoints(
+ &mSampledInputXs, &mSampledInputYs, &mSampledTimes, &mSampledLengthCache,
+ &mSampledInputIndice);
+ lastSavedInputSize = mSampledInputXs.size();
} else {
// Clear all data.
- mInputXs.clear();
- mInputYs.clear();
- mTimes.clear();
- mInputIndice.clear();
- mLengthCache.clear();
- mDistanceCache.clear();
- mNearKeysVector.clear();
- mRelativeSpeeds.clear();
+ mSampledInputXs.clear();
+ mSampledInputYs.clear();
+ mSampledTimes.clear();
+ mSampledInputIndice.clear();
+ mSampledLengthCache.clear();
+ mSampledDistanceCache_G.clear();
+ mSampledNearKeySets.clear();
+ mSampledSearchKeySets.clear();
+ mSpeedRates.clear();
+ mBeelineSpeedPercentiles.clear();
+ mCharProbabilities.clear();
+ mDirections.clear();
}
+
if (DEBUG_GEO_FULL) {
AKLOGI("Init ProximityInfoState: reused points = %d, last input size = %d",
pushTouchPointStartIndex, lastSavedInputSize);
}
- mInputSize = 0;
if (xCoordinates && yCoordinates) {
- const bool proximityOnly = !isGeometric && (xCoordinates[0] < 0 || yCoordinates[0] < 0);
- int lastInputIndex = pushTouchPointStartIndex;
- for (int i = lastInputIndex; i < inputSize; ++i) {
- const int pid = pointerIds ? pointerIds[i] : 0;
- if (pointerId == pid) {
- lastInputIndex = i;
- }
- }
- if (DEBUG_GEO_FULL) {
- AKLOGI("Init ProximityInfoState: last input index = %d", lastInputIndex);
- }
- // Working space to save near keys distances for current, prev and prevprev input point.
- NearKeysDistanceMap nearKeysDistances[3];
- // These pointers are swapped for each inputs points.
- NearKeysDistanceMap *currentNearKeysDistances = &nearKeysDistances[0];
- NearKeysDistanceMap *prevNearKeysDistances = &nearKeysDistances[1];
- NearKeysDistanceMap *prevPrevNearKeysDistances = &nearKeysDistances[2];
-
- for (int i = pushTouchPointStartIndex; i <= lastInputIndex; ++i) {
- // Assuming pointerId == 0 if pointerIds is null.
- const int pid = pointerIds ? pointerIds[i] : 0;
- if (DEBUG_GEO_FULL) {
- AKLOGI("Init ProximityInfoState: (%d)PID = %d", i, pid);
- }
- if (pointerId == pid) {
- const int c = isGeometric ? NOT_A_COORDINATE : getPrimaryCharAt(i);
- const int x = proximityOnly ? NOT_A_COORDINATE : xCoordinates[i];
- const int y = proximityOnly ? NOT_A_COORDINATE : yCoordinates[i];
- const int time = times ? times[i] : -1;
- if (pushTouchPoint(i, c, x, y, time, isGeometric /* do sampling */,
- i == lastInputIndex, currentNearKeysDistances, prevNearKeysDistances,
- prevPrevNearKeysDistances)) {
- // Previous point information was popped.
- NearKeysDistanceMap *tmp = prevNearKeysDistances;
- prevNearKeysDistances = currentNearKeysDistances;
- currentNearKeysDistances = tmp;
- } else {
- NearKeysDistanceMap *tmp = prevPrevNearKeysDistances;
- prevPrevNearKeysDistances = prevNearKeysDistances;
- prevNearKeysDistances = currentNearKeysDistances;
- currentNearKeysDistances = tmp;
- }
- }
- }
- mInputSize = mInputXs.size();
+ mSampledInputSize = ProximityInfoStateUtils::updateTouchPoints(mProximityInfo,
+ mMaxPointToKeyLength, mInputProximities, xCoordinates, yCoordinates, times,
+ pointerIds, inputSize, isGeometric, pointerId, pushTouchPointStartIndex,
+ &mSampledInputXs, &mSampledInputYs, &mSampledTimes, &mSampledLengthCache,
+ &mSampledInputIndice);
}
- if (mInputSize > 0 && isGeometric) {
- int sumDuration = mTimes.back() - mTimes.front();
- int sumLength = mLengthCache.back() - mLengthCache.front();
- float averageSpeed = static_cast<float>(sumLength) / static_cast<float>(sumDuration);
- mRelativeSpeeds.resize(mInputSize);
- for (int i = lastSavedInputSize; i < mInputSize; ++i) {
- const int index = mInputIndice[i];
- int length = 0;
- int duration = 0;
- if (index == 0 && index < inputSize - 1) {
- length = getDistanceInt(xCoordinates[index], yCoordinates[index],
- xCoordinates[index + 1], yCoordinates[index + 1]);
- duration = times[index + 1] - times[index];
- } else if (index == inputSize - 1 && index > 0) {
- length = getDistanceInt(xCoordinates[index - 1], yCoordinates[index - 1],
- xCoordinates[index], yCoordinates[index]);
- duration = times[index] - times[index - 1];
- } else if (0 < index && index < inputSize - 1) {
- length = getDistanceInt(xCoordinates[index - 1], yCoordinates[index - 1],
- xCoordinates[index], yCoordinates[index])
- + getDistanceInt(xCoordinates[index], yCoordinates[index],
- xCoordinates[index + 1], yCoordinates[index + 1]);
- duration = times[index + 1] - times[index - 1];
- } else {
- length = 0;
- duration = 1;
- }
- const float speed = static_cast<float>(length) / static_cast<float>(duration);
- mRelativeSpeeds[i] = speed / averageSpeed;
- }
+ if (mSampledInputSize > 0 && isGeometric) {
+ mAverageSpeed = ProximityInfoStateUtils::refreshSpeedRates(inputSize, xCoordinates,
+ yCoordinates, times, lastSavedInputSize, mSampledInputSize, &mSampledInputXs,
+ &mSampledInputYs, &mSampledTimes, &mSampledLengthCache, &mSampledInputIndice,
+ &mSpeedRates, &mDirections);
+ ProximityInfoStateUtils::refreshBeelineSpeedRates(mProximityInfo->getMostCommonKeyWidth(),
+ mAverageSpeed, inputSize, xCoordinates, yCoordinates, times, mSampledInputSize,
+ &mSampledInputXs, &mSampledInputYs, &mSampledInputIndice,
+ &mBeelineSpeedPercentiles);
}
- if (mInputSize > 0) {
- const int keyCount = mProximityInfo->getKeyCount();
- mNearKeysVector.resize(mInputSize);
- mDistanceCache.resize(mInputSize * keyCount);
- for (int i = lastSavedInputSize; i < mInputSize; ++i) {
- mNearKeysVector[i].reset();
- static const float NEAR_KEY_NORMALIZED_SQUARED_THRESHOLD = 4.0f;
- for (int k = 0; k < keyCount; ++k) {
- const int index = i * keyCount + k;
- const int x = mInputXs[i];
- const int y = mInputYs[i];
- const float normalizedSquaredDistance =
- mProximityInfo->getNormalizedSquaredDistanceFromCenterFloatG(k, x, y);
- mDistanceCache[index] = normalizedSquaredDistance;
- if (normalizedSquaredDistance < NEAR_KEY_NORMALIZED_SQUARED_THRESHOLD) {
- mNearKeysVector[i].set(k, 1);
- }
- }
- }
+ if (mSampledInputSize > 0) {
+ ProximityInfoStateUtils::initGeometricDistanceInfos(mProximityInfo, mSampledInputSize,
+ lastSavedInputSize, &mSampledInputXs, &mSampledInputYs, &mSampledNearKeySets,
+ &mSampledDistanceCache_G);
+ if (isGeometric) {
+ // updates probabilities of skipping or mapping each key for all points.
+ ProximityInfoStateUtils::updateAlignPointProbabilities(
+ mMaxPointToKeyLength, mProximityInfo->getMostCommonKeyWidth(),
+ mProximityInfo->getKeyCount(), lastSavedInputSize, mSampledInputSize,
+ &mSampledInputXs, &mSampledInputYs, &mSpeedRates, &mSampledLengthCache,
+ &mSampledDistanceCache_G, &mSampledNearKeySets, &mCharProbabilities);
+ ProximityInfoStateUtils::updateSampledSearchKeySets(mProximityInfo,
+ mSampledInputSize, lastSavedInputSize, &mSampledLengthCache,
+ &mSampledNearKeySets, &mSampledSearchKeySets,
+ &mSampledSearchKeyVectors);
+ mMostProbableStringProbability = ProximityInfoStateUtils::getMostProbableString(
+ mProximityInfo, mSampledInputSize, &mCharProbabilities, mMostProbableString);
- static const float READ_FORWORD_LENGTH_SCALE = 0.95f;
- const int readForwordLength = static_cast<int>(
- hypotf(mProximityInfo->getKeyboardWidth(), mProximityInfo->getKeyboardHeight())
- * READ_FORWORD_LENGTH_SCALE);
- for (int i = 0; i < mInputSize; ++i) {
- if (DEBUG_GEO_FULL) {
- AKLOGI("Sampled(%d): x = %d, y = %d, time = %d", i, mInputXs[i], mInputYs[i],
- mTimes[i]);
- }
- for (int j = max(i + 1, lastSavedInputSize); j < mInputSize; ++j) {
- if (mLengthCache[j] - mLengthCache[i] >= readForwordLength) {
- break;
- }
- mNearKeysVector[i] |= mNearKeysVector[j];
- }
}
}
+ if (DEBUG_SAMPLING_POINTS) {
+ ProximityInfoStateUtils::dump(isGeometric, inputSize, xCoordinates, yCoordinates,
+ mSampledInputSize, &mSampledInputXs, &mSampledInputYs, &mSampledTimes, &mSpeedRates,
+ &mBeelineSpeedPercentiles);
+ }
// end
///////////////////////
- memset(mNormalizedSquaredDistances, NOT_A_DISTANCE, sizeof(mNormalizedSquaredDistances));
- memset(mPrimaryInputWord, 0, sizeof(mPrimaryInputWord));
- mTouchPositionCorrectionEnabled = mInputSize > 0 && mHasTouchPositionCorrectionData
+ mTouchPositionCorrectionEnabled = mSampledInputSize > 0 && mHasTouchPositionCorrectionData
&& xCoordinates && yCoordinates;
if (!isGeometric && pointerId == 0) {
- for (int i = 0; i < inputSize; ++i) {
- mPrimaryInputWord[i] = getPrimaryCharAt(i);
- }
-
- for (int i = 0; i < mInputSize && mTouchPositionCorrectionEnabled; ++i) {
- const int *proximityChars = getProximityCharsAt(i);
- const int primaryKey = proximityChars[0];
- const int x = xCoordinates[i];
- const int y = yCoordinates[i];
- if (DEBUG_PROXIMITY_CHARS) {
- int a = x + y + primaryKey;
- a += 0;
- AKLOGI("--- Primary = %c, x = %d, y = %d", primaryKey, x, y);
- }
- for (int j = 0; j < MAX_PROXIMITY_CHARS_SIZE_INTERNAL && proximityChars[j] > 0; ++j) {
- const int currentChar = proximityChars[j];
- const float squaredDistance =
- hasInputCoordinates() ? calculateNormalizedSquaredDistance(
- mProximityInfo->getKeyIndexOf(currentChar), i) :
- NOT_A_DISTANCE_FLOAT;
- if (squaredDistance >= 0.0f) {
- mNormalizedSquaredDistances[i * MAX_PROXIMITY_CHARS_SIZE_INTERNAL + j] =
- (int) (squaredDistance * NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR);
- } else {
- mNormalizedSquaredDistances[i * MAX_PROXIMITY_CHARS_SIZE_INTERNAL + j] =
- (j == 0) ? EQUIVALENT_CHAR_WITHOUT_DISTANCE_INFO :
- PROXIMITY_CHAR_WITHOUT_DISTANCE_INFO;
- }
- if (DEBUG_PROXIMITY_CHARS) {
- AKLOGI("--- Proximity (%d) = %c", j, currentChar);
- }
- }
+ ProximityInfoStateUtils::initPrimaryInputWord(
+ inputSize, mInputProximities, mPrimaryInputWord);
+ if (mTouchPositionCorrectionEnabled) {
+ ProximityInfoStateUtils::initNormalizedSquaredDistances(
+ mProximityInfo, inputSize, xCoordinates, yCoordinates, mInputProximities,
+ &mSampledInputXs, &mSampledInputYs, mNormalizedSquaredDistances);
}
}
-
if (DEBUG_GEO_FULL) {
- AKLOGI("ProximityState init finished: %d points out of %d", mInputSize, inputSize);
+ AKLOGI("ProximityState init finished: %d points out of %d", mSampledInputSize, inputSize);
}
}
-bool ProximityInfoState::checkAndReturnIsContinuationPossible(const int inputSize,
- const int *const xCoordinates, const int *const yCoordinates, const int *const times) {
- for (int i = 0; i < mInputSize; ++i) {
- const int index = mInputIndice[i];
- if (index > inputSize || xCoordinates[index] != mInputXs[i] ||
- yCoordinates[index] != mInputYs[i] || times[index] != mTimes[i]) {
- return false;
- }
+// This function basically converts from a length to an edit distance. Accordingly, it's obviously
+// wrong to compare with mMaxPointToKeyLength.
+float ProximityInfoState::getPointToKeyLength(
+ const int inputIndex, const int codePoint) const {
+ const int keyId = mProximityInfo->getKeyIndexOf(codePoint);
+ if (keyId != NOT_AN_INDEX) {
+ const int index = inputIndex * mProximityInfo->getKeyCount() + keyId;
+ return min(mSampledDistanceCache_G[index], mMaxPointToKeyLength);
}
- return true;
-}
-
-// Calculating point to key distance for all near keys and returning the distance between
-// the given point and the nearest key position.
-float ProximityInfoState::updateNearKeysDistances(const int x, const int y,
- NearKeysDistanceMap *const currentNearKeysDistances) {
- static const float NEAR_KEY_THRESHOLD = 4.0f;
-
- currentNearKeysDistances->clear();
- const int keyCount = mProximityInfo->getKeyCount();
- float nearestKeyDistance = mMaxPointToKeyLength;
- for (int k = 0; k < keyCount; ++k) {
- const float dist = mProximityInfo->getNormalizedSquaredDistanceFromCenterFloatG(k, x, y);
- if (dist < NEAR_KEY_THRESHOLD) {
- currentNearKeysDistances->insert(std::pair<int, float>(k, dist));
- }
- if (nearestKeyDistance > dist) {
- nearestKeyDistance = dist;
- }
+ if (isSkippableCodePoint(codePoint)) {
+ return 0.0f;
}
- return nearestKeyDistance;
+ // If the char is not a key on the keyboard then return the max length.
+ return static_cast<float>(MAX_VALUE_FOR_WEIGHTING);
}
-// Check if previous point is at local minimum position to near keys.
-bool ProximityInfoState::isPrevLocalMin(const NearKeysDistanceMap *const currentNearKeysDistances,
- const NearKeysDistanceMap *const prevNearKeysDistances,
- const NearKeysDistanceMap *const prevPrevNearKeysDistances) const {
- static const float MARGIN = 0.01f;
-
- for (NearKeysDistanceMap::const_iterator it = prevNearKeysDistances->begin();
- it != prevNearKeysDistances->end(); ++it) {
- NearKeysDistanceMap::const_iterator itPP = prevPrevNearKeysDistances->find(it->first);
- NearKeysDistanceMap::const_iterator itC = currentNearKeysDistances->find(it->first);
- if ((itPP == prevPrevNearKeysDistances->end() || itPP->second > it->second + MARGIN)
- && (itC == currentNearKeysDistances->end() || itC->second > it->second + MARGIN)) {
- return true;
- }
- }
- return false;
+float ProximityInfoState::getPointToKeyByIdLength(
+ const int inputIndex, const int keyId) const {
+ return ProximityInfoStateUtils::getPointToKeyByIdLength(mMaxPointToKeyLength,
+ &mSampledDistanceCache_G, mProximityInfo->getKeyCount(), inputIndex, keyId);
}
-// Calculating a point score that indicates usefulness of the point.
-float ProximityInfoState::getPointScore(
- const int x, const int y, const int time, const bool lastPoint, const float nearest,
- const NearKeysDistanceMap *const currentNearKeysDistances,
- const NearKeysDistanceMap *const prevNearKeysDistances,
- const NearKeysDistanceMap *const prevPrevNearKeysDistances) const {
- static const int DISTANCE_BASE_SCALE = 100;
- static const int SAVE_DISTANCE_SCALE = 200;
- static const int SKIP_DISTANCE_SCALE = 25;
- static const int CHECK_LOCALMIN_DISTANCE_THRESHOLD_SCALE = 40;
- static const int STRAIGHT_SKIP_DISTANCE_THRESHOLD_SCALE = 50;
- static const int CORNER_CHECK_DISTANCE_THRESHOLD_SCALE = 27;
- static const float SAVE_DISTANCE_SCORE = 2.0f;
- static const float SKIP_DISTANCE_SCORE = -1.0f;
- static const float CHECK_LOCALMIN_DISTANCE_SCORE = -1.0f;
- static const float STRAIGHT_ANGLE_THRESHOLD = M_PI_F / 36.0f;
- static const float STRAIGHT_SKIP_NEAREST_DISTANCE_THRESHOLD = 0.5f;
- static const float STRAIGHT_SKIP_SCORE = -1.0f;
- static const float CORNER_ANGLE_THRESHOLD = M_PI_F / 2.0f;
- static const float CORNER_SCORE = 1.0f;
-
- const std::size_t size = mInputXs.size();
- if (size <= 1) {
- return 0.0f;
- }
- const int baseSampleRate = mProximityInfo->getMostCommonKeyWidth();
- const int distNext = getDistanceInt(x, y, mInputXs.back(), mInputYs.back())
- * DISTANCE_BASE_SCALE;
- const int distPrev = getDistanceInt(mInputXs.back(), mInputYs.back(),
- mInputXs[size - 2], mInputYs[size - 2]) * DISTANCE_BASE_SCALE;
- float score = 0.0f;
-
- // Sum of distances
- if (distPrev + distNext > baseSampleRate * SAVE_DISTANCE_SCALE) {
- score += SAVE_DISTANCE_SCORE;
- }
- // Distance
- if (distPrev < baseSampleRate * SKIP_DISTANCE_SCALE) {
- score += SKIP_DISTANCE_SCORE;
+// In the following function, c is the current character of the dictionary word currently examined.
+// currentChars is an array containing the keys close to the character the user actually typed at
+// the same position. We want to see if c is in it: if so, then the word contains at that position
+// a character close to what the user typed.
+// What the user typed is actually the first character of the array.
+// proximityIndex is a pointer to the variable where getProximityType returns the index of c
+// in the proximity chars of the input index.
+// Notice : accented characters do not have a proximity list, so they are alone in their list. The
+// non-accented version of the character should be considered "close", but not the other keys close
+// to the non-accented version.
+ProximityType ProximityInfoState::getProximityType(const int index, const int codePoint,
+ const bool checkProximityChars, int *proximityIndex) const {
+ const int *currentCodePoints = getProximityCodePointsAt(index);
+ const int firstCodePoint = currentCodePoints[0];
+ const int baseLowerC = toBaseLowerCase(codePoint);
+
+ // The first char in the array is what user typed. If it matches right away, that means the
+ // user typed that same char for this pos.
+ if (firstCodePoint == baseLowerC || firstCodePoint == codePoint) {
+ return MATCH_CHAR;
}
- // Location
- if (distPrev < baseSampleRate * CHECK_LOCALMIN_DISTANCE_THRESHOLD_SCALE) {
- if (!isPrevLocalMin(currentNearKeysDistances, prevNearKeysDistances,
- prevPrevNearKeysDistances)) {
- score += CHECK_LOCALMIN_DISTANCE_SCORE;
- }
- }
- // Angle
- const float angle1 = getAngle(x, y, mInputXs.back(), mInputYs.back());
- const float angle2 = getAngle(mInputXs.back(), mInputYs.back(),
- mInputXs[size - 2], mInputYs[size - 2]);
- const float angleDiff = getAngleDiff(angle1, angle2);
- // Skip straight
- if (nearest > STRAIGHT_SKIP_NEAREST_DISTANCE_THRESHOLD
- && distPrev < baseSampleRate * STRAIGHT_SKIP_DISTANCE_THRESHOLD_SCALE
- && angleDiff < STRAIGHT_ANGLE_THRESHOLD) {
- score += STRAIGHT_SKIP_SCORE;
- }
- // Save corner
- if (distPrev > baseSampleRate * CORNER_CHECK_DISTANCE_THRESHOLD_SCALE
- && angleDiff > CORNER_ANGLE_THRESHOLD) {
- score += CORNER_SCORE;
+
+ if (!checkProximityChars) return SUBSTITUTION_CHAR;
+
+ // If the non-accented, lowercased version of that first character matches c, then we have a
+ // non-accented version of the accented character the user typed. Treat it as a close char.
+ if (toBaseLowerCase(firstCodePoint) == baseLowerC) {
+ return PROXIMITY_CHAR;
}
- return score;
-}
-// Sampling touch point and pushing information to vectors.
-// Returning if previous point is popped or not.
-bool ProximityInfoState::pushTouchPoint(const int inputIndex, const int nodeChar, int x, int y,
- const int time, const bool sample, const bool isLastPoint,
- NearKeysDistanceMap *const currentNearKeysDistances,
- const NearKeysDistanceMap *const prevNearKeysDistances,
- const NearKeysDistanceMap *const prevPrevNearKeysDistances) {
- static const float LAST_POINT_SKIP_DISTANCE_SCALE = 0.25f;
-
- size_t size = mInputXs.size();
- bool popped = false;
- if (nodeChar < 0 && sample) {
- const float nearest = updateNearKeysDistances(x, y, currentNearKeysDistances);
- const float score = getPointScore(x, y, time, isLastPoint, nearest,
- currentNearKeysDistances, prevNearKeysDistances, prevPrevNearKeysDistances);
- if (score < 0) {
- // Pop previous point because it would be useless.
- popInputData();
- size = mInputXs.size();
- popped = true;
- } else {
- popped = false;
- }
- // Check if the last point should be skipped.
- if (isLastPoint) {
- if (size > 0 && getDistanceFloat(x, y, mInputXs.back(), mInputYs.back())
- < mProximityInfo->getMostCommonKeyWidth() * LAST_POINT_SKIP_DISTANCE_SCALE) {
- if (DEBUG_GEO_FULL) {
- AKLOGI("p0: size = %zd, x = %d, y = %d, lx = %d, ly = %d, dist = %f, "
- "width = %f", size, x, y, mInputXs.back(), mInputYs.back(),
- getDistanceFloat(x, y, mInputXs.back(), mInputYs.back()),
- mProximityInfo->getMostCommonKeyWidth()
- * LAST_POINT_SKIP_DISTANCE_SCALE);
- }
- return popped;
- } else if (size > 1) {
- int minChar = 0;
- float minDist = mMaxPointToKeyLength;
- for (NearKeysDistanceMap::const_iterator it = currentNearKeysDistances->begin();
- it != currentNearKeysDistances->end(); ++it) {
- if (minDist > it->second) {
- minChar = it->first;
- minDist = it->second;
- }
- }
- NearKeysDistanceMap::const_iterator itPP =
- prevNearKeysDistances->find(minChar);
- if (itPP != prevNearKeysDistances->end() && minDist > itPP->second) {
- if (DEBUG_GEO_FULL) {
- AKLOGI("p1: char = %c, minDist = %f, prevNear key minDist = %f",
- minChar, itPP->second, minDist);
- }
- return popped;
- }
+ // Not an exact nor an accent-alike match: search the list of close keys
+ int j = 1;
+ while (j < MAX_PROXIMITY_CHARS_SIZE
+ && currentCodePoints[j] > ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE) {
+ const bool matched = (currentCodePoints[j] == baseLowerC
+ || currentCodePoints[j] == codePoint);
+ if (matched) {
+ if (proximityIndex) {
+ *proximityIndex = j;
}
+ return PROXIMITY_CHAR;
}
+ ++j;
}
-
- if (nodeChar >= 0 && (x < 0 || y < 0)) {
- const int keyId = mProximityInfo->getKeyIndexOf(nodeChar);
- if (keyId >= 0) {
- x = mProximityInfo->getKeyCenterXOfKeyIdG(keyId);
- y = mProximityInfo->getKeyCenterYOfKeyIdG(keyId);
+ if (j < MAX_PROXIMITY_CHARS_SIZE
+ && currentCodePoints[j] == ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE) {
+ ++j;
+ while (j < MAX_PROXIMITY_CHARS_SIZE
+ && currentCodePoints[j] > ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE) {
+ const bool matched = (currentCodePoints[j] == baseLowerC
+ || currentCodePoints[j] == codePoint);
+ if (matched) {
+ if (proximityIndex) {
+ *proximityIndex = j;
+ }
+ return ADDITIONAL_PROXIMITY_CHAR;
+ }
+ ++j;
}
}
+ // Was not included, signal this as a substitution character.
+ return SUBSTITUTION_CHAR;
+}
- // Pushing point information.
- if (size > 0) {
- mLengthCache.push_back(
- mLengthCache.back() + getDistanceInt(x, y, mInputXs.back(), mInputYs.back()));
- } else {
- mLengthCache.push_back(0);
+ProximityType ProximityInfoState::getProximityTypeG(const int index, const int codePoint) const {
+ if (!isUsed()) {
+ return UNRELATED_CHAR;
}
- mInputXs.push_back(x);
- mInputYs.push_back(y);
- mTimes.push_back(time);
- mInputIndice.push_back(inputIndex);
- if (DEBUG_GEO_FULL) {
- AKLOGI("pushTouchPoint: x = %03d, y = %03d, time = %d, index = %d, popped ? %01d",
- x, y, time, inputIndex, popped);
+ const int lowerCodePoint = toLowerCase(codePoint);
+ const int baseLowerCodePoint = toBaseCodePoint(lowerCodePoint);
+ for (int i = 0; i < static_cast<int>(mSampledSearchKeyVectors[index].size()); ++i) {
+ if (mSampledSearchKeyVectors[index][i] == lowerCodePoint
+ || mSampledSearchKeyVectors[index][i] == baseLowerCodePoint) {
+ return MATCH_CHAR;
+ }
}
- return popped;
+ return UNRELATED_CHAR;
}
-float ProximityInfoState::calculateNormalizedSquaredDistance(
- const int keyIndex, const int inputIndex) const {
- if (keyIndex == NOT_AN_INDEX) {
- return NOT_A_DISTANCE_FLOAT;
- }
- if (!mProximityInfo->hasSweetSpotData(keyIndex)) {
- return NOT_A_DISTANCE_FLOAT;
- }
- if (NOT_A_COORDINATE == mInputXs[inputIndex]) {
- return NOT_A_DISTANCE_FLOAT;
- }
- const float squaredDistance = calculateSquaredDistanceFromSweetSpotCenter(
- keyIndex, inputIndex);
- const float squaredRadius = square(mProximityInfo->getSweetSpotRadiiAt(keyIndex));
- return squaredDistance / squaredRadius;
+bool ProximityInfoState::isKeyInSerchKeysAfterIndex(const int index, const int keyId) const {
+ ASSERT(keyId >= 0 && index >= 0 && index < mSampledInputSize);
+ return mSampledSearchKeySets[index].test(keyId);
}
-int ProximityInfoState::getDuration(const int index) const {
- if (index >= 0 && index < mInputSize - 1) {
- return mTimes[index + 1] - mTimes[index];
- }
- return 0;
+float ProximityInfoState::getDirection(const int index0, const int index1) const {
+ return ProximityInfoStateUtils::getDirection(
+ &mSampledInputXs, &mSampledInputYs, index0, index1);
}
-float ProximityInfoState::getPointToKeyLength(const int inputIndex, const int codePoint,
- const float scale) const {
- const int keyId = mProximityInfo->getKeyIndexOf(codePoint);
- if (keyId != NOT_AN_INDEX) {
- const int index = inputIndex * mProximityInfo->getKeyCount() + keyId;
- return min(mDistanceCache[index] * scale, mMaxPointToKeyLength);
+float ProximityInfoState::getLineToKeyDistance(
+ const int from, const int to, const int keyId, const bool extend) const {
+ if (from < 0 || from > mSampledInputSize - 1) {
+ return 0.0f;
}
- if (isSkippableChar(codePoint)) {
+ if (to < 0 || to > mSampledInputSize - 1) {
return 0.0f;
}
- // If the char is not a key on the keyboard then return the max length.
- return MAX_POINT_TO_KEY_LENGTH;
-}
+ const int x0 = mSampledInputXs[from];
+ const int y0 = mSampledInputYs[from];
+ const int x1 = mSampledInputXs[to];
+ const int y1 = mSampledInputYs[to];
-int ProximityInfoState::getSpaceY() const {
- const int keyId = mProximityInfo->getKeyIndexOf(' ');
- return mProximityInfo->getKeyCenterYOfKeyIdG(keyId);
-}
+ const int keyX = mProximityInfo->getKeyCenterXOfKeyIdG(keyId);
+ const int keyY = mProximityInfo->getKeyCenterYOfKeyIdG(keyId);
-float ProximityInfoState::calculateSquaredDistanceFromSweetSpotCenter(
- const int keyIndex, const int inputIndex) const {
- const float sweetSpotCenterX = mProximityInfo->getSweetSpotCenterXAt(keyIndex);
- const float sweetSpotCenterY = mProximityInfo->getSweetSpotCenterYAt(keyIndex);
- const float inputX = static_cast<float>(mInputXs[inputIndex]);
- const float inputY = static_cast<float>(mInputYs[inputIndex]);
- return square(inputX - sweetSpotCenterX) + square(inputY - sweetSpotCenterY);
+ return ProximityInfoUtils::pointToLineSegSquaredDistanceFloat(
+ keyX, keyY, x0, y0, x1, y1, extend);
}
-// Puts possible characters into filter and returns new filter size.
-int32_t ProximityInfoState::getAllPossibleChars(
- const size_t index, int32_t *const filter, const int32_t filterSize) const {
- if (index >= mInputXs.size()) {
- return filterSize;
- }
- int newFilterSize = filterSize;
- for (int j = 0; j < mProximityInfo->getKeyCount(); ++j) {
- if (mNearKeysVector[index].test(j)) {
- const int32_t keyCodePoint = mProximityInfo->getCodePointOf(j);
- bool insert = true;
- // TODO: Avoid linear search
- for (int k = 0; k < filterSize; ++k) {
- if (filter[k] == keyCodePoint) {
- insert = false;
- break;
- }
- }
- if (insert) {
- filter[newFilterSize++] = keyCodePoint;
- }
- }
- }
- return newFilterSize;
+float ProximityInfoState::getMostProbableString(int *const codePointBuf) const {
+ memcpy(codePointBuf, mMostProbableString, sizeof(mMostProbableString));
+ return mMostProbableStringProbability;
}
-void ProximityInfoState::popInputData() {
- mInputXs.pop_back();
- mInputYs.pop_back();
- mTimes.pop_back();
- mLengthCache.pop_back();
- mInputIndice.pop_back();
+bool ProximityInfoState::hasSpaceProximity(const int index) const {
+ ASSERT(0 <= index && index < mSampledInputSize);
+ return mProximityInfo->hasSpaceProximity(getInputX(index), getInputY(index));
}
+// Returns a probability of mapping index to keyIndex.
+float ProximityInfoState::getProbability(const int index, const int keyIndex) const {
+ ASSERT(0 <= index && index < mSampledInputSize);
+ hash_map_compat<int, float>::const_iterator it = mCharProbabilities[index].find(keyIndex);
+ if (it != mCharProbabilities[index].end()) {
+ return it->second;
+ }
+ return static_cast<float>(MAX_VALUE_FOR_WEIGHTING);
+}
} // namespace latinime
diff --git a/native/jni/src/proximity_info_state.h b/native/jni/src/proximity_info_state.h
index c1ec76c38..224240b00 100644
--- a/native/jni/src/proximity_info_state.h
+++ b/native/jni/src/proximity_info_state.h
@@ -17,15 +17,14 @@
#ifndef LATINIME_PROXIMITY_INFO_STATE_H
#define LATINIME_PROXIMITY_INFO_STATE_H
-#include <bitset>
#include <cstring> // for memset()
-#include <stdint.h>
-#include <string>
#include <vector>
#include "char_utils.h"
#include "defines.h"
#include "hash_map_compat.h"
+#include "proximity_info_params.h"
+#include "proximity_info_state_utils.h"
namespace latinime {
@@ -33,46 +32,60 @@ class ProximityInfo;
class ProximityInfoState {
public:
- typedef std::bitset<MAX_KEY_COUNT_IN_A_KEYBOARD> NearKeycodesSet;
- static const int NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR_LOG_2;
- static const int NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR;
- static const float NOT_A_DISTANCE_FLOAT;
- static const int NOT_A_CODE;
-
/////////////////////////////////////////
// Defined in proximity_info_state.cpp //
/////////////////////////////////////////
void initInputParams(const int pointerId, const float maxPointToKeyLength,
- const ProximityInfo *proximityInfo, const int32_t *const inputCodes,
+ const ProximityInfo *proximityInfo, const int *const inputCodes,
const int inputSize, const int *xCoordinates, const int *yCoordinates,
const int *const times, const int *const pointerIds, const bool isGeometric);
/////////////////////////////////////////
// Defined here //
/////////////////////////////////////////
- ProximityInfoState()
- : mProximityInfo(0), mMaxPointToKeyLength(0),
- mHasTouchPositionCorrectionData(false), mMostCommonKeyWidthSquare(0), mLocaleStr(),
+ AK_FORCE_INLINE ProximityInfoState()
+ : mProximityInfo(0), mMaxPointToKeyLength(0.0f), mAverageSpeed(0.0f),
+ mHasTouchPositionCorrectionData(false), mMostCommonKeyWidthSquare(0),
mKeyCount(0), mCellHeight(0), mCellWidth(0), mGridHeight(0), mGridWidth(0),
- mIsContinuationPossible(false), mInputXs(), mInputYs(), mTimes(), mInputIndice(),
- mDistanceCache(), mLengthCache(), mRelativeSpeeds(), mNearKeysVector(),
- mTouchPositionCorrectionEnabled(false), mInputSize(0) {
- memset(mInputCodes, 0, sizeof(mInputCodes));
+ mIsContinuationPossible(false), mSampledInputXs(), mSampledInputYs(), mSampledTimes(),
+ mSampledInputIndice(), mSampledLengthCache(), mBeelineSpeedPercentiles(),
+ mSampledDistanceCache_G(), mSpeedRates(), mDirections(), mCharProbabilities(),
+ mSampledNearKeySets(), mSampledSearchKeySets(), mSampledSearchKeyVectors(),
+ mTouchPositionCorrectionEnabled(false), mSampledInputSize(0),
+ mMostProbableStringProbability(0.0f) {
+ memset(mInputProximities, 0, sizeof(mInputProximities));
memset(mNormalizedSquaredDistances, 0, sizeof(mNormalizedSquaredDistances));
memset(mPrimaryInputWord, 0, sizeof(mPrimaryInputWord));
+ memset(mMostProbableString, 0, sizeof(mMostProbableString));
}
- virtual ~ProximityInfoState() {}
+ // Non virtual inline destructor -- never inherit this class
+ AK_FORCE_INLINE ~ProximityInfoState() {}
- inline unsigned short getPrimaryCharAt(const int index) const {
- return getProximityCharsAt(index)[0];
+ inline int getPrimaryCodePointAt(const int index) const {
+ return getProximityCodePointsAt(index)[0];
}
- inline bool existsCharInProximityAt(const int index, const int c) const {
- const int *chars = getProximityCharsAt(index);
+ inline bool sameAsTyped(const int *word, int length) const {
+ if (length != mSampledInputSize) {
+ return false;
+ }
+ const int *inputProximities = mInputProximities;
+ while (length--) {
+ if (*inputProximities != *word) {
+ return false;
+ }
+ inputProximities += MAX_PROXIMITY_CHARS_SIZE;
+ word++;
+ }
+ return true;
+ }
+
+ AK_FORCE_INLINE bool existsCodePointInProximityAt(const int index, const int c) const {
+ const int *codePoints = getProximityCodePointsAt(index);
int i = 0;
- while (chars[i] > 0 && i < MAX_PROXIMITY_CHARS_SIZE_INTERNAL) {
- if (chars[i++] == c) {
+ while (codePoints[i] > 0 && i < MAX_PROXIMITY_CHARS_SIZE) {
+ if (codePoints[i++] == c) {
return true;
}
}
@@ -80,91 +93,27 @@ class ProximityInfoState {
}
inline bool existsAdjacentProximityChars(const int index) const {
- if (index < 0 || index >= mInputSize) return false;
- const int currentChar = getPrimaryCharAt(index);
+ if (index < 0 || index >= mSampledInputSize) return false;
+ const int currentCodePoint = getPrimaryCodePointAt(index);
const int leftIndex = index - 1;
- if (leftIndex >= 0 && existsCharInProximityAt(leftIndex, currentChar)) {
+ if (leftIndex >= 0 && existsCodePointInProximityAt(leftIndex, currentCodePoint)) {
return true;
}
const int rightIndex = index + 1;
- if (rightIndex < mInputSize && existsCharInProximityAt(rightIndex, currentChar)) {
+ if (rightIndex < mSampledInputSize
+ && existsCodePointInProximityAt(rightIndex, currentCodePoint)) {
return true;
}
return false;
}
- // In the following function, c is the current character of the dictionary word
- // currently examined.
- // currentChars is an array containing the keys close to the character the
- // user actually typed at the same position. We want to see if c is in it: if so,
- // then the word contains at that position a character close to what the user
- // typed.
- // What the user typed is actually the first character of the array.
- // proximityIndex is a pointer to the variable where getMatchedProximityId returns
- // the index of c in the proximity chars of the input index.
- // Notice : accented characters do not have a proximity list, so they are alone
- // in their list. The non-accented version of the character should be considered
- // "close", but not the other keys close to the non-accented version.
- inline ProximityType getMatchedProximityId(const int index,
- const unsigned short c, const bool checkProximityChars, int *proximityIndex = 0) const {
- const int *currentChars = getProximityCharsAt(index);
- const int firstChar = currentChars[0];
- const unsigned short baseLowerC = toBaseLowerCase(c);
-
- // The first char in the array is what user typed. If it matches right away,
- // that means the user typed that same char for this pos.
- if (firstChar == baseLowerC || firstChar == c) {
- return EQUIVALENT_CHAR;
- }
-
- if (!checkProximityChars) return UNRELATED_CHAR;
-
- // If the non-accented, lowercased version of that first character matches c,
- // then we have a non-accented version of the accented character the user
- // typed. Treat it as a close char.
- if (toBaseLowerCase(firstChar) == baseLowerC)
- return NEAR_PROXIMITY_CHAR;
-
- // Not an exact nor an accent-alike match: search the list of close keys
- int j = 1;
- while (j < MAX_PROXIMITY_CHARS_SIZE_INTERNAL
- && currentChars[j] > ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE) {
- const bool matched = (currentChars[j] == baseLowerC || currentChars[j] == c);
- if (matched) {
- if (proximityIndex) {
- *proximityIndex = j;
- }
- return NEAR_PROXIMITY_CHAR;
- }
- ++j;
- }
- if (j < MAX_PROXIMITY_CHARS_SIZE_INTERNAL
- && currentChars[j] == ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE) {
- ++j;
- while (j < MAX_PROXIMITY_CHARS_SIZE_INTERNAL
- && currentChars[j] > ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE) {
- const bool matched = (currentChars[j] == baseLowerC || currentChars[j] == c);
- if (matched) {
- if (proximityIndex) {
- *proximityIndex = j;
- }
- return ADDITIONAL_PROXIMITY_CHAR;
- }
- ++j;
- }
- }
-
- // Was not included, signal this as an unrelated character.
- return UNRELATED_CHAR;
- }
-
inline int getNormalizedSquaredDistance(
const int inputIndex, const int proximityIndex) const {
return mNormalizedSquaredDistances[
- inputIndex * MAX_PROXIMITY_CHARS_SIZE_INTERNAL + proximityIndex];
+ inputIndex * MAX_PROXIMITY_CHARS_SIZE + proximityIndex];
}
- inline const unsigned short *getPrimaryInputWord() const {
+ inline const int *getPrimaryInputWord() const {
return mPrimaryInputWord;
}
@@ -172,60 +121,81 @@ class ProximityInfoState {
return mTouchPositionCorrectionEnabled;
}
- inline bool sameAsTyped(const unsigned short *word, int length) const {
- if (length != mInputSize) {
- return false;
- }
- const int *inputCodes = mInputCodes;
- while (length--) {
- if (static_cast<unsigned int>(*inputCodes) != static_cast<unsigned int>(*word)) {
- return false;
- }
- inputCodes += MAX_PROXIMITY_CHARS_SIZE_INTERNAL;
- word++;
- }
- return true;
- }
-
- int getDuration(const int index) const;
-
bool isUsed() const {
- return mInputSize > 0;
+ return mSampledInputSize > 0;
}
- uint32_t size() const {
- return mInputSize;
+ int size() const {
+ return mSampledInputSize;
}
int getInputX(const int index) const {
- return mInputXs[index];
+ return mSampledInputXs[index];
}
int getInputY(const int index) const {
- return mInputYs[index];
+ return mSampledInputYs[index];
}
+ bool hasSpaceProximity(const int index) const;
+
int getLengthCache(const int index) const {
- return mLengthCache[index];
+ return mSampledLengthCache[index];
}
bool isContinuationPossible() const {
return mIsContinuationPossible;
}
- float getPointToKeyLength(const int inputIndex, const int charCode, const float scale) const;
+ float getPointToKeyByIdLength(const int inputIndex, const int keyId) const;
+ float getPointToKeyLength(const int inputIndex, const int codePoint) const;
+
+ ProximityType getProximityType(const int index, const int codePoint,
+ const bool checkProximityChars, int *proximityIndex = 0) const;
+
+ ProximityType getProximityTypeG(const int index, const int codePoint) const;
+
+ const std::vector<int> *getSearchKeyVector(const int index) const {
+ return &mSampledSearchKeyVectors[index];
+ }
+
+ float getSpeedRate(const int index) const {
+ return mSpeedRates[index];
+ }
- int getSpaceY() const;
+ AK_FORCE_INLINE int getBeelineSpeedPercentile(const int id) const {
+ return mBeelineSpeedPercentiles[id];
+ }
- int32_t getAllPossibleChars(
- const size_t startIndex, int32_t *const filter, const int32_t filterSize) const;
+ AK_FORCE_INLINE DoubleLetterLevel getDoubleLetterLevel(const int id) const {
+ const int beelineSpeedRate = getBeelineSpeedPercentile(id);
+ if (beelineSpeedRate == 0) {
+ return A_STRONG_DOUBLE_LETTER;
+ } else if (beelineSpeedRate
+ < ProximityInfoParams::MIN_DOUBLE_LETTER_BEELINE_SPEED_PERCENTILE) {
+ return A_DOUBLE_LETTER;
+ } else {
+ return NOT_A_DOUBLE_LETTER;
+ }
+ }
- float getRelativeSpeed(const int index) const {
- return mRelativeSpeeds[index];
+ float getDirection(const int index) const {
+ return mDirections[index];
}
+ // get xy direction
+ float getDirection(const int x, const int y) const;
+
+ float getMostProbableString(int *const codePointBuf) const;
+
+ float getProbability(const int index, const int charCode) const;
+
+ float getLineToKeyDistance(
+ const int from, const int to, const int keyId, const bool extend) const;
+
+ bool isKeyInSerchKeysAfterIndex(const int index, const int keyId) const;
+
private:
DISALLOW_COPY_AND_ASSIGN(ProximityInfoState);
- typedef hash_map_compat<int, float> NearKeysDistanceMap;
/////////////////////////////////////////
// Defined in proximity_info_state.cpp //
/////////////////////////////////////////
@@ -234,44 +204,20 @@ class ProximityInfoState {
float calculateSquaredDistanceFromSweetSpotCenter(
const int keyIndex, const int inputIndex) const;
- bool pushTouchPoint(const int inputIndex, const int nodeChar, int x, int y, const int time,
- const bool sample, const bool isLastPoint,
- NearKeysDistanceMap *const currentNearKeysDistances,
- const NearKeysDistanceMap *const prevNearKeysDistances,
- const NearKeysDistanceMap *const prevPrevNearKeysDistances);
/////////////////////////////////////////
// Defined here //
/////////////////////////////////////////
- inline float square(const float x) const { return x * x; }
- bool hasInputCoordinates() const {
- return mInputXs.size() > 0 && mInputYs.size() > 0;
+ inline const int *getProximityCodePointsAt(const int index) const {
+ return ProximityInfoStateUtils::getProximityCodePointsAt(mInputProximities, index);
}
- inline const int *getProximityCharsAt(const int index) const {
- return mInputCodes + (index * MAX_PROXIMITY_CHARS_SIZE_INTERNAL);
- }
-
- float updateNearKeysDistances(const int x, const int y,
- NearKeysDistanceMap *const currentNearKeysDistances);
- bool isPrevLocalMin(const NearKeysDistanceMap *const currentNearKeysDistances,
- const NearKeysDistanceMap *const prevNearKeysDistances,
- const NearKeysDistanceMap *const prevPrevNearKeysDistances) const;
- float getPointScore(
- const int x, const int y, const int time, const bool last, const float nearest,
- const NearKeysDistanceMap *const currentNearKeysDistances,
- const NearKeysDistanceMap *const prevNearKeysDistances,
- const NearKeysDistanceMap *const prevPrevNearKeysDistances) const;
- bool checkAndReturnIsContinuationPossible(const int inputSize, const int *const xCoordinates,
- const int *const yCoordinates, const int *const times);
- void popInputData();
-
// const
const ProximityInfo *mProximityInfo;
float mMaxPointToKeyLength;
+ float mAverageSpeed;
bool mHasTouchPositionCorrectionData;
int mMostCommonKeyWidthSquare;
- std::string mLocaleStr;
int mKeyCount;
int mCellHeight;
int mCellWidth;
@@ -279,19 +225,34 @@ class ProximityInfoState {
int mGridWidth;
bool mIsContinuationPossible;
- std::vector<int> mInputXs;
- std::vector<int> mInputYs;
- std::vector<int> mTimes;
- std::vector<int> mInputIndice;
- std::vector<float> mDistanceCache;
- std::vector<int> mLengthCache;
- std::vector<float> mRelativeSpeeds;
- std::vector<NearKeycodesSet> mNearKeysVector;
+ std::vector<int> mSampledInputXs;
+ std::vector<int> mSampledInputYs;
+ std::vector<int> mSampledTimes;
+ std::vector<int> mSampledInputIndice;
+ std::vector<int> mSampledLengthCache;
+ std::vector<int> mBeelineSpeedPercentiles;
+ std::vector<float> mSampledDistanceCache_G;
+ std::vector<float> mSpeedRates;
+ std::vector<float> mDirections;
+ // probabilities of skipping or mapping to a key for each point.
+ std::vector<hash_map_compat<int, float> > mCharProbabilities;
+ // The vector for the key code set which holds nearby keys for each sampled input point
+ // 1. Used to calculate the probability of the key
+ // 2. Used to calculate mSampledSearchKeySets
+ std::vector<ProximityInfoStateUtils::NearKeycodesSet> mSampledNearKeySets;
+ // The vector for the key code set which holds nearby keys of some trailing sampled input points
+ // for each sampled input point. These nearby keys contain the next characters which can be in
+ // the dictionary. Specifically, currently we are looking for keys nearby trailing sampled
+ // inputs including the current input point.
+ std::vector<ProximityInfoStateUtils::NearKeycodesSet> mSampledSearchKeySets;
+ std::vector<std::vector<int> > mSampledSearchKeyVectors;
bool mTouchPositionCorrectionEnabled;
- int32_t mInputCodes[MAX_PROXIMITY_CHARS_SIZE_INTERNAL * MAX_WORD_LENGTH_INTERNAL];
- int mNormalizedSquaredDistances[MAX_PROXIMITY_CHARS_SIZE_INTERNAL * MAX_WORD_LENGTH_INTERNAL];
- int mInputSize;
- unsigned short mPrimaryInputWord[MAX_WORD_LENGTH_INTERNAL];
+ int mInputProximities[MAX_PROXIMITY_CHARS_SIZE * MAX_WORD_LENGTH];
+ int mNormalizedSquaredDistances[MAX_PROXIMITY_CHARS_SIZE * MAX_WORD_LENGTH];
+ int mSampledInputSize;
+ int mPrimaryInputWord[MAX_WORD_LENGTH];
+ float mMostProbableStringProbability;
+ int mMostProbableString[MAX_WORD_LENGTH];
};
} // namespace latinime
#endif // LATINIME_PROXIMITY_INFO_STATE_H
diff --git a/native/jni/src/proximity_info_state_utils.cpp b/native/jni/src/proximity_info_state_utils.cpp
new file mode 100644
index 000000000..ccb28bc8c
--- /dev/null
+++ b/native/jni/src/proximity_info_state_utils.cpp
@@ -0,0 +1,1073 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cmath>
+#include <cstring> // for memset()
+#include <sstream> // for debug prints
+#include <vector>
+
+#include "defines.h"
+#include "geometry_utils.h"
+#include "proximity_info.h"
+#include "proximity_info_params.h"
+#include "proximity_info_state_utils.h"
+
+namespace latinime {
+
+/* static */ int ProximityInfoStateUtils::trimLastTwoTouchPoints(std::vector<int> *sampledInputXs,
+ std::vector<int> *sampledInputYs, std::vector<int> *sampledInputTimes,
+ std::vector<int> *sampledLengthCache, std::vector<int> *sampledInputIndice) {
+ const int nextStartIndex = (*sampledInputIndice)[sampledInputIndice->size() - 2];
+ popInputData(sampledInputXs, sampledInputYs, sampledInputTimes, sampledLengthCache,
+ sampledInputIndice);
+ popInputData(sampledInputXs, sampledInputYs, sampledInputTimes, sampledLengthCache,
+ sampledInputIndice);
+ return nextStartIndex;
+}
+
+/* static */ int ProximityInfoStateUtils::updateTouchPoints(
+ const ProximityInfo *const proximityInfo, const int maxPointToKeyLength,
+ const int *const inputProximities, const int *const inputXCoordinates,
+ const int *const inputYCoordinates, const int *const times, const int *const pointerIds,
+ const int inputSize, const bool isGeometric, const int pointerId,
+ const int pushTouchPointStartIndex, std::vector<int> *sampledInputXs,
+ std::vector<int> *sampledInputYs, std::vector<int> *sampledInputTimes,
+ std::vector<int> *sampledLengthCache, std::vector<int> *sampledInputIndice) {
+ if (DEBUG_SAMPLING_POINTS) {
+ if (times) {
+ for (int i = 0; i < inputSize; ++i) {
+ AKLOGI("(%d) x %d, y %d, time %d",
+ i, inputXCoordinates[i], inputYCoordinates[i], times[i]);
+ }
+ }
+ }
+#ifdef DO_ASSERT_TEST
+ if (times) {
+ for (int i = 0; i < inputSize; ++i) {
+ if (i > 0) {
+ if (times[i] < times[i - 1]) {
+ AKLOGI("Invalid time sequence. %d, %d", times[i - 1], times[i]);
+ ASSERT(false);
+ }
+ }
+ }
+ }
+#endif
+ const bool proximityOnly = !isGeometric
+ && (inputXCoordinates[0] < 0 || inputYCoordinates[0] < 0);
+ int lastInputIndex = pushTouchPointStartIndex;
+ for (int i = lastInputIndex; i < inputSize; ++i) {
+ const int pid = pointerIds ? pointerIds[i] : 0;
+ if (pointerId == pid) {
+ lastInputIndex = i;
+ }
+ }
+ if (DEBUG_GEO_FULL) {
+ AKLOGI("Init ProximityInfoState: last input index = %d", lastInputIndex);
+ }
+ // Working space to save near keys distances for current, prev and prevprev input point.
+ NearKeysDistanceMap nearKeysDistances[3];
+ // These pointers are swapped for each inputs points.
+ NearKeysDistanceMap *currentNearKeysDistances = &nearKeysDistances[0];
+ NearKeysDistanceMap *prevNearKeysDistances = &nearKeysDistances[1];
+ NearKeysDistanceMap *prevPrevNearKeysDistances = &nearKeysDistances[2];
+ // "sumAngle" is accumulated by each angle of input points. And when "sumAngle" exceeds
+ // the threshold we save that point, reset sumAngle. This aims to keep the figure of
+ // the curve.
+ float sumAngle = 0.0f;
+
+ for (int i = pushTouchPointStartIndex; i <= lastInputIndex; ++i) {
+ // Assuming pointerId == 0 if pointerIds is null.
+ const int pid = pointerIds ? pointerIds[i] : 0;
+ if (DEBUG_GEO_FULL) {
+ AKLOGI("Init ProximityInfoState: (%d)PID = %d", i, pid);
+ }
+ if (pointerId == pid) {
+ const int c = isGeometric ?
+ NOT_A_COORDINATE : getPrimaryCodePointAt(inputProximities, i);
+ const int x = proximityOnly ? NOT_A_COORDINATE : inputXCoordinates[i];
+ const int y = proximityOnly ? NOT_A_COORDINATE : inputYCoordinates[i];
+ const int time = times ? times[i] : -1;
+
+ if (i > 1) {
+ const float prevAngle = getAngle(
+ inputXCoordinates[i - 2], inputYCoordinates[i - 2],
+ inputXCoordinates[i - 1], inputYCoordinates[i - 1]);
+ const float currentAngle = getAngle(
+ inputXCoordinates[i - 1], inputYCoordinates[i - 1], x, y);
+ sumAngle += getAngleDiff(prevAngle, currentAngle);
+ }
+
+ if (pushTouchPoint(proximityInfo, maxPointToKeyLength, i, c, x, y, time,
+ isGeometric /* doSampling */, i == lastInputIndex, sumAngle,
+ currentNearKeysDistances, prevNearKeysDistances, prevPrevNearKeysDistances,
+ sampledInputXs, sampledInputYs, sampledInputTimes, sampledLengthCache,
+ sampledInputIndice)) {
+ // Previous point information was popped.
+ NearKeysDistanceMap *tmp = prevNearKeysDistances;
+ prevNearKeysDistances = currentNearKeysDistances;
+ currentNearKeysDistances = tmp;
+ } else {
+ NearKeysDistanceMap *tmp = prevPrevNearKeysDistances;
+ prevPrevNearKeysDistances = prevNearKeysDistances;
+ prevNearKeysDistances = currentNearKeysDistances;
+ currentNearKeysDistances = tmp;
+ sumAngle = 0.0f;
+ }
+ }
+ }
+ return sampledInputXs->size();
+}
+
+/* static */ const int *ProximityInfoStateUtils::getProximityCodePointsAt(
+ const int *const inputProximities, const int index) {
+ return inputProximities + (index * MAX_PROXIMITY_CHARS_SIZE);
+}
+
+/* static */ int ProximityInfoStateUtils::getPrimaryCodePointAt(const int *const inputProximities,
+ const int index) {
+ return getProximityCodePointsAt(inputProximities, index)[0];
+}
+
+/* static */ void ProximityInfoStateUtils::initPrimaryInputWord(const int inputSize,
+ const int *const inputProximities, int *primaryInputWord) {
+ memset(primaryInputWord, 0, sizeof(primaryInputWord[0]) * MAX_WORD_LENGTH);
+ for (int i = 0; i < inputSize; ++i) {
+ primaryInputWord[i] = getPrimaryCodePointAt(inputProximities, i);
+ }
+}
+
+/* static */ float ProximityInfoStateUtils::calculateSquaredDistanceFromSweetSpotCenter(
+ const ProximityInfo *const proximityInfo, const std::vector<int> *const sampledInputXs,
+ const std::vector<int> *const sampledInputYs, const int keyIndex, const int inputIndex) {
+ const float sweetSpotCenterX = proximityInfo->getSweetSpotCenterXAt(keyIndex);
+ const float sweetSpotCenterY = proximityInfo->getSweetSpotCenterYAt(keyIndex);
+ const float inputX = static_cast<float>((*sampledInputXs)[inputIndex]);
+ const float inputY = static_cast<float>((*sampledInputYs)[inputIndex]);
+ return SQUARE_FLOAT(inputX - sweetSpotCenterX) + SQUARE_FLOAT(inputY - sweetSpotCenterY);
+}
+
+/* static */ float ProximityInfoStateUtils::calculateNormalizedSquaredDistance(
+ const ProximityInfo *const proximityInfo, const std::vector<int> *const sampledInputXs,
+ const std::vector<int> *const sampledInputYs, const int keyIndex, const int inputIndex) {
+ if (keyIndex == NOT_AN_INDEX) {
+ return ProximityInfoParams::NOT_A_DISTANCE_FLOAT;
+ }
+ if (!proximityInfo->hasSweetSpotData(keyIndex)) {
+ return ProximityInfoParams::NOT_A_DISTANCE_FLOAT;
+ }
+ if (NOT_A_COORDINATE == (*sampledInputXs)[inputIndex]) {
+ return ProximityInfoParams::NOT_A_DISTANCE_FLOAT;
+ }
+ const float squaredDistance = calculateSquaredDistanceFromSweetSpotCenter(proximityInfo,
+ sampledInputXs, sampledInputYs, keyIndex, inputIndex);
+ const float squaredRadius = SQUARE_FLOAT(proximityInfo->getSweetSpotRadiiAt(keyIndex));
+ return squaredDistance / squaredRadius;
+}
+
+/* static */ void ProximityInfoStateUtils::initNormalizedSquaredDistances(
+ const ProximityInfo *const proximityInfo, const int inputSize, const int *inputXCoordinates,
+ const int *inputYCoordinates, const int *const inputProximities,
+ const std::vector<int> *const sampledInputXs, const std::vector<int> *const sampledInputYs,
+ int *normalizedSquaredDistances) {
+ memset(normalizedSquaredDistances, NOT_A_DISTANCE,
+ sizeof(normalizedSquaredDistances[0]) * MAX_PROXIMITY_CHARS_SIZE * MAX_WORD_LENGTH);
+ const bool hasInputCoordinates = sampledInputXs->size() > 0 && sampledInputYs->size() > 0;
+ for (int i = 0; i < inputSize; ++i) {
+ const int *proximityCodePoints = getProximityCodePointsAt(inputProximities, i);
+ const int primaryKey = proximityCodePoints[0];
+ const int x = inputXCoordinates[i];
+ const int y = inputYCoordinates[i];
+ if (DEBUG_PROXIMITY_CHARS) {
+ int a = x + y + primaryKey;
+ a += 0;
+ AKLOGI("--- Primary = %c, x = %d, y = %d", primaryKey, x, y);
+ }
+ for (int j = 0; j < MAX_PROXIMITY_CHARS_SIZE && proximityCodePoints[j] > 0; ++j) {
+ const int currentCodePoint = proximityCodePoints[j];
+ const float squaredDistance =
+ hasInputCoordinates ? calculateNormalizedSquaredDistance(
+ proximityInfo, sampledInputXs, sampledInputYs,
+ proximityInfo->getKeyIndexOf(currentCodePoint), i) :
+ ProximityInfoParams::NOT_A_DISTANCE_FLOAT;
+ if (squaredDistance >= 0.0f) {
+ normalizedSquaredDistances[i * MAX_PROXIMITY_CHARS_SIZE + j] =
+ static_cast<int>(squaredDistance
+ * ProximityInfoParams::NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR);
+ } else {
+ normalizedSquaredDistances[i * MAX_PROXIMITY_CHARS_SIZE + j] =
+ (j == 0) ? MATCH_CHAR_WITHOUT_DISTANCE_INFO :
+ PROXIMITY_CHAR_WITHOUT_DISTANCE_INFO;
+ }
+ if (DEBUG_PROXIMITY_CHARS) {
+ AKLOGI("--- Proximity (%d) = %c", j, currentCodePoint);
+ }
+ }
+ }
+
+}
+
+/* static */ void ProximityInfoStateUtils::initGeometricDistanceInfos(
+ const ProximityInfo *const proximityInfo, const int sampledInputSize,
+ const int lastSavedInputSize, const std::vector<int> *const sampledInputXs,
+ const std::vector<int> *const sampledInputYs,
+ std::vector<NearKeycodesSet> *SampledNearKeySets,
+ std::vector<float> *SampledDistanceCache_G) {
+ SampledNearKeySets->resize(sampledInputSize);
+ const int keyCount = proximityInfo->getKeyCount();
+ SampledDistanceCache_G->resize(sampledInputSize * keyCount);
+ for (int i = lastSavedInputSize; i < sampledInputSize; ++i) {
+ (*SampledNearKeySets)[i].reset();
+ for (int k = 0; k < keyCount; ++k) {
+ const int index = i * keyCount + k;
+ const int x = (*sampledInputXs)[i];
+ const int y = (*sampledInputYs)[i];
+ const float normalizedSquaredDistance =
+ proximityInfo->getNormalizedSquaredDistanceFromCenterFloatG(k, x, y);
+ (*SampledDistanceCache_G)[index] = normalizedSquaredDistance;
+ if (normalizedSquaredDistance
+ < ProximityInfoParams::NEAR_KEY_NORMALIZED_SQUARED_THRESHOLD) {
+ (*SampledNearKeySets)[i][k] = true;
+ }
+ }
+ }
+}
+
+/* static */ void ProximityInfoStateUtils::popInputData(std::vector<int> *sampledInputXs,
+ std::vector<int> *sampledInputYs, std::vector<int> *sampledInputTimes,
+ std::vector<int> *sampledLengthCache, std::vector<int> *sampledInputIndice) {
+ sampledInputXs->pop_back();
+ sampledInputYs->pop_back();
+ sampledInputTimes->pop_back();
+ sampledLengthCache->pop_back();
+ sampledInputIndice->pop_back();
+}
+
+/* static */ float ProximityInfoStateUtils::refreshSpeedRates(const int inputSize,
+ const int *const xCoordinates, const int *const yCoordinates, const int *const times,
+ const int lastSavedInputSize, const int sampledInputSize,
+ const std::vector<int> *const sampledInputXs, const std::vector<int> *const sampledInputYs,
+ const std::vector<int> *const sampledInputTimes,
+ const std::vector<int> *const sampledLengthCache,
+ const std::vector<int> *const sampledInputIndice, std::vector<float> *sampledSpeedRates,
+ std::vector<float> *sampledDirections) {
+ // Relative speed calculation.
+ const int sumDuration = sampledInputTimes->back() - sampledInputTimes->front();
+ const int sumLength = sampledLengthCache->back() - sampledLengthCache->front();
+ const float averageSpeed = static_cast<float>(sumLength) / static_cast<float>(sumDuration);
+ sampledSpeedRates->resize(sampledInputSize);
+ for (int i = lastSavedInputSize; i < sampledInputSize; ++i) {
+ const int index = (*sampledInputIndice)[i];
+ int length = 0;
+ int duration = 0;
+
+ // Calculate velocity by using distances and durations of
+ // ProximityInfoParams::NUM_POINTS_FOR_SPEED_CALCULATION points for both forward and
+ // backward.
+ const int forwardNumPoints = min(inputSize - 1,
+ index + ProximityInfoParams::NUM_POINTS_FOR_SPEED_CALCULATION);
+ for (int j = index; j < forwardNumPoints; ++j) {
+ if (i < sampledInputSize - 1 && j >= (*sampledInputIndice)[i + 1]) {
+ break;
+ }
+ length += getDistanceInt(xCoordinates[j], yCoordinates[j],
+ xCoordinates[j + 1], yCoordinates[j + 1]);
+ duration += times[j + 1] - times[j];
+ }
+ const int backwardNumPoints = max(0,
+ index - ProximityInfoParams::NUM_POINTS_FOR_SPEED_CALCULATION);
+ for (int j = index - 1; j >= backwardNumPoints; --j) {
+ if (i > 0 && j < (*sampledInputIndice)[i - 1]) {
+ break;
+ }
+ // TODO: use mSampledLengthCache instead?
+ length += getDistanceInt(xCoordinates[j], yCoordinates[j],
+ xCoordinates[j + 1], yCoordinates[j + 1]);
+ duration += times[j + 1] - times[j];
+ }
+ if (duration == 0 || sumDuration == 0) {
+ // Cannot calculate speed; thus, it gives an average value (1.0);
+ (*sampledSpeedRates)[i] = 1.0f;
+ } else {
+ const float speed = static_cast<float>(length) / static_cast<float>(duration);
+ (*sampledSpeedRates)[i] = speed / averageSpeed;
+ }
+ }
+
+ // Direction calculation.
+ sampledDirections->resize(sampledInputSize - 1);
+ for (int i = max(0, lastSavedInputSize - 1); i < sampledInputSize - 1; ++i) {
+ (*sampledDirections)[i] = getDirection(sampledInputXs, sampledInputYs, i, i + 1);
+ }
+ return averageSpeed;
+}
+
+/* static */ void ProximityInfoStateUtils::refreshBeelineSpeedRates(const int mostCommonKeyWidth,
+ const float averageSpeed, const int inputSize, const int *const xCoordinates,
+ const int *const yCoordinates, const int *times, const int sampledInputSize,
+ const std::vector<int> *const sampledInputXs,
+ const std::vector<int> *const sampledInputYs, const std::vector<int> *const inputIndice,
+ std::vector<int> *beelineSpeedPercentiles) {
+ if (DEBUG_SAMPLING_POINTS) {
+ AKLOGI("--- refresh beeline speed rates");
+ }
+ beelineSpeedPercentiles->resize(sampledInputSize);
+ for (int i = 0; i < sampledInputSize; ++i) {
+ (*beelineSpeedPercentiles)[i] = static_cast<int>(calculateBeelineSpeedRate(
+ mostCommonKeyWidth, averageSpeed, i, inputSize, xCoordinates, yCoordinates, times,
+ sampledInputSize, sampledInputXs, sampledInputYs, inputIndice) * MAX_PERCENTILE);
+ }
+}
+
+/* static */float ProximityInfoStateUtils::getDirection(
+ const std::vector<int> *const sampledInputXs,
+ const std::vector<int> *const sampledInputYs, const int index0, const int index1) {
+ ASSERT(sampledInputXs && sampledInputYs);
+ const int sampledInputSize =sampledInputXs->size();
+ if (index0 < 0 || index0 > sampledInputSize - 1) {
+ return 0.0f;
+ }
+ if (index1 < 0 || index1 > sampledInputSize - 1) {
+ return 0.0f;
+ }
+ const int x1 = (*sampledInputXs)[index0];
+ const int y1 = (*sampledInputYs)[index0];
+ const int x2 = (*sampledInputXs)[index1];
+ const int y2 = (*sampledInputYs)[index1];
+ return getAngle(x1, y1, x2, y2);
+}
+
+// Calculating point to key distance for all near keys and returning the distance between
+// the given point and the nearest key position.
+/* static */ float ProximityInfoStateUtils::updateNearKeysDistances(
+ const ProximityInfo *const proximityInfo, const float maxPointToKeyLength, const int x,
+ const int y, NearKeysDistanceMap *const currentNearKeysDistances) {
+ currentNearKeysDistances->clear();
+ const int keyCount = proximityInfo->getKeyCount();
+ float nearestKeyDistance = maxPointToKeyLength;
+ for (int k = 0; k < keyCount; ++k) {
+ const float dist = proximityInfo->getNormalizedSquaredDistanceFromCenterFloatG(k, x, y);
+ if (dist < ProximityInfoParams::NEAR_KEY_THRESHOLD_FOR_DISTANCE) {
+ currentNearKeysDistances->insert(std::pair<int, float>(k, dist));
+ }
+ if (nearestKeyDistance > dist) {
+ nearestKeyDistance = dist;
+ }
+ }
+ return nearestKeyDistance;
+}
+
+// Check if previous point is at local minimum position to near keys.
+/* static */ bool ProximityInfoStateUtils::isPrevLocalMin(
+ const NearKeysDistanceMap *const currentNearKeysDistances,
+ const NearKeysDistanceMap *const prevNearKeysDistances,
+ const NearKeysDistanceMap *const prevPrevNearKeysDistances) {
+ for (NearKeysDistanceMap::const_iterator it = prevNearKeysDistances->begin();
+ it != prevNearKeysDistances->end(); ++it) {
+ NearKeysDistanceMap::const_iterator itPP = prevPrevNearKeysDistances->find(it->first);
+ NearKeysDistanceMap::const_iterator itC = currentNearKeysDistances->find(it->first);
+ const bool isPrevPrevNear = (itPP == prevPrevNearKeysDistances->end()
+ || itPP->second > it->second + ProximityInfoParams::MARGIN_FOR_PREV_LOCAL_MIN);
+ const bool isCurrentNear = (itC == currentNearKeysDistances->end()
+ || itC->second > it->second + ProximityInfoParams::MARGIN_FOR_PREV_LOCAL_MIN);
+ if (isPrevPrevNear && isCurrentNear) {
+ return true;
+ }
+ }
+ return false;
+}
+
+// Calculating a point score that indicates usefulness of the point.
+/* static */ float ProximityInfoStateUtils::getPointScore(const int mostCommonKeyWidth,
+ const int x, const int y, const int time, const bool lastPoint, const float nearest,
+ const float sumAngle, const NearKeysDistanceMap *const currentNearKeysDistances,
+ const NearKeysDistanceMap *const prevNearKeysDistances,
+ const NearKeysDistanceMap *const prevPrevNearKeysDistances,
+ std::vector<int> *sampledInputXs, std::vector<int> *sampledInputYs) {
+ const size_t size = sampledInputXs->size();
+ // If there is only one point, add this point. Besides, if the previous point's distance map
+ // is empty, we re-compute nearby keys distances from the current point.
+ // Note that the current point is the first point in the incremental input that needs to
+ // be re-computed.
+ if (size <= 1 || prevNearKeysDistances->empty()) {
+ return 0.0f;
+ }
+
+ const int baseSampleRate = mostCommonKeyWidth;
+ const int distPrev = getDistanceInt(sampledInputXs->back(), sampledInputYs->back(),
+ (*sampledInputXs)[size - 2], (*sampledInputYs)[size - 2])
+ * ProximityInfoParams::DISTANCE_BASE_SCALE;
+ float score = 0.0f;
+
+ // Location
+ if (!isPrevLocalMin(currentNearKeysDistances, prevNearKeysDistances,
+ prevPrevNearKeysDistances)) {
+ score += ProximityInfoParams::NOT_LOCALMIN_DISTANCE_SCORE;
+ } else if (nearest < ProximityInfoParams::NEAR_KEY_THRESHOLD_FOR_POINT_SCORE) {
+ // Promote points nearby keys
+ score += ProximityInfoParams::LOCALMIN_DISTANCE_AND_NEAR_TO_KEY_SCORE;
+ }
+ // Angle
+ const float angle1 = getAngle(x, y, sampledInputXs->back(), sampledInputYs->back());
+ const float angle2 = getAngle(sampledInputXs->back(), sampledInputYs->back(),
+ (*sampledInputXs)[size - 2], (*sampledInputYs)[size - 2]);
+ const float angleDiff = getAngleDiff(angle1, angle2);
+
+ // Save corner
+ if (distPrev > baseSampleRate * ProximityInfoParams::CORNER_CHECK_DISTANCE_THRESHOLD_SCALE
+ && (sumAngle > ProximityInfoParams::CORNER_SUM_ANGLE_THRESHOLD
+ || angleDiff > ProximityInfoParams::CORNER_ANGLE_THRESHOLD_FOR_POINT_SCORE)) {
+ score += ProximityInfoParams::CORNER_SCORE;
+ }
+ return score;
+}
+
+// Sampling touch point and pushing information to vectors.
+// Returning if previous point is popped or not.
+/* static */ bool ProximityInfoStateUtils::pushTouchPoint(const ProximityInfo *const proximityInfo,
+ const int maxPointToKeyLength, const int inputIndex, const int nodeCodePoint, int x, int y,
+ const int time, const bool doSampling, const bool isLastPoint, const float sumAngle,
+ NearKeysDistanceMap *const currentNearKeysDistances,
+ const NearKeysDistanceMap *const prevNearKeysDistances,
+ const NearKeysDistanceMap *const prevPrevNearKeysDistances,
+ std::vector<int> *sampledInputXs, std::vector<int> *sampledInputYs,
+ std::vector<int> *sampledInputTimes, std::vector<int> *sampledLengthCache,
+ std::vector<int> *sampledInputIndice) {
+ const int mostCommonKeyWidth = proximityInfo->getMostCommonKeyWidth();
+
+ size_t size = sampledInputXs->size();
+ bool popped = false;
+ if (nodeCodePoint < 0 && doSampling) {
+ const float nearest = updateNearKeysDistances(
+ proximityInfo, maxPointToKeyLength, x, y, currentNearKeysDistances);
+ const float score = getPointScore(mostCommonKeyWidth, x, y, time, isLastPoint, nearest,
+ sumAngle, currentNearKeysDistances, prevNearKeysDistances,
+ prevPrevNearKeysDistances, sampledInputXs, sampledInputYs);
+ if (score < 0) {
+ // Pop previous point because it would be useless.
+ popInputData(sampledInputXs, sampledInputYs, sampledInputTimes, sampledLengthCache,
+ sampledInputIndice);
+ size = sampledInputXs->size();
+ popped = true;
+ } else {
+ popped = false;
+ }
+ // Check if the last point should be skipped.
+ if (isLastPoint && size > 0) {
+ if (getDistanceInt(x, y, sampledInputXs->back(), sampledInputYs->back())
+ * ProximityInfoParams::LAST_POINT_SKIP_DISTANCE_SCALE < mostCommonKeyWidth) {
+ // This point is not used because it's too close to the previous point.
+ if (DEBUG_GEO_FULL) {
+ AKLOGI("p0: size = %zd, x = %d, y = %d, lx = %d, ly = %d, dist = %d, "
+ "width = %d", size, x, y, sampledInputXs->back(),
+ sampledInputYs->back(), getDistanceInt(
+ x, y, sampledInputXs->back(), sampledInputYs->back()),
+ mostCommonKeyWidth
+ / ProximityInfoParams::LAST_POINT_SKIP_DISTANCE_SCALE);
+ }
+ return popped;
+ }
+ }
+ }
+
+ if (nodeCodePoint >= 0 && (x < 0 || y < 0)) {
+ const int keyId = proximityInfo->getKeyIndexOf(nodeCodePoint);
+ if (keyId >= 0) {
+ x = proximityInfo->getKeyCenterXOfKeyIdG(keyId);
+ y = proximityInfo->getKeyCenterYOfKeyIdG(keyId);
+ }
+ }
+
+ // Pushing point information.
+ if (size > 0) {
+ sampledLengthCache->push_back(
+ sampledLengthCache->back() + getDistanceInt(
+ x, y, sampledInputXs->back(), sampledInputYs->back()));
+ } else {
+ sampledLengthCache->push_back(0);
+ }
+ sampledInputXs->push_back(x);
+ sampledInputYs->push_back(y);
+ sampledInputTimes->push_back(time);
+ sampledInputIndice->push_back(inputIndex);
+ if (DEBUG_GEO_FULL) {
+ AKLOGI("pushTouchPoint: x = %03d, y = %03d, time = %d, index = %d, popped ? %01d",
+ x, y, time, inputIndex, popped);
+ }
+ return popped;
+}
+
+/* static */ float ProximityInfoStateUtils::calculateBeelineSpeedRate(const int mostCommonKeyWidth,
+ const float averageSpeed, const int id, const int inputSize, const int *const xCoordinates,
+ const int *const yCoordinates, const int *times, const int sampledInputSize,
+ const std::vector<int> *const sampledInputXs,
+ const std::vector<int> *const sampledInputYs,
+ const std::vector<int> *const sampledInputIndices) {
+ if (sampledInputSize <= 0 || averageSpeed < 0.001f) {
+ if (DEBUG_SAMPLING_POINTS) {
+ AKLOGI("--- invalid state: cancel. size = %d, ave = %f",
+ sampledInputSize, averageSpeed);
+ }
+ return 1.0f;
+ }
+ const int lookupRadius = mostCommonKeyWidth
+ * ProximityInfoParams::LOOKUP_RADIUS_PERCENTILE / MAX_PERCENTILE;
+ const int x0 = (*sampledInputXs)[id];
+ const int y0 = (*sampledInputYs)[id];
+ const int actualInputIndex = (*sampledInputIndices)[id];
+ int tempTime = 0;
+ int tempBeelineDistance = 0;
+ int start = actualInputIndex;
+ // lookup forward
+ while (start > 0 && tempBeelineDistance < lookupRadius) {
+ tempTime += times[start] - times[start - 1];
+ --start;
+ tempBeelineDistance = getDistanceInt(x0, y0, xCoordinates[start], yCoordinates[start]);
+ }
+ // Exclusive unless this is an edge point
+ if (start > 0 && start < actualInputIndex) {
+ ++start;
+ }
+ tempTime= 0;
+ tempBeelineDistance = 0;
+ int end = actualInputIndex;
+ // lookup backward
+ while (end < (inputSize - 1) && tempBeelineDistance < lookupRadius) {
+ tempTime += times[end + 1] - times[end];
+ ++end;
+ tempBeelineDistance = getDistanceInt(x0, y0, xCoordinates[end], yCoordinates[end]);
+ }
+ // Exclusive unless this is an edge point
+ if (end > actualInputIndex && end < (inputSize - 1)) {
+ --end;
+ }
+
+ if (start >= end) {
+ if (DEBUG_DOUBLE_LETTER) {
+ AKLOGI("--- double letter: start == end %d", start);
+ }
+ return 1.0f;
+ }
+
+ const int x2 = xCoordinates[start];
+ const int y2 = yCoordinates[start];
+ const int x3 = xCoordinates[end];
+ const int y3 = yCoordinates[end];
+ const int beelineDistance = getDistanceInt(x2, y2, x3, y3);
+ int adjustedStartTime = times[start];
+ if (start == 0 && actualInputIndex == 0 && inputSize > 1) {
+ adjustedStartTime += ProximityInfoParams::FIRST_POINT_TIME_OFFSET_MILLIS;
+ }
+ int adjustedEndTime = times[end];
+ if (end == (inputSize - 1) && inputSize > 1) {
+ adjustedEndTime -= ProximityInfoParams::FIRST_POINT_TIME_OFFSET_MILLIS;
+ }
+ const int time = adjustedEndTime - adjustedStartTime;
+ if (time <= 0) {
+ return 1.0f;
+ }
+
+ if (time >= ProximityInfoParams::STRONG_DOUBLE_LETTER_TIME_MILLIS){
+ return 0.0f;
+ }
+ if (DEBUG_DOUBLE_LETTER) {
+ AKLOGI("--- (%d, %d) double letter: start = %d, end = %d, dist = %d, time = %d,"
+ " speed = %f, ave = %f, val = %f, start time = %d, end time = %d",
+ id, (*sampledInputIndices)[id], start, end, beelineDistance, time,
+ (static_cast<float>(beelineDistance) / static_cast<float>(time)), averageSpeed,
+ ((static_cast<float>(beelineDistance) / static_cast<float>(time))
+ / averageSpeed), adjustedStartTime, adjustedEndTime);
+ }
+ // Offset 1%
+ // TODO: Detect double letter more smartly
+ return 0.01f + static_cast<float>(beelineDistance) / static_cast<float>(time) / averageSpeed;
+}
+
+/* static */ float ProximityInfoStateUtils::getPointAngle(
+ const std::vector<int> *const sampledInputXs,
+ const std::vector<int> *const sampledInputYs, const int index) {
+ if (!sampledInputXs || !sampledInputYs) {
+ return 0.0f;
+ }
+ const int sampledInputSize = sampledInputXs->size();
+ if (index <= 0 || index >= sampledInputSize - 1) {
+ return 0.0f;
+ }
+ const float previousDirection = getDirection(sampledInputXs, sampledInputYs, index - 1, index);
+ const float nextDirection = getDirection(sampledInputXs, sampledInputYs, index, index + 1);
+ const float directionDiff = getAngleDiff(previousDirection, nextDirection);
+ return directionDiff;
+}
+
+/* static */ float ProximityInfoStateUtils::getPointsAngle(
+ const std::vector<int> *const sampledInputXs,
+ const std::vector<int> *const sampledInputYs,
+ const int index0, const int index1, const int index2) {
+ if (!sampledInputXs || !sampledInputYs) {
+ return 0.0f;
+ }
+ const int sampledInputSize = sampledInputXs->size();
+ if (index0 < 0 || index0 > sampledInputSize - 1) {
+ return 0.0f;
+ }
+ if (index1 < 0 || index1 > sampledInputSize - 1) {
+ return 0.0f;
+ }
+ if (index2 < 0 || index2 > sampledInputSize - 1) {
+ return 0.0f;
+ }
+ const float previousDirection = getDirection(sampledInputXs, sampledInputYs, index0, index1);
+ const float nextDirection = getDirection(sampledInputXs, sampledInputYs, index1, index2);
+ return getAngleDiff(previousDirection, nextDirection);
+}
+
+// This function basically converts from a length to an edit distance. Accordingly, it's obviously
+// wrong to compare with mMaxPointToKeyLength.
+/* static */ float ProximityInfoStateUtils::getPointToKeyByIdLength(const float maxPointToKeyLength,
+ const std::vector<float> *const SampledDistanceCache_G, const int keyCount,
+ const int inputIndex, const int keyId) {
+ if (keyId != NOT_AN_INDEX) {
+ const int index = inputIndex * keyCount + keyId;
+ return min((*SampledDistanceCache_G)[index], maxPointToKeyLength);
+ }
+ // If the char is not a key on the keyboard then return the max length.
+ return static_cast<float>(MAX_VALUE_FOR_WEIGHTING);
+}
+
+// Updates probabilities of aligning to some keys and skipping.
+// Word suggestion should be based on this probabilities.
+/* static */ void ProximityInfoStateUtils::updateAlignPointProbabilities(
+ const float maxPointToKeyLength, const int mostCommonKeyWidth, const int keyCount,
+ const int start, const int sampledInputSize, const std::vector<int> *const sampledInputXs,
+ const std::vector<int> *const sampledInputYs,
+ const std::vector<float> *const sampledSpeedRates,
+ const std::vector<int> *const sampledLengthCache,
+ const std::vector<float> *const SampledDistanceCache_G,
+ std::vector<NearKeycodesSet> *SampledNearKeySets,
+ std::vector<hash_map_compat<int, float> > *charProbabilities) {
+ charProbabilities->resize(sampledInputSize);
+ // Calculates probabilities of using a point as a correlated point with the character
+ // for each point.
+ for (int i = start; i < sampledInputSize; ++i) {
+ (*charProbabilities)[i].clear();
+ // First, calculates skip probability. Starts from MAX_SKIP_PROBABILITY.
+ // Note that all values that are multiplied to this probability should be in [0.0, 1.0];
+ float skipProbability = ProximityInfoParams::MAX_SKIP_PROBABILITY;
+
+ const float currentAngle = getPointAngle(sampledInputXs, sampledInputYs, i);
+ const float speedRate = (*sampledSpeedRates)[i];
+
+ float nearestKeyDistance = static_cast<float>(MAX_VALUE_FOR_WEIGHTING);
+ for (int j = 0; j < keyCount; ++j) {
+ if ((*SampledNearKeySets)[i].test(j)) {
+ const float distance = getPointToKeyByIdLength(
+ maxPointToKeyLength, SampledDistanceCache_G, keyCount, i, j);
+ if (distance < nearestKeyDistance) {
+ nearestKeyDistance = distance;
+ }
+ }
+ }
+
+ if (i == 0) {
+ skipProbability *= min(1.0f,
+ nearestKeyDistance * ProximityInfoParams::NEAREST_DISTANCE_WEIGHT
+ + ProximityInfoParams::NEAREST_DISTANCE_BIAS);
+ // Promote the first point
+ skipProbability *= ProximityInfoParams::SKIP_FIRST_POINT_PROBABILITY;
+ } else if (i == sampledInputSize - 1) {
+ skipProbability *= min(1.0f,
+ nearestKeyDistance * ProximityInfoParams::NEAREST_DISTANCE_WEIGHT_FOR_LAST
+ + ProximityInfoParams::NEAREST_DISTANCE_BIAS_FOR_LAST);
+ // Promote the last point
+ skipProbability *= ProximityInfoParams::SKIP_LAST_POINT_PROBABILITY;
+ } else {
+ // If the current speed is relatively slower than adjacent keys, we promote this point.
+ if ((*sampledSpeedRates)[i - 1] - ProximityInfoParams::SPEED_MARGIN > speedRate
+ && speedRate
+ < (*sampledSpeedRates)[i + 1] - ProximityInfoParams::SPEED_MARGIN) {
+ if (currentAngle < ProximityInfoParams::CORNER_ANGLE_THRESHOLD) {
+ skipProbability *= min(1.0f, speedRate
+ * ProximityInfoParams::SLOW_STRAIGHT_WEIGHT_FOR_SKIP_PROBABILITY);
+ } else {
+ // If the angle is small enough, we promote this point more. (e.g. pit vs put)
+ skipProbability *= min(1.0f,
+ speedRate * ProximityInfoParams::SPEED_WEIGHT_FOR_SKIP_PROBABILITY
+ + ProximityInfoParams::MIN_SPEED_RATE_FOR_SKIP_PROBABILITY);
+ }
+ }
+
+ skipProbability *= min(1.0f,
+ speedRate * nearestKeyDistance * ProximityInfoParams::NEAREST_DISTANCE_WEIGHT
+ + ProximityInfoParams::NEAREST_DISTANCE_BIAS);
+
+ // Adjusts skip probability by a rate depending on angle.
+ // ANGLE_RATE of skipProbability is adjusted by current angle.
+ skipProbability *= (M_PI_F - currentAngle) / M_PI_F * ProximityInfoParams::ANGLE_WEIGHT
+ + (1.0f - ProximityInfoParams::ANGLE_WEIGHT);
+ if (currentAngle > ProximityInfoParams::DEEP_CORNER_ANGLE_THRESHOLD) {
+ skipProbability *= ProximityInfoParams::SKIP_DEEP_CORNER_PROBABILITY;
+ }
+ // We assume the angle of this point is the angle for point[i], point[i - 2]
+ // and point[i - 3]. The reason why we don't use the angle for point[i], point[i - 1]
+ // and point[i - 2] is this angle can be more affected by the noise.
+ const float prevAngle = getPointsAngle(sampledInputXs, sampledInputYs, i, i - 2, i - 3);
+ if (i >= 3 && prevAngle < ProximityInfoParams::STRAIGHT_ANGLE_THRESHOLD
+ && currentAngle > ProximityInfoParams::CORNER_ANGLE_THRESHOLD) {
+ skipProbability *= ProximityInfoParams::SKIP_CORNER_PROBABILITY;
+ }
+ }
+
+ // probabilities must be in [0.0, ProximityInfoParams::MAX_SKIP_PROBABILITY];
+ ASSERT(skipProbability >= 0.0f);
+ ASSERT(skipProbability <= ProximityInfoParams::MAX_SKIP_PROBABILITY);
+ (*charProbabilities)[i][NOT_AN_INDEX] = skipProbability;
+
+ // Second, calculates key probabilities by dividing the rest probability
+ // (1.0f - skipProbability).
+ const float inputCharProbability = 1.0f - skipProbability;
+
+ const float speedxAngleRate = min(speedRate * currentAngle / M_PI_F
+ * ProximityInfoParams::SPEEDxANGLE_WEIGHT_FOR_STANDARD_DIVIATION,
+ ProximityInfoParams::MAX_SPEEDxANGLE_RATE_FOR_STANDERD_DIVIATION);
+ const float speedxNearestKeyDistanceRate = min(speedRate * nearestKeyDistance
+ * ProximityInfoParams::SPEEDxNEAREST_WEIGHT_FOR_STANDARD_DIVIATION,
+ ProximityInfoParams::MAX_SPEEDxNEAREST_RATE_FOR_STANDERD_DIVIATION);
+ const float sigma = speedxAngleRate + speedxNearestKeyDistanceRate
+ + ProximityInfoParams::MIN_STANDERD_DIVIATION;
+
+ ProximityInfoUtils::NormalDistribution
+ distribution(ProximityInfoParams::CENTER_VALUE_OF_NORMALIZED_DISTRIBUTION, sigma);
+ // Summing up probability densities of all near keys.
+ float sumOfProbabilityDensities = 0.0f;
+ for (int j = 0; j < keyCount; ++j) {
+ if ((*SampledNearKeySets)[i].test(j)) {
+ float distance = sqrtf(getPointToKeyByIdLength(
+ maxPointToKeyLength, SampledDistanceCache_G, keyCount, i, j));
+ if (i == 0 && i != sampledInputSize - 1) {
+ // For the first point, weighted average of distances from first point and the
+ // next point to the key is used as a point to key distance.
+ const float nextDistance = sqrtf(getPointToKeyByIdLength(
+ maxPointToKeyLength, SampledDistanceCache_G, keyCount, i + 1, j));
+ if (nextDistance < distance) {
+ // The distance of the first point tends to bigger than continuing
+ // points because the first touch by the user can be sloppy.
+ // So we promote the first point if the distance of that point is larger
+ // than the distance of the next point.
+ distance = (distance
+ + nextDistance * ProximityInfoParams::NEXT_DISTANCE_WEIGHT)
+ / (1.0f + ProximityInfoParams::NEXT_DISTANCE_WEIGHT);
+ }
+ } else if (i != 0 && i == sampledInputSize - 1) {
+ // For the first point, weighted average of distances from last point and
+ // the previous point to the key is used as a point to key distance.
+ const float previousDistance = sqrtf(getPointToKeyByIdLength(
+ maxPointToKeyLength, SampledDistanceCache_G, keyCount, i - 1, j));
+ if (previousDistance < distance) {
+ // The distance of the last point tends to bigger than continuing points
+ // because the last touch by the user can be sloppy. So we promote the
+ // last point if the distance of that point is larger than the distance of
+ // the previous point.
+ distance = (distance
+ + previousDistance * ProximityInfoParams::PREV_DISTANCE_WEIGHT)
+ / (1.0f + ProximityInfoParams::PREV_DISTANCE_WEIGHT);
+ }
+ }
+ // TODO: Promote the first point when the extended line from the next input is near
+ // from a key. Also, promote the last point as well.
+ sumOfProbabilityDensities += distribution.getProbabilityDensity(distance);
+ }
+ }
+
+ // Split the probability of an input point to keys that are close to the input point.
+ for (int j = 0; j < keyCount; ++j) {
+ if ((*SampledNearKeySets)[i].test(j)) {
+ float distance = sqrtf(getPointToKeyByIdLength(
+ maxPointToKeyLength, SampledDistanceCache_G, keyCount, i, j));
+ if (i == 0 && i != sampledInputSize - 1) {
+ // For the first point, weighted average of distances from the first point and
+ // the next point to the key is used as a point to key distance.
+ const float prevDistance = sqrtf(getPointToKeyByIdLength(
+ maxPointToKeyLength, SampledDistanceCache_G, keyCount, i + 1, j));
+ if (prevDistance < distance) {
+ distance = (distance
+ + prevDistance * ProximityInfoParams::NEXT_DISTANCE_WEIGHT)
+ / (1.0f + ProximityInfoParams::NEXT_DISTANCE_WEIGHT);
+ }
+ } else if (i != 0 && i == sampledInputSize - 1) {
+ // For the first point, weighted average of distances from last point and
+ // the previous point to the key is used as a point to key distance.
+ const float prevDistance = sqrtf(getPointToKeyByIdLength(
+ maxPointToKeyLength, SampledDistanceCache_G, keyCount, i - 1, j));
+ if (prevDistance < distance) {
+ distance = (distance
+ + prevDistance * ProximityInfoParams::PREV_DISTANCE_WEIGHT)
+ / (1.0f + ProximityInfoParams::PREV_DISTANCE_WEIGHT);
+ }
+ }
+ const float probabilityDensity = distribution.getProbabilityDensity(distance);
+ const float probability = inputCharProbability * probabilityDensity
+ / sumOfProbabilityDensities;
+ (*charProbabilities)[i][j] = probability;
+ }
+ }
+ }
+
+ if (DEBUG_POINTS_PROBABILITY) {
+ for (int i = 0; i < sampledInputSize; ++i) {
+ std::stringstream sstream;
+ sstream << i << ", ";
+ sstream << "(" << (*sampledInputXs)[i] << ", " << (*sampledInputYs)[i] << "), ";
+ sstream << "Speed: "<< (*sampledSpeedRates)[i] << ", ";
+ sstream << "Angle: "<< getPointAngle(sampledInputXs, sampledInputYs, i) << ", \n";
+
+ for (hash_map_compat<int, float>::iterator it = (*charProbabilities)[i].begin();
+ it != (*charProbabilities)[i].end(); ++it) {
+ if (it->first == NOT_AN_INDEX) {
+ sstream << it->first
+ << "(skip):"
+ << it->second
+ << "\n";
+ } else {
+ sstream << it->first
+ << "("
+ //<< static_cast<char>(mProximityInfo->getCodePointOf(it->first))
+ << "):"
+ << it->second
+ << "\n";
+ }
+ }
+ AKLOGI("%s", sstream.str().c_str());
+ }
+ }
+
+ // Decrease key probabilities of points which don't have the highest probability of that key
+ // among nearby points. Probabilities of the first point and the last point are not suppressed.
+ for (int i = max(start, 1); i < sampledInputSize; ++i) {
+ for (int j = i + 1; j < sampledInputSize; ++j) {
+ if (!suppressCharProbabilities(
+ mostCommonKeyWidth, sampledInputSize, sampledLengthCache, i, j,
+ charProbabilities)) {
+ break;
+ }
+ }
+ for (int j = i - 1; j >= max(start, 0); --j) {
+ if (!suppressCharProbabilities(
+ mostCommonKeyWidth, sampledInputSize, sampledLengthCache, i, j,
+ charProbabilities)) {
+ break;
+ }
+ }
+ }
+
+ // Converting from raw probabilities to log probabilities to calculate spatial distance.
+ for (int i = start; i < sampledInputSize; ++i) {
+ for (int j = 0; j < keyCount; ++j) {
+ hash_map_compat<int, float>::iterator it = (*charProbabilities)[i].find(j);
+ if (it == (*charProbabilities)[i].end()){
+ (*SampledNearKeySets)[i].reset(j);
+ } else if(it->second < ProximityInfoParams::MIN_PROBABILITY) {
+ // Erases from near keys vector because it has very low probability.
+ (*SampledNearKeySets)[i].reset(j);
+ (*charProbabilities)[i].erase(j);
+ } else {
+ it->second = -logf(it->second);
+ }
+ }
+ (*charProbabilities)[i][NOT_AN_INDEX] = -logf((*charProbabilities)[i][NOT_AN_INDEX]);
+ }
+}
+
+/* static */ void ProximityInfoStateUtils::updateSampledSearchKeySets(
+ const ProximityInfo *const proximityInfo, const int sampledInputSize,
+ const int lastSavedInputSize,
+ const std::vector<int> *const sampledLengthCache,
+ const std::vector<NearKeycodesSet> *const SampledNearKeySets,
+ std::vector<NearKeycodesSet> *sampledSearchKeySets,
+ std::vector<std::vector<int> > *sampledSearchKeyVectors) {
+ sampledSearchKeySets->resize(sampledInputSize);
+ sampledSearchKeyVectors->resize(sampledInputSize);
+ const int readForwordLength = static_cast<int>(
+ hypotf(proximityInfo->getKeyboardWidth(), proximityInfo->getKeyboardHeight())
+ * ProximityInfoParams::SEARCH_KEY_RADIUS_RATIO);
+ for (int i = 0; i < sampledInputSize; ++i) {
+ if (i >= lastSavedInputSize) {
+ (*sampledSearchKeySets)[i].reset();
+ }
+ for (int j = max(i, lastSavedInputSize); j < sampledInputSize; ++j) {
+ // TODO: Investigate if this is required. This may not fail.
+ if ((*sampledLengthCache)[j] - (*sampledLengthCache)[i] >= readForwordLength) {
+ break;
+ }
+ (*sampledSearchKeySets)[i] |= (*SampledNearKeySets)[j];
+ }
+ }
+ const int keyCount = proximityInfo->getKeyCount();
+ for (int i = 0; i < sampledInputSize; ++i) {
+ std::vector<int> *searchKeyVector = &(*sampledSearchKeyVectors)[i];
+ searchKeyVector->clear();
+ for (int j = 0; j < keyCount; ++j) {
+ if ((*sampledSearchKeySets)[i].test(j)) {
+ const int keyCodePoint = proximityInfo->getCodePointOf(j);
+ if (std::find(searchKeyVector->begin(), searchKeyVector->end(), keyCodePoint)
+ == searchKeyVector->end()) {
+ searchKeyVector->push_back(keyCodePoint);
+ }
+ }
+ }
+ }
+}
+
+// Decreases char probabilities of index0 by checking probabilities of a near point (index1) and
+// increases char probabilities of index1 by checking probabilities of index0.
+/* static */ bool ProximityInfoStateUtils::suppressCharProbabilities(const int mostCommonKeyWidth,
+ const int sampledInputSize, const std::vector<int> *const lengthCache,
+ const int index0, const int index1,
+ std::vector<hash_map_compat<int, float> > *charProbabilities) {
+ ASSERT(0 <= index0 && index0 < sampledInputSize);
+ ASSERT(0 <= index1 && index1 < sampledInputSize);
+ const float keyWidthFloat = static_cast<float>(mostCommonKeyWidth);
+ const float diff = fabsf(static_cast<float>((*lengthCache)[index0] - (*lengthCache)[index1]));
+ if (diff > keyWidthFloat * ProximityInfoParams::SUPPRESSION_LENGTH_WEIGHT) {
+ return false;
+ }
+ const float suppressionRate = ProximityInfoParams::MIN_SUPPRESSION_RATE
+ + diff / keyWidthFloat / ProximityInfoParams::SUPPRESSION_LENGTH_WEIGHT
+ * ProximityInfoParams::SUPPRESSION_WEIGHT;
+ for (hash_map_compat<int, float>::iterator it = (*charProbabilities)[index0].begin();
+ it != (*charProbabilities)[index0].end(); ++it) {
+ hash_map_compat<int, float>::iterator it2 = (*charProbabilities)[index1].find(it->first);
+ if (it2 != (*charProbabilities)[index1].end() && it->second < it2->second) {
+ const float newProbability = it->second * suppressionRate;
+ const float suppression = it->second - newProbability;
+ it->second = newProbability;
+ // mCharProbabilities[index0][NOT_AN_INDEX] is the probability of skipping this point.
+ (*charProbabilities)[index0][NOT_AN_INDEX] += suppression;
+
+ // Add the probability of the same key nearby index1
+ const float probabilityGain = min(suppression
+ * ProximityInfoParams::SUPPRESSION_WEIGHT_FOR_PROBABILITY_GAIN,
+ (*charProbabilities)[index1][NOT_AN_INDEX]
+ * ProximityInfoParams::SKIP_PROBABALITY_WEIGHT_FOR_PROBABILITY_GAIN);
+ it2->second += probabilityGain;
+ (*charProbabilities)[index1][NOT_AN_INDEX] -= probabilityGain;
+ }
+ }
+ return true;
+}
+
+/* static */ bool ProximityInfoStateUtils::checkAndReturnIsContinuationPossible(const int inputSize,
+ const int *const xCoordinates, const int *const yCoordinates, const int *const times,
+ const int sampledInputSize, const std::vector<int> *const sampledInputXs,
+ const std::vector<int> *const sampledInputYs,
+ const std::vector<int> *const sampledTimes,
+ const std::vector<int> *const sampledInputIndices) {
+ if (inputSize < sampledInputSize) {
+ return false;
+ }
+ for (int i = 0; i < sampledInputSize; ++i) {
+ const int index = (*sampledInputIndices)[i];
+ if (index >= inputSize) {
+ return false;
+ }
+ if (xCoordinates[index] != (*sampledInputXs)[i]
+ || yCoordinates[index] != (*sampledInputYs)[i]) {
+ return false;
+ }
+ if (!times) {
+ continue;
+ }
+ if (times[index] != (*sampledTimes)[i]) {
+ return false;
+ }
+ }
+ return true;
+}
+
+// Get a word that is detected by tracing the most probable string into codePointBuf and
+// returns probability of generating the word.
+/* static */ float ProximityInfoStateUtils::getMostProbableString(
+ const ProximityInfo *const proximityInfo, const int sampledInputSize,
+ const std::vector<hash_map_compat<int, float> > *const charProbabilities,
+ int *const codePointBuf) {
+ ASSERT(sampledInputSize >= 0);
+ memset(codePointBuf, 0, sizeof(codePointBuf[0]) * MAX_WORD_LENGTH);
+ int index = 0;
+ float sumLogProbability = 0.0f;
+ // TODO: Current implementation is greedy algorithm. DP would be efficient for many cases.
+ for (int i = 0; i < sampledInputSize && index < MAX_WORD_LENGTH - 1; ++i) {
+ float minLogProbability = static_cast<float>(MAX_VALUE_FOR_WEIGHTING);
+ int character = NOT_AN_INDEX;
+ for (hash_map_compat<int, float>::const_iterator it = (*charProbabilities)[i].begin();
+ it != (*charProbabilities)[i].end(); ++it) {
+ const float logProbability = (it->first != NOT_AN_INDEX)
+ ? it->second + ProximityInfoParams::DEMOTION_LOG_PROBABILITY : it->second;
+ if (logProbability < minLogProbability) {
+ minLogProbability = logProbability;
+ character = it->first;
+ }
+ }
+ if (character != NOT_AN_INDEX) {
+ codePointBuf[index] = proximityInfo->getCodePointOf(character);
+ index++;
+ }
+ sumLogProbability += minLogProbability;
+ }
+ codePointBuf[index] = '\0';
+ return sumLogProbability;
+}
+
+/* static */ void ProximityInfoStateUtils::dump(const bool isGeometric, const int inputSize,
+ const int *const inputXCoordinates, const int *const inputYCoordinates,
+ const int sampledInputSize, const std::vector<int> *const sampledInputXs,
+ const std::vector<int> *const sampledInputYs,
+ const std::vector<int> *const sampledTimes,
+ const std::vector<float> *const sampledSpeedRates,
+ const std::vector<int> *const sampledBeelineSpeedPercentiles) {
+ if (DEBUG_GEO_FULL) {
+ for (int i = 0; i < sampledInputSize; ++i) {
+ AKLOGI("Sampled(%d): x = %d, y = %d, time = %d", i, (*sampledInputXs)[i],
+ (*sampledInputYs)[i], sampledTimes ? (*sampledTimes)[i] : -1);
+ }
+ }
+
+ std::stringstream originalX, originalY, sampledX, sampledY;
+ for (int i = 0; i < inputSize; ++i) {
+ originalX << inputXCoordinates[i];
+ originalY << inputYCoordinates[i];
+ if (i != inputSize - 1) {
+ originalX << ";";
+ originalY << ";";
+ }
+ }
+ AKLOGI("===== sampled points =====");
+ for (int i = 0; i < sampledInputSize; ++i) {
+ if (isGeometric) {
+ AKLOGI("%d: x = %d, y = %d, time = %d, relative speed = %.4f, beeline speed = %d",
+ i, (*sampledInputXs)[i], (*sampledInputYs)[i], (*sampledTimes)[i],
+ (*sampledSpeedRates)[i], (*sampledBeelineSpeedPercentiles)[i]);
+ }
+ sampledX << (*sampledInputXs)[i];
+ sampledY << (*sampledInputYs)[i];
+ if (i != sampledInputSize - 1) {
+ sampledX << ";";
+ sampledY << ";";
+ }
+ }
+ AKLOGI("original points:\n%s, %s,\nsampled points:\n%s, %s,\n",
+ originalX.str().c_str(), originalY.str().c_str(), sampledX.str().c_str(),
+ sampledY.str().c_str());
+}
+} // namespace latinime
diff --git a/native/jni/src/proximity_info_state_utils.h b/native/jni/src/proximity_info_state_utils.h
new file mode 100644
index 000000000..a7f4a3425
--- /dev/null
+++ b/native/jni/src/proximity_info_state_utils.h
@@ -0,0 +1,163 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_PROXIMITY_INFO_STATE_UTILS_H
+#define LATINIME_PROXIMITY_INFO_STATE_UTILS_H
+
+#include <bitset>
+#include <vector>
+
+#include "defines.h"
+#include "hash_map_compat.h"
+
+namespace latinime {
+class ProximityInfo;
+class ProximityInfoParams;
+
+class ProximityInfoStateUtils {
+ public:
+ typedef hash_map_compat<int, float> NearKeysDistanceMap;
+ typedef std::bitset<MAX_KEY_COUNT_IN_A_KEYBOARD> NearKeycodesSet;
+
+ static int trimLastTwoTouchPoints(std::vector<int> *sampledInputXs,
+ std::vector<int> *sampledInputYs, std::vector<int> *sampledInputTimes,
+ std::vector<int> *sampledLengthCache, std::vector<int> *sampledInputIndice);
+ static int updateTouchPoints(const ProximityInfo *const proximityInfo,
+ const int maxPointToKeyLength, const int *const inputProximities,
+ const int *const inputXCoordinates, const int *const inputYCoordinates,
+ const int *const times, const int *const pointerIds, const int inputSize,
+ const bool isGeometric, const int pointerId, const int pushTouchPointStartIndex,
+ std::vector<int> *sampledInputXs, std::vector<int> *sampledInputYs,
+ std::vector<int> *sampledInputTimes, std::vector<int> *sampledLengthCache,
+ std::vector<int> *sampledInputIndice);
+ static const int *getProximityCodePointsAt(const int *const inputProximities, const int index);
+ static int getPrimaryCodePointAt(const int *const inputProximities, const int index);
+ static void popInputData(std::vector<int> *sampledInputXs, std::vector<int> *sampledInputYs,
+ std::vector<int> *sampledInputTimes, std::vector<int> *sampledLengthCache,
+ std::vector<int> *sampledInputIndice);
+ static float refreshSpeedRates(const int inputSize, const int *const xCoordinates,
+ const int *const yCoordinates, const int *const times, const int lastSavedInputSize,
+ const int sampledInputSize, const std::vector<int> *const sampledInputXs,
+ const std::vector<int> *const sampledInputYs,
+ const std::vector<int> *const sampledInputTimes,
+ const std::vector<int> *const sampledLengthCache,
+ const std::vector<int> *const sampledInputIndice,
+ std::vector<float> *sampledSpeedRates, std::vector<float> *sampledDirections);
+ static void refreshBeelineSpeedRates(const int mostCommonKeyWidth, const float averageSpeed,
+ const int inputSize, const int *const xCoordinates, const int *const yCoordinates,
+ const int *times, const int sampledInputSize,
+ const std::vector<int> *const sampledInputXs,
+ const std::vector<int> *const sampledInputYs, const std::vector<int> *const inputIndice,
+ std::vector<int> *beelineSpeedPercentiles);
+ static float getDirection(const std::vector<int> *const sampledInputXs,
+ const std::vector<int> *const sampledInputYs, const int index0, const int index1);
+ static void updateAlignPointProbabilities(const float maxPointToKeyLength,
+ const int mostCommonKeyWidth, const int keyCount, const int start,
+ const int sampledInputSize, const std::vector<int> *const sampledInputXs,
+ const std::vector<int> *const sampledInputYs,
+ const std::vector<float> *const sampledSpeedRates,
+ const std::vector<int> *const sampledLengthCache,
+ const std::vector<float> *const SampledDistanceCache_G,
+ std::vector<NearKeycodesSet> *SampledNearKeySets,
+ std::vector<hash_map_compat<int, float> > *charProbabilities);
+ static void updateSampledSearchKeySets(const ProximityInfo *const proximityInfo,
+ const int sampledInputSize, const int lastSavedInputSize,
+ const std::vector<int> *const sampledLengthCache,
+ const std::vector<NearKeycodesSet> *const SampledNearKeySets,
+ std::vector<NearKeycodesSet> *sampledSearchKeySets,
+ std::vector<std::vector<int> > *sampledSearchKeyVectors);
+ static float getPointToKeyByIdLength(const float maxPointToKeyLength,
+ const std::vector<float> *const SampledDistanceCache_G, const int keyCount,
+ const int inputIndex, const int keyId);
+ static void initGeometricDistanceInfos(const ProximityInfo *const proximityInfo,
+ const int sampledInputSize, const int lastSavedInputSize,
+ const std::vector<int> *const sampledInputXs,
+ const std::vector<int> *const sampledInputYs,
+ std::vector<NearKeycodesSet> *SampledNearKeySets,
+ std::vector<float> *SampledDistanceCache_G);
+ static void initPrimaryInputWord(const int inputSize, const int *const inputProximities,
+ int *primaryInputWord);
+ static void initNormalizedSquaredDistances(const ProximityInfo *const proximityInfo,
+ const int inputSize, const int *inputXCoordinates, const int *inputYCoordinates,
+ const int *const inputProximities, const std::vector<int> *const sampledInputXs,
+ const std::vector<int> *const sampledInputYs, int *normalizedSquaredDistances);
+ static void dump(const bool isGeometric, const int inputSize,
+ const int *const inputXCoordinates, const int *const inputYCoordinates,
+ const int sampledInputSize, const std::vector<int> *const sampledInputXs,
+ const std::vector<int> *const sampledInputYs,
+ const std::vector<int> *const sampledTimes,
+ const std::vector<float> *const sampledSpeedRates,
+ const std::vector<int> *const sampledBeelineSpeedPercentiles);
+ static bool checkAndReturnIsContinuationPossible(const int inputSize,
+ const int *const xCoordinates, const int *const yCoordinates, const int *const times,
+ const int sampledInputSize, const std::vector<int> *const sampledInputXs,
+ const std::vector<int> *const sampledInputYs,
+ const std::vector<int> *const sampledTimes,
+ const std::vector<int> *const sampledInputIndices);
+ // TODO: Move to most_probable_string_utils.h
+ static float getMostProbableString(const ProximityInfo *const proximityInfo,
+ const int sampledInputSize,
+ const std::vector<hash_map_compat<int, float> > *const charProbabilities,
+ int *const codePointBuf);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(ProximityInfoStateUtils);
+
+ static float updateNearKeysDistances(const ProximityInfo *const proximityInfo,
+ const float maxPointToKeyLength, const int x, const int y,
+ NearKeysDistanceMap *const currentNearKeysDistances);
+ static bool isPrevLocalMin(const NearKeysDistanceMap *const currentNearKeysDistances,
+ const NearKeysDistanceMap *const prevNearKeysDistances,
+ const NearKeysDistanceMap *const prevPrevNearKeysDistances);
+ static float getPointScore(const int mostCommonKeyWidth, const int x, const int y,
+ const int time, const bool lastPoint, const float nearest, const float sumAngle,
+ const NearKeysDistanceMap *const currentNearKeysDistances,
+ const NearKeysDistanceMap *const prevNearKeysDistances,
+ const NearKeysDistanceMap *const prevPrevNearKeysDistances,
+ std::vector<int> *sampledInputXs, std::vector<int> *sampledInputYs);
+ static bool pushTouchPoint(const ProximityInfo *const proximityInfo,
+ const int maxPointToKeyLength, const int inputIndex, const int nodeCodePoint, int x,
+ int y, const int time, const bool doSampling, const bool isLastPoint,
+ const float sumAngle, NearKeysDistanceMap *const currentNearKeysDistances,
+ const NearKeysDistanceMap *const prevNearKeysDistances,
+ const NearKeysDistanceMap *const prevPrevNearKeysDistances,
+ std::vector<int> *sampledInputXs, std::vector<int> *sampledInputYs,
+ std::vector<int> *sampledInputTimes, std::vector<int> *sampledLengthCache,
+ std::vector<int> *sampledInputIndice);
+ static float calculateBeelineSpeedRate(const int mostCommonKeyWidth, const float averageSpeed,
+ const int id, const int inputSize, const int *const xCoordinates,
+ const int *const yCoordinates, const int *times, const int sampledInputSize,
+ const std::vector<int> *const sampledInputXs,
+ const std::vector<int> *const sampledInputYs,
+ const std::vector<int> *const inputIndice);
+ static float getPointAngle(const std::vector<int> *const sampledInputXs,
+ const std::vector<int> *const sampledInputYs, const int index);
+ static float getPointsAngle(const std::vector<int> *const sampledInputXs,
+ const std::vector<int> *const sampledInputYs, const int index0, const int index1,
+ const int index2);
+ static bool suppressCharProbabilities(const int mostCommonKeyWidth,
+ const int sampledInputSize, const std::vector<int> *const lengthCache, const int index0,
+ const int index1, std::vector<hash_map_compat<int, float> > *charProbabilities);
+ static float calculateSquaredDistanceFromSweetSpotCenter(
+ const ProximityInfo *const proximityInfo, const std::vector<int> *const sampledInputXs,
+ const std::vector<int> *const sampledInputYs, const int keyIndex,
+ const int inputIndex);
+ static float calculateNormalizedSquaredDistance(const ProximityInfo *const proximityInfo,
+ const std::vector<int> *const sampledInputXs,
+ const std::vector<int> *const sampledInputYs, const int keyIndex, const int inputIndex);
+};
+} // namespace latinime
+#endif // LATINIME_PROXIMITY_INFO_STATE_UTILS_H
diff --git a/native/jni/src/proximity_info_utils.h b/native/jni/src/proximity_info_utils.h
new file mode 100644
index 000000000..71c97e325
--- /dev/null
+++ b/native/jni/src/proximity_info_utils.h
@@ -0,0 +1,242 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_PROXIMITY_INFO_UTILS_H
+#define LATINIME_PROXIMITY_INFO_UTILS_H
+
+#include <cmath>
+
+#include "additional_proximity_chars.h"
+#include "char_utils.h"
+#include "defines.h"
+#include "geometry_utils.h"
+#include "hash_map_compat.h"
+
+namespace latinime {
+class ProximityInfoUtils {
+ public:
+ static AK_FORCE_INLINE int getKeyIndexOf(const int keyCount, const int c,
+ const hash_map_compat<int, int> *const codeToKeyMap) {
+ if (keyCount == 0) {
+ // We do not have the coordinate data
+ return NOT_AN_INDEX;
+ }
+ if (c == NOT_A_CODE_POINT) {
+ return NOT_AN_INDEX;
+ }
+ const int lowerCode = toLowerCase(c);
+ hash_map_compat<int, int>::const_iterator mapPos = codeToKeyMap->find(lowerCode);
+ if (mapPos != codeToKeyMap->end()) {
+ return mapPos->second;
+ }
+ return NOT_AN_INDEX;
+ }
+
+ static AK_FORCE_INLINE void initializeProximities(const int *const inputCodes,
+ const int *const inputXCoordinates, const int *const inputYCoordinates,
+ const int inputSize, const int *const keyXCoordinates,
+ const int *const keyYCoordinates, const int *const keyWidths, const int *keyHeights,
+ const int *const proximityCharsArray, const int cellHeight, const int cellWidth,
+ const int gridWidth, const int mostCommonKeyWidth, const int keyCount,
+ const char *const localeStr,
+ const hash_map_compat<int, int> *const codeToKeyMap, int *inputProximities) {
+ // Initialize
+ // - mInputCodes
+ // - mNormalizedSquaredDistances
+ // TODO: Merge
+ for (int i = 0; i < inputSize; ++i) {
+ const int primaryKey = inputCodes[i];
+ const int x = inputXCoordinates[i];
+ const int y = inputYCoordinates[i];
+ int *proximities = &inputProximities[i * MAX_PROXIMITY_CHARS_SIZE];
+ calculateProximities(keyXCoordinates, keyYCoordinates, keyWidths, keyHeights,
+ proximityCharsArray, cellHeight, cellWidth, gridWidth, mostCommonKeyWidth,
+ keyCount, x, y, primaryKey, localeStr, codeToKeyMap, proximities);
+ }
+
+ if (DEBUG_PROXIMITY_CHARS) {
+ for (int i = 0; i < inputSize; ++i) {
+ AKLOGI("---");
+ for (int j = 0; j < MAX_PROXIMITY_CHARS_SIZE; ++j) {
+ int proximityChar =
+ inputProximities[i * MAX_PROXIMITY_CHARS_SIZE + j];
+ proximityChar += 0;
+ AKLOGI("--- (%d)%c", i, proximityChar);
+ }
+ }
+ }
+ }
+
+ static AK_FORCE_INLINE int getStartIndexFromCoordinates(const int x, const int y,
+ const int cellHeight, const int cellWidth, const int gridWidth) {
+ return ((y / cellHeight) * gridWidth + (x / cellWidth)) * MAX_PROXIMITY_CHARS_SIZE;
+ }
+
+ static inline float getSquaredDistanceFloat(const float x1, const float y1, const float x2,
+ const float y2) {
+ return SQUARE_FLOAT(x1 - x2) + SQUARE_FLOAT(y1 - y2);
+ }
+
+ static inline float pointToLineSegSquaredDistanceFloat(const float x, const float y,
+ const float x1, const float y1, const float x2, const float y2, const bool extend) {
+ const float ray1x = x - x1;
+ const float ray1y = y - y1;
+ const float ray2x = x2 - x1;
+ const float ray2y = y2 - y1;
+
+ const float dotProduct = ray1x * ray2x + ray1y * ray2y;
+ const float lineLengthSqr = SQUARE_FLOAT(ray2x) + SQUARE_FLOAT(ray2y);
+ const float projectionLengthSqr = dotProduct / lineLengthSqr;
+
+ float projectionX;
+ float projectionY;
+ if (!extend && projectionLengthSqr < 0.0f) {
+ projectionX = x1;
+ projectionY = y1;
+ } else if (!extend && projectionLengthSqr > 1.0f) {
+ projectionX = x2;
+ projectionY = y2;
+ } else {
+ projectionX = x1 + projectionLengthSqr * ray2x;
+ projectionY = y1 + projectionLengthSqr * ray2y;
+ }
+ return getSquaredDistanceFloat(x, y, projectionX, projectionY);
+ }
+
+ // Normal distribution N(u, sigma^2).
+ struct NormalDistribution {
+ public:
+ NormalDistribution(const float u, const float sigma)
+ : mU(u), mSigma(sigma),
+ mPreComputedNonExpPart(1.0f / sqrtf(2.0f * M_PI_F * SQUARE_FLOAT(sigma))),
+ mPreComputedExponentPart(-1.0f / (2.0f * SQUARE_FLOAT(sigma))) {}
+
+ float getProbabilityDensity(const float x) const {
+ const float shiftedX = x - mU;
+ return mPreComputedNonExpPart * expf(mPreComputedExponentPart * SQUARE_FLOAT(shiftedX));
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(NormalDistribution);
+ const float mU; // mean value
+ const float mSigma; // standard deviation
+ const float mPreComputedNonExpPart; // = 1 / sqrt(2 * PI * sigma^2)
+ const float mPreComputedExponentPart; // = -1 / (2 * sigma^2)
+ }; // struct NormalDistribution
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(ProximityInfoUtils);
+
+ static bool isOnKey(const int *const keyXCoordinates, const int *const keyYCoordinates,
+ const int *const keyWidths, const int *keyHeights, const int keyId, const int x,
+ const int y) {
+ if (keyId < 0) return true; // NOT_A_ID is -1, but return whenever < 0 just in case
+ const int left = keyXCoordinates[keyId];
+ const int top = keyYCoordinates[keyId];
+ const int right = left + keyWidths[keyId] + 1;
+ const int bottom = top + keyHeights[keyId];
+ return left < right && top < bottom && x >= left && x < right && y >= top && y < bottom;
+ }
+
+ static AK_FORCE_INLINE void calculateProximities(const int *const keyXCoordinates,
+ const int *const keyYCoordinates, const int *const keyWidths, const int *keyHeights,
+ const int *const proximityCharsArray, const int cellHeight, const int cellWidth,
+ const int gridWidth, const int mostCommonKeyWidth, const int keyCount,
+ const int x, const int y, const int primaryKey, const char *const localeStr,
+ const hash_map_compat<int, int> *const codeToKeyMap, int *proximities) {
+ const int mostCommonKeyWidthSquare = mostCommonKeyWidth * mostCommonKeyWidth;
+ int insertPos = 0;
+ proximities[insertPos++] = primaryKey;
+ const int startIndex = getStartIndexFromCoordinates(x, y, cellHeight, cellWidth, gridWidth);
+ if (startIndex >= 0) {
+ for (int i = 0; i < MAX_PROXIMITY_CHARS_SIZE; ++i) {
+ const int c = proximityCharsArray[startIndex + i];
+ if (c < KEYCODE_SPACE || c == primaryKey) {
+ continue;
+ }
+ const int keyIndex = getKeyIndexOf(keyCount, c, codeToKeyMap);
+ const bool onKey = isOnKey(keyXCoordinates, keyYCoordinates, keyWidths, keyHeights,
+ keyIndex, x, y);
+ const int distance = squaredLengthToEdge(keyXCoordinates, keyYCoordinates,
+ keyWidths, keyHeights, keyIndex, x, y);
+ if (onKey || distance < mostCommonKeyWidthSquare) {
+ proximities[insertPos++] = c;
+ if (insertPos >= MAX_PROXIMITY_CHARS_SIZE) {
+ if (DEBUG_DICT) {
+ ASSERT(false);
+ }
+ return;
+ }
+ }
+ }
+ const int additionalProximitySize =
+ AdditionalProximityChars::getAdditionalCharsSize(localeStr, primaryKey);
+ if (additionalProximitySize > 0) {
+ proximities[insertPos++] = ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE;
+ if (insertPos >= MAX_PROXIMITY_CHARS_SIZE) {
+ if (DEBUG_DICT) {
+ ASSERT(false);
+ }
+ return;
+ }
+
+ const int *additionalProximityChars =
+ AdditionalProximityChars::getAdditionalChars(localeStr, primaryKey);
+ for (int j = 0; j < additionalProximitySize; ++j) {
+ const int ac = additionalProximityChars[j];
+ int k = 0;
+ for (; k < insertPos; ++k) {
+ if (ac == proximities[k]) {
+ break;
+ }
+ }
+ if (k < insertPos) {
+ continue;
+ }
+ proximities[insertPos++] = ac;
+ if (insertPos >= MAX_PROXIMITY_CHARS_SIZE) {
+ if (DEBUG_DICT) {
+ ASSERT(false);
+ }
+ return;
+ }
+ }
+ }
+ }
+ // Add a delimiter for the proximity characters
+ for (int i = insertPos; i < MAX_PROXIMITY_CHARS_SIZE; ++i) {
+ proximities[i] = NOT_A_CODE_POINT;
+ }
+ }
+
+ static int squaredLengthToEdge(const int *const keyXCoordinates,
+ const int *const keyYCoordinates, const int *const keyWidths, const int *keyHeights,
+ const int keyId, const int x, const int y) {
+ // NOT_A_ID is -1, but return whenever < 0 just in case
+ if (keyId < 0) return MAX_VALUE_FOR_WEIGHTING;
+ const int left = keyXCoordinates[keyId];
+ const int top = keyYCoordinates[keyId];
+ const int right = left + keyWidths[keyId];
+ const int bottom = top + keyHeights[keyId];
+ const int edgeX = x < left ? left : (x > right ? right : x);
+ const int edgeY = y < top ? top : (y > bottom ? bottom : y);
+ const int dx = x - edgeX;
+ const int dy = y - edgeY;
+ return dx * dx + dy * dy;
+ }
+};
+} // namespace latinime
+#endif // LATINIME_PROXIMITY_INFO_UTILS_H
diff --git a/native/jni/src/gesture/gesture_decoder_wrapper.cpp b/native/jni/src/suggest/gesture_suggest.cpp
index afbe0c5c3..fce5621d5 100644
--- a/native/jni/src/gesture/gesture_decoder_wrapper.cpp
+++ b/native/jni/src/suggest/gesture_suggest.cpp
@@ -14,9 +14,12 @@
* limitations under the License.
*/
-#include "gesture_decoder_wrapper.h"
+#include "gesture_suggest.h"
namespace latinime {
- IncrementalDecoderInterface *
- (*GestureDecoderWrapper::sGestureDecoderFactoryMethod)(int, int) = 0;
+ SuggestInterface *(*GestureSuggest::sGestureSuggestFactoryMethod)() = 0;
+
+ GestureSuggest::~GestureSuggest() {
+ delete mSuggestInterface;
+ }
} // namespace latinime
diff --git a/native/jni/src/suggest/gesture_suggest.h b/native/jni/src/suggest/gesture_suggest.h
new file mode 100644
index 000000000..82c3a69ad
--- /dev/null
+++ b/native/jni/src/suggest/gesture_suggest.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_GESTURE_SUGGEST_H
+#define LATINIME_GESTURE_SUGGEST_H
+
+#include "defines.h"
+#include "suggest_interface.h"
+
+namespace latinime {
+
+class ProximityInfo;
+
+class GestureSuggest : public SuggestInterface {
+ public:
+ GestureSuggest() : mSuggestInterface(getGestureSuggestInstance()) {}
+
+ virtual ~GestureSuggest();
+
+ int getSuggestions(ProximityInfo *pInfo, void *traverseSession, int *inputXs, int *inputYs,
+ int *times, int *pointerIds, int *inputCodePoints, int inputSize, int commitPoint,
+ int *outWords, int *frequencies, int *outputIndices, int *outputTypes) const {
+ if (!mSuggestInterface) {
+ return 0;
+ }
+ return mSuggestInterface->getSuggestions(pInfo, traverseSession, inputXs, inputYs, times,
+ pointerIds, inputCodePoints, inputSize, commitPoint, outWords, frequencies,
+ outputIndices, outputTypes);
+ }
+
+ static void setGestureSuggestFactoryMethod(SuggestInterface *(*factoryMethod)()) {
+ sGestureSuggestFactoryMethod = factoryMethod;
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(GestureSuggest);
+ static SuggestInterface *getGestureSuggestInstance() {
+ if (!sGestureSuggestFactoryMethod) {
+ return 0;
+ }
+ return sGestureSuggestFactoryMethod();
+ }
+
+ static SuggestInterface *(*sGestureSuggestFactoryMethod)();
+ SuggestInterface *mSuggestInterface;
+};
+} // namespace latinime
+#endif // LATINIME_GESTURE_SUGGEST_H
diff --git a/native/jni/src/gesture/incremental_decoder_interface.h b/native/jni/src/suggest/suggest_interface.h
index d1395aab9..0bb85d7e5 100644
--- a/native/jni/src/gesture/incremental_decoder_interface.h
+++ b/native/jni/src/suggest/suggest_interface.h
@@ -14,28 +14,25 @@
* limitations under the License.
*/
-#ifndef LATINIME_INCREMENTAL_DECODER_INTERFACE_H
-#define LATINIME_INCREMENTAL_DECODER_INTERFACE_H
+#ifndef LATINIME_SUGGEST_INTERFACE_H
+#define LATINIME_SUGGEST_INTERFACE_H
-#include <stdint.h>
#include "defines.h"
namespace latinime {
-class UnigramDictionary;
-class BigramDictionary;
class ProximityInfo;
-class IncrementalDecoderInterface {
+class SuggestInterface {
public:
- virtual int getSuggestions(ProximityInfo *pInfo, void *traverseSession,
- int *inputXs, int *inputYs, int *times, int *pointerIds, int *codes,
- int inputSize, int commitPoint, unsigned short *outWords, int *frequencies,
- int *outputIndices, int *outputTypes) const = 0;
- IncrementalDecoderInterface() { };
- virtual ~IncrementalDecoderInterface() { };
+ virtual int getSuggestions(ProximityInfo *pInfo, void *traverseSession, int *inputXs,
+ int *inputYs, int *times, int *pointerIds, int *inputCodePoints, int inputSize,
+ int commitPoint, int *outWords, int *frequencies, int *outputIndices,
+ int *outputTypes) const = 0;
+ SuggestInterface() {}
+ virtual ~SuggestInterface() {}
private:
- DISALLOW_COPY_AND_ASSIGN(IncrementalDecoderInterface);
+ DISALLOW_COPY_AND_ASSIGN(SuggestInterface);
};
} // namespace latinime
-#endif // LATINIME_INCREMENTAL_DECODER_INTERFACE_H
+#endif // LATINIME_SUGGEST_INTERFACE_H
diff --git a/native/jni/src/gesture/incremental_decoder_wrapper.cpp b/native/jni/src/suggest/typing_suggest.cpp
index 8fcda6c9e..56bd5b69a 100644
--- a/native/jni/src/gesture/incremental_decoder_wrapper.cpp
+++ b/native/jni/src/suggest/typing_suggest.cpp
@@ -14,9 +14,12 @@
* limitations under the License.
*/
-#include "incremental_decoder_wrapper.h"
+#include "typing_suggest.h"
namespace latinime {
- IncrementalDecoderInterface *
- (*IncrementalDecoderWrapper::sIncrementalDecoderFactoryMethod)(int, int) = 0;
+ SuggestInterface *(*TypingSuggest::sTypingSuggestFactoryMethod)() = 0;
+
+ TypingSuggest::~TypingSuggest() {
+ delete mSuggestInterface;
+ }
} // namespace latinime
diff --git a/native/jni/src/suggest/typing_suggest.h b/native/jni/src/suggest/typing_suggest.h
new file mode 100644
index 000000000..678037aa2
--- /dev/null
+++ b/native/jni/src/suggest/typing_suggest.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_TYPING_SUGGEST_H
+#define LATINIME_TYPING_SUGGEST_H
+
+#include "defines.h"
+#include "suggest_interface.h"
+
+namespace latinime {
+
+class ProximityInfo;
+
+class TypingSuggest : public SuggestInterface {
+ public:
+ TypingSuggest() : mSuggestInterface(getTypingSuggestInstance()) {}
+
+ virtual ~TypingSuggest();
+
+ int getSuggestions(ProximityInfo *pInfo, void *traverseSession, int *inputXs, int *inputYs,
+ int *times, int *pointerIds, int *inputCodePoints, int inputSize, int commitPoint,
+ int *outWords, int *frequencies, int *outputIndices, int *outputTypes) const {
+ if (!mSuggestInterface) {
+ return 0;
+ }
+ return mSuggestInterface->getSuggestions(pInfo, traverseSession, inputXs, inputYs, times,
+ pointerIds, inputCodePoints, inputSize, commitPoint, outWords, frequencies,
+ outputIndices, outputTypes);
+ }
+
+ static void setTypingSuggestFactoryMethod(SuggestInterface *(*factoryMethod)()) {
+ sTypingSuggestFactoryMethod = factoryMethod;
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(TypingSuggest);
+ static SuggestInterface *getTypingSuggestInstance() {
+ if (!sTypingSuggestFactoryMethod) {
+ return 0;
+ }
+ return sTypingSuggestFactoryMethod();
+ }
+
+ static SuggestInterface *(*sTypingSuggestFactoryMethod)();
+ SuggestInterface *mSuggestInterface;
+};
+} // namespace latinime
+#endif // LATINIME_TYPING_SUGGEST_H
diff --git a/native/jni/src/suggest_utils.h b/native/jni/src/suggest_utils.h
new file mode 100644
index 000000000..aab9f7ba8
--- /dev/null
+++ b/native/jni/src/suggest_utils.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_SUGGEST_UTILS_H
+#define LATINIME_SUGGEST_UTILS_H
+
+#include "defines.h"
+#include "proximity_info_params.h"
+
+namespace latinime {
+class SuggestUtils {
+ public:
+ static float getDistanceScalingFactor(const float normalizedSquaredDistance) {
+ if (normalizedSquaredDistance < 0.0f) {
+ return -1.0f;
+ }
+ // Promote or demote the score according to the distance from the sweet spot
+ static const float A = ZERO_DISTANCE_PROMOTION_RATE / 100.0f;
+ static const float B = 1.0f;
+ static const float C = 0.5f;
+ static const float MIN = 0.3f;
+ static const float R1 = NEUTRAL_SCORE_SQUARED_RADIUS;
+ static const float R2 = HALF_SCORE_SQUARED_RADIUS;
+ const float x = normalizedSquaredDistance / static_cast<float>(
+ ProximityInfoParams::NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR);
+ const float factor = max((x < R1)
+ ? (A * (R1 - x) + B * x) / R1
+ : (B * (R2 - x) + C * (x - R1)) / (R2 - R1), MIN);
+ // factor is a piecewise linear function like:
+ // A -_ .
+ // ^-_ .
+ // B \ .
+ // \_ .
+ // C ------------.
+ // .
+ // 0 R1 R2 .
+ return factor;
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(SuggestUtils);
+};
+} // namespace latinime
+#endif // LATINIME_SUGGEST_UTILS_H
diff --git a/native/jni/src/terminal_attributes.h b/native/jni/src/terminal_attributes.h
index e72e7e3be..a8cc03b8d 100644
--- a/native/jni/src/terminal_attributes.h
+++ b/native/jni/src/terminal_attributes.h
@@ -17,6 +17,7 @@
#ifndef LATINIME_TERMINAL_ATTRIBUTES_H
#define LATINIME_TERMINAL_ATTRIBUTES_H
+#include <stdint.h>
#include "binary_format.h"
namespace latinime {
@@ -29,10 +30,6 @@ namespace latinime {
class TerminalAttributes {
public:
class ShortcutIterator {
- const uint8_t *const mDict;
- int mPos;
- bool mHasNextShortcutTarget;
-
public:
ShortcutIterator(const uint8_t *dict, const int pos, const uint8_t flags)
: mDict(dict), mPos(pos),
@@ -43,22 +40,25 @@ class TerminalAttributes {
return mHasNextShortcutTarget;
}
- // Gets the shortcut target itself as a uint16_t string. For parameters and return value
+ // Gets the shortcut target itself as an int string. For parameters and return value
// see BinaryFormat::getWordAtAddress.
- // TODO: make the output an uint32_t* to handle the whole unicode range.
- inline int getNextShortcutTarget(const int maxDepth, uint16_t *outWord, int *outFreq) {
+ inline int getNextShortcutTarget(const int maxDepth, int *outWord, int *outFreq) {
const int shortcutFlags = BinaryFormat::getFlagsAndForwardPointer(mDict, &mPos);
- mHasNextShortcutTarget =
- 0 != (shortcutFlags & BinaryFormat::FLAG_ATTRIBUTE_HAS_NEXT);
+ mHasNextShortcutTarget = 0 != (shortcutFlags & BinaryFormat::FLAG_ATTRIBUTE_HAS_NEXT);
unsigned int i;
- for (i = 0; i < MAX_WORD_LENGTH_INTERNAL; ++i) {
+ for (i = 0; i < MAX_WORD_LENGTH; ++i) {
const int codePoint = BinaryFormat::getCodePointAndForwardPointer(mDict, &mPos);
if (NOT_A_CODE_POINT == codePoint) break;
- outWord[i] = (uint16_t)codePoint;
+ outWord[i] = codePoint;
}
*outFreq = BinaryFormat::getAttributeFrequencyFromFlags(shortcutFlags);
return i;
}
+
+ private:
+ const uint8_t *const mDict;
+ int mPos;
+ bool mHasNextShortcutTarget;
};
TerminalAttributes(const uint8_t *const dict, const uint8_t flags, const int pos)
diff --git a/native/jni/src/unigram_dictionary.cpp b/native/jni/src/unigram_dictionary.cpp
index e3649bd4b..0b18e78a3 100644
--- a/native/jni/src/unigram_dictionary.cpp
+++ b/native/jni/src/unigram_dictionary.cpp
@@ -14,7 +14,6 @@
* limitations under the License.
*/
-#include <cassert>
#include <cstring>
#define LOG_TAG "LatinIME: unigram_dictionary.cpp"
@@ -41,14 +40,9 @@ const UnigramDictionary::digraph_t UnigramDictionary::FRENCH_LIGATURES_DIGRAPHS[
{ 'o', 'e', 0x0153 } }; // U+0153 : LATIN SMALL LIGATURE OE
// TODO: check the header
-UnigramDictionary::UnigramDictionary(const uint8_t *const streamStart, int typedLetterMultiplier,
- int fullWordMultiplier, int maxWordLength, int maxWords, const unsigned int flags)
- : DICT_ROOT(streamStart), MAX_WORD_LENGTH(maxWordLength), MAX_WORDS(maxWords),
- TYPED_LETTER_MULTIPLIER(typedLetterMultiplier), FULL_WORD_MULTIPLIER(fullWordMultiplier),
- // TODO : remove this variable.
- ROOT_POS(0),
- BYTES_IN_ONE_CHAR(sizeof(int)),
- MAX_DIGRAPH_SEARCH_DEPTH(DEFAULT_MAX_DIGRAPH_SEARCH_DEPTH), FLAGS(flags) {
+UnigramDictionary::UnigramDictionary(const uint8_t *const streamStart, const unsigned int flags)
+ : DICT_ROOT(streamStart), ROOT_POS(0),
+ MAX_DIGRAPH_SEARCH_DEPTH(DEFAULT_MAX_DIGRAPH_SEARCH_DEPTH), FLAGS(flags) {
if (DEBUG_DICT) {
AKLOGI("UnigramDictionary - constructor");
}
@@ -57,22 +51,17 @@ UnigramDictionary::UnigramDictionary(const uint8_t *const streamStart, int typed
UnigramDictionary::~UnigramDictionary() {
}
-static inline unsigned int getCodesBufferSize(const int *codes, const int codesSize) {
- return static_cast<unsigned int>(sizeof(*codes)) * codesSize;
-}
-
-// TODO: This needs to take a const unsigned short* and not tinker with its contents
-static inline void addWord(unsigned short *word, int length, int frequency,
- WordsPriorityQueue *queue, int type) {
+// TODO: This needs to take a const int* and not tinker with its contents
+static void addWord(int *word, int length, int frequency, WordsPriorityQueue *queue, int type) {
queue->push(frequency, word, length, type);
}
// Return the replacement code point for a digraph, or 0 if none.
-int UnigramDictionary::getDigraphReplacement(const int *codes, const int i, const int codesSize,
+int UnigramDictionary::getDigraphReplacement(const int *codes, const int i, const int inputSize,
const digraph_t *const digraphs, const unsigned int digraphsSize) const {
// There can't be a digraph if we don't have at least 2 characters to examine
- if (i + 2 > codesSize) return false;
+ if (i + 2 > inputSize) return false;
// Search for the first char of some digraph
int lastDigraphIndex = -1;
@@ -93,7 +82,7 @@ int UnigramDictionary::getDigraphReplacement(const int *codes, const int i, cons
// Mostly the same arguments as the non-recursive version, except:
// codes is the original value. It points to the start of the work buffer, and gets passed as is.
-// codesSize is the size of the user input (thus, it is the size of codesSrc).
+// inputSize is the size of the user input (thus, it is the size of codesSrc).
// codesDest is the current point in the work buffer.
// codesSrc is the current point in the user-input, original, content-unmodified buffer.
// codesRemain is the remaining size in codesSrc.
@@ -105,6 +94,9 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit
const int codesRemain, const int currentDepth, int *codesDest, Correction *correction,
WordsPriorityQueuePool *queuePool,
const digraph_t *const digraphs, const unsigned int digraphsSize) const {
+ ASSERT(sizeof(codesDest[0]) == sizeof(codesSrc[0]));
+ ASSERT(sizeof(xCoordinatesBuffer[0]) == sizeof(xcoordinates[0]));
+ ASSERT(sizeof(yCoordinatesBuffer[0]) == sizeof(ycoordinates[0]));
const int startIndex = static_cast<int>(codesDest - codesBuffer);
if (currentDepth < MAX_DIGRAPH_SEARCH_DEPTH) {
@@ -125,9 +117,8 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit
// Make i the index of the second char of the digraph for simplicity. Forgetting
// to do that results in an infinite recursion so take care!
++i;
- memcpy(codesDest, codesSrc, i * BYTES_IN_ONE_CHAR);
- codesDest[(i - 1) * (BYTES_IN_ONE_CHAR / sizeof(codesDest[0]))] =
- replacementCodePoint;
+ memcpy(codesDest, codesSrc, i * sizeof(codesDest[0]));
+ codesDest[i - 1] = replacementCodePoint;
getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates,
codesBuffer, xCoordinatesBuffer, yCoordinatesBuffer, codesBufferSize,
bigramMap, bigramFilter, useFullEditDistance, codesSrc + i + 1,
@@ -137,7 +128,7 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit
// Copy the second char of the digraph in place, then continue processing on
// the remaining part of the word.
// In our example, after "pru" in the buffer copy the "e", and continue on "fen"
- memcpy(codesDest + i, codesSrc + i, BYTES_IN_ONE_CHAR);
+ memcpy(codesDest + i, codesSrc + i, sizeof(codesDest[0]));
getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates,
codesBuffer, xCoordinatesBuffer, yCoordinatesBuffer, codesBufferSize,
bigramMap, bigramFilter, useFullEditDistance, codesSrc + i, codesRemain - i,
@@ -153,13 +144,13 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit
// If the word contains several digraphs, we'll come it for the product of them.
// eg. if the word is "ueberpruefen" we'll test, in order, against
// "uberprufen", "uberpruefen", "ueberprufen", "ueberpruefen".
- const unsigned int remainingBytes = BYTES_IN_ONE_CHAR * codesRemain;
+ const unsigned int remainingBytes = sizeof(codesDest[0]) * codesRemain;
if (0 != remainingBytes) {
memcpy(codesDest, codesSrc, remainingBytes);
memcpy(&xCoordinatesBuffer[startIndex], &xcoordinates[codesBufferSize - codesRemain],
- sizeof(int) * codesRemain);
+ sizeof(xCoordinatesBuffer[0]) * codesRemain);
memcpy(&yCoordinatesBuffer[startIndex], &ycoordinates[codesBufferSize - codesRemain],
- sizeof(int) * codesRemain);
+ sizeof(yCoordinatesBuffer[0]) * codesRemain);
}
getWordSuggestions(proximityInfo, xCoordinatesBuffer, yCoordinatesBuffer, codesBuffer,
@@ -171,58 +162,54 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit
// bigramFilter is a bloom filter for fast rejection: see functions setInFilter and isInFilter
// in bigram_dictionary.cpp
int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
- const int *ycoordinates, const int *codes, const int codesSize,
+ const int *ycoordinates, const int *inputCodePoints, const int inputSize,
const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
- const bool useFullEditDistance, unsigned short *outWords, int *frequencies,
- int *outputTypes) const {
-
- WordsPriorityQueuePool queuePool(MAX_WORDS, SUB_QUEUE_MAX_WORDS, MAX_WORD_LENGTH);
+ const bool useFullEditDistance, int *outWords, int *frequencies, int *outputTypes) const {
+ WordsPriorityQueuePool queuePool(MAX_RESULTS, SUB_QUEUE_MAX_WORDS);
queuePool.clearAll();
Correction masterCorrection;
masterCorrection.resetCorrection();
if (BinaryFormat::REQUIRES_GERMAN_UMLAUT_PROCESSING & FLAGS)
{ // Incrementally tune the word and try all possibilities
- int codesBuffer[getCodesBufferSize(codes, codesSize)];
- int xCoordinatesBuffer[codesSize];
- int yCoordinatesBuffer[codesSize];
+ int codesBuffer[sizeof(*inputCodePoints) * inputSize];
+ int xCoordinatesBuffer[inputSize];
+ int yCoordinatesBuffer[inputSize];
getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates, codesBuffer,
- xCoordinatesBuffer, yCoordinatesBuffer, codesSize, bigramMap, bigramFilter,
- useFullEditDistance, codes, codesSize, 0, codesBuffer, &masterCorrection,
- &queuePool, GERMAN_UMLAUT_DIGRAPHS,
- sizeof(GERMAN_UMLAUT_DIGRAPHS) / sizeof(GERMAN_UMLAUT_DIGRAPHS[0]));
+ xCoordinatesBuffer, yCoordinatesBuffer, inputSize, bigramMap, bigramFilter,
+ useFullEditDistance, inputCodePoints, inputSize, 0, codesBuffer, &masterCorrection,
+ &queuePool, GERMAN_UMLAUT_DIGRAPHS, NELEMS(GERMAN_UMLAUT_DIGRAPHS));
} else if (BinaryFormat::REQUIRES_FRENCH_LIGATURES_PROCESSING & FLAGS) {
- int codesBuffer[getCodesBufferSize(codes, codesSize)];
- int xCoordinatesBuffer[codesSize];
- int yCoordinatesBuffer[codesSize];
+ int codesBuffer[sizeof(*inputCodePoints) * inputSize];
+ int xCoordinatesBuffer[inputSize];
+ int yCoordinatesBuffer[inputSize];
getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates, codesBuffer,
- xCoordinatesBuffer, yCoordinatesBuffer, codesSize, bigramMap, bigramFilter,
- useFullEditDistance, codes, codesSize, 0, codesBuffer, &masterCorrection,
- &queuePool, FRENCH_LIGATURES_DIGRAPHS,
- sizeof(FRENCH_LIGATURES_DIGRAPHS) / sizeof(FRENCH_LIGATURES_DIGRAPHS[0]));
+ xCoordinatesBuffer, yCoordinatesBuffer, inputSize, bigramMap, bigramFilter,
+ useFullEditDistance, inputCodePoints, inputSize, 0, codesBuffer, &masterCorrection,
+ &queuePool, FRENCH_LIGATURES_DIGRAPHS, NELEMS(FRENCH_LIGATURES_DIGRAPHS));
} else { // Normal processing
- getWordSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, codesSize,
+ getWordSuggestions(proximityInfo, xcoordinates, ycoordinates, inputCodePoints, inputSize,
bigramMap, bigramFilter, useFullEditDistance, &masterCorrection, &queuePool);
}
PROF_START(20);
if (DEBUG_DICT) {
float ns = queuePool.getMasterQueue()->getHighestNormalizedScore(
- masterCorrection.getPrimaryInputWord(), codesSize, 0, 0, 0);
+ masterCorrection.getPrimaryInputWord(), inputSize, 0, 0, 0);
ns += 0;
AKLOGI("Max normalized score = %f", ns);
}
const int suggestedWordsCount =
queuePool.getMasterQueue()->outputSuggestions(masterCorrection.getPrimaryInputWord(),
- codesSize, frequencies, outWords, outputTypes);
+ inputSize, frequencies, outWords, outputTypes);
if (DEBUG_DICT) {
float ns = queuePool.getMasterQueue()->getHighestNormalizedScore(
- masterCorrection.getPrimaryInputWord(), codesSize, 0, 0, 0);
+ masterCorrection.getPrimaryInputWord(), inputSize, 0, 0, 0);
ns += 0;
AKLOGI("Returning %d words", suggestedWordsCount);
/// Print the returned words
for (int j = 0; j < suggestedWordsCount; ++j) {
- short unsigned int *w = outWords + j * MAX_WORD_LENGTH;
+ int *w = outWords + j * MAX_WORD_LENGTH;
char s[MAX_WORD_LENGTH];
for (int i = 0; i <= MAX_WORD_LENGTH; i++) s[i] = w[i];
(void)s; // To suppress compiler warning
@@ -234,19 +221,18 @@ int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, const int *x
return suggestedWordsCount;
}
-void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
- const int *xcoordinates, const int *ycoordinates, const int *codes,
- const int inputSize, const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
- const bool useFullEditDistance, Correction *correction,
- WordsPriorityQueuePool *queuePool) const {
-
+void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
+ const int *ycoordinates, const int *inputCodePoints, const int inputSize,
+ const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
+ const bool useFullEditDistance, Correction *correction, WordsPriorityQueuePool *queuePool)
+ const {
PROF_OPEN;
PROF_START(0);
PROF_END(0);
PROF_START(1);
- getOneWordSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, bigramMap, bigramFilter,
- useFullEditDistance, inputSize, correction, queuePool);
+ getOneWordSuggestions(proximityInfo, xcoordinates, ycoordinates, inputCodePoints, bigramMap,
+ bigramFilter, useFullEditDistance, inputSize, correction, queuePool);
PROF_END(1);
PROF_START(2);
@@ -271,7 +257,7 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
// Multiple word suggestions
if (SUGGEST_MULTIPLE_WORDS
&& inputSize >= MIN_USER_TYPED_LENGTH_FOR_MULTIPLE_WORD_SUGGESTION) {
- getSplitMultipleWordsSuggestions(proximityInfo, xcoordinates, ycoordinates, codes,
+ getSplitMultipleWordsSuggestions(proximityInfo, xcoordinates, ycoordinates, inputCodePoints,
useFullEditDistance, inputSize, correction, queuePool,
hasAutoCorrectionCandidate);
}
@@ -288,7 +274,7 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
if (queue->size() > 0) {
WordsPriorityQueue::SuggestedWord *sw = queue->top();
const int score = sw->mScore;
- const unsigned short *word = sw->mWord;
+ const int *word = sw->mWord;
const int wordLength = sw->mWordLength;
float ns = Correction::RankingAlgorithm::calcNormalizedScore(
correction->getPrimaryInputWord(), i, word, wordLength, score);
@@ -307,15 +293,13 @@ void UnigramDictionary::initSuggestions(ProximityInfo *proximityInfo, const int
Correction *correction) const {
if (DEBUG_DICT) {
AKLOGI("initSuggest");
- DUMP_WORD_INT(codes, inputSize);
+ DUMP_WORD(codes, inputSize);
}
correction->initInputParams(proximityInfo, codes, inputSize, xCoordinates, yCoordinates);
const int maxDepth = min(inputSize * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH);
correction->initCorrection(proximityInfo, inputSize, maxDepth);
}
-static const char SPACE = ' ';
-
void UnigramDictionary::getOneWordSuggestions(ProximityInfo *proximityInfo,
const int *xcoordinates, const int *ycoordinates, const int *codes,
const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
@@ -374,15 +358,15 @@ void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance,
}
}
-inline void UnigramDictionary::onTerminal(const int probability,
- const TerminalAttributes& terminalAttributes, Correction *correction,
+void UnigramDictionary::onTerminal(const int probability,
+ const TerminalAttributes &terminalAttributes, Correction *correction,
WordsPriorityQueuePool *queuePool, const bool addToMasterQueue,
const int currentWordIndex) const {
const int inputIndex = correction->getInputIndex();
const bool addToSubQueue = inputIndex < SUB_QUEUE_MAX_COUNT;
int wordLength;
- unsigned short *wordPointer;
+ int *wordPointer;
if ((currentWordIndex == FIRST_WORD_INDEX) && addToMasterQueue) {
WordsPriorityQueue *masterQueue = queuePool->getMasterQueue();
@@ -400,8 +384,7 @@ inline void UnigramDictionary::onTerminal(const int probability,
const int shortcutProbability = finalProbability > 0 ? finalProbability - 1 : 0;
// Please note that the shortcut candidates will be added to the master queue only.
- TerminalAttributes::ShortcutIterator iterator =
- terminalAttributes.getShortcutIterator();
+ TerminalAttributes::ShortcutIterator iterator = terminalAttributes.getShortcutIterator();
while (iterator.hasNextShortcutTarget()) {
// TODO: addWord only supports weak ordering, meaning we have no means
// to control the order of the shortcuts relative to one another or to the word.
@@ -410,10 +393,10 @@ inline void UnigramDictionary::onTerminal(const int probability,
// so that the insert order is protected inside the queue for words
// with the same score. For the moment we use -1 to make sure the shortcut will
// never be in front of the word.
- uint16_t shortcutTarget[MAX_WORD_LENGTH_INTERNAL];
+ int shortcutTarget[MAX_WORD_LENGTH];
int shortcutFrequency;
const int shortcutTargetStringLength = iterator.getNextShortcutTarget(
- MAX_WORD_LENGTH_INTERNAL, shortcutTarget, &shortcutFrequency);
+ MAX_WORD_LENGTH, shortcutTarget, &shortcutFrequency);
int shortcutScore;
int kind;
if (shortcutFrequency == BinaryFormat::WHITELIST_SHORTCUT_FREQUENCY
@@ -450,7 +433,7 @@ int UnigramDictionary::getSubStringSuggestion(
const bool hasAutoCorrectionCandidate, const int currentWordIndex,
const int inputWordStartPos, const int inputWordLength,
const int outputWordStartPos, const bool isSpaceProximity, int *freqArray,
- int *wordLengthArray, unsigned short *outputWord, int *outputWordLength) const {
+ int *wordLengthArray, int *outputWord, int *outputWordLength) const {
if (inputWordLength > MULTIPLE_WORDS_SUGGESTION_MAX_WORD_LENGTH) {
return FLAG_MULTIPLE_SUGGEST_ABORT;
}
@@ -493,13 +476,13 @@ int UnigramDictionary::getSubStringSuggestion(
// TODO: Remove the safety net above //
//////////////////////////////////////////////
- unsigned short *tempOutputWord = 0;
+ int *tempOutputWord = 0;
int nextWordLength = 0;
// TODO: Optimize init suggestion
initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes,
inputSize, correction);
- unsigned short word[MAX_WORD_LENGTH_INTERNAL];
+ int word[MAX_WORD_LENGTH];
int freq = getMostFrequentWordLike(
inputWordStartPos, inputWordLength, correction, word);
if (freq > 0) {
@@ -570,7 +553,7 @@ int UnigramDictionary::getSubStringSuggestion(
if (outputWordStartPos + nextWordLength >= MAX_WORD_LENGTH) {
return FLAG_MULTIPLE_SUGGEST_SKIP;
}
- outputWord[tempOutputWordLength] = SPACE;
+ outputWord[tempOutputWordLength] = KEYCODE_SPACE;
if (outputWordLength) {
++*outputWordLength;
}
@@ -598,7 +581,7 @@ void UnigramDictionary::getMultiWordsSuggestionRec(ProximityInfo *proximityInfo,
const bool useFullEditDistance, const int inputSize, Correction *correction,
WordsPriorityQueuePool *queuePool, const bool hasAutoCorrectionCandidate,
const int startInputPos, const int startWordIndex, const int outputWordLength,
- int *freqArray, int *wordLengthArray, unsigned short *outputWord) const {
+ int *freqArray, int *wordLengthArray, int *outputWord) const {
if (startWordIndex >= (MULTIPLE_WORDS_SUGGESTION_MAX_WORDS - 1)) {
// Return if the last word index
return;
@@ -684,7 +667,7 @@ void UnigramDictionary::getSplitMultipleWordsSuggestions(ProximityInfo *proximit
}
// Allocating fixed length array on stack
- unsigned short outputWord[MAX_WORD_LENGTH];
+ int outputWord[MAX_WORD_LENGTH];
int freqArray[MULTIPLE_WORDS_SUGGESTION_MAX_WORDS];
int wordLengthArray[MULTIPLE_WORDS_SUGGESTION_MAX_WORDS];
const int outputWordLength = 0;
@@ -698,12 +681,11 @@ void UnigramDictionary::getSplitMultipleWordsSuggestions(ProximityInfo *proximit
// Wrapper for getMostFrequentWordLikeInner, which matches it to the previous
// interface.
-inline int UnigramDictionary::getMostFrequentWordLike(const int startInputIndex,
- const int inputSize, Correction *correction, unsigned short *word) const {
- uint16_t inWord[inputSize];
-
+int UnigramDictionary::getMostFrequentWordLike(const int startInputIndex, const int inputSize,
+ Correction *correction, int *word) const {
+ int inWord[inputSize];
for (int i = 0; i < inputSize; ++i) {
- inWord[i] = (uint16_t)correction->getPrimaryCharAt(startInputIndex + i);
+ inWord[i] = correction->getPrimaryCodePointAt(startInputIndex + i);
}
return getMostFrequentWordLikeInner(inWord, inputSize, word);
}
@@ -721,14 +703,14 @@ inline int UnigramDictionary::getMostFrequentWordLike(const int startInputIndex,
// In and out parameters may point to the same location. This function takes care
// not to use any input parameters after it wrote into its outputs.
static inline bool testCharGroupForContinuedLikeness(const uint8_t flags,
- const uint8_t *const root, const int startPos, const uint16_t *const inWord,
- const int startInputIndex, const int inputSize, int32_t *outNewWord, int *outInputIndex,
+ const uint8_t *const root, const int startPos, const int *const inWord,
+ const int startInputIndex, const int inputSize, int *outNewWord, int *outInputIndex,
int *outPos) {
const bool hasMultipleChars = (0 != (BinaryFormat::FLAG_HAS_MULTIPLE_CHARS & flags));
int pos = startPos;
- int32_t codePoint = BinaryFormat::getCodePointAndForwardPointer(root, &pos);
- int32_t baseChar = toBaseLowerCase(codePoint);
- const uint16_t wChar = toBaseLowerCase(inWord[startInputIndex]);
+ int codePoint = BinaryFormat::getCodePointAndForwardPointer(root, &pos);
+ int baseChar = toBaseLowerCase(codePoint);
+ const int wChar = toBaseLowerCase(inWord[startInputIndex]);
if (baseChar != wChar) {
*outPos = hasMultipleChars ? BinaryFormat::skipOtherCharacters(root, pos) : pos;
@@ -759,8 +741,8 @@ static inline bool testCharGroupForContinuedLikeness(const uint8_t flags,
// It will compare the frequency to the max frequency, and if greater, will
// copy the word into the output buffer. In output value maxFreq, it will
// write the new maximum frequency if it changed.
-static inline void onTerminalWordLike(const int freq, int32_t *newWord, const int length,
- short unsigned int *outWord, int *maxFreq) {
+static inline void onTerminalWordLike(const int freq, int *newWord, const int length, int *outWord,
+ int *maxFreq) {
if (freq > *maxFreq) {
for (int q = 0; q < length; ++q) {
outWord[q] = newWord[q];
@@ -772,15 +754,15 @@ static inline void onTerminalWordLike(const int freq, int32_t *newWord, const in
// Will find the highest frequency of the words like the one passed as an argument,
// that is, everything that only differs by case/accents.
-int UnigramDictionary::getMostFrequentWordLikeInner(const uint16_t *const inWord,
- const int inputSize, short unsigned int *outWord) const {
- int32_t newWord[MAX_WORD_LENGTH_INTERNAL];
+int UnigramDictionary::getMostFrequentWordLikeInner(const int *const inWord, const int inputSize,
+ int *outWord) const {
+ int newWord[MAX_WORD_LENGTH];
int depth = 0;
int maxFreq = -1;
const uint8_t *const root = DICT_ROOT;
- int stackChildCount[MAX_WORD_LENGTH_INTERNAL];
- int stackInputIndex[MAX_WORD_LENGTH_INTERNAL];
- int stackSiblingPos[MAX_WORD_LENGTH_INTERNAL];
+ int stackChildCount[MAX_WORD_LENGTH];
+ int stackInputIndex[MAX_WORD_LENGTH];
+ int stackSiblingPos[MAX_WORD_LENGTH];
int startPos = 0;
stackChildCount[0] = BinaryFormat::getGroupCountAndForwardPointer(root, &startPos);
@@ -834,7 +816,7 @@ int UnigramDictionary::getMostFrequentWordLikeInner(const uint16_t *const inWord
return maxFreq;
}
-int UnigramDictionary::getFrequency(const int32_t *const inWord, const int length) const {
+int UnigramDictionary::getFrequency(const int *const inWord, const int length) const {
const uint8_t *const root = DICT_ROOT;
int pos = BinaryFormat::getTerminalPosition(root, inWord, length,
false /* forceLowerCaseSearch */);
@@ -859,8 +841,7 @@ int UnigramDictionary::getFrequency(const int32_t *const inWord, const int lengt
}
// TODO: remove this function.
-int UnigramDictionary::getBigramPosition(int pos, unsigned short *word, int offset,
- int length) const {
+int UnigramDictionary::getBigramPosition(int pos, int *word, int offset, int length) const {
return -1;
}
@@ -878,7 +859,7 @@ int UnigramDictionary::getBigramPosition(int pos, unsigned short *word, int offs
// there aren't any more nodes at this level, it merely returns the address of the first byte after
// the current node in nextSiblingPosition. Thus, the caller must keep count of the nodes at any
// given level, as output into newCount when traversing this level's parent.
-inline bool UnigramDictionary::processCurrentNode(const int initialPos,
+bool UnigramDictionary::processCurrentNode(const int initialPos,
const std::map<int, int> *bigramMap, const uint8_t *bigramFilter, Correction *correction,
int *newCount, int *newChildrenPosition, int *nextSiblingPosition,
WordsPriorityQueuePool *queuePool, const int currentWordIndex) const {
@@ -906,8 +887,8 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos,
// else if FLAG_IS_TERMINAL: the frequency
// else if MASK_GROUP_ADDRESS_TYPE is not NONE: the children address
// Note that you can't have a node that both is not a terminal and has no children.
- int32_t c = BinaryFormat::getCodePointAndForwardPointer(DICT_ROOT, &pos);
- assert(NOT_A_CODE_POINT != c);
+ int c = BinaryFormat::getCodePointAndForwardPointer(DICT_ROOT, &pos);
+ ASSERT(NOT_A_CODE_POINT != c);
// We are going to loop through each character and make it look like it's a different
// node each time. To do that, we will process characters in this node in order until
@@ -920,7 +901,7 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos,
// We prefetch the next char. If 'c' is the last char of this node, we will have
// NOT_A_CODE_POINT in the next char. From this we can decide whether this virtual node
// should behave as a terminal or not and whether we have children.
- const int32_t nextc = hasMultipleChars
+ const int nextc = hasMultipleChars
? BinaryFormat::getCodePointAndForwardPointer(DICT_ROOT, &pos) : NOT_A_CODE_POINT;
const bool isLastChar = (NOT_A_CODE_POINT == nextc);
// If there are more chars in this nodes, then this virtual node is not a terminal.
@@ -1000,7 +981,7 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos,
// Now we finished processing this node, and we want to traverse children. If there are no
// children, we can't come here.
- assert(BinaryFormat::hasChildrenInFlags(flags));
+ ASSERT(BinaryFormat::hasChildrenInFlags(flags));
// If this node was a terminal it still has the frequency under the pointer (it may have been
// read, but not skipped - see readFrequencyWithoutMovingPointer).
diff --git a/native/jni/src/unigram_dictionary.h b/native/jni/src/unigram_dictionary.h
index 57129bb07..502bf4790 100644
--- a/native/jni/src/unigram_dictionary.h
+++ b/native/jni/src/unigram_dictionary.h
@@ -39,36 +39,34 @@ class UnigramDictionary {
static const int FLAG_MULTIPLE_SUGGEST_ABORT = 0;
static const int FLAG_MULTIPLE_SUGGEST_SKIP = 1;
static const int FLAG_MULTIPLE_SUGGEST_CONTINUE = 2;
- UnigramDictionary(const uint8_t *const streamStart, int typedLetterMultipler,
- int fullWordMultiplier, int maxWordLength, int maxWords, const unsigned int flags);
- int getFrequency(const int32_t *const inWord, const int length) const;
- int getBigramPosition(int pos, unsigned short *word, int offset, int length) const;
+ UnigramDictionary(const uint8_t *const streamStart, const unsigned int flags);
+ int getFrequency(const int *const inWord, const int length) const;
+ int getBigramPosition(int pos, int *word, int offset, int length) const;
int getSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
- const int *ycoordinates, const int *codes, const int codesSize,
+ const int *ycoordinates, const int *inputCodePoints, const int inputSize,
const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
- const bool useFullEditDistance, unsigned short *outWords, int *frequencies,
+ const bool useFullEditDistance, int *outWords, int *frequencies,
int *outputTypes) const;
virtual ~UnigramDictionary();
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(UnigramDictionary);
void getWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
- const int *ycoordinates, const int *codes, const int inputSize,
+ const int *ycoordinates, const int *inputCodePoints, const int inputSize,
const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
const bool useFullEditDistance, Correction *correction,
WordsPriorityQueuePool *queuePool) const;
- int getDigraphReplacement(const int *codes, const int i, const int codesSize,
+ int getDigraphReplacement(const int *codes, const int i, const int inputSize,
const digraph_t *const digraphs, const unsigned int digraphsSize) const;
- void getWordWithDigraphSuggestionsRec(ProximityInfo *proximityInfo,
- const int *xcoordinates, const int *ycoordinates, const int *codesBuffer,
- int *xCoordinatesBuffer, int *yCoordinatesBuffer, const int codesBufferSize,
- const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
- const bool useFullEditDistance, const int *codesSrc, const int codesRemain,
- const int currentDepth, int *codesDest, Correction *correction,
- WordsPriorityQueuePool *queuePool, const digraph_t *const digraphs,
- const unsigned int digraphsSize) const;
+ void getWordWithDigraphSuggestionsRec(ProximityInfo *proximityInfo, const int *xcoordinates,
+ const int *ycoordinates, const int *codesBuffer, int *xCoordinatesBuffer,
+ int *yCoordinatesBuffer, const int codesBufferSize, const std::map<int, int> *bigramMap,
+ const uint8_t *bigramFilter, const bool useFullEditDistance, const int *codesSrc,
+ const int codesRemain, const int currentDepth, int *codesDest, Correction *correction,
+ WordsPriorityQueuePool *queuePool, const digraph_t *const digraphs,
+ const unsigned int digraphsSize) const;
void initSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
- const int *ycoordinates, const int *codes, const int codesSize,
+ const int *ycoordinates, const int *codes, const int inputSize,
Correction *correction) const;
void getOneWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
const int *ycoordinates, const int *codes, const std::map<int, int> *bigramMap,
@@ -79,12 +77,11 @@ class UnigramDictionary {
const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
Correction *correction, WordsPriorityQueuePool *queuePool, const bool doAutoCompletion,
const int maxErrors, const int currentWordIndex) const;
- void getSplitMultipleWordsSuggestions(ProximityInfo *proximityInfo,
- const int *xcoordinates, const int *ycoordinates, const int *codes,
- const bool useFullEditDistance, const int inputSize,
- Correction *correction, WordsPriorityQueuePool *queuePool,
+ void getSplitMultipleWordsSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
+ const int *ycoordinates, const int *codes, const bool useFullEditDistance,
+ const int inputSize, Correction *correction, WordsPriorityQueuePool *queuePool,
const bool hasAutoCorrectionCandidate) const;
- void onTerminal(const int freq, const TerminalAttributes& terminalAttributes,
+ void onTerminal(const int freq, const TerminalAttributes &terminalAttributes,
Correction *correction, WordsPriorityQueuePool *queuePool, const bool addToMasterQueue,
const int currentWordIndex) const;
// Process a node by considering proximity, missing and excessive character
@@ -93,32 +90,25 @@ class UnigramDictionary {
int *newChildPosition, int *nextSiblingPosition, WordsPriorityQueuePool *queuePool,
const int currentWordIndex) const;
int getMostFrequentWordLike(const int startInputIndex, const int inputSize,
- Correction *correction, unsigned short *word) const;
- int getMostFrequentWordLikeInner(const uint16_t *const inWord, const int inputSize,
- short unsigned int *outWord) const;
- int getSubStringSuggestion(
- ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates,
- const int *codes, const bool useFullEditDistance, Correction *correction,
- WordsPriorityQueuePool *queuePool, const int inputSize,
+ Correction *correction, int *word) const;
+ int getMostFrequentWordLikeInner(const int *const inWord, const int inputSize,
+ int *outWord) const;
+ int getSubStringSuggestion(ProximityInfo *proximityInfo, const int *xcoordinates,
+ const int *ycoordinates, const int *codes, const bool useFullEditDistance,
+ Correction *correction, WordsPriorityQueuePool *queuePool, const int inputSize,
const bool hasAutoCorrectionCandidate, const int currentWordIndex,
- const int inputWordStartPos, const int inputWordLength,
- const int outputWordStartPos, const bool isSpaceProximity, int *freqArray,
- int *wordLengthArray, unsigned short *outputWord, int *outputWordLength) const;
- void getMultiWordsSuggestionRec(ProximityInfo *proximityInfo,
- const int *xcoordinates, const int *ycoordinates, const int *codes,
- const bool useFullEditDistance, const int inputSize,
- Correction *correction, WordsPriorityQueuePool *queuePool,
+ const int inputWordStartPos, const int inputWordLength, const int outputWordStartPos,
+ const bool isSpaceProximity, int *freqArray, int *wordLengthArray, int *outputWord,
+ int *outputWordLength) const;
+ void getMultiWordsSuggestionRec(ProximityInfo *proximityInfo, const int *xcoordinates,
+ const int *ycoordinates, const int *codes, const bool useFullEditDistance,
+ const int inputSize, Correction *correction, WordsPriorityQueuePool *queuePool,
const bool hasAutoCorrectionCandidate, const int startPos, const int startWordIndex,
const int outputWordLength, int *freqArray, int *wordLengthArray,
- unsigned short *outputWord) const;
+ int *outputWord) const;
const uint8_t *const DICT_ROOT;
- const int MAX_WORD_LENGTH;
- const int MAX_WORDS;
- const int TYPED_LETTER_MULTIPLIER;
- const int FULL_WORD_MULTIPLIER;
const int ROOT_POS;
- const unsigned int BYTES_IN_ONE_CHAR;
const int MAX_DIGRAPH_SEARCH_DEPTH;
const int FLAGS;
diff --git a/native/jni/src/words_priority_queue.cpp b/native/jni/src/words_priority_queue.cpp
new file mode 100644
index 000000000..7e18d0f87
--- /dev/null
+++ b/native/jni/src/words_priority_queue.cpp
@@ -0,0 +1,76 @@
+/*
+ * Copyright (C) 2012, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "words_priority_queue.h"
+
+namespace latinime {
+
+int WordsPriorityQueue::outputSuggestions(const int *before, const int beforeLength,
+ int *frequencies, int *outputCodePoints, int* outputTypes) {
+ mHighestSuggestedWord = 0;
+ const int size = min(MAX_WORDS, static_cast<int>(mSuggestions.size()));
+ SuggestedWord *swBuffer[size];
+ int index = size - 1;
+ while (!mSuggestions.empty() && index >= 0) {
+ SuggestedWord *sw = mSuggestions.top();
+ if (DEBUG_WORDS_PRIORITY_QUEUE) {
+ AKLOGI("dump word. %d", sw->mScore);
+ DUMP_WORD(sw->mWord, sw->mWordLength);
+ }
+ swBuffer[index] = sw;
+ mSuggestions.pop();
+ --index;
+ }
+ if (size >= 2) {
+ SuggestedWord *nsMaxSw = 0;
+ int maxIndex = 0;
+ float maxNs = 0;
+ for (int i = 0; i < size; ++i) {
+ SuggestedWord *tempSw = swBuffer[i];
+ if (!tempSw) {
+ continue;
+ }
+ const float tempNs = getNormalizedScore(tempSw, before, beforeLength, 0, 0, 0);
+ if (tempNs >= maxNs) {
+ maxNs = tempNs;
+ maxIndex = i;
+ nsMaxSw = tempSw;
+ }
+ }
+ if (maxIndex > 0 && nsMaxSw) {
+ memmove(&swBuffer[1], &swBuffer[0], maxIndex * sizeof(swBuffer[0]));
+ swBuffer[0] = nsMaxSw;
+ }
+ }
+ for (int i = 0; i < size; ++i) {
+ SuggestedWord *sw = swBuffer[i];
+ if (!sw) {
+ AKLOGE("SuggestedWord is null %d", i);
+ continue;
+ }
+ const int wordLength = sw->mWordLength;
+ int *targetAddress = outputCodePoints + i * MAX_WORD_LENGTH;
+ frequencies[i] = sw->mScore;
+ outputTypes[i] = sw->mType;
+ memcpy(targetAddress, sw->mWord, wordLength * sizeof(targetAddress[0]));
+ if (wordLength < MAX_WORD_LENGTH) {
+ targetAddress[wordLength] = 0;
+ }
+ sw->mUsed = false;
+ }
+ return size;
+}
+} // namespace latinime
diff --git a/native/jni/src/words_priority_queue.h b/native/jni/src/words_priority_queue.h
index 19efa5da3..54e8007a2 100644
--- a/native/jni/src/words_priority_queue.h
+++ b/native/jni/src/words_priority_queue.h
@@ -27,47 +27,45 @@ namespace latinime {
class WordsPriorityQueue {
public:
- class SuggestedWord {
- public:
+ struct SuggestedWord {
int mScore;
- unsigned short mWord[MAX_WORD_LENGTH_INTERNAL];
+ int mWord[MAX_WORD_LENGTH];
int mWordLength;
bool mUsed;
int mType;
- void setParams(int score, unsigned short *word, int wordLength, int type) {
+ void setParams(int score, int *word, int wordLength, int type) {
mScore = score;
mWordLength = wordLength;
- memcpy(mWord, word, sizeof(unsigned short) * wordLength);
+ memcpy(mWord, word, sizeof(mWord[0]) * wordLength);
mUsed = true;
mType = type;
}
};
- WordsPriorityQueue(int maxWords, int maxWordLength)
- : mSuggestions(), MAX_WORDS(static_cast<unsigned int>(maxWords)),
- MAX_WORD_LENGTH(static_cast<unsigned int>(maxWordLength)),
- mSuggestedWords(new SuggestedWord[maxWordLength]), mHighestSuggestedWord(0) {
- for (int i = 0; i < maxWordLength; ++i) {
+ WordsPriorityQueue(int maxWords)
+ : mSuggestions(), MAX_WORDS(maxWords),
+ mSuggestedWords(new SuggestedWord[MAX_WORD_LENGTH]), mHighestSuggestedWord(0) {
+ for (int i = 0; i < MAX_WORD_LENGTH; ++i) {
mSuggestedWords[i].mUsed = false;
}
}
- virtual ~WordsPriorityQueue() {
+ // Non virtual inline destructor -- never inherit this class
+ AK_FORCE_INLINE ~WordsPriorityQueue() {
delete[] mSuggestedWords;
}
- void push(int score, unsigned short *word, int wordLength, int type) {
+ void push(int score, int *word, int wordLength, int type) {
SuggestedWord *sw = 0;
- if (mSuggestions.size() >= MAX_WORDS) {
+ if (size() >= MAX_WORDS) {
sw = mSuggestions.top();
const int minScore = sw->mScore;
if (minScore >= score) {
return;
- } else {
- sw->mUsed = false;
- mSuggestions.pop();
}
+ sw->mUsed = false;
+ mSuggestions.pop();
}
if (sw == 0) {
sw = getFreeSuggestedWord(score, word, wordLength, type);
@@ -88,74 +86,17 @@ class WordsPriorityQueue {
}
}
- SuggestedWord *top() {
+ SuggestedWord *top() const {
if (mSuggestions.empty()) return 0;
SuggestedWord *sw = mSuggestions.top();
return sw;
}
- int outputSuggestions(const unsigned short *before, const int beforeLength,
- int *frequencies, unsigned short *outputChars, int* outputTypes) {
- mHighestSuggestedWord = 0;
- const unsigned int size = min(
- MAX_WORDS, static_cast<unsigned int>(mSuggestions.size()));
- SuggestedWord *swBuffer[size];
- int index = size - 1;
- while (!mSuggestions.empty() && index >= 0) {
- SuggestedWord *sw = mSuggestions.top();
- if (DEBUG_WORDS_PRIORITY_QUEUE) {
- AKLOGI("dump word. %d", sw->mScore);
- DUMP_WORD(sw->mWord, sw->mWordLength);
- }
- swBuffer[index] = sw;
- mSuggestions.pop();
- --index;
- }
- if (size >= 2) {
- SuggestedWord *nsMaxSw = 0;
- unsigned int maxIndex = 0;
- float maxNs = 0;
- for (unsigned int i = 0; i < size; ++i) {
- SuggestedWord *tempSw = swBuffer[i];
- if (!tempSw) {
- continue;
- }
- const float tempNs = getNormalizedScore(tempSw, before, beforeLength, 0, 0, 0);
- if (tempNs >= maxNs) {
- maxNs = tempNs;
- maxIndex = i;
- nsMaxSw = tempSw;
- }
- }
- if (maxIndex > 0 && nsMaxSw) {
- memmove(&swBuffer[1], &swBuffer[0], maxIndex * sizeof(SuggestedWord *));
- swBuffer[0] = nsMaxSw;
- }
- }
- for (unsigned int i = 0; i < size; ++i) {
- SuggestedWord *sw = swBuffer[i];
- if (!sw) {
- AKLOGE("SuggestedWord is null %d", i);
- continue;
- }
- const unsigned int wordLength = sw->mWordLength;
- unsigned short *targetAddress = outputChars + i * MAX_WORD_LENGTH;
- frequencies[i] = sw->mScore;
- outputTypes[i] = sw->mType;
- memcpy(targetAddress, sw->mWord, wordLength * sizeof(unsigned short));
- if (wordLength < MAX_WORD_LENGTH) {
- targetAddress[wordLength] = 0;
- }
- sw->mUsed = false;
- }
- return size;
- }
-
int size() const {
- return mSuggestions.size();
+ return static_cast<int>(mSuggestions.size());
}
- void clear() {
+ AK_FORCE_INLINE void clear() {
mHighestSuggestedWord = 0;
while (!mSuggestions.empty()) {
SuggestedWord *sw = mSuggestions.top();
@@ -168,22 +109,25 @@ class WordsPriorityQueue {
}
}
- void dumpTopWord() {
+ AK_FORCE_INLINE void dumpTopWord() const {
if (size() <= 0) {
return;
}
DUMP_WORD(mHighestSuggestedWord->mWord, mHighestSuggestedWord->mWordLength);
}
- float getHighestNormalizedScore(const unsigned short *before, const int beforeLength,
- unsigned short **outWord, int *outScore, int *outLength) {
+ AK_FORCE_INLINE float getHighestNormalizedScore(const int *before, const int beforeLength,
+ int **outWord, int *outScore, int *outLength) const {
if (!mHighestSuggestedWord) {
- return 0.0;
+ return 0.0f;
}
- return getNormalizedScore(
- mHighestSuggestedWord, before, beforeLength, outWord, outScore, outLength);
+ return getNormalizedScore(mHighestSuggestedWord, before, beforeLength, outWord, outScore,
+ outLength);
}
+ int outputSuggestions(const int *before, const int beforeLength, int *frequencies,
+ int *outputCodePoints, int* outputTypes);
+
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(WordsPriorityQueue);
struct wordComparator {
@@ -192,9 +136,8 @@ class WordsPriorityQueue {
}
};
- SuggestedWord *getFreeSuggestedWord(int score, unsigned short *word,
- int wordLength, int type) {
- for (unsigned int i = 0; i < MAX_WORD_LENGTH; ++i) {
+ SuggestedWord *getFreeSuggestedWord(int score, int *word, int wordLength, int type) const {
+ for (int i = 0; i < MAX_WORD_LENGTH; ++i) {
if (!mSuggestedWords[i].mUsed) {
mSuggestedWords[i].setParams(score, word, wordLength, type);
return &mSuggestedWords[i];
@@ -203,10 +146,10 @@ class WordsPriorityQueue {
return 0;
}
- static float getNormalizedScore(SuggestedWord *sw, const unsigned short *before,
- const int beforeLength, unsigned short **outWord, int *outScore, int *outLength) {
+ static float getNormalizedScore(SuggestedWord *sw, const int *before, const int beforeLength,
+ int **outWord, int *outScore, int *outLength) {
const int score = sw->mScore;
- unsigned short *word = sw->mWord;
+ int *word = sw->mWord;
const int wordLength = sw->mWordLength;
if (outScore) {
*outScore = score;
@@ -217,15 +160,14 @@ class WordsPriorityQueue {
if (outLength) {
*outLength = wordLength;
}
- return Correction::RankingAlgorithm::calcNormalizedScore(
- before, beforeLength, word, wordLength, score);
+ return Correction::RankingAlgorithm::calcNormalizedScore(before, beforeLength, word,
+ wordLength, score);
}
typedef std::priority_queue<SuggestedWord *, std::vector<SuggestedWord *>,
wordComparator> Suggestions;
Suggestions mSuggestions;
- const unsigned int MAX_WORDS;
- const unsigned int MAX_WORD_LENGTH;
+ const int MAX_WORDS;
SuggestedWord *mSuggestedWords;
SuggestedWord *mHighestSuggestedWord;
};
diff --git a/native/jni/src/words_priority_queue_pool.h b/native/jni/src/words_priority_queue_pool.h
index 2d52903e0..2cd210a05 100644
--- a/native/jni/src/words_priority_queue_pool.h
+++ b/native/jni/src/words_priority_queue_pool.h
@@ -17,26 +17,26 @@
#ifndef LATINIME_WORDS_PRIORITY_QUEUE_POOL_H
#define LATINIME_WORDS_PRIORITY_QUEUE_POOL_H
-#include <cassert>
+#include "defines.h"
#include "words_priority_queue.h"
namespace latinime {
class WordsPriorityQueuePool {
public:
- WordsPriorityQueuePool(int mainQueueMaxWords, int subQueueMaxWords, int maxWordLength)
+ WordsPriorityQueuePool(int mainQueueMaxWords, int subQueueMaxWords)
// Note: using placement new() requires the caller to call the destructor explicitly.
- : mMasterQueue(new(mMasterQueueBuf) WordsPriorityQueue(
- mainQueueMaxWords, maxWordLength)) {
+ : mMasterQueue(new(mMasterQueueBuf) WordsPriorityQueue(mainQueueMaxWords)) {
for (int i = 0, subQueueBufOffset = 0;
i < MULTIPLE_WORDS_SUGGESTION_MAX_WORDS * SUB_QUEUE_MAX_COUNT;
++i, subQueueBufOffset += static_cast<int>(sizeof(WordsPriorityQueue))) {
mSubQueues[i] = new(mSubQueueBuf + subQueueBufOffset)
- WordsPriorityQueue(subQueueMaxWords, maxWordLength);
+ WordsPriorityQueue(subQueueMaxWords);
}
}
- virtual ~WordsPriorityQueuePool() {
+ // Non virtual inline destructor -- never inherit this class
+ ~WordsPriorityQueuePool() {
// Note: these explicit calls to the destructor match the calls to placement new() above.
if (mMasterQueue) mMasterQueue->~WordsPriorityQueue();
for (int i = 0; i < MULTIPLE_WORDS_SUGGESTION_MAX_WORDS * SUB_QUEUE_MAX_COUNT; ++i) {
@@ -44,17 +44,17 @@ class WordsPriorityQueuePool {
}
}
- WordsPriorityQueue *getMasterQueue() {
+ WordsPriorityQueue *getMasterQueue() const {
return mMasterQueue;
}
- WordsPriorityQueue *getSubQueue(const int wordIndex, const int inputWordLength) {
+ WordsPriorityQueue *getSubQueue(const int wordIndex, const int inputWordLength) const {
if (wordIndex >= MULTIPLE_WORDS_SUGGESTION_MAX_WORDS) {
return 0;
}
if (inputWordLength < 0 || inputWordLength >= SUB_QUEUE_MAX_COUNT) {
if (DEBUG_WORDS_PRIORITY_QUEUE) {
- assert(false);
+ ASSERT(false);
}
return 0;
}
@@ -68,7 +68,7 @@ class WordsPriorityQueuePool {
}
}
- inline void clearSubQueue(const int wordIndex) {
+ AK_FORCE_INLINE void clearSubQueue(const int wordIndex) {
for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) {
WordsPriorityQueue *queue = getSubQueue(wordIndex, i);
if (queue) {
@@ -77,7 +77,7 @@ class WordsPriorityQueuePool {
}
}
- void dumpSubQueue1TopSuggestions() {
+ void dumpSubQueue1TopSuggestions() const {
AKLOGI("DUMP SUBQUEUE1 TOP SUGGESTIONS");
for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) {
getSubQueue(0, i)->dumpTopWord();