diff options
Diffstat (limited to 'native')
39 files changed, 1460 insertions, 938 deletions
diff --git a/native/jni/Android.mk b/native/jni/Android.mk index d53757fd4..9f9958377 100644 --- a/native/jni/Android.mk +++ b/native/jni/Android.mk @@ -15,17 +15,18 @@ LOCAL_PATH := $(call my-dir) ############ some local flags -# If you change any of those flags, you need to rebuild both libjni_latinime_static -# and the shared library. -#FLAG_DBG := true -#FLAG_DO_PROFILE := true +# If you change any of those flags, you need to rebuild both libjni_latinime_common_static +# and the shared library that uses libjni_latinime_common_static. +FLAG_DBG ?= false +FLAG_DO_PROFILE ?= false ###################################### include $(CLEAR_VARS) LATIN_IME_SRC_DIR := src +LATIN_IME_SRC_FULLPATH_DIR := $(LOCAL_PATH)/$(LATIN_IME_SRC_DIR) -LOCAL_C_INCLUDES += $(LOCAL_PATH)/$(LATIN_IME_SRC_DIR) +LOCAL_C_INCLUDES += $(LATIN_IME_SRC_FULLPATH_DIR) $(LATIN_IME_SRC_FULLPATH_DIR)/gesture LOCAL_CFLAGS += -Werror -Wall @@ -35,6 +36,7 @@ LOCAL_CFLAGS += -Wno-unused-parameter -Wno-unused-function LATIN_IME_JNI_SRC_FILES := \ com_android_inputmethod_keyboard_ProximityInfo.cpp \ com_android_inputmethod_latin_BinaryDictionary.cpp \ + com_android_inputmethod_latin_NativeUtils.cpp \ jni_common.cpp LATIN_IME_CORE_SRC_FILES := \ @@ -45,11 +47,14 @@ LATIN_IME_CORE_SRC_FILES := \ correction.cpp \ dictionary.cpp \ proximity_info.cpp \ - unigram_dictionary.cpp + proximity_info_state.cpp \ + unigram_dictionary.cpp \ + gesture/gesture_decoder_wrapper.cpp \ + gesture/incremental_decoder_wrapper.cpp LOCAL_SRC_FILES := \ $(LATIN_IME_JNI_SRC_FILES) \ - $(addprefix $(LATIN_IME_SRC_DIR)/,$(LATIN_IME_CORE_SRC_FILES)) + $(addprefix $(LATIN_IME_SRC_DIR)/, $(LATIN_IME_CORE_SRC_FILES)) ifeq ($(FLAG_DO_PROFILE), true) $(warning Making profiling version of native library) @@ -61,50 +66,42 @@ ifeq ($(FLAG_DBG), true) endif # FLAG_DBG endif # FLAG_DO_PROFILE -LOCAL_MODULE := libjni_latinime_static +LOCAL_MODULE := libjni_latinime_common_static LOCAL_MODULE_TAGS := optional -ifdef HISTORICAL_NDK_VERSIONS_ROOT # In the platform build system -include external/stlport/libstlport.mk -else # In the NDK build system -LOCAL_C_INCLUDES += external/stlport/stlport bionic -endif +LOCAL_NDK_VERSION := 8 +LOCAL_SDK_VERSION := 14 +LOCAL_NDK_STL_VARIANT := stlport_static include $(BUILD_STATIC_LIBRARY) - ###################################### include $(CLEAR_VARS) # All code in LOCAL_WHOLE_STATIC_LIBRARIES will be built into this shared library. -LOCAL_WHOLE_STATIC_LIBRARIES := libjni_latinime_static - -ifdef HISTORICAL_NDK_VERSIONS_ROOT # In the platform build system -LOCAL_SHARED_LIBRARIES := libstlport -else # In the NDK build system -LOCAL_SHARED_LIBRARIES := libstlport_static -endif +LOCAL_WHOLE_STATIC_LIBRARIES := libjni_latinime_common_static ifeq ($(FLAG_DO_PROFILE), true) $(warning Making profiling version of native library) - LOCAL_SHARED_LIBRARIES += libcutils libutils + LOCAL_SHARED_LIBRARIES += liblog else # FLAG_DO_PROFILE ifeq ($(FLAG_DBG), true) $(warning Making debug version of native library) - LOCAL_SHARED_LIBRARIES += libcutils libutils + LOCAL_SHARED_LIBRARIES += liblog endif # FLAG_DBG endif # FLAG_DO_PROFILE LOCAL_MODULE := libjni_latinime LOCAL_MODULE_TAGS := optional -ifdef HISTORICAL_NDK_VERSIONS_ROOT # In the platform build system -include external/stlport/libstlport.mk -endif +LOCAL_NDK_VERSION := 8 +LOCAL_SDK_VERSION := 14 +LOCAL_NDK_STL_VARIANT := stlport_static include $(BUILD_SHARED_LIBRARY) #################### Clean up the tmp vars LATIN_IME_CORE_SRC_FILES := LATIN_IME_JNI_SRC_FILES := +LATIN_IME_GESTURE_IMPL_SRC_FILES := LATIN_IME_SRC_DIR := -TARGETING_UNBUNDLED_FROYO := +LATIN_IME_SRC_FULLPATH_DIR := diff --git a/native/jni/com_android_inputmethod_keyboard_ProximityInfo.cpp b/native/jni/com_android_inputmethod_keyboard_ProximityInfo.cpp index 9eb437c06..0a8aabf4f 100644 --- a/native/jni/com_android_inputmethod_keyboard_ProximityInfo.cpp +++ b/native/jni/com_android_inputmethod_keyboard_ProximityInfo.cpp @@ -1,19 +1,19 @@ /* -** -** Copyright 2011, The Android Open Source Project -** -** Licensed under the Apache License, Version 2.0 (the "License"); -** you may not use this file except in compliance with the License. -** You may obtain a copy of the License at -** -** http://www.apache.org/licenses/LICENSE-2.0 -** -** Unless required by applicable law or agreed to in writing, software -** distributed under the License is distributed on an "AS IS" BASIS, -** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -** See the License for the specific language governing permissions and -** limitations under the License. -*/ + * + * Copyright 2011, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ #define LOG_TAG "LatinIME: jni: ProximityInfo" @@ -85,5 +85,4 @@ int register_ProximityInfo(JNIEnv *env) { return registerNativeMethods(env, kClassPathName, sKeyboardMethods, sizeof(sKeyboardMethods) / sizeof(sKeyboardMethods[0])); } - } // namespace latinime diff --git a/native/jni/com_android_inputmethod_keyboard_ProximityInfo.h b/native/jni/com_android_inputmethod_keyboard_ProximityInfo.h index 4a1e83b09..f5ccf2053 100644 --- a/native/jni/com_android_inputmethod_keyboard_ProximityInfo.h +++ b/native/jni/com_android_inputmethod_keyboard_ProximityInfo.h @@ -1,19 +1,19 @@ /* -** -** Copyright 2011, The Android Open Source Project -** -** Licensed under the Apache License, Version 2.0 (the "License"); -** you may not use this file except in compliance with the License. -** You may obtain a copy of the License at -** -** http://www.apache.org/licenses/LICENSE-2.0 -** -** Unless required by applicable law or agreed to in writing, software -** distributed under the License is distributed on an "AS IS" BASIS, -** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -** See the License for the specific language governing permissions and -** limitations under the License. -*/ + * + * Copyright 2011, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ #ifndef _COM_ANDROID_INPUTMETHOD_KEYBOARD_PROXIMITYINFO_H #define _COM_ANDROID_INPUTMETHOD_KEYBOARD_PROXIMITYINFO_H @@ -24,6 +24,5 @@ namespace latinime { int register_ProximityInfo(JNIEnv *env); -} - +} // namespace latinime #endif // _COM_ANDROID_INPUTMETHOD_KEYBOARD_PROXIMITYINFO_H diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp index d10dc962e..8725b0c9d 100644 --- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp +++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp @@ -1,19 +1,19 @@ /* -** -** Copyright 2009, The Android Open Source Project -** -** Licensed under the Apache License, Version 2.0 (the "License"); -** you may not use this file except in compliance with the License. -** You may obtain a copy of the License at -** -** http://www.apache.org/licenses/LICENSE-2.0 -** -** Unless required by applicable law or agreed to in writing, software -** distributed under the License is distributed on an "AS IS" BASIS, -** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -** See the License for the specific language governing permissions and -** limitations under the License. -*/ + * + * Copyright 2009, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ #define LOG_TAG "LatinIME: jni: BinaryDictionary" @@ -42,11 +42,12 @@ namespace latinime { -void releaseDictBuf(void* dictBuf, const size_t length, int fd); +void releaseDictBuf(void *dictBuf, const size_t length, int fd); static jlong latinime_BinaryDictionary_open(JNIEnv *env, jobject object, jstring sourceDir, jlong dictOffset, jlong dictSize, - jint typedLetterMultiplier, jint fullWordMultiplier, jint maxWordLength, jint maxWords) { + jint typedLetterMultiplier, jint fullWordMultiplier, jint maxWordLength, jint maxWords, + jint maxPredictions) { PROF_OPEN; PROF_START(66); const char *sourceDirChars = env->GetStringUTFChars(sourceDir, 0); @@ -119,7 +120,7 @@ static jlong latinime_BinaryDictionary_open(JNIEnv *env, jobject object, #endif // USE_MMAP_FOR_DICTIONARY } else { dictionary = new Dictionary(dictBuf, dictSize, fd, adjust, typedLetterMultiplier, - fullWordMultiplier, maxWordLength, maxWords); + fullWordMultiplier, maxWordLength, maxWords, maxPredictions); } PROF_END(66); PROF_CLOSE; @@ -128,51 +129,52 @@ static jlong latinime_BinaryDictionary_open(JNIEnv *env, jobject object, static int latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jobject object, jlong dict, jlong proximityInfo, jintArray xCoordinatesArray, jintArray yCoordinatesArray, - jintArray inputArray, jint arraySize, jintArray prevWordForBigrams, - jboolean useFullEditDistance, jcharArray outputArray, jintArray frequencyArray) { - Dictionary *dictionary = (Dictionary*)dict; + jintArray timesArray, jintArray pointerIdArray, jintArray inputArray, jint arraySize, + jint commitPoint, jboolean isGesture, + jintArray prevWordForBigrams, jboolean useFullEditDistance, jcharArray outputArray, + jintArray frequencyArray, jintArray spaceIndexArray, jintArray outputTypesArray) { + Dictionary *dictionary = (Dictionary*) dict; if (!dictionary) return 0; ProximityInfo *pInfo = (ProximityInfo*)proximityInfo; int *xCoordinates = env->GetIntArrayElements(xCoordinatesArray, 0); int *yCoordinates = env->GetIntArrayElements(yCoordinatesArray, 0); + int *times = env->GetIntArrayElements(timesArray, 0); + int *pointerIds = env->GetIntArrayElements(pointerIdArray, 0); int *frequencies = env->GetIntArrayElements(frequencyArray, 0); int *inputCodes = env->GetIntArrayElements(inputArray, 0); jchar *outputChars = env->GetCharArrayElements(outputArray, 0); + int *spaceIndices = env->GetIntArrayElements(spaceIndexArray, 0); + int *outputTypes = env->GetIntArrayElements(outputTypesArray, 0); jint *prevWordChars = prevWordForBigrams ? env->GetIntArrayElements(prevWordForBigrams, 0) : 0; jsize prevWordLength = prevWordChars ? env->GetArrayLength(prevWordForBigrams) : 0; - int count = dictionary->getSuggestions(pInfo, xCoordinates, yCoordinates, inputCodes, - arraySize, prevWordChars, prevWordLength, useFullEditDistance, - (unsigned short*) outputChars, frequencies); + + int count; + if (isGesture || arraySize > 1) { + count = dictionary->getSuggestions(pInfo, xCoordinates, yCoordinates, times, pointerIds, + inputCodes, arraySize, prevWordChars, prevWordLength, commitPoint, isGesture, + useFullEditDistance, (unsigned short*) outputChars, frequencies, spaceIndices, + outputTypes); + } else { + count = dictionary->getBigrams(prevWordChars, prevWordLength, inputCodes, + arraySize, (unsigned short*) outputChars, frequencies, outputTypes); + } + if (prevWordChars) { env->ReleaseIntArrayElements(prevWordForBigrams, prevWordChars, JNI_ABORT); } + env->ReleaseIntArrayElements(outputTypesArray, outputTypes, 0); + env->ReleaseIntArrayElements(spaceIndexArray, spaceIndices, 0); env->ReleaseCharArrayElements(outputArray, outputChars, 0); env->ReleaseIntArrayElements(inputArray, inputCodes, JNI_ABORT); env->ReleaseIntArrayElements(frequencyArray, frequencies, 0); + env->ReleaseIntArrayElements(pointerIdArray, pointerIds, 0); + env->ReleaseIntArrayElements(timesArray, times, 0); env->ReleaseIntArrayElements(yCoordinatesArray, yCoordinates, 0); env->ReleaseIntArrayElements(xCoordinatesArray, xCoordinates, 0); return count; } -static int latinime_BinaryDictionary_getBigrams(JNIEnv *env, jobject object, jlong dict, - jintArray prevWordArray, jint prevWordLength, jintArray inputArray, jint inputArraySize, - jcharArray outputArray, jintArray frequencyArray, jint maxWordLength, jint maxBigrams) { - Dictionary *dictionary = (Dictionary*)dict; - if (!dictionary) return 0; - jint *prevWord = env->GetIntArrayElements(prevWordArray, 0); - int *inputCodes = env->GetIntArrayElements(inputArray, 0); - jchar *outputChars = env->GetCharArrayElements(outputArray, 0); - int *frequencies = env->GetIntArrayElements(frequencyArray, 0); - int count = dictionary->getBigrams(prevWord, prevWordLength, inputCodes, - inputArraySize, (unsigned short*) outputChars, frequencies, maxWordLength, maxBigrams); - env->ReleaseIntArrayElements(frequencyArray, frequencies, 0); - env->ReleaseCharArrayElements(outputArray, outputChars, 0); - env->ReleaseIntArrayElements(inputArray, inputCodes, JNI_ABORT); - env->ReleaseIntArrayElements(prevWordArray, prevWord, JNI_ABORT); - return count; -} - static jint latinime_BinaryDictionary_getFrequency(JNIEnv *env, jobject object, jlong dict, jintArray wordArray, jint wordLength) { Dictionary *dictionary = (Dictionary*)dict; @@ -233,7 +235,7 @@ static void latinime_BinaryDictionary_close(JNIEnv *env, jobject object, jlong d delete dictionary; } -void releaseDictBuf(void* dictBuf, const size_t length, int fd) { +void releaseDictBuf(void *dictBuf, const size_t length, int fd) { #ifdef USE_MMAP_FOR_DICTIONARY int ret = munmap(dictBuf, length); if (ret != 0) { @@ -249,22 +251,20 @@ void releaseDictBuf(void* dictBuf, const size_t length, int fd) { } static JNINativeMethod sMethods[] = { - {"openNative", "(Ljava/lang/String;JJIIII)J", (void*)latinime_BinaryDictionary_open}, + {"openNative", "(Ljava/lang/String;JJIIIII)J", (void*)latinime_BinaryDictionary_open}, {"closeNative", "(J)V", (void*)latinime_BinaryDictionary_close}, - {"getSuggestionsNative", "(JJ[I[I[II[IZ[C[I)I", - (void*)latinime_BinaryDictionary_getSuggestions}, + {"getSuggestionsNative", "(JJ[I[I[I[I[IIIZ[IZ[C[I[I[I)I", + (void*) latinime_BinaryDictionary_getSuggestions}, {"getFrequencyNative", "(J[II)I", (void*)latinime_BinaryDictionary_getFrequency}, {"isValidBigramNative", "(J[I[I)Z", (void*)latinime_BinaryDictionary_isValidBigram}, - {"getBigramsNative", "(J[II[II[C[III)I", (void*)latinime_BinaryDictionary_getBigrams}, {"calcNormalizedScoreNative", "([CI[CII)F", (void*)latinime_BinaryDictionary_calcNormalizedScore}, {"editDistanceNative", "([CI[CI)I", (void*)latinime_BinaryDictionary_editDistance} }; int register_BinaryDictionary(JNIEnv *env) { - const char* const kClassPathName = "com/android/inputmethod/latin/BinaryDictionary"; + const char *const kClassPathName = "com/android/inputmethod/latin/BinaryDictionary"; return registerNativeMethods(env, kClassPathName, sMethods, sizeof(sMethods) / sizeof(sMethods[0])); } - } // namespace latinime diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.h b/native/jni/com_android_inputmethod_latin_BinaryDictionary.h index 1b1ba7f0f..0b67e6b95 100644 --- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.h +++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.h @@ -1,19 +1,19 @@ /* -** -** Copyright 2011, The Android Open Source Project -** -** Licensed under the Apache License, Version 2.0 (the "License"); -** you may not use this file except in compliance with the License. -** You may obtain a copy of the License at -** -** http://www.apache.org/licenses/LICENSE-2.0 -** -** Unless required by applicable law or agreed to in writing, software -** distributed under the License is distributed on an "AS IS" BASIS, -** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -** See the License for the specific language governing permissions and -** limitations under the License. -*/ + * + * Copyright 2011, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ #ifndef _COM_ANDROID_INPUTMETHOD_LATIN_BINARYDICTIONARY_H #define _COM_ANDROID_INPUTMETHOD_LATIN_BINARYDICTIONARY_H @@ -24,6 +24,5 @@ namespace latinime { int register_BinaryDictionary(JNIEnv *env); -} - +} // namespace latinime #endif // _COM_ANDROID_INPUTMETHOD_LATIN_BINARYDICTIONARY_H diff --git a/native/jni/com_android_inputmethod_latin_NativeUtils.cpp b/native/jni/com_android_inputmethod_latin_NativeUtils.cpp new file mode 100644 index 000000000..32d895405 --- /dev/null +++ b/native/jni/com_android_inputmethod_latin_NativeUtils.cpp @@ -0,0 +1,39 @@ +/* + * + * Copyright 2012, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "com_android_inputmethod_latin_NativeUtils.h" +#include "jni.h" +#include "jni_common.h" + +#include <math.h> + +namespace latinime { + +static float latinime_NativeUtils_powf(float x, float y) { + return powf(x, y); +} + +static JNINativeMethod sMethods[] = { + {"powf", "(FF)F", (void*)latinime_NativeUtils_powf} +}; + +int register_NativeUtils(JNIEnv *env) { + const char *const kClassPathName = "com/android/inputmethod/latin/NativeUtils"; + return registerNativeMethods(env, kClassPathName, sMethods, + sizeof(sMethods) / sizeof(sMethods[0])); +} +} // namespace latinime diff --git a/native/jni/com_android_inputmethod_latin_NativeUtils.h b/native/jni/com_android_inputmethod_latin_NativeUtils.h new file mode 100644 index 000000000..4d29d7d47 --- /dev/null +++ b/native/jni/com_android_inputmethod_latin_NativeUtils.h @@ -0,0 +1,28 @@ +/* + * + * Copyright 2012, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _COM_ANDROID_INPUTMETHOD_LATIN_NATIVEUTILS_H +#define _COM_ANDROID_INPUTMETHOD_LATIN_NATIVEUTILS_H + +#include "jni.h" + +namespace latinime { + +int register_NativeUtils(JNIEnv *env); + +} // namespace latinime +#endif // _COM_ANDROID_INPUTMETHOD_LATIN_NATIVEUTILS_H diff --git a/native/jni/jni_common.cpp b/native/jni/jni_common.cpp index b9e2c3255..8d7bce79e 100644 --- a/native/jni/jni_common.cpp +++ b/native/jni/jni_common.cpp @@ -1,24 +1,25 @@ /* -** -** Copyright 2011, The Android Open Source Project -** -** Licensed under the Apache License, Version 2.0 (the "License"); -** you may not use this file except in compliance with the License. -** You may obtain a copy of the License at -** -** http://www.apache.org/licenses/LICENSE-2.0 -** -** Unless required by applicable law or agreed to in writing, software -** distributed under the License is distributed on an "AS IS" BASIS, -** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -** See the License for the specific language governing permissions and -** limitations under the License. -*/ + * + * Copyright 2011, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ #define LOG_TAG "LatinIME: jni" #include "com_android_inputmethod_keyboard_ProximityInfo.h" #include "com_android_inputmethod_latin_BinaryDictionary.h" +#include "com_android_inputmethod_latin_NativeUtils.h" #include "defines.h" #include "jni.h" #include "proximity_info.h" @@ -32,8 +33,8 @@ using namespace latinime; /* * Returns the JNI version on success, -1 on failure. */ -jint JNI_OnLoad(JavaVM* vm, void* reserved) { - JNIEnv* env = 0; +jint JNI_OnLoad(JavaVM *vm, void *reserved) { + JNIEnv *env = 0; jint result = -1; if (vm->GetEnv((void**) &env, JNI_VERSION_1_4) != JNI_OK) { @@ -52,6 +53,11 @@ jint JNI_OnLoad(JavaVM* vm, void* reserved) { goto bail; } + if (!register_NativeUtils(env)) { + AKLOGE("ERROR: NativeUtils native registration failed"); + goto bail; + } + /* success -- return valid version number */ result = JNI_VERSION_1_4; @@ -61,7 +67,7 @@ bail: namespace latinime { -int registerNativeMethods(JNIEnv* env, const char* className, JNINativeMethod* methods, +int registerNativeMethods(JNIEnv *env, const char *className, JNINativeMethod *methods, int numMethods) { jclass clazz = env->FindClass(className); if (clazz == 0) { @@ -76,5 +82,4 @@ int registerNativeMethods(JNIEnv* env, const char* className, JNINativeMethod* m env->DeleteLocalRef(clazz); return JNI_TRUE; } - } // namespace latinime diff --git a/native/jni/jni_common.h b/native/jni/jni_common.h index 6741443ac..32f9fa9b8 100644 --- a/native/jni/jni_common.h +++ b/native/jni/jni_common.h @@ -1,19 +1,19 @@ /* -** -** Copyright 2011, The Android Open Source Project -** -** Licensed under the Apache License, Version 2.0 (the "License"); -** you may not use this file except in compliance with the License. -** You may obtain a copy of the License at -** -** http://www.apache.org/licenses/LICENSE-2.0 -** -** Unless required by applicable law or agreed to in writing, software -** distributed under the License is distributed on an "AS IS" BASIS, -** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -** See the License for the specific language governing permissions and -** limitations under the License. -*/ + * + * Copyright 2011, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ #ifndef LATINIME_JNI_COMMON_H #define LATINIME_JNI_COMMON_H @@ -54,7 +54,5 @@ inline void safeReleaseFloatArrayElements(JNIEnv *env, jfloatArray jArray, jfloa env->ReleaseFloatArrayElements(jArray, cArray, 0); } } - } // namespace latinime - #endif // LATINIME_JNI_COMMON_H diff --git a/native/jni/src/additional_proximity_chars.cpp b/native/jni/src/additional_proximity_chars.cpp index 224f020f2..de8764678 100644 --- a/native/jni/src/additional_proximity_chars.cpp +++ b/native/jni/src/additional_proximity_chars.cpp @@ -38,4 +38,4 @@ const int32_t AdditionalProximityChars::EN_US_ADDITIONAL_O[EN_US_ADDITIONAL_O_SI const int32_t AdditionalProximityChars::EN_US_ADDITIONAL_U[EN_US_ADDITIONAL_U_SIZE] = { 'a', 'e', 'i', 'o' }; -} +} // namespace latinime diff --git a/native/jni/src/additional_proximity_chars.h b/native/jni/src/additional_proximity_chars.h index e0ecc0e1d..ba76cfced 100644 --- a/native/jni/src/additional_proximity_chars.h +++ b/native/jni/src/additional_proximity_chars.h @@ -26,6 +26,7 @@ namespace latinime { class AdditionalProximityChars { private: + DISALLOW_IMPLICIT_CONSTRUCTORS(AdditionalProximityChars); static const std::string LOCALE_EN_US; static const int EN_US_ADDITIONAL_A_SIZE = 4; static const int32_t EN_US_ADDITIONAL_A[]; @@ -44,7 +45,7 @@ class AdditionalProximityChars { } public: - static int getAdditionalCharsSize(const std::string* locale_str, const int32_t c) { + static int getAdditionalCharsSize(const std::string *locale_str, const int32_t c) { if (!isEnLocale(locale_str)) { return 0; } @@ -64,7 +65,7 @@ class AdditionalProximityChars { } } - static const int32_t* getAdditionalChars(const std::string *locale_str, const int32_t c) { + static const int32_t *getAdditionalChars(const std::string *locale_str, const int32_t c) { if (!isEnLocale(locale_str)) { return 0; } @@ -88,7 +89,5 @@ class AdditionalProximityChars { return getAdditionalCharsSize(locale_str, c) > 0; } }; - -} - +} // namespace latinime #endif // LATINIME_ADDITIONAL_PROXIMITY_CHARS_H diff --git a/native/jni/src/basechars.cpp b/native/jni/src/basechars.cpp index 31f1e18a8..c91e5f741 100644 --- a/native/jni/src/basechars.cpp +++ b/native/jni/src/basechars.cpp @@ -18,7 +18,7 @@ namespace latinime { -/** +/* * Table mapping most combined Latin, Greek, and Cyrillic characters * to their base characters. If c is in range, BASE_CHARS[c] == c * if c is not a combined character, or the base character if it @@ -187,8 +187,6 @@ const unsigned short BASE_CHARS[BASE_CHARS_SIZE] = { 0x0423, 0x0443, 0x0423, 0x0443, 0x0427, 0x0447, 0x04f6, 0x04f7, 0x042b, 0x044b, 0x04fa, 0x04fb, 0x04fc, 0x04fd, 0x04fe, 0x04ff, }; - // generated with: // cat UnicodeData.txt | perl -e 'while (<>) { @foo = split(/;/); $foo[5] =~ s/<.*> //; $base[hex($foo[0])] = hex($foo[5]);} for ($i = 0; $i < 0x500; $i += 8) { for ($j = $i; $j < $i + 8; $j++) { printf("0x%04x, ", $base[$j] ? $base[$j] : $j)}; print "\n"; }' - } // namespace latinime diff --git a/native/jni/src/bigram_dictionary.cpp b/native/jni/src/bigram_dictionary.cpp index 9ef024dc4..4f5493ad1 100644 --- a/native/jni/src/bigram_dictionary.cpp +++ b/native/jni/src/bigram_dictionary.cpp @@ -1,19 +1,19 @@ /* -** -** Copyright 2010, The Android Open Source Project -** -** Licensed under the Apache License, Version 2.0 (the "License"); -** you may not use this file except in compliance with the License. -** You may obtain a copy of the License at -** -** http://www.apache.org/licenses/LICENSE-2.0 -** -** Unless required by applicable law or agreed to in writing, software -** distributed under the License is distributed on an "AS IS" BASIS, -** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -** See the License for the specific language governing permissions and -** limitations under the License. -*/ + * + * Copyright 2010, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ #include <string.h> @@ -27,9 +27,8 @@ namespace latinime { -BigramDictionary::BigramDictionary(const unsigned char *dict, int maxWordLength, - Dictionary *parentDictionary) - : DICT(dict), MAX_WORD_LENGTH(maxWordLength), mParentDictionary(parentDictionary) { +BigramDictionary::BigramDictionary(const unsigned char *dict, int maxWordLength, int maxPredictions) + : DICT(dict), MAX_WORD_LENGTH(maxWordLength), MAX_PREDICTIONS(maxPredictions) { if (DEBUG_DICT) { AKLOGI("BigramDictionary - constructor"); } @@ -38,7 +37,8 @@ BigramDictionary::BigramDictionary(const unsigned char *dict, int maxWordLength, BigramDictionary::~BigramDictionary() { } -bool BigramDictionary::addWordBigram(unsigned short *word, int length, int frequency) { +bool BigramDictionary::addWordBigram(unsigned short *word, int length, int frequency, + int *bigramFreq, unsigned short *bigramChars, int *outputTypes) const { word[length] = 0; if (DEBUG_DICT) { #ifdef FLAG_DBG @@ -50,25 +50,26 @@ bool BigramDictionary::addWordBigram(unsigned short *word, int length, int frequ // Find the right insertion point int insertAt = 0; - while (insertAt < mMaxBigrams) { - if (frequency > mBigramFreq[insertAt] || (mBigramFreq[insertAt] == frequency - && length < Dictionary::wideStrLen(mBigramChars + insertAt * MAX_WORD_LENGTH))) { + while (insertAt < MAX_PREDICTIONS) { + if (frequency > bigramFreq[insertAt] || (bigramFreq[insertAt] == frequency + && length < Dictionary::wideStrLen(bigramChars + insertAt * MAX_WORD_LENGTH))) { break; } insertAt++; } if (DEBUG_DICT) { - AKLOGI("Bigram: InsertAt -> %d maxBigrams: %d", insertAt, mMaxBigrams); + AKLOGI("Bigram: InsertAt -> %d MAX_PREDICTIONS: %d", insertAt, MAX_PREDICTIONS); } - if (insertAt < mMaxBigrams) { - memmove((char*) mBigramFreq + (insertAt + 1) * sizeof(mBigramFreq[0]), - (char*) mBigramFreq + insertAt * sizeof(mBigramFreq[0]), - (mMaxBigrams - insertAt - 1) * sizeof(mBigramFreq[0])); - mBigramFreq[insertAt] = frequency; - memmove((char*) mBigramChars + (insertAt + 1) * MAX_WORD_LENGTH * sizeof(short), - (char*) mBigramChars + (insertAt ) * MAX_WORD_LENGTH * sizeof(short), - (mMaxBigrams - insertAt - 1) * sizeof(short) * MAX_WORD_LENGTH); - unsigned short *dest = mBigramChars + (insertAt ) * MAX_WORD_LENGTH; + if (insertAt < MAX_PREDICTIONS) { + memmove((char*) bigramFreq + (insertAt + 1) * sizeof(bigramFreq[0]), + (char*) bigramFreq + insertAt * sizeof(bigramFreq[0]), + (MAX_PREDICTIONS - insertAt - 1) * sizeof(bigramFreq[0])); + bigramFreq[insertAt] = frequency; + outputTypes[insertAt] = Dictionary::KIND_PREDICTION; + memmove((char*) bigramChars + (insertAt + 1) * MAX_WORD_LENGTH * sizeof(short), + (char*) bigramChars + (insertAt ) * MAX_WORD_LENGTH * sizeof(short), + (MAX_PREDICTIONS - insertAt - 1) * sizeof(short) * MAX_WORD_LENGTH); + unsigned short *dest = bigramChars + (insertAt ) * MAX_WORD_LENGTH; while (length--) { *dest++ = *word++; } @@ -84,12 +85,11 @@ bool BigramDictionary::addWordBigram(unsigned short *word, int length, int frequ /* Parameters : * prevWord: the word before, the one for which we need to look up bigrams. * prevWordLength: its length. - * codes: what user typed, in the same format as for UnigramDictionary::getSuggestions. + * inputCodes: what user typed, in the same format as for UnigramDictionary::getSuggestions. * codesSize: the size of the codes array. * bigramChars: an array for output, at the same format as outwords for getSuggestions. * bigramFreq: an array to output frequencies. - * maxWordLength: the maximum size of a word. - * maxBigrams: the maximum number of bigrams fitting in the bigramChars array. + * outputTypes: an array to output types. * This method returns the number of bigrams this word has, for backward compatibility. * Note: this is not the number of bigrams output in the array, which is the number of * bigrams this word has WHOSE first letter also matches the letter the user typed. @@ -98,19 +98,21 @@ bool BigramDictionary::addWordBigram(unsigned short *word, int length, int frequ * and the bigrams are used to boost unigram result scores, it makes little sense to * reduce their scope to the ones that match the first letter. */ -int BigramDictionary::getBigrams(const int32_t *prevWord, int prevWordLength, int *codes, - int codesSize, unsigned short *bigramChars, int *bigramFreq, int maxWordLength, - int maxBigrams) { +int BigramDictionary::getBigrams(const int32_t *prevWord, int prevWordLength, int *inputCodes, + int codesSize, unsigned short *bigramChars, int *bigramFreq, int *outputTypes) const { // TODO: remove unused arguments, and refrain from storing stuff in members of this class // TODO: have "in" arguments before "out" ones, and make out args explicit in the name - mBigramFreq = bigramFreq; - mBigramChars = bigramChars; - mInputCodes = codes; - mMaxBigrams = maxBigrams; - const uint8_t* const root = DICT; - int pos = getBigramListPositionForWord(prevWord, prevWordLength); + const uint8_t *const root = DICT; + int pos = getBigramListPositionForWord(prevWord, prevWordLength, + false /* forceLowerCaseSearch */); // getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams + if (0 == pos) { + // If no bigrams for this exact word, search again in lower case. + pos = getBigramListPositionForWord(prevWord, prevWordLength, + true /* forceLowerCaseSearch */); + } + // If still no bigrams, we really don't have them! if (0 == pos) return 0; int bigramFlags; int bigramCount = 0; @@ -124,16 +126,17 @@ int BigramDictionary::getBigrams(const int32_t *prevWord, int prevWordLength, in bigramBuffer, &unigramFreq); // codesSize == 0 means we are trying to find bigram predictions. - if (codesSize < 1 || checkFirstCharacter(bigramBuffer)) { - const int bigramFreq = UnigramDictionary::MASK_ATTRIBUTE_FREQUENCY & bigramFlags; + if (codesSize < 1 || checkFirstCharacter(bigramBuffer, inputCodes)) { + const int bigramFreqTemp = UnigramDictionary::MASK_ATTRIBUTE_FREQUENCY & bigramFlags; // Due to space constraints, the frequency for bigrams is approximate - the lower the // unigram frequency, the worse the precision. The theoritical maximum error in // resulting frequency is 8 - although in the practice it's never bigger than 3 or 4 // in very bad cases. This means that sometimes, we'll see some bigrams interverted // here, but it can't get too bad. const int frequency = - BinaryFormat::computeFrequencyForBigram(unigramFreq, bigramFreq); - if (addWordBigram(bigramBuffer, length, frequency)) { + BinaryFormat::computeFrequencyForBigram(unigramFreq, bigramFreqTemp); + if (addWordBigram(bigramBuffer, length, frequency, bigramFreq, bigramChars, + outputTypes)) { ++bigramCount; } } @@ -144,10 +147,11 @@ int BigramDictionary::getBigrams(const int32_t *prevWord, int prevWordLength, in // Returns a pointer to the start of the bigram list. // If the word is not found or has no bigrams, this function returns 0. int BigramDictionary::getBigramListPositionForWord(const int32_t *prevWord, - const int prevWordLength) { + const int prevWordLength, const bool forceLowerCaseSearch) const { if (0 >= prevWordLength) return 0; - const uint8_t* const root = DICT; - int pos = BinaryFormat::getTerminalPosition(root, prevWord, prevWordLength); + const uint8_t *const root = DICT; + int pos = BinaryFormat::getTerminalPosition(root, prevWord, prevWordLength, + forceLowerCaseSearch); if (NOT_VALID_WORD == pos) return 0; const int flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos); @@ -164,10 +168,16 @@ int BigramDictionary::getBigramListPositionForWord(const int32_t *prevWord, } void BigramDictionary::fillBigramAddressToFrequencyMapAndFilter(const int32_t *prevWord, - const int prevWordLength, std::map<int, int> *map, uint8_t *filter) { + const int prevWordLength, std::map<int, int> *map, uint8_t *filter) const { memset(filter, 0, BIGRAM_FILTER_BYTE_SIZE); - const uint8_t* const root = DICT; - int pos = getBigramListPositionForWord(prevWord, prevWordLength); + const uint8_t *const root = DICT; + int pos = getBigramListPositionForWord(prevWord, prevWordLength, + false /* forceLowerCaseSearch */); + if (0 == pos) { + // If no bigrams for this exact string, search again in lower case. + pos = getBigramListPositionForWord(prevWord, prevWordLength, + true /* forceLowerCaseSearch */); + } if (0 == pos) return; int bigramFlags; @@ -181,11 +191,10 @@ void BigramDictionary::fillBigramAddressToFrequencyMapAndFilter(const int32_t *p } while (0 != (UnigramDictionary::FLAG_ATTRIBUTE_HAS_NEXT & bigramFlags)); } -bool BigramDictionary::checkFirstCharacter(unsigned short *word) { +bool BigramDictionary::checkFirstCharacter(unsigned short *word, int *inputCodes) const { // Checks whether this word starts with same character or neighboring characters of // what user typed. - int *inputCodes = mInputCodes; int maxAlt = MAX_ALTERNATIVES; const unsigned short firstBaseChar = toBaseLowerCase(*word); while (maxAlt > 0) { @@ -199,12 +208,13 @@ bool BigramDictionary::checkFirstCharacter(unsigned short *word) { } bool BigramDictionary::isValidBigram(const int32_t *word1, int length1, const int32_t *word2, - int length2) { - const uint8_t* const root = DICT; - int pos = getBigramListPositionForWord(word1, length1); + int length2) const { + const uint8_t *const root = DICT; + int pos = getBigramListPositionForWord(word1, length1, false /* forceLowerCaseSearch */); // getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams if (0 == pos) return false; - int nextWordPos = BinaryFormat::getTerminalPosition(root, word2, length2); + int nextWordPos = BinaryFormat::getTerminalPosition(root, word2, length2, + false /* forceLowerCaseSearch */); if (NOT_VALID_WORD == nextWordPos) return false; int bigramFlags; do { diff --git a/native/jni/src/bigram_dictionary.h b/native/jni/src/bigram_dictionary.h index b8763a515..f66e27b8a 100644 --- a/native/jni/src/bigram_dictionary.h +++ b/native/jni/src/bigram_dictionary.h @@ -27,36 +27,31 @@ namespace latinime { class Dictionary; class BigramDictionary { public: - BigramDictionary(const unsigned char *dict, int maxWordLength, Dictionary *parentDictionary); - int getBigrams(const int32_t *word, int length, int *codes, int codesSize, - unsigned short *outWords, int *frequencies, int maxWordLength, int maxBigrams); - int getBigramListPositionForWord(const int32_t *prevWord, const int prevWordLength); + BigramDictionary(const unsigned char *dict, int maxWordLength, int maxPredictions); + int getBigrams(const int32_t *word, int length, int *inputCodes, int codesSize, + unsigned short *outWords, int *frequencies, int *outputTypes) const; + int getBigramListPositionForWord(const int32_t *prevWord, const int prevWordLength, + const bool forceLowerCaseSearch) const; void fillBigramAddressToFrequencyMapAndFilter(const int32_t *prevWord, const int prevWordLength, - std::map<int, int> *map, uint8_t *filter); - bool isValidBigram(const int32_t *word1, int length1, const int32_t *word2, int length2); + std::map<int, int> *map, uint8_t *filter) const; + bool isValidBigram(const int32_t *word1, int length1, const int32_t *word2, int length2) const; ~BigramDictionary(); private: - bool addWordBigram(unsigned short *word, int length, int frequency); + DISALLOW_IMPLICIT_CONSTRUCTORS(BigramDictionary); + bool addWordBigram(unsigned short *word, int length, int frequency, + int *bigramFreq, unsigned short *bigramChars, int *outputTypes) const; int getBigramAddress(int *pos, bool advance); int getBigramFreq(int *pos); void searchForTerminalNode(int addressLookingFor, int frequency); bool getFirstBitOfByte(int *pos) { return (DICT[*pos] & 0x80) > 0; } bool getSecondBitOfByte(int *pos) { return (DICT[*pos] & 0x40) > 0; } - bool checkFirstCharacter(unsigned short *word); + bool checkFirstCharacter(unsigned short *word, int *inputCodes) const; const unsigned char *DICT; const int MAX_WORD_LENGTH; + const int MAX_PREDICTIONS; // TODO: Re-implement proximity correction for bigram correction static const int MAX_ALTERNATIVES = 1; - - Dictionary *mParentDictionary; - int *mBigramFreq; - int mMaxBigrams; - unsigned short *mBigramChars; - int *mInputCodes; - int mInputLength; }; - } // namespace latinime - #endif // LATINIME_BIGRAM_DICTIONARY_H diff --git a/native/jni/src/binary_format.h b/native/jni/src/binary_format.h index 51bf8ebbc..4155ef401 100644 --- a/native/jni/src/binary_format.h +++ b/native/jni/src/binary_format.h @@ -19,12 +19,14 @@ #include <limits> #include "bloom_filter.h" +#include "char_utils.h" #include "unigram_dictionary.h" namespace latinime { class BinaryFormat { private: + DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryFormat); const static int32_t MINIMAL_ONE_BYTE_CHARACTER_VALUE = 0x20; const static int32_t CHARACTER_ARRAY_TERMINATOR = 0x1F; const static int MULTIPLE_BYTE_CHARACTER_ADDITIONAL_SIZE = 2; @@ -44,29 +46,29 @@ class BinaryFormat { const static int CHARACTER_ARRAY_TERMINATOR_SIZE = 1; const static int SHORTCUT_LIST_SIZE_SIZE = 2; - static int detectFormat(const uint8_t* const dict); - static unsigned int getHeaderSize(const uint8_t* const dict); - static unsigned int getFlags(const uint8_t* const dict); - static int getGroupCountAndForwardPointer(const uint8_t* const dict, int* pos); - static uint8_t getFlagsAndForwardPointer(const uint8_t* const dict, int* pos); - static int32_t getCharCodeAndForwardPointer(const uint8_t* const dict, int* pos); - static int readFrequencyWithoutMovingPointer(const uint8_t* const dict, const int pos); - static int skipOtherCharacters(const uint8_t* const dict, const int pos); + static int detectFormat(const uint8_t *const dict); + static unsigned int getHeaderSize(const uint8_t *const dict); + static unsigned int getFlags(const uint8_t *const dict); + static int getGroupCountAndForwardPointer(const uint8_t *const dict, int *pos); + static uint8_t getFlagsAndForwardPointer(const uint8_t *const dict, int *pos); + static int32_t getCharCodeAndForwardPointer(const uint8_t *const dict, int *pos); + static int readFrequencyWithoutMovingPointer(const uint8_t *const dict, const int pos); + static int skipOtherCharacters(const uint8_t *const dict, const int pos); static int skipChildrenPosition(const uint8_t flags, const int pos); static int skipFrequency(const uint8_t flags, const int pos); - static int skipShortcuts(const uint8_t* const dict, const uint8_t flags, const int pos); - static int skipBigrams(const uint8_t* const dict, const uint8_t flags, const int pos); - static int skipAllAttributes(const uint8_t* const dict, const uint8_t flags, const int pos); - static int skipChildrenPosAndAttributes(const uint8_t* const dict, const uint8_t flags, + static int skipShortcuts(const uint8_t *const dict, const uint8_t flags, const int pos); + static int skipBigrams(const uint8_t *const dict, const uint8_t flags, const int pos); + static int skipAllAttributes(const uint8_t *const dict, const uint8_t flags, const int pos); + static int skipChildrenPosAndAttributes(const uint8_t *const dict, const uint8_t flags, const int pos); - static int readChildrenPosition(const uint8_t* const dict, const uint8_t flags, const int pos); + static int readChildrenPosition(const uint8_t *const dict, const uint8_t flags, const int pos); static bool hasChildrenInFlags(const uint8_t flags); - static int getAttributeAddressAndForwardPointer(const uint8_t* const dict, const uint8_t flags, + static int getAttributeAddressAndForwardPointer(const uint8_t *const dict, const uint8_t flags, int *pos); - static int getTerminalPosition(const uint8_t* const root, const int32_t* const inWord, - const int length); - static int getWordAtAddress(const uint8_t* const root, const int address, const int maxDepth, - uint16_t* outWord, int* outUnigramFrequency); + static int getTerminalPosition(const uint8_t *const root, const int32_t *const inWord, + const int length, const bool forceLowerCaseSearch); + static int getWordAtAddress(const uint8_t *const root, const int address, const int maxDepth, + uint16_t *outWord, int *outUnigramFrequency); static int computeFrequencyForBigram(const int unigramFreq, const int bigramFreq); static int getProbability(const int position, const std::map<int, int> *bigramMap, const uint8_t *bigramFilter, const int unigramFreq); @@ -81,7 +83,7 @@ class BinaryFormat { const static unsigned int NO_FLAGS = 0; }; -inline int BinaryFormat::detectFormat(const uint8_t* const dict) { +inline int BinaryFormat::detectFormat(const uint8_t *const dict) { // The magic number is stored big-endian. const uint32_t magicNumber = (dict[0] << 24) + (dict[1] << 16) + (dict[2] << 8) + dict[3]; switch (magicNumber) { @@ -103,7 +105,7 @@ inline int BinaryFormat::detectFormat(const uint8_t* const dict) { } } -inline unsigned int BinaryFormat::getFlags(const uint8_t* const dict) { +inline unsigned int BinaryFormat::getFlags(const uint8_t *const dict) { switch (detectFormat(dict)) { case 1: return NO_FLAGS; @@ -112,7 +114,7 @@ inline unsigned int BinaryFormat::getFlags(const uint8_t* const dict) { } } -inline unsigned int BinaryFormat::getHeaderSize(const uint8_t* const dict) { +inline unsigned int BinaryFormat::getHeaderSize(const uint8_t *const dict) { switch (detectFormat(dict)) { case 1: return FORMAT_VERSION_1_HEADER_SIZE; @@ -124,17 +126,17 @@ inline unsigned int BinaryFormat::getHeaderSize(const uint8_t* const dict) { } } -inline int BinaryFormat::getGroupCountAndForwardPointer(const uint8_t* const dict, int* pos) { +inline int BinaryFormat::getGroupCountAndForwardPointer(const uint8_t *const dict, int *pos) { const int msb = dict[(*pos)++]; if (msb < 0x80) return msb; return ((msb & 0x7F) << 8) | dict[(*pos)++]; } -inline uint8_t BinaryFormat::getFlagsAndForwardPointer(const uint8_t* const dict, int* pos) { +inline uint8_t BinaryFormat::getFlagsAndForwardPointer(const uint8_t *const dict, int *pos) { return dict[(*pos)++]; } -inline int32_t BinaryFormat::getCharCodeAndForwardPointer(const uint8_t* const dict, int* pos) { +inline int32_t BinaryFormat::getCharCodeAndForwardPointer(const uint8_t *const dict, int *pos) { const int origin = *pos; const int32_t character = dict[origin]; if (character < MINIMAL_ONE_BYTE_CHARACTER_VALUE) { @@ -153,12 +155,12 @@ inline int32_t BinaryFormat::getCharCodeAndForwardPointer(const uint8_t* const d } } -inline int BinaryFormat::readFrequencyWithoutMovingPointer(const uint8_t* const dict, +inline int BinaryFormat::readFrequencyWithoutMovingPointer(const uint8_t *const dict, const int pos) { return dict[pos]; } -inline int BinaryFormat::skipOtherCharacters(const uint8_t* const dict, const int pos) { +inline int BinaryFormat::skipOtherCharacters(const uint8_t *const dict, const int pos) { int currentPos = pos; int32_t character = dict[currentPos++]; while (CHARACTER_ARRAY_TERMINATOR != character) { @@ -184,7 +186,7 @@ static inline int attributeAddressSize(const uint8_t flags) { */ } -static inline int skipExistingBigrams(const uint8_t* const dict, const int pos) { +static inline int skipExistingBigrams(const uint8_t *const dict, const int pos) { int currentPos = pos; uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(dict, ¤tPos); while (flags & UnigramDictionary::FLAG_ATTRIBUTE_HAS_NEXT) { @@ -201,7 +203,7 @@ static inline int childrenAddressSize(const uint8_t flags) { /* See the note in attributeAddressSize. The same applies here */ } -static inline int shortcutByteSize(const uint8_t* const dict, const int pos) { +static inline int shortcutByteSize(const uint8_t *const dict, const int pos) { return ((int)(dict[pos] << 8)) + (dict[pos + 1]); } @@ -213,7 +215,7 @@ inline int BinaryFormat::skipFrequency(const uint8_t flags, const int pos) { return UnigramDictionary::FLAG_IS_TERMINAL & flags ? pos + 1 : pos; } -inline int BinaryFormat::skipShortcuts(const uint8_t* const dict, const uint8_t flags, +inline int BinaryFormat::skipShortcuts(const uint8_t *const dict, const uint8_t flags, const int pos) { if (UnigramDictionary::FLAG_HAS_SHORTCUT_TARGETS & flags) { return pos + shortcutByteSize(dict, pos); @@ -222,7 +224,7 @@ inline int BinaryFormat::skipShortcuts(const uint8_t* const dict, const uint8_t } } -inline int BinaryFormat::skipBigrams(const uint8_t* const dict, const uint8_t flags, +inline int BinaryFormat::skipBigrams(const uint8_t *const dict, const uint8_t flags, const int pos) { if (UnigramDictionary::FLAG_HAS_BIGRAMS & flags) { return skipExistingBigrams(dict, pos); @@ -231,7 +233,7 @@ inline int BinaryFormat::skipBigrams(const uint8_t* const dict, const uint8_t fl } } -inline int BinaryFormat::skipAllAttributes(const uint8_t* const dict, const uint8_t flags, +inline int BinaryFormat::skipAllAttributes(const uint8_t *const dict, const uint8_t flags, const int pos) { // This function skips all attributes: shortcuts and bigrams. int newPos = pos; @@ -240,7 +242,7 @@ inline int BinaryFormat::skipAllAttributes(const uint8_t* const dict, const uint return newPos; } -inline int BinaryFormat::skipChildrenPosAndAttributes(const uint8_t* const dict, +inline int BinaryFormat::skipChildrenPosAndAttributes(const uint8_t *const dict, const uint8_t flags, const int pos) { int currentPos = pos; currentPos = skipChildrenPosition(flags, currentPos); @@ -248,7 +250,7 @@ inline int BinaryFormat::skipChildrenPosAndAttributes(const uint8_t* const dict, return currentPos; } -inline int BinaryFormat::readChildrenPosition(const uint8_t* const dict, const uint8_t flags, +inline int BinaryFormat::readChildrenPosition(const uint8_t *const dict, const uint8_t flags, const int pos) { int offset = 0; switch (UnigramDictionary::MASK_GROUP_ADDRESS_TYPE & flags) { @@ -277,7 +279,7 @@ inline bool BinaryFormat::hasChildrenInFlags(const uint8_t flags) { != (UnigramDictionary::MASK_GROUP_ADDRESS_TYPE & flags)); } -inline int BinaryFormat::getAttributeAddressAndForwardPointer(const uint8_t* const dict, +inline int BinaryFormat::getAttributeAddressAndForwardPointer(const uint8_t *const dict, const uint8_t flags, int *pos) { int offset = 0; const int origin = *pos; @@ -307,8 +309,8 @@ inline int BinaryFormat::getAttributeAddressAndForwardPointer(const uint8_t* con // This function gets the byte position of the last chargroup of the exact matching word in the // dictionary. If no match is found, it returns NOT_VALID_WORD. -inline int BinaryFormat::getTerminalPosition(const uint8_t* const root, - const int32_t* const inWord, const int length) { +inline int BinaryFormat::getTerminalPosition(const uint8_t *const root, + const int32_t *const inWord, const int length, const bool forceLowerCaseSearch) { int pos = 0; int wordPos = 0; @@ -317,7 +319,7 @@ inline int BinaryFormat::getTerminalPosition(const uint8_t* const root, // there was no match (or we would have found it). if (wordPos > length) return NOT_VALID_WORD; int charGroupCount = BinaryFormat::getGroupCountAndForwardPointer(root, &pos); - const int32_t wChar = inWord[wordPos]; + const int32_t wChar = forceLowerCaseSearch ? toLowerCase(inWord[wordPos]) : inWord[wordPos]; while (true) { // If there are no more character groups in this node, it means we could not // find a matching character for this depth, therefore there is no match. @@ -394,8 +396,8 @@ inline int BinaryFormat::getTerminalPosition(const uint8_t* const root, * outUnigramFrequency: a pointer to an int to write the frequency into. * Return value : the length of the word, of 0 if the word was not found. */ -inline int BinaryFormat::getWordAtAddress(const uint8_t* const root, const int address, - const int maxDepth, uint16_t* outWord, int* outUnigramFrequency) { +inline int BinaryFormat::getWordAtAddress(const uint8_t *const root, const int address, + const int maxDepth, uint16_t *outWord, int *outUnigramFrequency) { int pos = 0; int wordPos = 0; @@ -555,7 +557,5 @@ inline int BinaryFormat::getProbability(const int position, const std::map<int, return backoff(unigramFreq); } } - } // namespace latinime - #endif // LATINIME_BINARY_FORMAT_H diff --git a/native/jni/src/bloom_filter.h b/native/jni/src/bloom_filter.h index 7ae6a1fa4..47177dcba 100644 --- a/native/jni/src/bloom_filter.h +++ b/native/jni/src/bloom_filter.h @@ -32,7 +32,5 @@ static inline bool isInFilter(const uint8_t *filter, const int position) { const unsigned int bucket = position % BIGRAM_FILTER_MODULO; return filter[bucket >> 3] & (1 << (bucket & 0x7)); } - } // namespace latinime - #endif // LATINIME_BLOOM_FILTER_H diff --git a/native/jni/src/char_utils.cpp b/native/jni/src/char_utils.cpp index a31a0632c..9d2bc85b3 100644 --- a/native/jni/src/char_utils.cpp +++ b/native/jni/src/char_utils.cpp @@ -895,5 +895,4 @@ unsigned short latin_tolower(unsigned short c) { compare_pair_capital); return p ? p->small : c; } - } // namespace latinime diff --git a/native/jni/src/char_utils.h b/native/jni/src/char_utils.h index 607dc5195..edd96bbb0 100644 --- a/native/jni/src/char_utils.h +++ b/native/jni/src/char_utils.h @@ -50,8 +50,7 @@ inline static unsigned short toBaseChar(unsigned short c) { return c; } -inline static unsigned short toBaseLowerCase(unsigned short c) { - c = toBaseChar(c); +inline static unsigned short toLowerCase(const unsigned short c) { if (isAsciiUpper(c)) { return toAsciiLower(c); } else if (isAscii(c)) { @@ -60,6 +59,8 @@ inline static unsigned short toBaseLowerCase(unsigned short c) { return latin_tolower(c); } +inline static unsigned short toBaseLowerCase(const unsigned short c) { + return toLowerCase(toBaseChar(c)); +} } // namespace latinime - #endif // LATINIME_CHAR_UTILS_H diff --git a/native/jni/src/correction.cpp b/native/jni/src/correction.cpp index 99f5b92c1..74e256fe5 100644 --- a/native/jni/src/correction.cpp +++ b/native/jni/src/correction.cpp @@ -27,6 +27,7 @@ #include "defines.h" #include "dictionary.h" #include "proximity_info.h" +#include "proximity_info_state.h" namespace latinime { @@ -97,7 +98,7 @@ inline static int getCurrentEditDistance(int *editDistanceTable, const int editD static const char QUOTE = '\''; inline bool Correction::isQuote(const unsigned short c) { - const unsigned short userTypedChar = mProximityInfo->getPrimaryCharAt(mInputIndex); + const unsigned short userTypedChar = mProximityInfoState.getPrimaryCharAt(mInputIndex); return (c == QUOTE && userTypedChar != QUOTE); } @@ -105,11 +106,6 @@ inline bool Correction::isQuote(const unsigned short c) { // Correction // //////////////// -Correction::Correction(const int typedLetterMultiplier, const int fullWordMultiplier) - : TYPED_LETTER_MULTIPLIER(typedLetterMultiplier), FULL_WORD_MULTIPLIER(fullWordMultiplier) { - initEditDistance(mEditDistanceTable); -} - void Correction::resetCorrection() { mTotalTraverseCount = 0; } @@ -282,7 +278,7 @@ bool Correction::needsToPrune() const { void Correction::addCharToCurrentWord(const int32_t c) { mWord[mOutputIndex] = c; - const unsigned short *primaryInputWord = mProximityInfo->getPrimaryInputWord(); + const unsigned short *primaryInputWord = mProximityInfoState.getPrimaryInputWord(); calcEditDistanceOneStep(mEditDistanceTable, primaryInputWord, mInputLength, mWord, mOutputIndex + 1); } @@ -308,13 +304,12 @@ Correction::CorrectionType Correction::processUnrelatedCorrectionType() { return UNRELATED; } -inline bool isEquivalentChar(ProximityInfo::ProximityType type) { - return type == ProximityInfo::EQUIVALENT_CHAR; +inline bool isEquivalentChar(ProximityType type) { + return type == EQUIVALENT_CHAR; } -inline bool isProximityCharOrEquivalentChar(ProximityInfo::ProximityType type) { - return type == ProximityInfo::EQUIVALENT_CHAR - || type == ProximityInfo::NEAR_PROXIMITY_CHAR; +inline bool isProximityCharOrEquivalentChar(ProximityType type) { + return type == EQUIVALENT_CHAR || type == NEAR_PROXIMITY_CHAR; } Correction::CorrectionType Correction::processCharAndCalcState( @@ -335,19 +330,19 @@ Correction::CorrectionType Correction::processCharAndCalcState( bool incremented = false; if (mLastCharExceeded && mInputIndex == mInputLength - 1) { // TODO: Do not check the proximity if EditDistance exceeds the threshold - const ProximityInfo::ProximityType matchId = - mProximityInfo->getMatchedProximityId(mInputIndex, c, true, &proximityIndex); + const ProximityType matchId = mProximityInfoState.getMatchedProximityId( + mInputIndex, c, true, &proximityIndex); if (isEquivalentChar(matchId)) { mLastCharExceeded = false; --mExcessiveCount; mDistances[mOutputIndex] = - mProximityInfo->getNormalizedSquaredDistance(mInputIndex, 0); - } else if (matchId == ProximityInfo::NEAR_PROXIMITY_CHAR) { + mProximityInfoState.getNormalizedSquaredDistance(mInputIndex, 0); + } else if (matchId == NEAR_PROXIMITY_CHAR) { mLastCharExceeded = false; --mExcessiveCount; ++mProximityCount; - mDistances[mOutputIndex] = - mProximityInfo->getNormalizedSquaredDistance(mInputIndex, proximityIndex); + mDistances[mOutputIndex] = mProximityInfoState.getNormalizedSquaredDistance( + mInputIndex, proximityIndex); } if (!isQuote(c)) { incrementInputIndex(); @@ -388,7 +383,8 @@ Correction::CorrectionType Correction::processCharAndCalcState( bool secondTransposing = false; if (mTransposedCount % 2 == 1) { - if (isEquivalentChar(mProximityInfo->getMatchedProximityId(mInputIndex - 1, c, false))) { + if (isEquivalentChar(mProximityInfoState.getMatchedProximityId( + mInputIndex - 1, c, false))) { ++mTransposedCount; secondTransposing = true; } else if (mCorrectionStates[mOutputIndex].mExceeding) { @@ -417,17 +413,17 @@ Correction::CorrectionType Correction::processCharAndCalcState( ? (noCorrectionsHappenedSoFar || mProximityCount == 0) : (noCorrectionsHappenedSoFar && mProximityCount == 0); - ProximityInfo::ProximityType matchedProximityCharId = secondTransposing - ? ProximityInfo::EQUIVALENT_CHAR - : mProximityInfo->getMatchedProximityId( + ProximityType matchedProximityCharId = secondTransposing + ? EQUIVALENT_CHAR + : mProximityInfoState.getMatchedProximityId( mInputIndex, c, checkProximityChars, &proximityIndex); - if (ProximityInfo::UNRELATED_CHAR == matchedProximityCharId - || ProximityInfo::ADDITIONAL_PROXIMITY_CHAR == matchedProximityCharId) { + if (UNRELATED_CHAR == matchedProximityCharId + || ADDITIONAL_PROXIMITY_CHAR == matchedProximityCharId) { if (canTryCorrection && mOutputIndex > 0 && mCorrectionStates[mOutputIndex].mProximityMatching && mCorrectionStates[mOutputIndex].mExceeding - && isEquivalentChar(mProximityInfo->getMatchedProximityId( + && isEquivalentChar(mProximityInfoState.getMatchedProximityId( mInputIndex, mWord[mOutputIndex - 1], false))) { if (DEBUG_CORRECTION && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputLength) @@ -446,14 +442,14 @@ Correction::CorrectionType Correction::processCharAndCalcState( // Here, we are doing something equivalent to matchedProximityCharId, // but we already know that "excessive char correction" just happened // so that we just need to check "mProximityCount == 0". - matchedProximityCharId = mProximityInfo->getMatchedProximityId( + matchedProximityCharId = mProximityInfoState.getMatchedProximityId( mInputIndex, c, mProximityCount == 0, &proximityIndex); } } - if (ProximityInfo::UNRELATED_CHAR == matchedProximityCharId - || ProximityInfo::ADDITIONAL_PROXIMITY_CHAR == matchedProximityCharId) { - if (ProximityInfo::ADDITIONAL_PROXIMITY_CHAR == matchedProximityCharId) { + if (UNRELATED_CHAR == matchedProximityCharId + || ADDITIONAL_PROXIMITY_CHAR == matchedProximityCharId) { + if (ADDITIONAL_PROXIMITY_CHAR == matchedProximityCharId) { mAdditionalProximityMatching = true; } // TODO: Optimize @@ -463,10 +459,10 @@ Correction::CorrectionType Correction::processCharAndCalcState( if (mInputIndex < mInputLength - 1 && mOutputIndex > 0 && mTransposedCount > 0 && !mCorrectionStates[mOutputIndex].mTransposing && mCorrectionStates[mOutputIndex - 1].mTransposing - && isEquivalentChar(mProximityInfo->getMatchedProximityId( + && isEquivalentChar(mProximityInfoState.getMatchedProximityId( mInputIndex, mWord[mOutputIndex - 1], false)) && isEquivalentChar( - mProximityInfo->getMatchedProximityId(mInputIndex + 1, c, false))) { + mProximityInfoState.getMatchedProximityId(mInputIndex + 1, c, false))) { // Conversion t->e // Example: // occaisional -> occa sional @@ -478,7 +474,7 @@ Correction::CorrectionType Correction::processCharAndCalcState( && !mCorrectionStates[mOutputIndex].mTransposing && mCorrectionStates[mOutputIndex - 1].mTransposing && isEquivalentChar( - mProximityInfo->getMatchedProximityId(mInputIndex - 1, c, false))) { + mProximityInfoState.getMatchedProximityId(mInputIndex - 1, c, false))) { // Conversion t->s // Example: // chcolate -> chocolate @@ -490,7 +486,7 @@ Correction::CorrectionType Correction::processCharAndCalcState( && mCorrectionStates[mOutputIndex].mProximityMatching && mCorrectionStates[mOutputIndex].mSkipping && isEquivalentChar( - mProximityInfo->getMatchedProximityId(mInputIndex - 1, c, false))) { + mProximityInfoState.getMatchedProximityId(mInputIndex - 1, c, false))) { // Conversion p->s // Note: This logic tries saving cases like contrst --> contrast -- "a" is one of // proximity chars of "s", but it should rather be handled as a skipped char. @@ -502,7 +498,7 @@ Correction::CorrectionType Correction::processCharAndCalcState( && mCorrectionStates[mOutputIndex].mSkipping && mCorrectionStates[mOutputIndex].mAdditionalProximityMatching && isProximityCharOrEquivalentChar( - mProximityInfo->getMatchedProximityId(mInputIndex + 1, c, false))) { + mProximityInfoState.getMatchedProximityId(mInputIndex + 1, c, false))) { // Conversion s->a incrementInputIndex(); --mSkippedCount; @@ -511,7 +507,7 @@ Correction::CorrectionType Correction::processCharAndCalcState( mDistances[mOutputIndex] = ADDITIONAL_PROXIMITY_CHAR_DISTANCE_INFO; } else if ((mExceeding || mTransposing) && mInputIndex - 1 < mInputLength && isEquivalentChar( - mProximityInfo->getMatchedProximityId(mInputIndex + 1, c, false))) { + mProximityInfoState.getMatchedProximityId(mInputIndex + 1, c, false))) { // 1.2. Excessive or transpose correction if (mTransposing) { ++mTransposedCount; @@ -543,7 +539,7 @@ Correction::CorrectionType Correction::processCharAndCalcState( mTransposedCount, mExcessiveCount, c); } return processSkipChar(c, isTerminal, false); - } else if (ProximityInfo::ADDITIONAL_PROXIMITY_CHAR == matchedProximityCharId) { + } else if (ADDITIONAL_PROXIMITY_CHAR == matchedProximityCharId) { // As a last resort, use additional proximity characters mProximityMatching = true; ++mProximityCount; @@ -573,12 +569,12 @@ Correction::CorrectionType Correction::processCharAndCalcState( } else if (isEquivalentChar(matchedProximityCharId)) { mMatching = true; ++mEquivalentCharCount; - mDistances[mOutputIndex] = mProximityInfo->getNormalizedSquaredDistance(mInputIndex, 0); - } else if (ProximityInfo::NEAR_PROXIMITY_CHAR == matchedProximityCharId) { + mDistances[mOutputIndex] = mProximityInfoState.getNormalizedSquaredDistance(mInputIndex, 0); + } else if (NEAR_PROXIMITY_CHAR == matchedProximityCharId) { mProximityMatching = true; ++mProximityCount; mDistances[mOutputIndex] = - mProximityInfo->getNormalizedSquaredDistance(mInputIndex, proximityIndex); + mProximityInfoState.getNormalizedSquaredDistance(mInputIndex, proximityIndex); if (DEBUG_CORRECTION && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputLength) && (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0 @@ -637,7 +633,7 @@ Correction::CorrectionType Correction::processCharAndCalcState( Correction::~Correction() { } -inline static int getQuoteCount(const unsigned short* word, const int length) { +inline static int getQuoteCount(const unsigned short *word, const int length) { int quoteCount = 0; for (int i = 0; i < length; ++i) { if(word[i] == '\'') { @@ -657,12 +653,12 @@ inline static bool isUpperCase(unsigned short c) { /* static */ int Correction::RankingAlgorithm::calculateFinalProbability(const int inputIndex, - const int outputIndex, const int freq, int* editDistanceTable, const Correction* correction, + const int outputIndex, const int freq, int *editDistanceTable, const Correction *correction, const int inputLength) { const int excessivePos = correction->getExcessivePos(); const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER; const int fullWordMultiplier = correction->FULL_WORD_MULTIPLIER; - const ProximityInfo *proximityInfo = correction->mProximityInfo; + const ProximityInfoState *proximityInfoState = &correction->mProximityInfoState; const int skippedCount = correction->mSkippedCount; const int transposedCount = correction->mTransposedCount / 2; const int excessiveCount = correction->mExcessiveCount + correction->mTransposedCount % 2; @@ -681,11 +677,11 @@ int Correction::RankingAlgorithm::calculateFinalProbability(const int inputIndex // TODO: use mExcessiveCount const int matchCount = inputLength - correction->mProximityCount - excessiveCount; - const unsigned short* word = correction->mWord; + const unsigned short *word = correction->mWord; const bool skipped = skippedCount > 0; const int quoteDiffCount = max(0, getQuoteCount(word, outputLength) - - getQuoteCount(proximityInfo->getPrimaryInputWord(), inputLength)); + - getQuoteCount(proximityInfoState->getPrimaryInputWord(), inputLength)); // TODO: Calculate edit distance for transposed and excessive int ed = 0; @@ -737,8 +733,7 @@ int Correction::RankingAlgorithm::calculateFinalProbability(const int inputIndex multiplyIntCapped(matchWeight, &finalFreq); } - if (proximityInfo->getMatchedProximityId(0, word[0], true) - == ProximityInfo::UNRELATED_CHAR) { + if (proximityInfoState->getMatchedProximityId(0, word[0], true) == UNRELATED_CHAR) { multiplyRate(FIRST_CHAR_DIFFERENT_DEMOTION_RATE, &finalFreq); } @@ -764,7 +759,7 @@ int Correction::RankingAlgorithm::calculateFinalProbability(const int inputIndex // Demotion for a word with excessive character if (excessiveCount > 0) { multiplyRate(WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE, &finalFreq); - if (!lastCharExceeded && !proximityInfo->existsAdjacentProximityChars(excessivePos)) { + if (!lastCharExceeded && !proximityInfoState->existsAdjacentProximityChars(excessivePos)) { if (DEBUG_DICT_FULL) { AKLOGI("Double excessive demotion"); } @@ -775,8 +770,9 @@ int Correction::RankingAlgorithm::calculateFinalProbability(const int inputIndex } const bool performTouchPositionCorrection = - CALIBRATE_SCORE_BY_TOUCH_COORDINATES && proximityInfo->touchPositionCorrectionEnabled() - && skippedCount == 0 && excessiveCount == 0 && transposedCount == 0; + CALIBRATE_SCORE_BY_TOUCH_COORDINATES + && proximityInfoState->touchPositionCorrectionEnabled() + && skippedCount == 0 && excessiveCount == 0 && transposedCount == 0; // Score calibration by touch coordinates is being done only for pure-fat finger typing error // cases. int additionalProximityCount = 0; @@ -796,7 +792,7 @@ int Correction::RankingAlgorithm::calculateFinalProbability(const int inputIndex static const float R1 = NEUTRAL_SCORE_SQUARED_RADIUS; static const float R2 = HALF_SCORE_SQUARED_RADIUS; const float x = (float)squaredDistance - / ProximityInfo::NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR; + / ProximityInfoState::NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR; const float factor = max((x < R1) ? (A * (R1 - x) + B * x) / R1 : (B * (R2 - x) + C * (x - R1)) / (R2 - R1), MIN); @@ -907,7 +903,7 @@ int Correction::RankingAlgorithm::calculateFinalProbability(const int inputIndex if (DEBUG_CORRECTION_FREQ && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == inputLength)) { - DUMP_WORD(proximityInfo->getPrimaryInputWord(), inputLength); + DUMP_WORD(correction->getPrimaryInputWord(), inputLength); DUMP_WORD(correction->mWord, outputLength); AKLOGI("FinalFreq: [P%d, S%d, T%d, E%d, A%d] %d, %d, %d, %d, %d, %d", proximityMatchedCount, skippedCount, transposedCount, excessiveCount, additionalProximityCount, @@ -920,7 +916,7 @@ int Correction::RankingAlgorithm::calculateFinalProbability(const int inputIndex /* static */ int Correction::RankingAlgorithm::calcFreqForSplitMultipleWords( const int *freqArray, const int *wordLengthArray, const int wordCount, - const Correction* correction, const bool isSpaceProximity, const unsigned short *word) { + const Correction *correction, const bool isSpaceProximity, const unsigned short *word) { const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER; bool firstCapitalizedWordDemotion = false; @@ -1050,10 +1046,10 @@ int Correction::RankingAlgorithm::calcFreqForSplitMultipleWords( /* Damerau-Levenshtein distance */ inline static int editDistanceInternal( - int* editDistanceTable, const unsigned short* before, - const int beforeLength, const unsigned short* after, const int afterLength) { + int *editDistanceTable, const unsigned short *before, + const int beforeLength, const unsigned short *after, const int afterLength) { // dp[li][lo] dp[a][b] = dp[ a * lo + b] - int* dp = editDistanceTable; + int *dp = editDistanceTable; const int li = beforeLength + 1; const int lo = afterLength + 1; for (int i = 0; i < li; ++i) { @@ -1089,8 +1085,8 @@ inline static int editDistanceInternal( return dp[li * lo - 1]; } -int Correction::RankingAlgorithm::editDistance(const unsigned short* before, - const int beforeLength, const unsigned short* after, const int afterLength) { +int Correction::RankingAlgorithm::editDistance(const unsigned short *before, + const int beforeLength, const unsigned short *after, const int afterLength) { int table[(beforeLength + 1) * (afterLength + 1)]; return editDistanceInternal(table, before, beforeLength, after, afterLength); } @@ -1118,8 +1114,8 @@ int Correction::RankingAlgorithm::editDistance(const unsigned short* before, // So, we can normalize original score by dividing pow(2, min(b.l(),a.l())) * 255 * 2. /* static */ -float Correction::RankingAlgorithm::calcNormalizedScore(const unsigned short* before, - const int beforeLength, const unsigned short* after, const int afterLength, +float Correction::RankingAlgorithm::calcNormalizedScore(const unsigned short *before, + const int beforeLength, const unsigned short *after, const int afterLength, const int score) { if (0 == beforeLength || 0 == afterLength) { return 0; @@ -1146,5 +1142,4 @@ float Correction::RankingAlgorithm::calcNormalizedScore(const unsigned short* be const float weight = 1.0 - (float) distance / afterLength; return (score / maxScore) * weight; } - } // namespace latinime diff --git a/native/jni/src/correction.h b/native/jni/src/correction.h index 3300a8491..13f951af5 100644 --- a/native/jni/src/correction.h +++ b/native/jni/src/correction.h @@ -19,9 +19,10 @@ #include <assert.h> #include <stdint.h> -#include "correction_state.h" +#include "correction_state.h" #include "defines.h" +#include "proximity_info_state.h" namespace latinime { @@ -93,7 +94,7 @@ class Correction { } } - Correction(const int typedLetterMultiplier, const int fullWordMultiplier); + Correction() {}; void resetCorrection(); void initCorrection( const ProximityInfo *pi, const int inputLength, const int maxWordLength); @@ -137,9 +138,9 @@ class Correction { int getFreqForSplitMultipleWords( const int *freqArray, const int *wordLengthArray, const int wordCount, const bool isSpaceProximity, const unsigned short *word); - int getFinalProbability(const int probability, unsigned short **word, int* wordLength); + int getFinalProbability(const int probability, unsigned short **word, int *wordLength); int getFinalProbabilityForSubQueue(const int probability, unsigned short **word, - int* wordLength, const int inputLength); + int *wordLength, const int inputLength); CorrectionType processCharAndCalcState(const int32_t c, const bool isTerminal); @@ -162,23 +163,37 @@ class Correction { class RankingAlgorithm { public: static int calculateFinalProbability(const int inputIndex, const int depth, - const int probability, int *editDistanceTable, const Correction* correction, + const int probability, int *editDistanceTable, const Correction *correction, const int inputLength); static int calcFreqForSplitMultipleWords(const int *freqArray, const int *wordLengthArray, - const int wordCount, const Correction* correction, const bool isSpaceProximity, + const int wordCount, const Correction *correction, const bool isSpaceProximity, const unsigned short *word); - static float calcNormalizedScore(const unsigned short* before, const int beforeLength, - const unsigned short* after, const int afterLength, const int score); - static int editDistance(const unsigned short* before, - const int beforeLength, const unsigned short* after, const int afterLength); + static float calcNormalizedScore(const unsigned short *before, const int beforeLength, + const unsigned short *after, const int afterLength, const int score); + static int editDistance(const unsigned short *before, + const int beforeLength, const unsigned short *after, const int afterLength); private: static const int CODE_SPACE = ' '; static const int MAX_INITIAL_SCORE = 255; - static const int TYPED_LETTER_MULTIPLIER = 2; - static const int FULL_WORD_MULTIPLIER = 2; }; + // proximity info state + void initInputParams(const ProximityInfo *proximityInfo, const int32_t *inputCodes, + const int inputLength, const int *xCoordinates, const int *yCoordinates) { + mProximityInfoState.initInputParams( + proximityInfo, inputCodes, inputLength, xCoordinates, yCoordinates); + } + + const unsigned short *getPrimaryInputWord() const { + return mProximityInfoState.getPrimaryInputWord(); + } + + unsigned short getPrimaryCharAt(const int index) const { + return mProximityInfoState.getPrimaryCharAt(index); + } + private: + DISALLOW_COPY_AND_ASSIGN(Correction); inline void incrementInputIndex(); inline void incrementOutputIndex(); inline void startToTraverseAllNodes(); @@ -188,10 +203,10 @@ class Correction { inline CorrectionType processUnrelatedCorrectionType(); inline void addCharToCurrentWord(const int32_t c); inline int getFinalProbabilityInternal(const int probability, unsigned short **word, - int* wordLength, const int inputLength); + int *wordLength, const int inputLength); - const int TYPED_LETTER_MULTIPLIER; - const int FULL_WORD_MULTIPLIER; + static const int TYPED_LETTER_MULTIPLIER = 2; + static const int FULL_WORD_MULTIPLIER = 2; const ProximityInfo *mProximityInfo; bool mUseFullEditDistance; @@ -240,7 +255,7 @@ class Correction { bool mExceeding; bool mTransposing; bool mSkipping; - + ProximityInfoState mProximityInfoState; }; } // namespace latinime #endif // LATINIME_CORRECTION_H diff --git a/native/jni/src/correction_state.h b/native/jni/src/correction_state.h index 5b2cbd3a2..a63d4aa94 100644 --- a/native/jni/src/correction_state.h +++ b/native/jni/src/correction_state.h @@ -79,6 +79,5 @@ inline static void initCorrectionState(CorrectionState *state, const int rootPos state->mSkipping = false; state->mAdditionalProximityMatching = false; } - } // namespace latinime #endif // LATINIME_CORRECTION_STATE_H diff --git a/native/jni/src/debug.h b/native/jni/src/debug.h index 376ba59d9..2fee6e83f 100644 --- a/native/jni/src/debug.h +++ b/native/jni/src/debug.h @@ -1,26 +1,26 @@ /* -** -** Copyright 2011, The Android Open Source Project -** -** Licensed under the Apache License, Version 2.0 (the "License"); -** you may not use this file except in compliance with the License. -** You may obtain a copy of the License at -** -** http://www.apache.org/licenses/LICENSE-2.0 -** -** Unless required by applicable law or agreed to in writing, software -** distributed under the License is distributed on an "AS IS" BASIS, -** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -** See the License for the specific language governing permissions and -** limitations under the License. -*/ + * + * Copyright 2011, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ #ifndef LATINIME_DEBUG_H #define LATINIME_DEBUG_H #include "defines.h" -static inline unsigned char* convertToUnibyteString(unsigned short* input, unsigned char* output, +static inline unsigned char *convertToUnibyteString(unsigned short *input, unsigned char *output, const unsigned int length) { unsigned int i = 0; for (; i <= length && input[i] != 0; ++i) @@ -29,8 +29,8 @@ static inline unsigned char* convertToUnibyteString(unsigned short* input, unsig return output; } -static inline unsigned char* convertToUnibyteStringAndReplaceLastChar(unsigned short* input, - unsigned char* output, const unsigned int length, unsigned char c) { +static inline unsigned char *convertToUnibyteStringAndReplaceLastChar(unsigned short *input, + unsigned char *output, const unsigned int length, unsigned char c) { unsigned int i = 0; for (; i <= length && input[i] != 0; ++i) output[i] = input[i] & 0xFF; @@ -39,7 +39,7 @@ static inline unsigned char* convertToUnibyteStringAndReplaceLastChar(unsigned s return output; } -static inline void LOGI_S16(unsigned short* string, const unsigned int length) { +static inline void LOGI_S16(unsigned short *string, const unsigned int length) { unsigned char tmp_buffer[length]; convertToUnibyteString(string, tmp_buffer, length); AKLOGI(">> %s", tmp_buffer); @@ -49,7 +49,7 @@ static inline void LOGI_S16(unsigned short* string, const unsigned int length) { // usleep(10); } -static inline void LOGI_S16_PLUS(unsigned short* string, const unsigned int length, +static inline void LOGI_S16_PLUS(unsigned short *string, const unsigned int length, unsigned char c) { unsigned char tmp_buffer[length+1]; convertToUnibyteStringAndReplaceLastChar(string, tmp_buffer, length, c); @@ -58,7 +58,7 @@ static inline void LOGI_S16_PLUS(unsigned short* string, const unsigned int leng // usleep(10); } -static inline void printDebug(const char* tag, int* codes, int codesSize, int MAX_PROXIMITY_CHARS) { +static inline void printDebug(const char *tag, int *codes, int codesSize, int MAX_PROXIMITY_CHARS) { unsigned char *buf = (unsigned char*)malloc((1 + codesSize) * sizeof(*buf)); buf[codesSize] = 0; @@ -68,5 +68,4 @@ static inline void printDebug(const char* tag, int* codes, int codesSize, int MA free(buf); } - #endif // LATINIME_DEBUG_H diff --git a/native/jni/src/defines.h b/native/jni/src/defines.h index cd2fc634a..05b997d59 100644 --- a/native/jni/src/defines.h +++ b/native/jni/src/defines.h @@ -1,32 +1,35 @@ /* -** -** Copyright 2010, The Android Open Source Project -** -** Licensed under the Apache License, Version 2.0 (the "License"); -** you may not use this file except in compliance with the License. -** You may obtain a copy of the License at -** -** http://www.apache.org/licenses/LICENSE-2.0 -** -** Unless required by applicable law or agreed to in writing, software -** distributed under the License is distributed on an "AS IS" BASIS, -** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -** See the License for the specific language governing permissions and -** limitations under the License. -*/ + * + * Copyright 2010, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ #ifndef LATINIME_DEFINES_H #define LATINIME_DEFINES_H #if defined(FLAG_DO_PROFILE) || defined(FLAG_DBG) -#include <cutils/log.h> -#define AKLOGE ALOGE -#define AKLOGI ALOGI +#include <android/log.h> +#ifndef LOG_TAG +#define LOG_TAG "LatinIME: " +#endif +#define AKLOGE(fmt, ...) __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, fmt, ##__VA_ARGS__) +#define AKLOGI(fmt, ...) __android_log_print(ANDROID_LOG_INFO, LOG_TAG, fmt, ##__VA_ARGS__) #define DUMP_WORD(word, length) do { dumpWord(word, length); } while(0) #define DUMP_WORD_INT(word, length) do { dumpWordInt(word, length); } while(0) -static inline void dumpWord(const unsigned short* word, const int length) { +static inline void dumpWord(const unsigned short *word, const int length) { static char charBuf[50]; for (int i = 0; i < length; ++i) { @@ -36,7 +39,7 @@ static inline void dumpWord(const unsigned short* word, const int length) { AKLOGI("[ %s ]", charBuf); } -static inline void dumpWordInt(const int* word, const int length) { +static inline void dumpWordInt(const int *word, const int length) { static char charBuf[50]; for (int i = 0; i < length; ++i) { @@ -116,10 +119,6 @@ static inline void prof_out(void) { #endif // FLAG_DO_PROFILE #ifdef FLAG_DBG -#include <cutils/log.h> -#ifndef LOG_TAG -#define LOG_TAG "LatinIME: " -#endif #define DEBUG_DICT true #define DEBUG_DICT_FULL false #define DEBUG_EDIT_DISTANCE false @@ -146,7 +145,6 @@ static inline void prof_out(void) { #define DEBUG_CORRECTION_FREQ false #define DEBUG_WORDS_PRIORITY_QUEUE false - #endif // FLAG_DBG #ifndef U_SHORT_MAX @@ -225,6 +223,9 @@ static inline void prof_out(void) { // This is only used for the size of array. Not to be used in c functions. #define MAX_WORD_LENGTH_INTERNAL 48 +// This must be the same as ProximityInfo#MAX_PROXIMITY_CHARS_SIZE, currently it's 16. +#define MAX_PROXIMITY_CHARS_SIZE_INTERNAL 16 + // This must be equal to ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE in KeyDetector.java #define ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE 2 @@ -252,6 +253,8 @@ static inline void prof_out(void) { #define FIRST_WORD_INDEX 0 +#define MAX_SPACES_INTERNAL 16 + // TODO: Reduce this constant if possible; check the maximum number of digraphs in the same // word in the dictionary for languages with digraphs, like German and French #define DEFAULT_MAX_DIGRAPH_SEARCH_DEPTH 5 @@ -289,4 +292,23 @@ template<typename T> inline T max(T a, T b) { return a > b ? a : b; } #define INPUTLENGTH_FOR_DEBUG -1 #define MIN_OUTPUT_INDEX_FOR_DEBUG -1 +#define DISALLOW_COPY_AND_ASSIGN(TypeName) \ + TypeName(const TypeName&); \ + void operator=(const TypeName&) + +#define DISALLOW_IMPLICIT_CONSTRUCTORS(TypeName) \ + TypeName(); \ + DISALLOW_COPY_AND_ASSIGN(TypeName) + +// Used as a return value for character comparison +typedef enum { + // Same char, possibly with different case or accent + EQUIVALENT_CHAR, + // It is a char located nearby on the keyboard + NEAR_PROXIMITY_CHAR, + // It is an unrelated char + UNRELATED_CHAR, + // Additional proximity char which can differ by language. + ADDITIONAL_PROXIMITY_CHAR +} ProximityType; #endif // LATINIME_DEFINES_H diff --git a/native/jni/src/dictionary.cpp b/native/jni/src/dictionary.cpp index 1fb02478b..4703a38e8 100644 --- a/native/jni/src/dictionary.cpp +++ b/native/jni/src/dictionary.cpp @@ -1,19 +1,19 @@ /* -** -** Copyright 2009, The Android Open Source Project -** -** Licensed under the Apache License, Version 2.0 (the "License"); -** you may not use this file except in compliance with the License. -** You may obtain a copy of the License at -** -** http://www.apache.org/licenses/LICENSE-2.0 -** -** Unless required by applicable law or agreed to in writing, software -** distributed under the License is distributed on an "AS IS" BASIS, -** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -** See the License for the specific language governing permissions and -** limitations under the License. -*/ + * + * Copyright (C) 2009, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ #include <stdio.h> @@ -22,13 +22,14 @@ #include "binary_format.h" #include "defines.h" #include "dictionary.h" +#include "gesture_decoder_wrapper.h" namespace latinime { // TODO: Change the type of all keyCodes to uint32_t Dictionary::Dictionary(void *dict, int dictSize, int mmapFd, int dictBufAdjust, int typedLetterMultiplier, int fullWordMultiplier, - int maxWordLength, int maxWords) + int maxWordLength, int maxWords, int maxPredictions) : mDict((unsigned char*) dict), mDictSize(dictSize), mMmapFd(mmapFd), mDictBufAdjust(dictBufAdjust) { if (DEBUG_DICT) { @@ -38,30 +39,28 @@ Dictionary::Dictionary(void *dict, int dictSize, int mmapFd, int dictBufAdjust, AKLOGI("IN NATIVE SUGGEST Version: %d", (mDict[0] & 0xFF)); } } - mCorrection = new Correction(typedLetterMultiplier, fullWordMultiplier); - mWordsPriorityQueuePool = new WordsPriorityQueuePool( - maxWords, SUB_QUEUE_MAX_WORDS, maxWordLength); const unsigned int headerSize = BinaryFormat::getHeaderSize(mDict); const unsigned int options = BinaryFormat::getFlags(mDict); mUnigramDictionary = new UnigramDictionary(mDict + headerSize, typedLetterMultiplier, fullWordMultiplier, maxWordLength, maxWords, options); - mBigramDictionary = new BigramDictionary(mDict + headerSize, maxWordLength, this); + mBigramDictionary = new BigramDictionary(mDict + headerSize, maxWordLength, maxPredictions); + mGestureDecoder = new GestureDecoderWrapper(maxWordLength, maxWords); + mGestureDecoder->setDict(mUnigramDictionary, mBigramDictionary, + mDict + headerSize /* dict root */, 0 /* root pos */); } Dictionary::~Dictionary() { - delete mCorrection; - delete mWordsPriorityQueuePool; delete mUnigramDictionary; delete mBigramDictionary; + delete mGestureDecoder; } -int Dictionary::getFrequency(const int32_t *word, int length) { +int Dictionary::getFrequency(const int32_t *word, int length) const { return mUnigramDictionary->getFrequency(word, length); } bool Dictionary::isValidBigram(const int32_t *word1, int length1, const int32_t *word2, - int length2) { + int length2) const { return mBigramDictionary->isValidBigram(word1, length1, word2, length2); } - } // namespace latinime diff --git a/native/jni/src/dictionary.h b/native/jni/src/dictionary.h index 9f2367904..f625813a6 100644 --- a/native/jni/src/dictionary.h +++ b/native/jni/src/dictionary.h @@ -21,8 +21,8 @@ #include "bigram_dictionary.h" #include "char_utils.h" -#include "correction.h" #include "defines.h" +#include "incremental_decoder_interface.h" #include "proximity_info.h" #include "unigram_dictionary.h" #include "words_priority_queue_pool.h" @@ -31,33 +31,57 @@ namespace latinime { class Dictionary { public: + // Taken from SuggestedWords.java + const static int KIND_TYPED = 0; // What user typed + const static int KIND_CORRECTION = 1; // Simple correction/suggestion + const static int KIND_COMPLETION = 2; // Completion (suggestion with appended chars) + const static int KIND_WHITELIST = 3; // Whitelisted word + const static int KIND_BLACKLIST = 4; // Blacklisted word + const static int KIND_HARDCODED = 5; // Hardcoded suggestion, e.g. punctuation + const static int KIND_APP_DEFINED = 6; // Suggested by the application + const static int KIND_SHORTCUT = 7; // A shortcut + const static int KIND_PREDICTION = 8; // A prediction (== a suggestion with no input) + Dictionary(void *dict, int dictSize, int mmapFd, int dictBufAdjust, int typedLetterMultipler, - int fullWordMultiplier, int maxWordLength, int maxWords); + int fullWordMultiplier, int maxWordLength, int maxWords, int maxPredictions); int getSuggestions(ProximityInfo *proximityInfo, int *xcoordinates, int *ycoordinates, - int *codes, int codesSize, const int32_t* prevWordChars, const int prevWordLength, - bool useFullEditDistance, unsigned short *outWords, int *frequencies) { - std::map<int, int> bigramMap; - uint8_t bigramFilter[BIGRAM_FILTER_BYTE_SIZE]; - mBigramDictionary->fillBigramAddressToFrequencyMapAndFilter(prevWordChars, - prevWordLength, &bigramMap, bigramFilter); - return mUnigramDictionary->getSuggestions(proximityInfo, mWordsPriorityQueuePool, - mCorrection, xcoordinates, ycoordinates, codes, codesSize, &bigramMap, - bigramFilter, useFullEditDistance, outWords, frequencies); + int *times, int *pointerIds, int *codes, int codesSize, int *prevWordChars, + int prevWordLength, int commitPoint, bool isGesture, + bool useFullEditDistance, unsigned short *outWords, + int *frequencies, int *spaceIndices, int *outputTypes) { + int result = 0; + if (isGesture) { + mGestureDecoder->setPrevWord(prevWordChars, prevWordLength); + result = mGestureDecoder->getSuggestions(proximityInfo, xcoordinates, ycoordinates, + times, pointerIds, codes, codesSize, commitPoint, + outWords, frequencies, spaceIndices, outputTypes); + return result; + } else { + std::map<int, int> bigramMap; + uint8_t bigramFilter[BIGRAM_FILTER_BYTE_SIZE]; + mBigramDictionary->fillBigramAddressToFrequencyMapAndFilter(prevWordChars, + prevWordLength, &bigramMap, bigramFilter); + result = mUnigramDictionary->getSuggestions(proximityInfo, xcoordinates, + ycoordinates, codes, codesSize, &bigramMap, bigramFilter, + useFullEditDistance, outWords, frequencies, outputTypes); + return result; + } } int getBigrams(const int32_t *word, int length, int *codes, int codesSize, - unsigned short *outWords, int *frequencies, int maxWordLength, int maxBigrams) { + unsigned short *outWords, int *frequencies, int *outputTypes) const { + if (length <= 0) return 0; return mBigramDictionary->getBigrams(word, length, codes, codesSize, outWords, frequencies, - maxWordLength, maxBigrams); + outputTypes); } - int getFrequency(const int32_t *word, int length); - bool isValidBigram(const int32_t *word1, int length1, const int32_t *word2, int length2); - void *getDict() { return (void *)mDict; } - int getDictSize() { return mDictSize; } - int getMmapFd() { return mMmapFd; } - int getDictBufAdjust() { return mDictBufAdjust; } + int getFrequency(const int32_t *word, int length) const; + bool isValidBigram(const int32_t *word1, int length1, const int32_t *word2, int length2) const; + void *getDict() const { return (void *)mDict; } + int getDictSize() const { return mDictSize; } + int getMmapFd() const { return mMmapFd; } + int getDictBufAdjust() const { return mDictBufAdjust; } ~Dictionary(); // public static utility methods @@ -65,6 +89,7 @@ class Dictionary { static int wideStrLen(unsigned short *str); private: + DISALLOW_IMPLICIT_CONSTRUCTORS(Dictionary); const unsigned char *mDict; // Used only for the mmap version of dictionary loading, but we use these as dummy variables @@ -73,10 +98,9 @@ class Dictionary { const int mMmapFd; const int mDictBufAdjust; - UnigramDictionary *mUnigramDictionary; - BigramDictionary *mBigramDictionary; - WordsPriorityQueuePool *mWordsPriorityQueuePool; - Correction *mCorrection; + const UnigramDictionary *mUnigramDictionary; + const BigramDictionary *mBigramDictionary; + IncrementalDecoderInterface *mGestureDecoder; }; // public static utility methods @@ -89,5 +113,4 @@ inline int Dictionary::wideStrLen(unsigned short *str) { return end - str; } } // namespace latinime - #endif // LATINIME_DICTIONARY_H diff --git a/native/jni/src/gesture/gesture_decoder_wrapper.cpp b/native/jni/src/gesture/gesture_decoder_wrapper.cpp new file mode 100644 index 000000000..afbe0c5c3 --- /dev/null +++ b/native/jni/src/gesture/gesture_decoder_wrapper.cpp @@ -0,0 +1,22 @@ +/* + * Copyright (C) 2012 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "gesture_decoder_wrapper.h" + +namespace latinime { + IncrementalDecoderInterface * + (*GestureDecoderWrapper::sGestureDecoderFactoryMethod)(int, int) = 0; +} // namespace latinime diff --git a/native/jni/src/gesture/gesture_decoder_wrapper.h b/native/jni/src/gesture/gesture_decoder_wrapper.h new file mode 100644 index 000000000..03c84b5fd --- /dev/null +++ b/native/jni/src/gesture/gesture_decoder_wrapper.h @@ -0,0 +1,91 @@ +/* + * Copyright (C) 2012 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_GESTURE_DECODER_WRAPPER_H +#define LATINIME_GESTURE_DECODER_WRAPPER_H + +#include <stdint.h> +#include "defines.h" +#include "incremental_decoder_interface.h" + +namespace latinime { + +class UnigramDictionary; +class BigramDictionary; +class ProximityInfo; + +class GestureDecoderWrapper : public IncrementalDecoderInterface { + public: + GestureDecoderWrapper(const int maxWordLength, const int maxWords) { + mIncrementalDecoderInterface = getGestureDecoderInstance(maxWordLength, maxWords); + } + + virtual ~GestureDecoderWrapper() { + delete mIncrementalDecoderInterface; + } + + int getSuggestions(ProximityInfo *pInfo, int *inputXs, int *inputYs, int *times, + int *pointerIds, int *codes, int inputSize, int commitPoint, + unsigned short *outWords, int *frequencies, int *outputIndices, int *outputTypes) { + if (!mIncrementalDecoderInterface) { + return 0; + } + return mIncrementalDecoderInterface->getSuggestions( + pInfo, inputXs, inputYs, times, pointerIds, codes, inputSize, commitPoint, + outWords, frequencies, outputIndices, outputTypes); + } + + void reset() { + if (!mIncrementalDecoderInterface) { + return; + } + mIncrementalDecoderInterface->reset(); + } + + void setDict(const UnigramDictionary *dict, const BigramDictionary *bigram, + const uint8_t *dictRoot, int rootPos) { + if (!mIncrementalDecoderInterface) { + return; + } + mIncrementalDecoderInterface->setDict(dict, bigram, dictRoot, rootPos); + } + + void setPrevWord(const int32_t *prevWord, int prevWordLength) { + if (!mIncrementalDecoderInterface) { + return; + } + mIncrementalDecoderInterface->setPrevWord(prevWord, prevWordLength); + } + + static void setGestureDecoderFactoryMethod( + IncrementalDecoderInterface *(*factoryMethod)(int, int)) { + sGestureDecoderFactoryMethod = factoryMethod; + } + + private: + DISALLOW_COPY_AND_ASSIGN(GestureDecoderWrapper); + static IncrementalDecoderInterface *getGestureDecoderInstance(int maxWordLength, int maxWords) { + if (sGestureDecoderFactoryMethod) { + return sGestureDecoderFactoryMethod(maxWordLength, maxWords); + } + return 0; + } + + static IncrementalDecoderInterface *(*sGestureDecoderFactoryMethod)(int, int); + IncrementalDecoderInterface *mIncrementalDecoderInterface; +}; +} // namespace latinime +#endif // LATINIME_GESTURE_DECODER_WRAPPER_H diff --git a/native/jni/src/gesture/incremental_decoder_interface.h b/native/jni/src/gesture/incremental_decoder_interface.h new file mode 100644 index 000000000..6d2e273da --- /dev/null +++ b/native/jni/src/gesture/incremental_decoder_interface.h @@ -0,0 +1,41 @@ +/* + * Copyright (C) 2012 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_INCREMENTAL_DECODER_INTERFACE_H +#define LATINIME_INCREMENTAL_DECODER_INTERFACE_H + +#include <stdint.h> +#include "defines.h" + +namespace latinime { + +class UnigramDictionary; +class BigramDictionary; +class ProximityInfo; + +class IncrementalDecoderInterface { + public: + virtual int getSuggestions(ProximityInfo *pInfo, int *inputXs, int *inputYs, int *times, + int *pointerIds, int *codes, int inputSize, int commitPoint, + unsigned short *outWords, int *frequencies, int *outputIndices, int *outputTypes) = 0; + virtual void reset() = 0; + virtual void setDict(const UnigramDictionary *dict, const BigramDictionary *bigram, + const uint8_t *dictRoot, int rootPos) = 0; + virtual void setPrevWord(const int32_t *prevWord, int prevWordLength) = 0; + virtual ~IncrementalDecoderInterface() { }; +}; +} // namespace latinime +#endif // LATINIME_INCREMENTAL_DECODER_INTERFACE_H diff --git a/native/jni/src/gesture/incremental_decoder_wrapper.cpp b/native/jni/src/gesture/incremental_decoder_wrapper.cpp new file mode 100644 index 000000000..8fcda6c9e --- /dev/null +++ b/native/jni/src/gesture/incremental_decoder_wrapper.cpp @@ -0,0 +1,22 @@ +/* + * Copyright (C) 2012 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "incremental_decoder_wrapper.h" + +namespace latinime { + IncrementalDecoderInterface * + (*IncrementalDecoderWrapper::sIncrementalDecoderFactoryMethod)(int, int) = 0; +} // namespace latinime diff --git a/native/jni/src/gesture/incremental_decoder_wrapper.h b/native/jni/src/gesture/incremental_decoder_wrapper.h new file mode 100644 index 000000000..698061548 --- /dev/null +++ b/native/jni/src/gesture/incremental_decoder_wrapper.h @@ -0,0 +1,92 @@ +/* + * Copyright (C) 2012 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_INCREMENTAL_DECODER_WRAPPER_H +#define LATINIME_INCREMENTAL_DECODER_WRAPPER_H + +#include <stdint.h> +#include "defines.h" +#include "incremental_decoder_interface.h" + +namespace latinime { + +class UnigramDictionary; +class BigramDictionary; +class ProximityInfo; + +class IncrementalDecoderWrapper : public IncrementalDecoderInterface { + public: + IncrementalDecoderWrapper(const int maxWordLength, const int maxWords) { + mIncrementalDecoderInterface = getIncrementalDecoderInstance(maxWordLength, maxWords); + } + + virtual ~IncrementalDecoderWrapper() { + delete mIncrementalDecoderInterface; + } + + int getSuggestions(ProximityInfo *pInfo, int *inputXs, int *inputYs, int *times, + int *pointerIds, int *codes, int inputSize, int commitPoint, + unsigned short *outWords, int *frequencies, int *outputIndices, int *outputTypes) { + if (!mIncrementalDecoderInterface) { + return 0; + } + return mIncrementalDecoderInterface->getSuggestions( + pInfo, inputXs, inputYs, times, pointerIds, codes, inputSize, commitPoint, + outWords, frequencies, outputIndices, outputTypes); + } + + void reset() { + if (!mIncrementalDecoderInterface) { + return; + } + mIncrementalDecoderInterface->reset(); + } + + void setDict(const UnigramDictionary *dict, const BigramDictionary *bigram, + const uint8_t *dictRoot, int rootPos) { + if (!mIncrementalDecoderInterface) { + return; + } + mIncrementalDecoderInterface->setDict(dict, bigram, dictRoot, rootPos); + } + + void setPrevWord(const int32_t *prevWord, int prevWordLength) { + if (!mIncrementalDecoderInterface) { + return; + } + mIncrementalDecoderInterface->setPrevWord(prevWord, prevWordLength); + } + + static void setIncrementalDecoderFactoryMethod( + IncrementalDecoderInterface *(*factoryMethod)(int, int)) { + sIncrementalDecoderFactoryMethod = factoryMethod; + } + + private: + DISALLOW_COPY_AND_ASSIGN(IncrementalDecoderWrapper); + static IncrementalDecoderInterface *getIncrementalDecoderInstance(int maxWordLength, + int maxWords) { + if (sIncrementalDecoderFactoryMethod) { + return sIncrementalDecoderFactoryMethod(maxWordLength, maxWords); + } + return 0; + } + + static IncrementalDecoderInterface *(*sIncrementalDecoderFactoryMethod)(int, int); + IncrementalDecoderInterface *mIncrementalDecoderInterface; +}; +} // namespace latinime +#endif // LATINIME_INCREMENTAL_DECODER_WRAPPER_H diff --git a/native/jni/src/proximity_info.cpp b/native/jni/src/proximity_info.cpp index 960d40119..393791f97 100644 --- a/native/jni/src/proximity_info.cpp +++ b/native/jni/src/proximity_info.cpp @@ -15,6 +15,7 @@ */ #include <assert.h> +#include <math.h> #include <stdio.h> #include <string> @@ -24,6 +25,7 @@ #include "defines.h" #include "dictionary.h" #include "proximity_info.h" +#include "proximity_info_state.h" namespace latinime { @@ -51,23 +53,14 @@ ProximityInfo::ProximityInfo(const std::string localeStr, const int maxProximity HAS_TOUCH_POSITION_CORRECTION_DATA(keyCount > 0 && keyXCoordinates && keyYCoordinates && keyWidths && keyHeights && keyCharCodes && sweetSpotCenterXs && sweetSpotCenterYs && sweetSpotRadii), - mLocaleStr(localeStr), - mInputXCoordinates(0), mInputYCoordinates(0), - mTouchPositionCorrectionEnabled(false) { + mLocaleStr(localeStr) { const int proximityGridLength = GRID_WIDTH * GRID_HEIGHT * MAX_PROXIMITY_CHARS_SIZE; - mProximityCharsArray = new int32_t[proximityGridLength]; - mInputCodes = new int32_t[MAX_PROXIMITY_CHARS_SIZE * MAX_WORD_LENGTH_INTERNAL]; if (DEBUG_PROXIMITY_INFO) { AKLOGI("Create proximity info array %d", proximityGridLength); } + mProximityCharsArray = new int32_t[proximityGridLength]; memcpy(mProximityCharsArray, proximityCharsArray, proximityGridLength * sizeof(mProximityCharsArray[0])); - const int normalizedSquaredDistancesLength = - MAX_PROXIMITY_CHARS_SIZE * MAX_WORD_LENGTH_INTERNAL; - mNormalizedSquaredDistances = new int[normalizedSquaredDistancesLength]; - for (int i = 0; i < normalizedSquaredDistancesLength; ++i) { - mNormalizedSquaredDistances[i] = NOT_A_DISTANCE; - } copyOrFillZero(mKeyXCoordinates, keyXCoordinates, KEY_COUNT * sizeof(mKeyXCoordinates[0])); copyOrFillZero(mKeyYCoordinates, keyYCoordinates, KEY_COUNT * sizeof(mKeyYCoordinates[0])); @@ -96,9 +89,7 @@ void ProximityInfo::initializeCodeToKeyIndex() { } ProximityInfo::~ProximityInfo() { - delete[] mNormalizedSquaredDistances; delete[] mProximityCharsArray; - delete[] mInputCodes; } inline int ProximityInfo::getStartIndexFromCoordinates(const int x, const int y) const { @@ -119,26 +110,18 @@ bool ProximityInfo::hasSpaceProximity(const int x, const int y) const { if (DEBUG_PROXIMITY_INFO) { AKLOGI("hasSpaceProximity: index %d, %d, %d", startIndex, x, y); } + int32_t *proximityCharsArray = mProximityCharsArray; for (int i = 0; i < MAX_PROXIMITY_CHARS_SIZE; ++i) { if (DEBUG_PROXIMITY_INFO) { AKLOGI("Index: %d", mProximityCharsArray[startIndex + i]); } - if (mProximityCharsArray[startIndex + i] == KEYCODE_SPACE) { + if (proximityCharsArray[startIndex + i] == KEYCODE_SPACE) { return true; } } return false; } -bool ProximityInfo::isOnKey(const int keyId, const int x, const int y) const { - if (keyId < 0) return true; // NOT_A_ID is -1, but return whenever < 0 just in case - const int left = mKeyXCoordinates[keyId]; - const int top = mKeyYCoordinates[keyId]; - const int right = left + mKeyWidths[keyId] + 1; - const int bottom = top + mKeyHeights[keyId]; - return left < right && top < bottom && x >= left && x < right && y >= top && y < bottom; -} - int ProximityInfo::squaredDistanceToEdge(const int keyId, const int x, const int y) const { if (keyId < 0) return true; // NOT_A_ID is -1, but return whenever < 0 just in case const int left = mKeyXCoordinates[keyId]; @@ -154,12 +137,13 @@ int ProximityInfo::squaredDistanceToEdge(const int keyId, const int x, const int void ProximityInfo::calculateNearbyKeyCodes( const int x, const int y, const int32_t primaryKey, int *inputCodes) const { + int32_t *proximityCharsArray = mProximityCharsArray; int insertPos = 0; inputCodes[insertPos++] = primaryKey; const int startIndex = getStartIndexFromCoordinates(x, y); if (startIndex >= 0) { for (int i = 0; i < MAX_PROXIMITY_CHARS_SIZE; ++i) { - const int32_t c = mProximityCharsArray[startIndex + i]; + const int32_t c = proximityCharsArray[startIndex + i]; if (c < KEYCODE_SPACE || c == primaryKey) { continue; } @@ -187,7 +171,7 @@ void ProximityInfo::calculateNearbyKeyCodes( return; } - const int32_t* additionalProximityChars = + const int32_t *additionalProximityChars = AdditionalProximityChars::getAdditionalChars(&mLocaleStr, primaryKey); for (int j = 0; j < additionalProximitySize; ++j) { const int32_t ac = additionalProximityChars[j]; @@ -216,115 +200,6 @@ void ProximityInfo::calculateNearbyKeyCodes( } } -void ProximityInfo::setInputParams(const int32_t* inputCodes, const int inputLength, - const int* xCoordinates, const int* yCoordinates) { - memset(mInputCodes, 0, - MAX_WORD_LENGTH_INTERNAL * MAX_PROXIMITY_CHARS_SIZE * sizeof(mInputCodes[0])); - - for (int i = 0; i < inputLength; ++i) { - const int32_t primaryKey = inputCodes[i]; - const int x = xCoordinates[i]; - const int y = yCoordinates[i]; - int *proximities = &mInputCodes[i * MAX_PROXIMITY_CHARS_SIZE]; - calculateNearbyKeyCodes(x, y, primaryKey, proximities); - } - - if (DEBUG_PROXIMITY_CHARS) { - for (int i = 0; i < inputLength; ++i) { - AKLOGI("---"); - for (int j = 0; j < MAX_PROXIMITY_CHARS_SIZE; ++j) { - int icc = mInputCodes[i * MAX_PROXIMITY_CHARS_SIZE + j]; - int icfjc = inputCodes[i * MAX_PROXIMITY_CHARS_SIZE + j]; - icc+= 0; - icfjc += 0; - AKLOGI("--- (%d)%c,%c", i, icc, icfjc); - AKLOGI("--- A<%d>,B<%d>", icc, icfjc); - } - } - } - //Keep for debug, sorry - //for (int i = 0; i < MAX_WORD_LENGTH_INTERNAL * MAX_PROXIMITY_CHARS_SIZE; ++i) { - //if (i < inputLength * MAX_PROXIMITY_CHARS_SIZE) { - //mInputCodes[i] = mInputCodesFromJava[i]; - //} else { - // mInputCodes[i] = 0; - // } - //} - mInputXCoordinates = xCoordinates; - mInputYCoordinates = yCoordinates; - mTouchPositionCorrectionEnabled = - HAS_TOUCH_POSITION_CORRECTION_DATA && xCoordinates && yCoordinates; - mInputLength = inputLength; - for (int i = 0; i < inputLength; ++i) { - mPrimaryInputWord[i] = getPrimaryCharAt(i); - } - mPrimaryInputWord[inputLength] = 0; - if (DEBUG_PROXIMITY_CHARS) { - AKLOGI("--- setInputParams"); - } - for (int i = 0; i < mInputLength; ++i) { - const int *proximityChars = getProximityCharsAt(i); - const int primaryKey = proximityChars[0]; - const int x = xCoordinates[i]; - const int y = yCoordinates[i]; - if (DEBUG_PROXIMITY_CHARS) { - int a = x + y + primaryKey; - a += 0; - AKLOGI("--- Primary = %c, x = %d, y = %d", primaryKey, x, y); - // Keep debug code just in case - //int proximities[50]; - //for (int m = 0; m < 50; ++m) { - //proximities[m] = 0; - //} - //calculateNearbyKeyCodes(x, y, primaryKey, proximities); - //for (int l = 0; l < 50 && proximities[l] > 0; ++l) { - //if (DEBUG_PROXIMITY_CHARS) { - //AKLOGI("--- native Proximity (%d) = %c", l, proximities[l]); - //} - //} - } - for (int j = 0; j < MAX_PROXIMITY_CHARS_SIZE && proximityChars[j] > 0; ++j) { - const int currentChar = proximityChars[j]; - const float squaredDistance = hasInputCoordinates() - ? calculateNormalizedSquaredDistance(getKeyIndex(currentChar), i) - : NOT_A_DISTANCE_FLOAT; - if (squaredDistance >= 0.0f) { - mNormalizedSquaredDistances[i * MAX_PROXIMITY_CHARS_SIZE + j] = - (int)(squaredDistance * NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR); - } else { - mNormalizedSquaredDistances[i * MAX_PROXIMITY_CHARS_SIZE + j] = (j == 0) - ? EQUIVALENT_CHAR_WITHOUT_DISTANCE_INFO - : PROXIMITY_CHAR_WITHOUT_DISTANCE_INFO; - } - if (DEBUG_PROXIMITY_CHARS) { - AKLOGI("--- Proximity (%d) = %c", j, currentChar); - } - } - } -} - -inline float square(const float x) { return x * x; } - -float ProximityInfo::calculateNormalizedSquaredDistance( - const int keyIndex, const int inputIndex) const { - if (keyIndex == NOT_AN_INDEX) { - return NOT_A_DISTANCE_FLOAT; - } - if (!hasSweetSpotData(keyIndex)) { - return NOT_A_DISTANCE_FLOAT; - } - if (NOT_A_COORDINATE == mInputXCoordinates[inputIndex]) { - return NOT_A_DISTANCE_FLOAT; - } - const float squaredDistance = calculateSquaredDistanceFromSweetSpotCenter(keyIndex, inputIndex); - const float squaredRadius = square(mSweetSpotRadii[keyIndex]); - return squaredDistance / squaredRadius; -} - -bool ProximityInfo::hasInputCoordinates() const { - return mInputXCoordinates && mInputYCoordinates; -} - int ProximityInfo::getKeyIndex(const int c) const { if (KEY_COUNT == 0) { // We do not have the coordinate data @@ -337,131 +212,24 @@ int ProximityInfo::getKeyIndex(const int c) const { return mCodeToKeyIndex[baseLowerC]; } -float ProximityInfo::calculateSquaredDistanceFromSweetSpotCenter( - const int keyIndex, const int inputIndex) const { - const float sweetSpotCenterX = mSweetSpotCenterXs[keyIndex]; - const float sweetSpotCenterY = mSweetSpotCenterYs[keyIndex]; - const float inputX = (float)mInputXCoordinates[inputIndex]; - const float inputY = (float)mInputYCoordinates[inputIndex]; - return square(inputX - sweetSpotCenterX) + square(inputY - sweetSpotCenterY); -} - -inline const int* ProximityInfo::getProximityCharsAt(const int index) const { - return mInputCodes + (index * MAX_PROXIMITY_CHARS_SIZE); -} - -unsigned short ProximityInfo::getPrimaryCharAt(const int index) const { - return getProximityCharsAt(index)[0]; -} - -inline bool ProximityInfo::existsCharInProximityAt(const int index, const int c) const { - const int *chars = getProximityCharsAt(index); - int i = 0; - while (chars[i] > 0 && i < MAX_PROXIMITY_CHARS_SIZE) { - if (chars[i++] == c) { - return true; - } - } - return false; -} - -bool ProximityInfo::existsAdjacentProximityChars(const int index) const { - if (index < 0 || index >= mInputLength) return false; - const int currentChar = getPrimaryCharAt(index); - const int leftIndex = index - 1; - if (leftIndex >= 0 && existsCharInProximityAt(leftIndex, currentChar)) { - return true; - } - const int rightIndex = index + 1; - if (rightIndex < mInputLength && existsCharInProximityAt(rightIndex, currentChar)) { - return true; - } - return false; -} - -// In the following function, c is the current character of the dictionary word -// currently examined. -// currentChars is an array containing the keys close to the character the -// user actually typed at the same position. We want to see if c is in it: if so, -// then the word contains at that position a character close to what the user -// typed. -// What the user typed is actually the first character of the array. -// proximityIndex is a pointer to the variable where getMatchedProximityId returns -// the index of c in the proximity chars of the input index. -// Notice : accented characters do not have a proximity list, so they are alone -// in their list. The non-accented version of the character should be considered -// "close", but not the other keys close to the non-accented version. -ProximityInfo::ProximityType ProximityInfo::getMatchedProximityId(const int index, - const unsigned short c, const bool checkProximityChars, int *proximityIndex) const { - const int *currentChars = getProximityCharsAt(index); - const int firstChar = currentChars[0]; - const unsigned short baseLowerC = toBaseLowerCase(c); - - // The first char in the array is what user typed. If it matches right away, - // that means the user typed that same char for this pos. - if (firstChar == baseLowerC || firstChar == c) { - return EQUIVALENT_CHAR; - } - - if (!checkProximityChars) return UNRELATED_CHAR; +// TODO: [Staging] Optimize +void ProximityInfo::getCenters(int *centerXs, int *centerYs, int *codeToKeyIndex, + int *keyToCodeIndex, int *keyCount, int *keyWidth) const { + *keyCount = KEY_COUNT; + *keyWidth = sqrt((float)MOST_COMMON_KEY_WIDTH_SQUARE); - // If the non-accented, lowercased version of that first character matches c, - // then we have a non-accented version of the accented character the user - // typed. Treat it as a close char. - if (toBaseLowerCase(firstChar) == baseLowerC) - return NEAR_PROXIMITY_CHAR; - - // Not an exact nor an accent-alike match: search the list of close keys - int j = 1; - while (j < MAX_PROXIMITY_CHARS_SIZE - && currentChars[j] > ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE) { - const bool matched = (currentChars[j] == baseLowerC || currentChars[j] == c); - if (matched) { - if (proximityIndex) { - *proximityIndex = j; - } - return NEAR_PROXIMITY_CHAR; - } - ++j; - } - if (j < MAX_PROXIMITY_CHARS_SIZE - && currentChars[j] == ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE) { - ++j; - while (j < MAX_PROXIMITY_CHARS_SIZE - && currentChars[j] > ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE) { - const bool matched = (currentChars[j] == baseLowerC || currentChars[j] == c); - if (matched) { - if (proximityIndex) { - *proximityIndex = j; - } - return ADDITIONAL_PROXIMITY_CHAR; - } - ++j; - } - } - - // Was not included, signal this as an unrelated character. - return UNRELATED_CHAR; -} - -bool ProximityInfo::sameAsTyped(const unsigned short *word, int length) const { - if (length != mInputLength) { - return false; - } - const int *inputCodes = mInputCodes; - while (length--) { - if ((unsigned int) *inputCodes != (unsigned int) *word) { - return false; + for (int i = 0; i < KEY_COUNT; ++i) { + const int code = mKeyCharCodes[i]; + const int lowerCode = toBaseLowerCase(code); + centerXs[i] = mKeyXCoordinates[i] + mKeyWidths[i] / 2; + centerYs[i] = mKeyYCoordinates[i] + mKeyHeights[i] / 2; + codeToKeyIndex[code] = i; + if (code != lowerCode && lowerCode >= 0 && lowerCode <= MAX_CHAR_CODE) { + codeToKeyIndex[lowerCode] = i; + keyToCodeIndex[i] = lowerCode; + } else { + keyToCodeIndex[i] = code; } - inputCodes += MAX_PROXIMITY_CHARS_SIZE; - word++; } - return true; } - -const int ProximityInfo::NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR_LOG_2; -const int ProximityInfo::NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR; -const int ProximityInfo::MAX_KEY_COUNT_IN_A_KEYBOARD; -const int ProximityInfo::MAX_CHAR_CODE; - } // namespace latinime diff --git a/native/jni/src/proximity_info.h b/native/jni/src/proximity_info.h index feb0c9444..abd07dd3e 100644 --- a/native/jni/src/proximity_info.h +++ b/native/jni/src/proximity_info.h @@ -28,22 +28,6 @@ class Correction; class ProximityInfo { public: - static const int NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR_LOG_2 = 10; - static const int NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR = - 1 << NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR_LOG_2; - - // Used as a return value for character comparison - typedef enum { - // Same char, possibly with different case or accent - EQUIVALENT_CHAR, - // It is a char located nearby on the keyboard - NEAR_PROXIMITY_CHAR, - // It is an unrelated char - UNRELATED_CHAR, - // Additional proximity char which can differ by language. - ADDITIONAL_PROXIMITY_CHAR - } ProximityType; - ProximityInfo(const std::string localeStr, const int maxProximityCharsSize, const int keyboardWidth, const int keyboardHeight, const int gridWidth, const int gridHeight, const int mostCommonkeyWidth, @@ -53,26 +37,73 @@ class ProximityInfo { const float *sweetSpotCenterYs, const float *sweetSpotRadii); ~ProximityInfo(); bool hasSpaceProximity(const int x, const int y) const; - void setInputParams(const int32_t *inputCodes, const int inputLength, - const int *xCoordinates, const int *yCoordinates); - const int* getProximityCharsAt(const int index) const; - unsigned short getPrimaryCharAt(const int index) const; - bool existsCharInProximityAt(const int index, const int c) const; - bool existsAdjacentProximityChars(const int index) const; - ProximityType getMatchedProximityId(const int index, const unsigned short c, - const bool checkProximityChars, int *proximityIndex = 0) const; - int getNormalizedSquaredDistance(const int inputIndex, const int proximityIndex) const { - return mNormalizedSquaredDistances[inputIndex * MAX_PROXIMITY_CHARS_SIZE + proximityIndex]; - } + int getNormalizedSquaredDistance(const int inputIndex, const int proximityIndex) const; bool sameAsTyped(const unsigned short *word, int length) const; - const unsigned short* getPrimaryInputWord() const { - return mPrimaryInputWord; + int squaredDistanceToEdge(const int keyId, const int x, const int y) const; + bool isOnKey(const int keyId, const int x, const int y) const { + if (keyId < 0) return true; // NOT_A_ID is -1, but return whenever < 0 just in case + const int left = mKeyXCoordinates[keyId]; + const int top = mKeyYCoordinates[keyId]; + const int right = left + mKeyWidths[keyId] + 1; + const int bottom = top + mKeyHeights[keyId]; + return left < right && top < bottom && x >= left && x < right && y >= top && y < bottom; + } + int getKeyIndex(const int c) const; + bool hasSweetSpotData(const int keyIndex) const { + // When there are no calibration data for a key, + // the radius of the key is assigned to zero. + return mSweetSpotRadii[keyIndex] > 0.0; + } + float getSweetSpotRadiiAt(int keyIndex) const { + return mSweetSpotRadii[keyIndex]; + } + float getSweetSpotCenterXAt(int keyIndex) const { + return mSweetSpotCenterXs[keyIndex]; } - bool touchPositionCorrectionEnabled() const { - return mTouchPositionCorrectionEnabled; + float getSweetSpotCenterYAt(int keyIndex) const { + return mSweetSpotCenterYs[keyIndex]; + } + void calculateNearbyKeyCodes( + const int x, const int y, const int32_t primaryKey, int *inputCodes) const; + + bool hasTouchPositionCorrectionData() const { + return HAS_TOUCH_POSITION_CORRECTION_DATA; + } + + int getMostCommonKeyWidthSquare() const { + return MOST_COMMON_KEY_WIDTH_SQUARE; + } + + std::string getLocaleStr() const { + return mLocaleStr; + } + + int getKeyCount() const { + return KEY_COUNT; } + int getCellHeight() const { + return CELL_HEIGHT; + } + + int getCellWidth() const { + return CELL_WIDTH; + } + + int getGridWidth() const { + return GRID_WIDTH; + } + + int getGridHeight() const { + return GRID_HEIGHT; + } + + // Returns the keyboard key-center information. + void getCenters(int *centersX, int *centersY, int *codeToKeyIndex, int *keyToCodeIndex, + int *keyCount, int *keyWidth) const; + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(ProximityInfo); // The max number of the keys in one keyboard layout static const int MAX_KEY_COUNT_IN_A_KEYBOARD = 64; // The upper limit of the char code in mCodeToKeyIndex @@ -86,16 +117,6 @@ class ProximityInfo { float calculateSquaredDistanceFromSweetSpotCenter( const int keyIndex, const int inputIndex) const; bool hasInputCoordinates() const; - int getKeyIndex(const int c) const; - bool hasSweetSpotData(const int keyIndex) const { - // When there are no calibration data for a key, - // the radius of the key is assigned to zero. - return mSweetSpotRadii[keyIndex] > 0.0; - } - bool isOnKey(const int keyId, const int x, const int y) const; - int squaredDistanceToEdge(const int keyId, const int x, const int y) const; - void calculateNearbyKeyCodes( - const int x, const int y, const int32_t primaryKey, int *inputCodes) const; const int MAX_PROXIMITY_CHARS_SIZE; const int KEYBOARD_WIDTH; @@ -108,14 +129,7 @@ class ProximityInfo { const int KEY_COUNT; const bool HAS_TOUCH_POSITION_CORRECTION_DATA; const std::string mLocaleStr; - // TODO: remove this - const int *mInputCodesFromJava; - int32_t *mInputCodes; - const int *mInputXCoordinates; - const int *mInputYCoordinates; - bool mTouchPositionCorrectionEnabled; int32_t *mProximityCharsArray; - int *mNormalizedSquaredDistances; int32_t mKeyXCoordinates[MAX_KEY_COUNT_IN_A_KEYBOARD]; int32_t mKeyYCoordinates[MAX_KEY_COUNT_IN_A_KEYBOARD]; int32_t mKeyWidths[MAX_KEY_COUNT_IN_A_KEYBOARD]; @@ -124,11 +138,8 @@ class ProximityInfo { float mSweetSpotCenterXs[MAX_KEY_COUNT_IN_A_KEYBOARD]; float mSweetSpotCenterYs[MAX_KEY_COUNT_IN_A_KEYBOARD]; float mSweetSpotRadii[MAX_KEY_COUNT_IN_A_KEYBOARD]; - int mInputLength; - unsigned short mPrimaryInputWord[MAX_WORD_LENGTH_INTERNAL]; int mCodeToKeyIndex[MAX_CHAR_CODE + 1]; + // TODO: move to correction.h }; - } // namespace latinime - #endif // LATINIME_PROXIMITY_INFO_H diff --git a/native/jni/src/proximity_info_state.cpp b/native/jni/src/proximity_info_state.cpp new file mode 100644 index 000000000..d481a3bc7 --- /dev/null +++ b/native/jni/src/proximity_info_state.cpp @@ -0,0 +1,139 @@ +/* + * Copyright (C) 2012 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <assert.h> +#include <stdint.h> +#include <string> + +#define LOG_TAG "LatinIME: proximity_info_state.cpp" + +#include "additional_proximity_chars.h" +#include "defines.h" +#include "dictionary.h" +#include "proximity_info.h" +#include "proximity_info_state.h" + +namespace latinime { +void ProximityInfoState::initInputParams( + const ProximityInfo *proximityInfo, const int32_t *inputCodes, const int inputLength, + const int *xCoordinates, const int *yCoordinates) { + mProximityInfo = proximityInfo; + mHasTouchPositionCorrectionData = proximityInfo->hasTouchPositionCorrectionData(); + mMostCommonKeyWidthSquare = proximityInfo->getMostCommonKeyWidthSquare(); + mLocaleStr = proximityInfo->getLocaleStr(); + mKeyCount = proximityInfo->getKeyCount(); + mCellHeight = proximityInfo->getCellHeight(); + mCellWidth = proximityInfo->getCellWidth(); + mGridHeight = proximityInfo->getGridWidth(); + mGridWidth = proximityInfo->getGridHeight(); + const int normalizedSquaredDistancesLength = + MAX_PROXIMITY_CHARS_SIZE_INTERNAL * MAX_WORD_LENGTH_INTERNAL; + for (int i = 0; i < normalizedSquaredDistancesLength; ++i) { + mNormalizedSquaredDistances[i] = NOT_A_DISTANCE; + } + + memset(mInputCodes, 0, + MAX_WORD_LENGTH_INTERNAL * MAX_PROXIMITY_CHARS_SIZE_INTERNAL * sizeof(mInputCodes[0])); + + for (int i = 0; i < inputLength; ++i) { + const int32_t primaryKey = inputCodes[i]; + const int x = xCoordinates[i]; + const int y = yCoordinates[i]; + int *proximities = &mInputCodes[i * MAX_PROXIMITY_CHARS_SIZE_INTERNAL]; + mProximityInfo->calculateNearbyKeyCodes(x, y, primaryKey, proximities); + } + + if (DEBUG_PROXIMITY_CHARS) { + for (int i = 0; i < inputLength; ++i) { + AKLOGI("---"); + for (int j = 0; j < MAX_PROXIMITY_CHARS_SIZE_INTERNAL; ++j) { + int icc = mInputCodes[i * MAX_PROXIMITY_CHARS_SIZE_INTERNAL + j]; + int icfjc = inputCodes[i * MAX_PROXIMITY_CHARS_SIZE_INTERNAL + j]; + icc += 0; + icfjc += 0; + AKLOGI("--- (%d)%c,%c", i, icc, icfjc); AKLOGI("--- A<%d>,B<%d>", icc, icfjc); + } + } + } + mInputXCoordinates = xCoordinates; + mInputYCoordinates = yCoordinates; + mTouchPositionCorrectionEnabled = + mHasTouchPositionCorrectionData && xCoordinates && yCoordinates; + mInputLength = inputLength; + for (int i = 0; i < inputLength; ++i) { + mPrimaryInputWord[i] = getPrimaryCharAt(i); + } + mPrimaryInputWord[inputLength] = 0; + if (DEBUG_PROXIMITY_CHARS) { + AKLOGI("--- initInputParams"); + } + for (int i = 0; i < mInputLength; ++i) { + const int *proximityChars = getProximityCharsAt(i); + const int primaryKey = proximityChars[0]; + const int x = xCoordinates[i]; + const int y = yCoordinates[i]; + if (DEBUG_PROXIMITY_CHARS) { + int a = x + y + primaryKey; + a += 0; + AKLOGI("--- Primary = %c, x = %d, y = %d", primaryKey, x, y); + } + for (int j = 0; j < MAX_PROXIMITY_CHARS_SIZE_INTERNAL && proximityChars[j] > 0; ++j) { + const int currentChar = proximityChars[j]; + const float squaredDistance = + hasInputCoordinates() ? calculateNormalizedSquaredDistance( + mProximityInfo->getKeyIndex(currentChar), i) : + NOT_A_DISTANCE_FLOAT; + if (squaredDistance >= 0.0f) { + mNormalizedSquaredDistances[i * MAX_PROXIMITY_CHARS_SIZE_INTERNAL + j] = + (int) (squaredDistance * NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR); + } else { + mNormalizedSquaredDistances[i * MAX_PROXIMITY_CHARS_SIZE_INTERNAL + j] = + (j == 0) ? EQUIVALENT_CHAR_WITHOUT_DISTANCE_INFO : + PROXIMITY_CHAR_WITHOUT_DISTANCE_INFO; + } + if (DEBUG_PROXIMITY_CHARS) { + AKLOGI("--- Proximity (%d) = %c", j, currentChar); + } + } + } +} + +float ProximityInfoState::calculateNormalizedSquaredDistance( + const int keyIndex, const int inputIndex) const { + if (keyIndex == NOT_AN_INDEX) { + return NOT_A_DISTANCE_FLOAT; + } + if (!mProximityInfo->hasSweetSpotData(keyIndex)) { + return NOT_A_DISTANCE_FLOAT; + } + if (NOT_A_COORDINATE == mInputXCoordinates[inputIndex]) { + return NOT_A_DISTANCE_FLOAT; + } + const float squaredDistance = calculateSquaredDistanceFromSweetSpotCenter( + keyIndex, inputIndex); + const float squaredRadius = square(mProximityInfo->getSweetSpotRadiiAt(keyIndex)); + return squaredDistance / squaredRadius; +} + +float ProximityInfoState::calculateSquaredDistanceFromSweetSpotCenter( + const int keyIndex, const int inputIndex) const { + const float sweetSpotCenterX = mProximityInfo->getSweetSpotCenterXAt(keyIndex); + const float sweetSpotCenterY = mProximityInfo->getSweetSpotCenterYAt(keyIndex); + const float inputX = (float)mInputXCoordinates[inputIndex]; + const float inputY = (float)mInputYCoordinates[inputIndex]; + return square(inputX - sweetSpotCenterX) + square(inputY - sweetSpotCenterY); +} +} // namespace latinime diff --git a/native/jni/src/proximity_info_state.h b/native/jni/src/proximity_info_state.h new file mode 100644 index 000000000..a5fe528c1 --- /dev/null +++ b/native/jni/src/proximity_info_state.h @@ -0,0 +1,219 @@ +/* + * Copyright (C) 2012 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_PROXIMITY_INFO_STATE_H +#define LATINIME_PROXIMITY_INFO_STATE_H + +#include <assert.h> +#include <stdint.h> +#include <string> + +#include "additional_proximity_chars.h" +#include "char_utils.h" +#include "defines.h" + +namespace latinime { + +class ProximityInfo; + +class ProximityInfoState { + public: + static const int NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR_LOG_2 = 10; + static const int NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR = + 1 << NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR_LOG_2; + // The upper limit of the char code in mCodeToKeyIndex + static const int MAX_CHAR_CODE = 127; + static const float NOT_A_DISTANCE_FLOAT = -1.0f; + static const int NOT_A_CODE = -1; + + ///////////////////////////////////////// + // Defined in proximity_info_state.cpp // + ///////////////////////////////////////// + void initInputParams( + const ProximityInfo *proximityInfo, const int32_t *inputCodes, const int inputLength, + const int *xCoordinates, const int *yCoordinates); + + ///////////////////////////////////////// + // Defined here // + ///////////////////////////////////////// + ProximityInfoState() {}; + inline const int *getProximityCharsAt(const int index) const { + return mInputCodes + (index * MAX_PROXIMITY_CHARS_SIZE_INTERNAL); + } + + inline unsigned short getPrimaryCharAt(const int index) const { + return getProximityCharsAt(index)[0]; + } + + inline bool existsCharInProximityAt(const int index, const int c) const { + const int *chars = getProximityCharsAt(index); + int i = 0; + while (chars[i] > 0 && i < MAX_PROXIMITY_CHARS_SIZE_INTERNAL) { + if (chars[i++] == c) { + return true; + } + } + return false; + } + + inline bool existsAdjacentProximityChars(const int index) const { + if (index < 0 || index >= mInputLength) return false; + const int currentChar = getPrimaryCharAt(index); + const int leftIndex = index - 1; + if (leftIndex >= 0 && existsCharInProximityAt(leftIndex, currentChar)) { + return true; + } + const int rightIndex = index + 1; + if (rightIndex < mInputLength && existsCharInProximityAt(rightIndex, currentChar)) { + return true; + } + return false; + } + + // In the following function, c is the current character of the dictionary word + // currently examined. + // currentChars is an array containing the keys close to the character the + // user actually typed at the same position. We want to see if c is in it: if so, + // then the word contains at that position a character close to what the user + // typed. + // What the user typed is actually the first character of the array. + // proximityIndex is a pointer to the variable where getMatchedProximityId returns + // the index of c in the proximity chars of the input index. + // Notice : accented characters do not have a proximity list, so they are alone + // in their list. The non-accented version of the character should be considered + // "close", but not the other keys close to the non-accented version. + inline ProximityType getMatchedProximityId(const int index, + const unsigned short c, const bool checkProximityChars, int *proximityIndex = 0) const { + const int *currentChars = getProximityCharsAt(index); + const int firstChar = currentChars[0]; + const unsigned short baseLowerC = toBaseLowerCase(c); + + // The first char in the array is what user typed. If it matches right away, + // that means the user typed that same char for this pos. + if (firstChar == baseLowerC || firstChar == c) { + return EQUIVALENT_CHAR; + } + + if (!checkProximityChars) return UNRELATED_CHAR; + + // If the non-accented, lowercased version of that first character matches c, + // then we have a non-accented version of the accented character the user + // typed. Treat it as a close char. + if (toBaseLowerCase(firstChar) == baseLowerC) + return NEAR_PROXIMITY_CHAR; + + // Not an exact nor an accent-alike match: search the list of close keys + int j = 1; + while (j < MAX_PROXIMITY_CHARS_SIZE_INTERNAL + && currentChars[j] > ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE) { + const bool matched = (currentChars[j] == baseLowerC || currentChars[j] == c); + if (matched) { + if (proximityIndex) { + *proximityIndex = j; + } + return NEAR_PROXIMITY_CHAR; + } + ++j; + } + if (j < MAX_PROXIMITY_CHARS_SIZE_INTERNAL + && currentChars[j] == ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE) { + ++j; + while (j < MAX_PROXIMITY_CHARS_SIZE_INTERNAL + && currentChars[j] > ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE) { + const bool matched = (currentChars[j] == baseLowerC || currentChars[j] == c); + if (matched) { + if (proximityIndex) { + *proximityIndex = j; + } + return ADDITIONAL_PROXIMITY_CHAR; + } + ++j; + } + } + + // Was not included, signal this as an unrelated character. + return UNRELATED_CHAR; + } + + inline int getNormalizedSquaredDistance( + const int inputIndex, const int proximityIndex) const { + return mNormalizedSquaredDistances[ + inputIndex * MAX_PROXIMITY_CHARS_SIZE_INTERNAL + proximityIndex]; + } + + inline const unsigned short *getPrimaryInputWord() const { + return mPrimaryInputWord; + } + + inline bool touchPositionCorrectionEnabled() const { + return mTouchPositionCorrectionEnabled; + } + + private: + DISALLOW_COPY_AND_ASSIGN(ProximityInfoState); + ///////////////////////////////////////// + // Defined in proximity_info_state.cpp // + ///////////////////////////////////////// + float calculateNormalizedSquaredDistance(const int keyIndex, const int inputIndex) const; + + float calculateSquaredDistanceFromSweetSpotCenter( + const int keyIndex, const int inputIndex) const; + + ///////////////////////////////////////// + // Defined here // + ///////////////////////////////////////// + inline float square(const float x) const { return x * x; } + + bool hasInputCoordinates() const { + return mInputXCoordinates && mInputYCoordinates; + } + + bool sameAsTyped(const unsigned short *word, int length) const { + if (length != mInputLength) { + return false; + } + const int *inputCodes = mInputCodes; + while (length--) { + if ((unsigned int) *inputCodes != (unsigned int) *word) { + return false; + } + inputCodes += MAX_PROXIMITY_CHARS_SIZE_INTERNAL; + word++; + } + return true; + } + + // const + const ProximityInfo *mProximityInfo; + bool mHasTouchPositionCorrectionData; + int mMostCommonKeyWidthSquare; + std::string mLocaleStr; + int mKeyCount; + int mCellHeight; + int mCellWidth; + int mGridHeight; + int mGridWidth; + + const int *mInputXCoordinates; + const int *mInputYCoordinates; + bool mTouchPositionCorrectionEnabled; + int32_t mInputCodes[MAX_PROXIMITY_CHARS_SIZE_INTERNAL * MAX_WORD_LENGTH_INTERNAL]; + int mNormalizedSquaredDistances[MAX_PROXIMITY_CHARS_SIZE_INTERNAL * MAX_WORD_LENGTH_INTERNAL]; + int mInputLength; + unsigned short mPrimaryInputWord[MAX_WORD_LENGTH_INTERNAL]; +}; +} // namespace latinime +#endif // LATINIME_PROXIMITY_INFO_STATE_H diff --git a/native/jni/src/terminal_attributes.h b/native/jni/src/terminal_attributes.h index 9a803cca1..755635fba 100644 --- a/native/jni/src/terminal_attributes.h +++ b/native/jni/src/terminal_attributes.h @@ -29,12 +29,12 @@ namespace latinime { class TerminalAttributes { public: class ShortcutIterator { - const uint8_t* const mDict; + const uint8_t *const mDict; bool mHasNextShortcutTarget; int mPos; public: - ShortcutIterator(const uint8_t* dict, const int pos, const uint8_t flags) : mDict(dict), + ShortcutIterator(const uint8_t *dict, const int pos, const uint8_t flags) : mDict(dict), mPos(pos) { mHasNextShortcutTarget = (0 != (flags & UnigramDictionary::FLAG_HAS_SHORTCUT_TARGETS)); } @@ -46,7 +46,7 @@ class TerminalAttributes { // Gets the shortcut target itself as a uint16_t string. For parameters and return value // see BinaryFormat::getWordAtAddress. // TODO: make the output an uint32_t* to handle the whole unicode range. - inline int getNextShortcutTarget(const int maxDepth, uint16_t* outWord) { + inline int getNextShortcutTarget(const int maxDepth, uint16_t *outWord) { const int shortcutFlags = BinaryFormat::getFlagsAndForwardPointer(mDict, &mPos); mHasNextShortcutTarget = 0 != (shortcutFlags & UnigramDictionary::FLAG_ATTRIBUTE_HAS_NEXT); @@ -62,12 +62,13 @@ class TerminalAttributes { }; private: - const uint8_t* const mDict; + DISALLOW_IMPLICIT_CONSTRUCTORS(TerminalAttributes); + const uint8_t *const mDict; const uint8_t mFlags; const int mStartPos; public: - TerminalAttributes(const uint8_t* const dict, const uint8_t flags, const int pos) : + TerminalAttributes(const uint8_t *const dict, const uint8_t flags, const int pos) : mDict(dict), mFlags(flags), mStartPos(pos) { } @@ -78,5 +79,4 @@ class TerminalAttributes { } }; } // namespace latinime - #endif // LATINIME_TERMINAL_ATTRIBUTES_H diff --git a/native/jni/src/unigram_dictionary.cpp b/native/jni/src/unigram_dictionary.cpp index ea9f11b2c..b121d08a8 100644 --- a/native/jni/src/unigram_dictionary.cpp +++ b/native/jni/src/unigram_dictionary.cpp @@ -1,19 +1,19 @@ /* -** -** Copyright 2010, The Android Open Source Project -** -** Licensed under the Apache License, Version 2.0 (the "License"); -** you may not use this file except in compliance with the License. -** You may obtain a copy of the License at -** -** http://www.apache.org/licenses/LICENSE-2.0 -** -** Unless required by applicable law or agreed to in writing, software -** distributed under the License is distributed on an "AS IS" BASIS, -** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -** See the License for the specific language governing permissions and -** limitations under the License. -*/ + * + * Copyright 2010, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ #include <assert.h> #include <string.h> @@ -40,7 +40,7 @@ const UnigramDictionary::digraph_t UnigramDictionary::FRENCH_LIGATURES_DIGRAPHS[ { 'o', 'e', 0x0153 } }; // U+0153 : LATIN SMALL LIGATURE OE // TODO: check the header -UnigramDictionary::UnigramDictionary(const uint8_t* const streamStart, int typedLetterMultiplier, +UnigramDictionary::UnigramDictionary(const uint8_t *const streamStart, int typedLetterMultiplier, int fullWordMultiplier, int maxWordLength, int maxWords, const unsigned int flags) : DICT_ROOT(streamStart), MAX_WORD_LENGTH(maxWordLength), MAX_WORDS(maxWords), TYPED_LETTER_MULTIPLIER(typedLetterMultiplier), FULL_WORD_MULTIPLIER(fullWordMultiplier), @@ -68,7 +68,7 @@ static inline void addWord( // Return the replacement code point for a digraph, or 0 if none. int UnigramDictionary::getDigraphReplacement(const int *codes, const int i, const int codesSize, - const digraph_t* const digraphs, const unsigned int digraphsSize) const { + const digraph_t *const digraphs, const unsigned int digraphsSize) const { // There can't be a digraph if we don't have at least 2 characters to examine if (i + 2 > codesSize) return false; @@ -103,7 +103,7 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit const bool useFullEditDistance, const int *codesSrc, const int codesRemain, const int currentDepth, int *codesDest, Correction *correction, WordsPriorityQueuePool *queuePool, - const digraph_t* const digraphs, const unsigned int digraphsSize) { + const digraph_t *const digraphs, const unsigned int digraphsSize) const { const int startIndex = codesDest - codesBuffer; if (currentDepth < MAX_DIGRAPH_SEARCH_DEPTH) { @@ -170,14 +170,16 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit // bigramFilter is a bloom filter for fast rejection: see functions setInFilter and isInFilter // in bigram_dictionary.cpp int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, - WordsPriorityQueuePool *queuePool, Correction *correction, const int *xcoordinates, + const int *xcoordinates, const int *ycoordinates, const int *codes, const int codesSize, const std::map<int, int> *bigramMap, const uint8_t *bigramFilter, - const bool useFullEditDistance, unsigned short *outWords, int *frequencies) { + const bool useFullEditDistance, unsigned short *outWords, int *frequencies, + int *outputTypes) const { - queuePool->clearAll(); - Correction* masterCorrection = correction; - correction->resetCorrection(); + WordsPriorityQueuePool queuePool(MAX_WORDS, SUB_QUEUE_MAX_WORDS, MAX_WORD_LENGTH); + queuePool.clearAll(); + Correction masterCorrection; + masterCorrection.resetCorrection(); if (BinaryFormat::REQUIRES_GERMAN_UMLAUT_PROCESSING & FLAGS) { // Incrementally tune the word and try all possibilities int codesBuffer[getCodesBufferSize(codes, codesSize)]; @@ -185,8 +187,8 @@ int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, int yCoordinatesBuffer[codesSize]; getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates, codesBuffer, xCoordinatesBuffer, yCoordinatesBuffer, codesSize, bigramMap, bigramFilter, - useFullEditDistance, codes, codesSize, 0, codesBuffer, masterCorrection, - queuePool, GERMAN_UMLAUT_DIGRAPHS, + useFullEditDistance, codes, codesSize, 0, codesBuffer, &masterCorrection, + &queuePool, GERMAN_UMLAUT_DIGRAPHS, sizeof(GERMAN_UMLAUT_DIGRAPHS) / sizeof(GERMAN_UMLAUT_DIGRAPHS[0])); } else if (BinaryFormat::REQUIRES_FRENCH_LIGATURES_PROCESSING & FLAGS) { int codesBuffer[getCodesBufferSize(codes, codesSize)]; @@ -194,33 +196,33 @@ int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, int yCoordinatesBuffer[codesSize]; getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates, codesBuffer, xCoordinatesBuffer, yCoordinatesBuffer, codesSize, bigramMap, bigramFilter, - useFullEditDistance, codes, codesSize, 0, codesBuffer, masterCorrection, - queuePool, FRENCH_LIGATURES_DIGRAPHS, + useFullEditDistance, codes, codesSize, 0, codesBuffer, &masterCorrection, + &queuePool, FRENCH_LIGATURES_DIGRAPHS, sizeof(FRENCH_LIGATURES_DIGRAPHS) / sizeof(FRENCH_LIGATURES_DIGRAPHS[0])); } else { // Normal processing getWordSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, codesSize, - bigramMap, bigramFilter, useFullEditDistance, masterCorrection, queuePool); + bigramMap, bigramFilter, useFullEditDistance, &masterCorrection, &queuePool); } PROF_START(20); if (DEBUG_DICT) { - float ns = queuePool->getMasterQueue()->getHighestNormalizedScore( - proximityInfo->getPrimaryInputWord(), codesSize, 0, 0, 0); + float ns = queuePool.getMasterQueue()->getHighestNormalizedScore( + masterCorrection.getPrimaryInputWord(), codesSize, 0, 0, 0); ns += 0; AKLOGI("Max normalized score = %f", ns); } const int suggestedWordsCount = - queuePool->getMasterQueue()->outputSuggestions( - proximityInfo->getPrimaryInputWord(), codesSize, frequencies, outWords); + queuePool.getMasterQueue()->outputSuggestions( + masterCorrection.getPrimaryInputWord(), codesSize, frequencies, outWords); if (DEBUG_DICT) { - float ns = queuePool->getMasterQueue()->getHighestNormalizedScore( - proximityInfo->getPrimaryInputWord(), codesSize, 0, 0, 0); + float ns = queuePool.getMasterQueue()->getHighestNormalizedScore( + masterCorrection.getPrimaryInputWord(), codesSize, 0, 0, 0); ns += 0; AKLOGI("Returning %d words", suggestedWordsCount); /// Print the returned words for (int j = 0; j < suggestedWordsCount; ++j) { - short unsigned int* w = outWords + j * MAX_WORD_LENGTH; + short unsigned int *w = outWords + j * MAX_WORD_LENGTH; char s[MAX_WORD_LENGTH]; for (int i = 0; i <= MAX_WORD_LENGTH; i++) s[i] = w[i]; (void)s; @@ -235,7 +237,8 @@ int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const int inputLength, const std::map<int, int> *bigramMap, const uint8_t *bigramFilter, - const bool useFullEditDistance, Correction *correction, WordsPriorityQueuePool *queuePool) { + const bool useFullEditDistance, Correction *correction, + WordsPriorityQueuePool *queuePool) const { PROF_OPEN; PROF_START(0); @@ -256,10 +259,10 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo, PROF_START(4); bool hasAutoCorrectionCandidate = false; - WordsPriorityQueue* masterQueue = queuePool->getMasterQueue(); + WordsPriorityQueue *masterQueue = queuePool->getMasterQueue(); if (masterQueue->size() > 0) { float nsForMaster = masterQueue->getHighestNormalizedScore( - proximityInfo->getPrimaryInputWord(), inputLength, 0, 0, 0); + correction->getPrimaryInputWord(), inputLength, 0, 0, 0); hasAutoCorrectionCandidate = (nsForMaster > START_TWO_WORDS_CORRECTION_THRESHOLD); } PROF_END(4); @@ -281,18 +284,18 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo, if (DEBUG_DICT) { queuePool->dumpSubQueue1TopSuggestions(); for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) { - WordsPriorityQueue* queue = queuePool->getSubQueue(FIRST_WORD_INDEX, i); + WordsPriorityQueue *queue = queuePool->getSubQueue(FIRST_WORD_INDEX, i); if (queue->size() > 0) { - WordsPriorityQueue::SuggestedWord* sw = queue->top(); + WordsPriorityQueue::SuggestedWord *sw = queue->top(); const int score = sw->mScore; - const unsigned short* word = sw->mWord; + const unsigned short *word = sw->mWord; const int wordLength = sw->mWordLength; float ns = Correction::RankingAlgorithm::calcNormalizedScore( - proximityInfo->getPrimaryInputWord(), i, word, wordLength, score); + correction->getPrimaryInputWord(), i, word, wordLength, score); ns += 0; AKLOGI("--- TOP SUB WORDS for %d --- %d %f [%d]", i, score, ns, (ns > TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD)); - DUMP_WORD(proximityInfo->getPrimaryInputWord(), i); + DUMP_WORD(correction->getPrimaryInputWord(), i); DUMP_WORD(word, wordLength); } } @@ -300,12 +303,13 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo, } void UnigramDictionary::initSuggestions(ProximityInfo *proximityInfo, const int *xCoordinates, - const int *yCoordinates, const int *codes, const int inputLength, Correction *correction) { + const int *yCoordinates, const int *codes, const int inputLength, + Correction *correction) const { if (DEBUG_DICT) { AKLOGI("initSuggest"); DUMP_WORD_INT(codes, inputLength); } - proximityInfo->setInputParams(codes, inputLength, xCoordinates, yCoordinates); + correction->initInputParams(proximityInfo, codes, inputLength, xCoordinates, yCoordinates); const int maxDepth = min(inputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH); correction->initCorrection(proximityInfo, inputLength, maxDepth); } @@ -317,7 +321,7 @@ void UnigramDictionary::getOneWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const std::map<int, int> *bigramMap, const uint8_t *bigramFilter, const bool useFullEditDistance, const int inputLength, - Correction *correction, WordsPriorityQueuePool *queuePool) { + Correction *correction, WordsPriorityQueuePool *queuePool) const { initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputLength, correction); getSuggestionCandidates(useFullEditDistance, inputLength, bigramMap, bigramFilter, correction, queuePool, true /* doAutoCompletion */, DEFAULT_MAX_ERRORS, FIRST_WORD_INDEX); @@ -326,7 +330,7 @@ void UnigramDictionary::getOneWordSuggestions(ProximityInfo *proximityInfo, void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance, const int inputLength, const std::map<int, int> *bigramMap, const uint8_t *bigramFilter, Correction *correction, WordsPriorityQueuePool *queuePool, - const bool doAutoCompletion, const int maxErrors, const int currentWordIndex) { + const bool doAutoCompletion, const int maxErrors, const int currentWordIndex) const { uint8_t totalTraverseCount = correction->pushAndGetTotalTraverseCount(); if (DEBUG_DICT) { AKLOGI("Traverse count %d", totalTraverseCount); @@ -374,12 +378,12 @@ void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance, inline void UnigramDictionary::onTerminal(const int probability, const TerminalAttributes& terminalAttributes, Correction *correction, WordsPriorityQueuePool *queuePool, const bool addToMasterQueue, - const int currentWordIndex) { + const int currentWordIndex) const { const int inputIndex = correction->getInputIndex(); const bool addToSubQueue = inputIndex < SUB_QUEUE_MAX_COUNT; int wordLength; - unsigned short* wordPointer; + unsigned short *wordPointer; if ((currentWordIndex == FIRST_WORD_INDEX) && addToMasterQueue) { WordsPriorityQueue *masterQueue = queuePool->getMasterQueue(); @@ -426,11 +430,11 @@ inline void UnigramDictionary::onTerminal(const int probability, int UnigramDictionary::getSubStringSuggestion( ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const bool useFullEditDistance, Correction *correction, - WordsPriorityQueuePool* queuePool, const int inputLength, + WordsPriorityQueuePool *queuePool, const int inputLength, const bool hasAutoCorrectionCandidate, const int currentWordIndex, const int inputWordStartPos, const int inputWordLength, const int outputWordStartPos, const bool isSpaceProximity, int *freqArray, - int*wordLengthArray, unsigned short* outputWord, int *outputWordLength) { + int*wordLengthArray, unsigned short *outputWord, int *outputWordLength) const { if (inputWordLength > MULTIPLE_WORDS_SUGGESTION_MAX_WORD_LENGTH) { return FLAG_MULTIPLE_SUGGEST_ABORT; } @@ -473,17 +477,18 @@ int UnigramDictionary::getSubStringSuggestion( // TODO: Remove the safety net above // ////////////////////////////////////////////// - unsigned short* tempOutputWord = 0; + unsigned short *tempOutputWord = 0; int nextWordLength = 0; // TODO: Optimize init suggestion initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputLength, correction); + unsigned short word[MAX_WORD_LENGTH_INTERNAL]; int freq = getMostFrequentWordLike( - inputWordStartPos, inputWordLength, proximityInfo, mWord); + inputWordStartPos, inputWordLength, correction, word); if (freq > 0) { nextWordLength = inputWordLength; - tempOutputWord = mWord; + tempOutputWord = word; } else if (!hasAutoCorrectionCandidate) { if (inputWordStartPos > 0) { const int offset = inputWordStartPos; @@ -503,14 +508,14 @@ int UnigramDictionary::getSubStringSuggestion( } } } - WordsPriorityQueue* queue = queuePool->getSubQueue(currentWordIndex, inputWordLength); + WordsPriorityQueue *queue = queuePool->getSubQueue(currentWordIndex, inputWordLength); // TODO: Return the correct value depending on doAutoCompletion if (!queue || queue->size() <= 0) { return FLAG_MULTIPLE_SUGGEST_ABORT; } int score = 0; const float ns = queue->getHighestNormalizedScore( - proximityInfo->getPrimaryInputWord(), inputWordLength, + correction->getPrimaryInputWord(), inputWordLength, &tempOutputWord, &score, &nextWordLength); if (DEBUG_DICT) { AKLOGI("NS(%d) = %f, Score = %d", currentWordIndex, ns, score); @@ -574,10 +579,10 @@ int UnigramDictionary::getSubStringSuggestion( void UnigramDictionary::getMultiWordsSuggestionRec(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const bool useFullEditDistance, const int inputLength, - Correction *correction, WordsPriorityQueuePool* queuePool, + Correction *correction, WordsPriorityQueuePool *queuePool, const bool hasAutoCorrectionCandidate, const int startInputPos, const int startWordIndex, - const int outputWordLength, int *freqArray, int* wordLengthArray, - unsigned short* outputWord) { + const int outputWordLength, int *freqArray, int *wordLengthArray, + unsigned short *outputWord) const { if (startWordIndex >= (MULTIPLE_WORDS_SUGGESTION_MAX_WORDS - 1)) { // Return if the last word index return; @@ -655,8 +660,8 @@ void UnigramDictionary::getMultiWordsSuggestionRec(ProximityInfo *proximityInfo, void UnigramDictionary::getSplitMultipleWordsSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const bool useFullEditDistance, const int inputLength, - Correction *correction, WordsPriorityQueuePool* queuePool, - const bool hasAutoCorrectionCandidate) { + Correction *correction, WordsPriorityQueuePool *queuePool, + const bool hasAutoCorrectionCandidate) const { if (inputLength >= MAX_WORD_LENGTH) return; if (DEBUG_DICT) { AKLOGI("--- Suggest multiple words"); @@ -678,11 +683,11 @@ void UnigramDictionary::getSplitMultipleWordsSuggestions(ProximityInfo *proximit // Wrapper for getMostFrequentWordLikeInner, which matches it to the previous // interface. inline int UnigramDictionary::getMostFrequentWordLike(const int startInputIndex, - const int inputLength, ProximityInfo *proximityInfo, unsigned short *word) { + const int inputLength, Correction *correction, unsigned short *word) const { uint16_t inWord[inputLength]; for (int i = 0; i < inputLength; ++i) { - inWord[i] = (uint16_t)proximityInfo->getPrimaryCharAt(startInputIndex + i); + inWord[i] = (uint16_t)correction->getPrimaryCharAt(startInputIndex + i); } return getMostFrequentWordLikeInner(inWord, inputLength, word); } @@ -700,9 +705,9 @@ inline int UnigramDictionary::getMostFrequentWordLike(const int startInputIndex, // In and out parameters may point to the same location. This function takes care // not to use any input parameters after it wrote into its outputs. static inline bool testCharGroupForContinuedLikeness(const uint8_t flags, - const uint8_t* const root, const int startPos, - const uint16_t* const inWord, const int startInputIndex, - int32_t* outNewWord, int* outInputIndex, int* outPos) { + const uint8_t *const root, const int startPos, + const uint16_t *const inWord, const int startInputIndex, + int32_t *outNewWord, int *outInputIndex, int *outPos) { const bool hasMultipleChars = (0 != (UnigramDictionary::FLAG_HAS_MULTIPLE_CHARS & flags)); int pos = startPos; int32_t character = BinaryFormat::getCharCodeAndForwardPointer(root, &pos); @@ -738,8 +743,8 @@ static inline bool testCharGroupForContinuedLikeness(const uint8_t flags, // It will compare the frequency to the max frequency, and if greater, will // copy the word into the output buffer. In output value maxFreq, it will // write the new maximum frequency if it changed. -static inline void onTerminalWordLike(const int freq, int32_t* newWord, const int length, - short unsigned int* outWord, int* maxFreq) { +static inline void onTerminalWordLike(const int freq, int32_t *newWord, const int length, + short unsigned int *outWord, int *maxFreq) { if (freq > *maxFreq) { for (int q = 0; q < length; ++q) outWord[q] = newWord[q]; @@ -750,22 +755,25 @@ static inline void onTerminalWordLike(const int freq, int32_t* newWord, const in // Will find the highest frequency of the words like the one passed as an argument, // that is, everything that only differs by case/accents. -int UnigramDictionary::getMostFrequentWordLikeInner(const uint16_t * const inWord, - const int length, short unsigned int* outWord) { +int UnigramDictionary::getMostFrequentWordLikeInner(const uint16_t *const inWord, + const int length, short unsigned int *outWord) const { int32_t newWord[MAX_WORD_LENGTH_INTERNAL]; int depth = 0; int maxFreq = -1; - const uint8_t* const root = DICT_ROOT; + const uint8_t *const root = DICT_ROOT; + int stackChildCount[MAX_WORD_LENGTH_INTERNAL]; + int stackInputIndex[MAX_WORD_LENGTH_INTERNAL]; + int stackSiblingPos[MAX_WORD_LENGTH_INTERNAL]; int startPos = 0; - mStackChildCount[0] = BinaryFormat::getGroupCountAndForwardPointer(root, &startPos); - mStackInputIndex[0] = 0; - mStackSiblingPos[0] = startPos; + stackChildCount[0] = BinaryFormat::getGroupCountAndForwardPointer(root, &startPos); + stackInputIndex[0] = 0; + stackSiblingPos[0] = startPos; while (depth >= 0) { - const int charGroupCount = mStackChildCount[depth]; - int pos = mStackSiblingPos[depth]; + const int charGroupCount = stackChildCount[depth]; + int pos = stackSiblingPos[depth]; for (int charGroupIndex = charGroupCount - 1; charGroupIndex >= 0; --charGroupIndex) { - int inputIndex = mStackInputIndex[depth]; + int inputIndex = stackInputIndex[depth]; const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos); // Test whether all chars in this group match with the word we are searching for. If so, // we want to traverse its children (or if the length match, evaluate its frequency). @@ -785,15 +793,15 @@ int UnigramDictionary::getMostFrequentWordLikeInner(const uint16_t * const inWor // anyway, so don't traverse unless inputIndex < length. if (isAlike && (-1 != childrenNodePos) && (inputIndex < length)) { // Save position for this depth, to get back to this once children are done - mStackChildCount[depth] = charGroupIndex; - mStackSiblingPos[depth] = siblingPos; + stackChildCount[depth] = charGroupIndex; + stackSiblingPos[depth] = siblingPos; // Prepare stack values for next depth ++depth; int childrenPos = childrenNodePos; - mStackChildCount[depth] = + stackChildCount[depth] = BinaryFormat::getGroupCountAndForwardPointer(root, &childrenPos); - mStackSiblingPos[depth] = childrenPos; - mStackInputIndex[depth] = inputIndex; + stackSiblingPos[depth] = childrenPos; + stackInputIndex[depth] = inputIndex; pos = childrenPos; // Go to the next depth level. ++depth; @@ -808,9 +816,10 @@ int UnigramDictionary::getMostFrequentWordLikeInner(const uint16_t * const inWor return maxFreq; } -int UnigramDictionary::getFrequency(const int32_t* const inWord, const int length) const { - const uint8_t* const root = DICT_ROOT; - int pos = BinaryFormat::getTerminalPosition(root, inWord, length); +int UnigramDictionary::getFrequency(const int32_t *const inWord, const int length) const { + const uint8_t *const root = DICT_ROOT; + int pos = BinaryFormat::getTerminalPosition(root, inWord, length, + false /* forceLowerCaseSearch */); if (NOT_VALID_WORD == pos) { return NOT_A_PROBABILITY; } @@ -848,7 +857,7 @@ int UnigramDictionary::getBigramPosition(int pos, unsigned short *word, int offs inline bool UnigramDictionary::processCurrentNode(const int initialPos, const std::map<int, int> *bigramMap, const uint8_t *bigramFilter, Correction *correction, int *newCount, int *newChildrenPosition, int *nextSiblingPosition, - WordsPriorityQueuePool *queuePool, const int currentWordIndex) { + WordsPriorityQueuePool *queuePool, const int currentWordIndex) const { if (DEBUG_DICT) { correction->checkState(); } @@ -982,5 +991,4 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos, *newChildrenPosition = childrenPos; return true; } - } // namespace latinime diff --git a/native/jni/src/unigram_dictionary.h b/native/jni/src/unigram_dictionary.h index a1a8299e5..f6aef2bfd 100644 --- a/native/jni/src/unigram_dictionary.h +++ b/native/jni/src/unigram_dictionary.h @@ -73,80 +73,80 @@ class UnigramDictionary { static const int FLAG_MULTIPLE_SUGGEST_ABORT = 0; static const int FLAG_MULTIPLE_SUGGEST_SKIP = 1; static const int FLAG_MULTIPLE_SUGGEST_CONTINUE = 2; - UnigramDictionary(const uint8_t* const streamStart, int typedLetterMultipler, + UnigramDictionary(const uint8_t *const streamStart, int typedLetterMultipler, int fullWordMultiplier, int maxWordLength, int maxWords, const unsigned int flags); - int getFrequency(const int32_t* const inWord, const int length) const; + int getFrequency(const int32_t *const inWord, const int length) const; int getBigramPosition(int pos, unsigned short *word, int offset, int length) const; - int getSuggestions(ProximityInfo *proximityInfo, WordsPriorityQueuePool *queuePool, - Correction *correction, const int *xcoordinates, const int *ycoordinates, + int getSuggestions( + ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const int codesSize, const std::map<int, int> *bigramMap, const uint8_t *bigramFilter, const bool useFullEditDistance, unsigned short *outWords, - int *frequencies); + int *frequencies, int *outputTypes) const; virtual ~UnigramDictionary(); private: + DISALLOW_IMPLICIT_CONSTRUCTORS(UnigramDictionary); void getWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const int inputLength, const std::map<int, int> *bigramMap, const uint8_t *bigramFilter, const bool useFullEditDistance, Correction *correction, - WordsPriorityQueuePool *queuePool); + WordsPriorityQueuePool *queuePool) const; int getDigraphReplacement(const int *codes, const int i, const int codesSize, - const digraph_t* const digraphs, const unsigned int digraphsSize) const; + const digraph_t *const digraphs, const unsigned int digraphsSize) const; void getWordWithDigraphSuggestionsRec(ProximityInfo *proximityInfo, - const int *xcoordinates, const int* ycoordinates, const int *codesBuffer, + const int *xcoordinates, const int *ycoordinates, const int *codesBuffer, int *xCoordinatesBuffer, int *yCoordinatesBuffer, const int codesBufferSize, const std::map<int, int> *bigramMap, const uint8_t *bigramFilter, - const bool useFullEditDistance, const int* codesSrc, const int codesRemain, - const int currentDepth, int* codesDest, Correction *correction, - WordsPriorityQueuePool* queuePool, const digraph_t* const digraphs, - const unsigned int digraphsSize); + const bool useFullEditDistance, const int *codesSrc, const int codesRemain, + const int currentDepth, int *codesDest, Correction *correction, + WordsPriorityQueuePool *queuePool, const digraph_t *const digraphs, + const unsigned int digraphsSize) const; void initSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, - const int *ycoordinates, const int *codes, const int codesSize, Correction *correction); + const int *ycoordinates, const int *codes, const int codesSize, + Correction *correction) const; void getOneWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const std::map<int, int> *bigramMap, const uint8_t *bigramFilter, const bool useFullEditDistance, const int inputLength, - Correction *correction, WordsPriorityQueuePool* queuePool); + Correction *correction, WordsPriorityQueuePool *queuePool) const; void getSuggestionCandidates( const bool useFullEditDistance, const int inputLength, const std::map<int, int> *bigramMap, const uint8_t *bigramFilter, - Correction *correction, WordsPriorityQueuePool* queuePool, const bool doAutoCompletion, - const int maxErrors, const int currentWordIndex); + Correction *correction, WordsPriorityQueuePool *queuePool, const bool doAutoCompletion, + const int maxErrors, const int currentWordIndex) const; void getSplitMultipleWordsSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const bool useFullEditDistance, const int inputLength, - Correction *correction, WordsPriorityQueuePool* queuePool, - const bool hasAutoCorrectionCandidate); + Correction *correction, WordsPriorityQueuePool *queuePool, + const bool hasAutoCorrectionCandidate) const; void onTerminal(const int freq, const TerminalAttributes& terminalAttributes, Correction *correction, WordsPriorityQueuePool *queuePool, const bool addToMasterQueue, - const int currentWordIndex); - bool needsToSkipCurrentNode(const unsigned short c, - const int inputIndex, const int skipPos, const int depth); + const int currentWordIndex) const; // Process a node by considering proximity, missing and excessive character bool processCurrentNode(const int initialPos, const std::map<int, int> *bigramMap, const uint8_t *bigramFilter, Correction *correction, int *newCount, int *newChildPosition, int *nextSiblingPosition, WordsPriorityQueuePool *queuePool, - const int currentWordIndex); + const int currentWordIndex) const; int getMostFrequentWordLike(const int startInputIndex, const int inputLength, - ProximityInfo *proximityInfo, unsigned short *word); - int getMostFrequentWordLikeInner(const uint16_t* const inWord, const int length, - short unsigned int *outWord); + Correction *correction, unsigned short *word) const; + int getMostFrequentWordLikeInner(const uint16_t *const inWord, const int length, + short unsigned int *outWord) const; int getSubStringSuggestion( ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const bool useFullEditDistance, Correction *correction, - WordsPriorityQueuePool* queuePool, const int inputLength, + WordsPriorityQueuePool *queuePool, const int inputLength, const bool hasAutoCorrectionCandidate, const int currentWordIndex, const int inputWordStartPos, const int inputWordLength, const int outputWordStartPos, const bool isSpaceProximity, int *freqArray, - int *wordLengthArray, unsigned short* outputWord, int *outputWordLength); + int *wordLengthArray, unsigned short *outputWord, int *outputWordLength) const; void getMultiWordsSuggestionRec(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const bool useFullEditDistance, const int inputLength, - Correction *correction, WordsPriorityQueuePool* queuePool, + Correction *correction, WordsPriorityQueuePool *queuePool, const bool hasAutoCorrectionCandidate, const int startPos, const int startWordIndex, - const int outputWordLength, int *freqArray, int* wordLengthArray, - unsigned short* outputWord); + const int outputWordLength, int *freqArray, int *wordLengthArray, + unsigned short *outputWord) const; - const uint8_t* const DICT_ROOT; + const uint8_t *const DICT_ROOT; const int MAX_WORD_LENGTH; const int MAX_WORDS; const int TYPED_LETTER_MULTIPLIER; @@ -158,13 +158,6 @@ class UnigramDictionary { static const digraph_t GERMAN_UMLAUT_DIGRAPHS[]; static const digraph_t FRENCH_LIGATURES_DIGRAPHS[]; - - // Still bundled members - unsigned short mWord[MAX_WORD_LENGTH_INTERNAL];// TODO: remove - int mStackChildCount[MAX_WORD_LENGTH_INTERNAL];// TODO: remove - int mStackInputIndex[MAX_WORD_LENGTH_INTERNAL];// TODO: remove - int mStackSiblingPos[MAX_WORD_LENGTH_INTERNAL];// TODO: remove }; } // namespace latinime - #endif // LATINIME_UNIGRAM_DICTIONARY_H diff --git a/native/jni/src/words_priority_queue.h b/native/jni/src/words_priority_queue.h index 7629251d6..fc9c55e63 100644 --- a/native/jni/src/words_priority_queue.h +++ b/native/jni/src/words_priority_queue.h @@ -33,7 +33,7 @@ class WordsPriorityQueue { int mWordLength; bool mUsed; - void setParams(int score, unsigned short* word, int wordLength) { + void setParams(int score, unsigned short *word, int wordLength) { mScore = score; mWordLength = wordLength; memcpy(mWord, word, sizeof(unsigned short) * wordLength); @@ -55,8 +55,8 @@ class WordsPriorityQueue { delete[] mSuggestedWords; } - void push(int score, unsigned short* word, int wordLength) { - SuggestedWord* sw = 0; + void push(int score, unsigned short *word, int wordLength) { + SuggestedWord *sw = 0; if (mSuggestions.size() >= MAX_WORDS) { sw = mSuggestions.top(); const int minScore = sw->mScore; @@ -86,21 +86,21 @@ class WordsPriorityQueue { } } - SuggestedWord* top() { + SuggestedWord *top() { if (mSuggestions.empty()) return 0; - SuggestedWord* sw = mSuggestions.top(); + SuggestedWord *sw = mSuggestions.top(); return sw; } - int outputSuggestions(const unsigned short* before, const int beforeLength, + int outputSuggestions(const unsigned short *before, const int beforeLength, int *frequencies, unsigned short *outputChars) { mHighestSuggestedWord = 0; const unsigned int size = min( MAX_WORDS, static_cast<unsigned int>(mSuggestions.size())); - SuggestedWord* swBuffer[size]; + SuggestedWord *swBuffer[size]; int index = size - 1; while (!mSuggestions.empty() && index >= 0) { - SuggestedWord* sw = mSuggestions.top(); + SuggestedWord *sw = mSuggestions.top(); if (DEBUG_WORDS_PRIORITY_QUEUE) { AKLOGI("dump word. %d", sw->mScore); DUMP_WORD(sw->mWord, sw->mWordLength); @@ -110,11 +110,11 @@ class WordsPriorityQueue { --index; } if (size >= 2) { - SuggestedWord* nsMaxSw = 0; + SuggestedWord *nsMaxSw = 0; unsigned int maxIndex = 0; float maxNs = 0; for (unsigned int i = 0; i < size; ++i) { - SuggestedWord* tempSw = swBuffer[i]; + SuggestedWord *tempSw = swBuffer[i]; if (!tempSw) { continue; } @@ -131,13 +131,13 @@ class WordsPriorityQueue { } } for (unsigned int i = 0; i < size; ++i) { - SuggestedWord* sw = swBuffer[i]; + SuggestedWord *sw = swBuffer[i]; if (!sw) { AKLOGE("SuggestedWord is null %d", i); continue; } const unsigned int wordLength = sw->mWordLength; - char* targetAdr = (char*) outputChars + i * MAX_WORD_LENGTH * sizeof(short); + char *targetAdr = (char*) outputChars + i * MAX_WORD_LENGTH * sizeof(short); frequencies[i] = sw->mScore; memcpy(targetAdr, sw->mWord, (wordLength) * sizeof(short)); if (wordLength < MAX_WORD_LENGTH) { @@ -155,7 +155,7 @@ class WordsPriorityQueue { void clear() { mHighestSuggestedWord = 0; while (!mSuggestions.empty()) { - SuggestedWord* sw = mSuggestions.top(); + SuggestedWord *sw = mSuggestions.top(); if (DEBUG_WORDS_PRIORITY_QUEUE) { AKLOGI("Clear word. %d", sw->mScore); DUMP_WORD(sw->mWord, sw->mWordLength); @@ -172,8 +172,8 @@ class WordsPriorityQueue { DUMP_WORD(mHighestSuggestedWord->mWord, mHighestSuggestedWord->mWordLength); } - float getHighestNormalizedScore(const unsigned short* before, const int beforeLength, - unsigned short** outWord, int *outScore, int *outLength) { + float getHighestNormalizedScore(const unsigned short *before, const int beforeLength, + unsigned short **outWord, int *outScore, int *outLength) { if (!mHighestSuggestedWord) { return 0.0; } @@ -182,13 +182,14 @@ class WordsPriorityQueue { } private: + DISALLOW_IMPLICIT_CONSTRUCTORS(WordsPriorityQueue); struct wordComparator { bool operator ()(SuggestedWord * left, SuggestedWord * right) { return left->mScore > right->mScore; } }; - SuggestedWord* getFreeSuggestedWord(int score, unsigned short* word, + SuggestedWord *getFreeSuggestedWord(int score, unsigned short *word, int wordLength) { for (unsigned int i = 0; i < MAX_WORD_LENGTH; ++i) { if (!mSuggestedWords[i].mUsed) { @@ -199,10 +200,10 @@ class WordsPriorityQueue { return 0; } - static float getNormalizedScore(SuggestedWord* sw, const unsigned short* before, - const int beforeLength, unsigned short** outWord, int *outScore, int *outLength) { + static float getNormalizedScore(SuggestedWord *sw, const unsigned short *before, + const int beforeLength, unsigned short **outWord, int *outScore, int *outLength) { const int score = sw->mScore; - unsigned short* word = sw->mWord; + unsigned short *word = sw->mWord; const int wordLength = sw->mWordLength; if (outScore) { *outScore = score; @@ -222,9 +223,8 @@ class WordsPriorityQueue { Suggestions mSuggestions; const unsigned int MAX_WORDS; const unsigned int MAX_WORD_LENGTH; - SuggestedWord* mSuggestedWords; - SuggestedWord* mHighestSuggestedWord; + SuggestedWord *mSuggestedWords; + SuggestedWord *mHighestSuggestedWord; }; -} - +} // namespace latinime #endif // LATINIME_WORDS_PRIORITY_QUEUE_H diff --git a/native/jni/src/words_priority_queue_pool.h b/native/jni/src/words_priority_queue_pool.h index 210b5a848..aff307404 100644 --- a/native/jni/src/words_priority_queue_pool.h +++ b/native/jni/src/words_priority_queue_pool.h @@ -44,11 +44,11 @@ class WordsPriorityQueuePool { } } - WordsPriorityQueue* getMasterQueue() { + WordsPriorityQueue *getMasterQueue() { return mMasterQueue; } - WordsPriorityQueue* getSubQueue(const int wordIndex, const int inputWordLength) { + WordsPriorityQueue *getSubQueue(const int wordIndex, const int inputWordLength) { if (wordIndex >= MULTIPLE_WORDS_SUGGESTION_MAX_WORDS) { return 0; } @@ -70,7 +70,7 @@ class WordsPriorityQueuePool { inline void clearSubQueue(const int wordIndex) { for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) { - WordsPriorityQueue* queue = getSubQueue(wordIndex, i); + WordsPriorityQueue *queue = getSubQueue(wordIndex, i); if (queue) { queue->clear(); } @@ -85,12 +85,12 @@ class WordsPriorityQueuePool { } private: - WordsPriorityQueue* mMasterQueue; - WordsPriorityQueue* mSubQueues[SUB_QUEUE_MAX_COUNT * MULTIPLE_WORDS_SUGGESTION_MAX_WORDS]; + DISALLOW_IMPLICIT_CONSTRUCTORS(WordsPriorityQueuePool); + WordsPriorityQueue *mMasterQueue; + WordsPriorityQueue *mSubQueues[SUB_QUEUE_MAX_COUNT * MULTIPLE_WORDS_SUGGESTION_MAX_WORDS]; char mMasterQueueBuf[sizeof(WordsPriorityQueue)]; char mSubQueueBuf[MULTIPLE_WORDS_SUGGESTION_MAX_WORDS * SUB_QUEUE_MAX_COUNT * sizeof(WordsPriorityQueue)]; }; -} - +} // namespace latinime #endif // LATINIME_WORDS_PRIORITY_QUEUE_POOL_H |