aboutsummaryrefslogtreecommitdiffstats
path: root/native
diff options
context:
space:
mode:
Diffstat (limited to 'native')
-rw-r--r--native/Android.mk61
-rw-r--r--native/jni/Android.mk87
-rw-r--r--native/jni/Application.mk1
-rw-r--r--native/jni/com_android_inputmethod_keyboard_ProximityInfo.cpp12
-rw-r--r--native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp110
-rw-r--r--native/jni/jni_common.cpp16
-rw-r--r--native/jni/jni_common.h8
-rw-r--r--native/src/basechars.cpp (renamed from native/src/basechars.h)12
-rw-r--r--native/src/bigram_dictionary.cpp10
-rw-r--r--native/src/bigram_dictionary.h4
-rw-r--r--native/src/binary_format.h22
-rw-r--r--native/src/char_utils.h39
-rw-r--r--native/src/correction.cpp431
-rw-r--r--native/src/correction.h45
-rw-r--r--native/src/debug.h6
-rw-r--r--native/src/defines.h69
-rw-r--r--native/src/dictionary.cpp9
-rw-r--r--native/src/dictionary.h102
-rw-r--r--native/src/proximity_info.cpp22
-rw-r--r--native/src/proximity_info.h6
-rw-r--r--native/src/terminal_attributes.h78
-rw-r--r--native/src/unigram_dictionary.cpp498
-rw-r--r--native/src/unigram_dictionary.h92
-rw-r--r--native/src/words_priority_queue.h193
-rw-r--r--native/src/words_priority_queue_pool.h86
25 files changed, 1404 insertions, 615 deletions
diff --git a/native/Android.mk b/native/Android.mk
index cf1da4a41..5053e7d64 100644
--- a/native/Android.mk
+++ b/native/Android.mk
@@ -1,60 +1 @@
-LOCAL_PATH := $(call my-dir)
-include $(CLEAR_VARS)
-
-LOCAL_C_INCLUDES += $(LOCAL_PATH)/src
-
-LOCAL_CFLAGS += -Werror -Wall
-
-# To suppress compiler warnings for unused variables/functions used for debug features etc.
-LOCAL_CFLAGS += -Wno-unused-parameter -Wno-unused-function
-
-LOCAL_SRC_FILES := \
- jni/com_android_inputmethod_keyboard_ProximityInfo.cpp \
- jni/com_android_inputmethod_latin_BinaryDictionary.cpp \
- jni/jni_common.cpp \
- src/bigram_dictionary.cpp \
- src/char_utils.cpp \
- src/correction.cpp \
- src/dictionary.cpp \
- src/proximity_info.cpp \
- src/unigram_dictionary.cpp
-
-#FLAG_DBG := true
-#FLAG_DO_PROFILE := true
-
-TARGETING_UNBUNDLED_FROYO := true
-
-ifeq ($(TARGET_ARCH), x86)
- TARGETING_UNBUNDLED_FROYO := false
-endif
-
-ifeq ($(FLAG_DBG), true)
- TARGETING_UNBUNDLED_FROYO := false
-endif
-
-ifeq ($(FLAG_DO_PROFILE), true)
- TARGETING_UNBUNDLED_FROYO := false
-endif
-
-ifeq ($(TARGETING_UNBUNDLED_FROYO), true)
- LOCAL_NDK_VERSION := 4
- LOCAL_SDK_VERSION := 8
-endif
-
-LOCAL_MODULE := libjni_latinime
-
-LOCAL_MODULE_TAGS := optional
-
-ifeq ($(FLAG_DO_PROFILE), true)
- $(warning Making profiling version of native library)
- LOCAL_CFLAGS += -DFLAG_DO_PROFILE
- LOCAL_SHARED_LIBRARIES := libcutils libutils
-else # FLAG_DO_PROFILE
-ifeq ($(FLAG_DBG), true)
- $(warning Making debug version of native library)
- LOCAL_CFLAGS += -DFLAG_DBG
- LOCAL_SHARED_LIBRARIES := libcutils libutils
-endif # FLAG_DBG
-endif # FLAG_DO_PROFILE
-
-include $(BUILD_SHARED_LIBRARY)
+include $(call all-subdir-makefiles)
diff --git a/native/jni/Android.mk b/native/jni/Android.mk
new file mode 100644
index 000000000..53bd21ddc
--- /dev/null
+++ b/native/jni/Android.mk
@@ -0,0 +1,87 @@
+# Copyright (C) 2011 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+LOCAL_PATH := $(call my-dir)
+include $(CLEAR_VARS)
+
+LATIN_IME_SRC_DIR := ../src
+
+LOCAL_C_INCLUDES += $(LOCAL_PATH)/$(LATIN_IME_SRC_DIR)
+
+LOCAL_CFLAGS += -Werror -Wall
+
+# To suppress compiler warnings for unused variables/functions used for debug features etc.
+LOCAL_CFLAGS += -Wno-unused-parameter -Wno-unused-function
+
+LATIN_IME_JNI_SRC_FILES := \
+ com_android_inputmethod_keyboard_ProximityInfo.cpp \
+ com_android_inputmethod_latin_BinaryDictionary.cpp \
+ jni_common.cpp
+
+LATIN_IME_CORE_SRC_FILES := \
+ basechars.cpp \
+ bigram_dictionary.cpp \
+ char_utils.cpp \
+ correction.cpp \
+ dictionary.cpp \
+ proximity_info.cpp \
+ unigram_dictionary.cpp
+
+LOCAL_SRC_FILES := \
+ $(LATIN_IME_JNI_SRC_FILES) \
+ $(addprefix $(LATIN_IME_SRC_DIR)/,$(LATIN_IME_CORE_SRC_FILES))
+
+#FLAG_DBG := true
+#FLAG_DO_PROFILE := true
+
+TARGETING_UNBUNDLED_FROYO := true
+
+ifeq ($(TARGET_ARCH), x86)
+ TARGETING_UNBUNDLED_FROYO := false
+endif
+
+ifeq ($(FLAG_DBG), true)
+ TARGETING_UNBUNDLED_FROYO := false
+endif
+
+ifeq ($(FLAG_DO_PROFILE), true)
+ TARGETING_UNBUNDLED_FROYO := false
+endif
+
+ifeq ($(TARGETING_UNBUNDLED_FROYO), true)
+ LOCAL_NDK_VERSION := 4
+ LOCAL_SDK_VERSION := 8
+endif
+
+LOCAL_MODULE := libjni_latinime
+
+LOCAL_MODULE_TAGS := optional
+
+# For STL
+LOCAL_C_INCLUDES += external/stlport/stlport bionic
+LOCAL_SHARED_LIBRARIES += libstlport
+
+ifeq ($(FLAG_DO_PROFILE), true)
+ $(warning Making profiling version of native library)
+ LOCAL_CFLAGS += -DFLAG_DO_PROFILE
+ LOCAL_SHARED_LIBRARIES += libcutils libutils
+else # FLAG_DO_PROFILE
+ifeq ($(FLAG_DBG), true)
+ $(warning Making debug version of native library)
+ LOCAL_CFLAGS += -DFLAG_DBG
+ LOCAL_SHARED_LIBRARIES += libcutils libutils
+endif # FLAG_DBG
+endif # FLAG_DO_PROFILE
+
+include $(BUILD_SHARED_LIBRARY)
diff --git a/native/jni/Application.mk b/native/jni/Application.mk
new file mode 100644
index 000000000..caf3b2622
--- /dev/null
+++ b/native/jni/Application.mk
@@ -0,0 +1 @@
+APP_STL := stlport_static
diff --git a/native/jni/com_android_inputmethod_keyboard_ProximityInfo.cpp b/native/jni/com_android_inputmethod_keyboard_ProximityInfo.cpp
index 595ea2fdc..6e4fefd72 100644
--- a/native/jni/com_android_inputmethod_keyboard_ProximityInfo.cpp
+++ b/native/jni/com_android_inputmethod_keyboard_ProximityInfo.cpp
@@ -28,14 +28,14 @@
namespace latinime {
-static jint latinime_Keyboard_setProximityInfo(JNIEnv *env, jobject object,
+static jlong latinime_Keyboard_setProximityInfo(JNIEnv *env, jobject object,
jint maxProximityCharsSize, jint displayWidth, jint displayHeight, jint gridWidth,
jint gridHeight, jintArray proximityCharsArray, jint keyCount,
jintArray keyXCoordinateArray, jintArray keyYCoordinateArray, jintArray keyWidthArray,
jintArray keyHeightArray, jintArray keyCharCodeArray,
jfloatArray sweetSpotCenterXArray, jfloatArray sweetSpotCenterYArray,
jfloatArray sweetSpotRadiusArray) {
- jint *proximityChars = env->GetIntArrayElements(proximityCharsArray, NULL);
+ jint *proximityChars = env->GetIntArrayElements(proximityCharsArray, 0);
jint *keyXCoordinates = safeGetIntArrayElements(env, keyXCoordinateArray);
jint *keyYCoordinates = safeGetIntArrayElements(env, keyYCoordinateArray);
jint *keyWidths = safeGetIntArrayElements(env, keyWidthArray);
@@ -59,19 +59,19 @@ static jint latinime_Keyboard_setProximityInfo(JNIEnv *env, jobject object,
safeReleaseIntArrayElements(env, keyYCoordinateArray, keyYCoordinates);
safeReleaseIntArrayElements(env, keyXCoordinateArray, keyXCoordinates);
env->ReleaseIntArrayElements(proximityCharsArray, proximityChars, 0);
- return (jint)proximityInfo;
+ return (jlong)proximityInfo;
}
-static void latinime_Keyboard_release(JNIEnv *env, jobject object, jint proximityInfo) {
+static void latinime_Keyboard_release(JNIEnv *env, jobject object, jlong proximityInfo) {
ProximityInfo *pi = (ProximityInfo*)proximityInfo;
if (!pi) return;
delete pi;
}
static JNINativeMethod sKeyboardMethods[] = {
- {"setProximityInfoNative", "(IIIII[II[I[I[I[I[I[F[F[F)I",
+ {"setProximityInfoNative", "(IIIII[II[I[I[I[I[I[F[F[F)J",
(void*)latinime_Keyboard_setProximityInfo},
- {"releaseProximityInfoNative", "(I)V", (void*)latinime_Keyboard_release}
+ {"releaseProximityInfoNative", "(J)V", (void*)latinime_Keyboard_release}
};
int register_ProximityInfo(JNIEnv *env) {
diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
index 18c972444..f2878eea5 100644
--- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
+++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
@@ -18,6 +18,7 @@
#define LOG_TAG "LatinIME: jni: BinaryDictionary"
#include "binary_format.h"
+#include "correction.h"
#include "com_android_inputmethod_latin_BinaryDictionary.h"
#include "dictionary.h"
#include "jni.h"
@@ -33,6 +34,7 @@
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
+#include <unistd.h>
#else // USE_MMAP_FOR_DICTIONARY
#include <stdlib.h>
#endif // USE_MMAP_FOR_DICTIONARY
@@ -41,75 +43,75 @@ namespace latinime {
void releaseDictBuf(void* dictBuf, const size_t length, int fd);
-static jint latinime_BinaryDictionary_open(JNIEnv *env, jobject object,
+static jlong latinime_BinaryDictionary_open(JNIEnv *env, jobject object,
jstring sourceDir, jlong dictOffset, jlong dictSize,
jint typedLetterMultiplier, jint fullWordMultiplier, jint maxWordLength, jint maxWords,
jint maxAlternatives) {
PROF_OPEN;
PROF_START(66);
- const char *sourceDirChars = env->GetStringUTFChars(sourceDir, NULL);
- if (sourceDirChars == NULL) {
- LOGE("DICT: Can't get sourceDir string");
+ const char *sourceDirChars = env->GetStringUTFChars(sourceDir, 0);
+ if (sourceDirChars == 0) {
+ AKLOGE("DICT: Can't get sourceDir string");
return 0;
}
int fd = 0;
- void *dictBuf = NULL;
+ void *dictBuf = 0;
int adjust = 0;
#ifdef USE_MMAP_FOR_DICTIONARY
/* mmap version */
fd = open(sourceDirChars, O_RDONLY);
if (fd < 0) {
- LOGE("DICT: Can't open sourceDir. sourceDirChars=%s errno=%d", sourceDirChars, errno);
+ AKLOGE("DICT: Can't open sourceDir. sourceDirChars=%s errno=%d", sourceDirChars, errno);
return 0;
}
int pagesize = getpagesize();
adjust = dictOffset % pagesize;
int adjDictOffset = dictOffset - adjust;
int adjDictSize = dictSize + adjust;
- dictBuf = mmap(NULL, sizeof(char) * adjDictSize, PROT_READ, MAP_PRIVATE, fd, adjDictOffset);
+ dictBuf = mmap(0, sizeof(char) * adjDictSize, PROT_READ, MAP_PRIVATE, fd, adjDictOffset);
if (dictBuf == MAP_FAILED) {
- LOGE("DICT: Can't mmap dictionary. errno=%d", errno);
+ AKLOGE("DICT: Can't mmap dictionary. errno=%d", errno);
return 0;
}
dictBuf = (void *)((char *)dictBuf + adjust);
#else // USE_MMAP_FOR_DICTIONARY
/* malloc version */
- FILE *file = NULL;
+ FILE *file = 0;
file = fopen(sourceDirChars, "rb");
- if (file == NULL) {
- LOGE("DICT: Can't fopen sourceDir. sourceDirChars=%s errno=%d", sourceDirChars, errno);
+ if (file == 0) {
+ AKLOGE("DICT: Can't fopen sourceDir. sourceDirChars=%s errno=%d", sourceDirChars, errno);
return 0;
}
dictBuf = malloc(sizeof(char) * dictSize);
if (!dictBuf) {
- LOGE("DICT: Can't allocate memory region for dictionary. errno=%d", errno);
+ AKLOGE("DICT: Can't allocate memory region for dictionary. errno=%d", errno);
return 0;
}
int ret = fseek(file, (long)dictOffset, SEEK_SET);
if (ret != 0) {
- LOGE("DICT: Failure in fseek. ret=%d errno=%d", ret, errno);
+ AKLOGE("DICT: Failure in fseek. ret=%d errno=%d", ret, errno);
return 0;
}
ret = fread(dictBuf, sizeof(char) * dictSize, 1, file);
if (ret != 1) {
- LOGE("DICT: Failure in fread. ret=%d errno=%d", ret, errno);
+ AKLOGE("DICT: Failure in fread. ret=%d errno=%d", ret, errno);
return 0;
}
ret = fclose(file);
if (ret != 0) {
- LOGE("DICT: Failure in fclose. ret=%d errno=%d", ret, errno);
+ AKLOGE("DICT: Failure in fclose. ret=%d errno=%d", ret, errno);
return 0;
}
#endif // USE_MMAP_FOR_DICTIONARY
env->ReleaseStringUTFChars(sourceDir, sourceDirChars);
if (!dictBuf) {
- LOGE("DICT: dictBuf is null");
+ AKLOGE("DICT: dictBuf is null");
return 0;
}
- Dictionary *dictionary = NULL;
+ Dictionary *dictionary = 0;
if (BinaryFormat::UNKNOWN_FORMAT == BinaryFormat::detectFormat((uint8_t*)dictBuf)) {
- LOGE("DICT: dictionary format is unknown, bad magic number");
+ AKLOGE("DICT: dictionary format is unknown, bad magic number");
#ifdef USE_MMAP_FOR_DICTIONARY
releaseDictBuf(((char*)dictBuf) - adjust, adjDictSize, fd);
#else // USE_MMAP_FOR_DICTIONARY
@@ -121,23 +123,23 @@ static jint latinime_BinaryDictionary_open(JNIEnv *env, jobject object,
}
PROF_END(66);
PROF_CLOSE;
- return (jint)dictionary;
+ return (jlong)dictionary;
}
-static int latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jobject object, jint dict,
- jint proximityInfo, jintArray xCoordinatesArray, jintArray yCoordinatesArray,
+static int latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jobject object, jlong dict,
+ jlong proximityInfo, jintArray xCoordinatesArray, jintArray yCoordinatesArray,
jintArray inputArray, jint arraySize, jint flags,
jcharArray outputArray, jintArray frequencyArray) {
Dictionary *dictionary = (Dictionary*)dict;
if (!dictionary) return 0;
ProximityInfo *pInfo = (ProximityInfo*)proximityInfo;
- int *xCoordinates = env->GetIntArrayElements(xCoordinatesArray, NULL);
- int *yCoordinates = env->GetIntArrayElements(yCoordinatesArray, NULL);
+ int *xCoordinates = env->GetIntArrayElements(xCoordinatesArray, 0);
+ int *yCoordinates = env->GetIntArrayElements(yCoordinatesArray, 0);
- int *frequencies = env->GetIntArrayElements(frequencyArray, NULL);
- int *inputCodes = env->GetIntArrayElements(inputArray, NULL);
- jchar *outputChars = env->GetCharArrayElements(outputArray, NULL);
+ int *frequencies = env->GetIntArrayElements(frequencyArray, 0);
+ int *inputCodes = env->GetIntArrayElements(inputArray, 0);
+ jchar *outputChars = env->GetCharArrayElements(outputArray, 0);
int count = dictionary->getSuggestions(pInfo, xCoordinates, yCoordinates, inputCodes,
arraySize, flags, (unsigned short*) outputChars, frequencies);
@@ -151,17 +153,17 @@ static int latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jobject object,
return count;
}
-static int latinime_BinaryDictionary_getBigrams(JNIEnv *env, jobject object, jint dict,
+static int latinime_BinaryDictionary_getBigrams(JNIEnv *env, jobject object, jlong dict,
jcharArray prevWordArray, jint prevWordLength, jintArray inputArray, jint inputArraySize,
jcharArray outputArray, jintArray frequencyArray, jint maxWordLength, jint maxBigrams,
jint maxAlternatives) {
Dictionary *dictionary = (Dictionary*)dict;
if (!dictionary) return 0;
- jchar *prevWord = env->GetCharArrayElements(prevWordArray, NULL);
- int *inputCodes = env->GetIntArrayElements(inputArray, NULL);
- jchar *outputChars = env->GetCharArrayElements(outputArray, NULL);
- int *frequencies = env->GetIntArrayElements(frequencyArray, NULL);
+ jchar *prevWord = env->GetCharArrayElements(prevWordArray, 0);
+ int *inputCodes = env->GetIntArrayElements(inputArray, 0);
+ jchar *outputChars = env->GetCharArrayElements(outputArray, 0);
+ int *frequencies = env->GetIntArrayElements(frequencyArray, 0);
int count = dictionary->getBigrams((unsigned short*) prevWord, prevWordLength, inputCodes,
inputArraySize, (unsigned short*) outputChars, frequencies, maxWordLength, maxBigrams,
@@ -175,19 +177,42 @@ static int latinime_BinaryDictionary_getBigrams(JNIEnv *env, jobject object, jin
return count;
}
-static jboolean latinime_BinaryDictionary_isValidWord(JNIEnv *env, jobject object, jint dict,
+static jboolean latinime_BinaryDictionary_isValidWord(JNIEnv *env, jobject object, jlong dict,
jcharArray wordArray, jint wordLength) {
Dictionary *dictionary = (Dictionary*)dict;
if (!dictionary) return (jboolean) false;
- jchar *word = env->GetCharArrayElements(wordArray, NULL);
+ jchar *word = env->GetCharArrayElements(wordArray, 0);
jboolean result = dictionary->isValidWord((unsigned short*) word, wordLength);
env->ReleaseCharArrayElements(wordArray, word, JNI_ABORT);
return result;
}
-static void latinime_BinaryDictionary_close(JNIEnv *env, jobject object, jint dict) {
+static jdouble latinime_BinaryDictionary_calcNormalizedScore(JNIEnv *env, jobject object,
+ jcharArray before, jint beforeLength, jcharArray after, jint afterLength, jint score) {
+ jchar *beforeChars = env->GetCharArrayElements(before, 0);
+ jchar *afterChars = env->GetCharArrayElements(after, 0);
+ jdouble result = Correction::RankingAlgorithm::calcNormalizedScore(
+ (unsigned short*)beforeChars, beforeLength, (unsigned short*)afterChars, afterLength,
+ score);
+ env->ReleaseCharArrayElements(before, beforeChars, JNI_ABORT);
+ env->ReleaseCharArrayElements(after, afterChars, JNI_ABORT);
+ return result;
+}
+
+static jint latinime_BinaryDictionary_editDistance(JNIEnv *env, jobject object,
+ jcharArray before, jint beforeLength, jcharArray after, jint afterLength) {
+ jchar *beforeChars = env->GetCharArrayElements(before, 0);
+ jchar *afterChars = env->GetCharArrayElements(after, 0);
+ jint result = Correction::RankingAlgorithm::editDistance(
+ (unsigned short*)beforeChars, beforeLength, (unsigned short*)afterChars, afterLength);
+ env->ReleaseCharArrayElements(before, beforeChars, JNI_ABORT);
+ env->ReleaseCharArrayElements(after, afterChars, JNI_ABORT);
+ return result;
+}
+
+static void latinime_BinaryDictionary_close(JNIEnv *env, jobject object, jlong dict) {
Dictionary *dictionary = (Dictionary*)dict;
if (!dictionary) return;
void *dictBuf = dictionary->getDict();
@@ -205,11 +230,11 @@ void releaseDictBuf(void* dictBuf, const size_t length, int fd) {
#ifdef USE_MMAP_FOR_DICTIONARY
int ret = munmap(dictBuf, length);
if (ret != 0) {
- LOGE("DICT: Failure in munmap. ret=%d errno=%d", ret, errno);
+ AKLOGE("DICT: Failure in munmap. ret=%d errno=%d", ret, errno);
}
ret = close(fd);
if (ret != 0) {
- LOGE("DICT: Failure in close. ret=%d errno=%d", ret, errno);
+ AKLOGE("DICT: Failure in close. ret=%d errno=%d", ret, errno);
}
#else // USE_MMAP_FOR_DICTIONARY
free(dictBuf);
@@ -217,11 +242,14 @@ void releaseDictBuf(void* dictBuf, const size_t length, int fd) {
}
static JNINativeMethod sMethods[] = {
- {"openNative", "(Ljava/lang/String;JJIIIII)I", (void*)latinime_BinaryDictionary_open},
- {"closeNative", "(I)V", (void*)latinime_BinaryDictionary_close},
- {"getSuggestionsNative", "(II[I[I[III[C[I)I", (void*)latinime_BinaryDictionary_getSuggestions},
- {"isValidWordNative", "(I[CI)Z", (void*)latinime_BinaryDictionary_isValidWord},
- {"getBigramsNative", "(I[CI[II[C[IIII)I", (void*)latinime_BinaryDictionary_getBigrams}
+ {"openNative", "(Ljava/lang/String;JJIIIII)J", (void*)latinime_BinaryDictionary_open},
+ {"closeNative", "(J)V", (void*)latinime_BinaryDictionary_close},
+ {"getSuggestionsNative", "(JJ[I[I[III[C[I)I", (void*)latinime_BinaryDictionary_getSuggestions},
+ {"isValidWordNative", "(J[CI)Z", (void*)latinime_BinaryDictionary_isValidWord},
+ {"getBigramsNative", "(J[CI[II[C[IIII)I", (void*)latinime_BinaryDictionary_getBigrams},
+ {"calcNormalizedScoreNative", "([CI[CII)D",
+ (void*)latinime_BinaryDictionary_calcNormalizedScore},
+ {"editDistanceNative", "([CI[CI)I", (void*)latinime_BinaryDictionary_editDistance}
};
int register_BinaryDictionary(JNIEnv *env) {
diff --git a/native/jni/jni_common.cpp b/native/jni/jni_common.cpp
index 8643f723f..85d26836a 100644
--- a/native/jni/jni_common.cpp
+++ b/native/jni/jni_common.cpp
@@ -32,22 +32,22 @@ using namespace latinime;
* Returns the JNI version on success, -1 on failure.
*/
jint JNI_OnLoad(JavaVM* vm, void* reserved) {
- JNIEnv* env = NULL;
+ JNIEnv* env = 0;
jint result = -1;
if (vm->GetEnv((void**) &env, JNI_VERSION_1_4) != JNI_OK) {
- LOGE("ERROR: GetEnv failed");
+ AKLOGE("ERROR: GetEnv failed");
goto bail;
}
- assert(env != NULL);
+ assert(env != 0);
if (!register_BinaryDictionary(env)) {
- LOGE("ERROR: BinaryDictionary native registration failed");
+ AKLOGE("ERROR: BinaryDictionary native registration failed");
goto bail;
}
if (!register_ProximityInfo(env)) {
- LOGE("ERROR: ProximityInfo native registration failed");
+ AKLOGE("ERROR: ProximityInfo native registration failed");
goto bail;
}
@@ -63,12 +63,12 @@ namespace latinime {
int registerNativeMethods(JNIEnv* env, const char* className, JNINativeMethod* methods,
int numMethods) {
jclass clazz = env->FindClass(className);
- if (clazz == NULL) {
- LOGE("Native registration unable to find class '%s'", className);
+ if (clazz == 0) {
+ AKLOGE("Native registration unable to find class '%s'", className);
return JNI_FALSE;
}
if (env->RegisterNatives(clazz, methods, numMethods) < 0) {
- LOGE("RegisterNatives failed for '%s'", className);
+ AKLOGE("RegisterNatives failed for '%s'", className);
env->DeleteLocalRef(clazz);
return JNI_FALSE;
}
diff --git a/native/jni/jni_common.h b/native/jni/jni_common.h
index 9548e1b3f..6741443ac 100644
--- a/native/jni/jni_common.h
+++ b/native/jni/jni_common.h
@@ -29,17 +29,17 @@ int registerNativeMethods(JNIEnv *env, const char *className, JNINativeMethod *m
inline jint *safeGetIntArrayElements(JNIEnv *env, jintArray jArray) {
if (jArray) {
- return env->GetIntArrayElements(jArray, NULL);
+ return env->GetIntArrayElements(jArray, 0);
} else {
- return NULL;
+ return 0;
}
}
inline jfloat *safeGetFloatArrayElements(JNIEnv *env, jfloatArray jArray) {
if (jArray) {
- return env->GetFloatArrayElements(jArray, NULL);
+ return env->GetFloatArrayElements(jArray, 0);
} else {
- return NULL;
+ return 0;
}
}
diff --git a/native/src/basechars.h b/native/src/basechars.cpp
index 3843e11c5..31f1e18a8 100644
--- a/native/src/basechars.h
+++ b/native/src/basechars.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2009 The Android Open Source Project
+ * Copyright (C) 2011 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,8 +14,9 @@
* limitations under the License.
*/
-#ifndef LATINIME_BASECHARS_H
-#define LATINIME_BASECHARS_H
+#include "char_utils.h"
+
+namespace latinime {
/**
* Table mapping most combined Latin, Greek, and Cyrillic characters
@@ -23,7 +24,7 @@
* if c is not a combined character, or the base character if it
* is combined.
*/
-static unsigned short BASE_CHARS[] = {
+const unsigned short BASE_CHARS[BASE_CHARS_SIZE] = {
0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f,
0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017,
@@ -189,4 +190,5 @@ static unsigned short BASE_CHARS[] = {
// generated with:
// cat UnicodeData.txt | perl -e 'while (<>) { @foo = split(/;/); $foo[5] =~ s/<.*> //; $base[hex($foo[0])] = hex($foo[5]);} for ($i = 0; $i < 0x500; $i += 8) { for ($j = $i; $j < $i + 8; $j++) { printf("0x%04x, ", $base[$j] ? $base[$j] : $j)}; print "\n"; }'
-#endif // LATINIME_BASECHARS_H
+
+} // namespace latinime
diff --git a/native/src/bigram_dictionary.cpp b/native/src/bigram_dictionary.cpp
index c340c6c1a..db7734bc7 100644
--- a/native/src/bigram_dictionary.cpp
+++ b/native/src/bigram_dictionary.cpp
@@ -32,8 +32,8 @@ BigramDictionary::BigramDictionary(const unsigned char *dict, int maxWordLength,
MAX_ALTERNATIVES(maxAlternatives), IS_LATEST_DICT_VERSION(isLatestDictVersion),
HAS_BIGRAM(hasBigram), mParentDictionary(parentDictionary) {
if (DEBUG_DICT) {
- LOGI("BigramDictionary - constructor");
- LOGI("Has Bigram : %d", hasBigram);
+ AKLOGI("BigramDictionary - constructor");
+ AKLOGI("Has Bigram : %d", hasBigram);
}
}
@@ -46,7 +46,7 @@ bool BigramDictionary::addWordBigram(unsigned short *word, int length, int frequ
#ifdef FLAG_DBG
char s[length + 1];
for (int i = 0; i <= length; i++) s[i] = word[i];
- LOGI("Bigram: Found word = %s, freq = %d :", s, frequency);
+ AKLOGI("Bigram: Found word = %s, freq = %d :", s, frequency);
#endif
}
@@ -60,7 +60,7 @@ bool BigramDictionary::addWordBigram(unsigned short *word, int length, int frequ
insertAt++;
}
if (DEBUG_DICT) {
- LOGI("Bigram: InsertAt -> %d maxBigrams: %d", insertAt, mMaxBigrams);
+ AKLOGI("Bigram: InsertAt -> %d maxBigrams: %d", insertAt, mMaxBigrams);
}
if (insertAt < mMaxBigrams) {
memmove((char*) mBigramFreq + (insertAt + 1) * sizeof(mBigramFreq[0]),
@@ -76,7 +76,7 @@ bool BigramDictionary::addWordBigram(unsigned short *word, int length, int frequ
}
*dest = 0; // NULL terminate
if (DEBUG_DICT) {
- LOGI("Bigram: Added word at %d", insertAt);
+ AKLOGI("Bigram: Added word at %d", insertAt);
}
return true;
}
diff --git a/native/src/bigram_dictionary.h b/native/src/bigram_dictionary.h
index c07458a38..585a1866a 100644
--- a/native/src/bigram_dictionary.h
+++ b/native/src/bigram_dictionary.h
@@ -21,14 +21,14 @@ namespace latinime {
class Dictionary;
class BigramDictionary {
-public:
+ public:
BigramDictionary(const unsigned char *dict, int maxWordLength, int maxAlternatives,
const bool isLatestDictVersion, const bool hasBigram, Dictionary *parentDictionary);
int getBigrams(unsigned short *word, int length, int *codes, int codesSize,
unsigned short *outWords, int *frequencies, int maxWordLength, int maxBigrams,
int maxAlternatives);
~BigramDictionary();
-private:
+ private:
bool addWordBigram(unsigned short *word, int length, int frequency);
int getBigramAddress(int *pos, bool advance);
int getBigramFreq(int *pos);
diff --git a/native/src/binary_format.h b/native/src/binary_format.h
index 6f65088db..1d74998f6 100644
--- a/native/src/binary_format.h
+++ b/native/src/binary_format.h
@@ -22,12 +22,12 @@
namespace latinime {
class BinaryFormat {
-private:
+ private:
const static int32_t MINIMAL_ONE_BYTE_CHARACTER_VALUE = 0x20;
const static int32_t CHARACTER_ARRAY_TERMINATOR = 0x1F;
const static int MULTIPLE_BYTE_CHARACTER_ADDITIONAL_SIZE = 2;
-public:
+ public:
const static int UNKNOWN_FORMAT = -1;
const static int FORMAT_VERSION_1 = 1;
const static uint16_t FORMAT_VERSION_1_MAGIC_NUMBER = 0x78B1;
@@ -61,7 +61,9 @@ inline int BinaryFormat::detectFormat(const uint8_t* const dict) {
}
inline int BinaryFormat::getGroupCountAndForwardPointer(const uint8_t* const dict, int* pos) {
- return dict[(*pos)++];
+ const int msb = dict[(*pos)++];
+ if (msb < 0x80) return msb;
+ return ((msb & 0x7F) << 8) | dict[(*pos)++];
}
inline uint8_t BinaryFormat::getFlagsAndForwardPointer(const uint8_t* const dict, int* pos) {
@@ -145,15 +147,15 @@ inline int BinaryFormat::skipFrequency(const uint8_t flags, const int pos) {
inline int BinaryFormat::skipAllAttributes(const uint8_t* const dict, const uint8_t flags,
const int pos) {
- // This function skips all attributes. The format makes provision for future extension
- // with other attributes (notably shortcuts) but for the time being, bigrams are the
- // only attributes that may be found in a character group, so we only look at bigrams
- // in this version.
+ // This function skips all attributes: shortcuts and bigrams.
+ int newPos = pos;
+ if (UnigramDictionary::FLAG_HAS_SHORTCUT_TARGETS & flags) {
+ newPos = skipAttributes(dict, newPos);
+ }
if (UnigramDictionary::FLAG_HAS_BIGRAMS & flags) {
- return skipAttributes(dict, pos);
- } else {
- return pos;
+ newPos = skipAttributes(dict, newPos);
}
+ return newPos;
}
inline int BinaryFormat::skipChildrenPosAndAttributes(const uint8_t* const dict,
diff --git a/native/src/char_utils.h b/native/src/char_utils.h
index a69a35e7a..607dc5195 100644
--- a/native/src/char_utils.h
+++ b/native/src/char_utils.h
@@ -19,8 +19,47 @@
namespace latinime {
+inline static int isAsciiUpper(unsigned short c) {
+ return c >= 'A' && c <= 'Z';
+}
+
+inline static unsigned short toAsciiLower(unsigned short c) {
+ return c - 'A' + 'a';
+}
+
+inline static int isAscii(unsigned short c) {
+ return c <= 127;
+}
+
unsigned short latin_tolower(unsigned short c);
+/**
+ * Table mapping most combined Latin, Greek, and Cyrillic characters
+ * to their base characters. If c is in range, BASE_CHARS[c] == c
+ * if c is not a combined character, or the base character if it
+ * is combined.
+ */
+
+static const int BASE_CHARS_SIZE = 0x0500;
+extern const unsigned short BASE_CHARS[BASE_CHARS_SIZE];
+
+inline static unsigned short toBaseChar(unsigned short c) {
+ if (c < BASE_CHARS_SIZE) {
+ return BASE_CHARS[c];
+ }
+ return c;
+}
+
+inline static unsigned short toBaseLowerCase(unsigned short c) {
+ c = toBaseChar(c);
+ if (isAsciiUpper(c)) {
+ return toAsciiLower(c);
+ } else if (isAscii(c)) {
+ return c;
+ }
+ return latin_tolower(c);
+}
+
} // namespace latinime
#endif // LATINIME_CHAR_UTILS_H
diff --git a/native/src/correction.cpp b/native/src/correction.cpp
index 27dc40745..63dd283c8 100644
--- a/native/src/correction.cpp
+++ b/native/src/correction.cpp
@@ -16,11 +16,13 @@
#include <assert.h>
#include <ctype.h>
+#include <math.h>
#include <stdio.h>
#include <string.h>
#define LOG_TAG "LatinIME: correction.cpp"
+#include "char_utils.h"
#include "correction.h"
#include "dictionary.h"
#include "proximity_info.h"
@@ -31,81 +33,60 @@ namespace latinime {
// edit distance funcitons //
/////////////////////////////
-#if 0 /* no longer used */
-inline static int editDistance(
- int* editDistanceTable, const unsigned short* input,
- const int inputLength, const unsigned short* output, const int outputLength) {
- // dp[li][lo] dp[a][b] = dp[ a * lo + b]
- int* dp = editDistanceTable;
- const int li = inputLength + 1;
- const int lo = outputLength + 1;
- for (int i = 0; i < li; ++i) {
- dp[lo * i] = i;
- }
- for (int i = 0; i < lo; ++i) {
- dp[i] = i;
- }
-
- for (int i = 0; i < li - 1; ++i) {
- for (int j = 0; j < lo - 1; ++j) {
- const uint32_t ci = Dictionary::toBaseLowerCase(input[i]);
- const uint32_t co = Dictionary::toBaseLowerCase(output[j]);
- const uint16_t cost = (ci == co) ? 0 : 1;
- dp[(i + 1) * lo + (j + 1)] = min(dp[i * lo + (j + 1)] + 1,
- min(dp[(i + 1) * lo + j] + 1, dp[i * lo + j] + cost));
- if (i > 0 && j > 0 && ci == Dictionary::toBaseLowerCase(output[j - 1])
- && co == Dictionary::toBaseLowerCase(input[i - 1])) {
- dp[(i + 1) * lo + (j + 1)] = min(
- dp[(i + 1) * lo + (j + 1)], dp[(i - 1) * lo + (j - 1)] + cost);
- }
- }
+inline static void initEditDistance(int *editDistanceTable) {
+ for (int i = 0; i <= MAX_WORD_LENGTH_INTERNAL; ++i) {
+ editDistanceTable[i] = i;
}
+}
- if (DEBUG_EDIT_DISTANCE) {
- LOGI("IN = %d, OUT = %d", inputLength, outputLength);
- for (int i = 0; i < li; ++i) {
- for (int j = 0; j < lo; ++j) {
- LOGI("EDIT[%d][%d], %d", i, j, dp[i * lo + j]);
+inline static void dumpEditDistance10ForDebug(int *editDistanceTable,
+ const int editDistanceTableWidth, const int outputLength) {
+ if (DEBUG_DICT) {
+ AKLOGI("EditDistanceTable");
+ for (int i = 0; i <= 10; ++i) {
+ int c[11];
+ for (int j = 0; j <= 10; ++j) {
+ if (j < editDistanceTableWidth + 1 && i < outputLength + 1) {
+ c[j] = (editDistanceTable + i * (editDistanceTableWidth + 1))[j];
+ } else {
+ c[j] = -1;
+ }
}
+ AKLOGI("[ %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d ]",
+ c[0], c[1], c[2], c[3], c[4], c[5], c[6], c[7], c[8], c[9], c[10]);
}
}
- return dp[li * lo - 1];
-}
-#endif
-
-inline static void initEditDistance(int *editDistanceTable) {
- for (int i = 0; i <= MAX_WORD_LENGTH_INTERNAL; ++i) {
- editDistanceTable[i] = i;
- }
}
inline static void calcEditDistanceOneStep(int *editDistanceTable, const unsigned short *input,
const int inputLength, const unsigned short *output, const int outputLength) {
+ // TODO: Make sure that editDistance[0 ~ MAX_WORD_LENGTH_INTERNAL] is not touched.
// Let dp[i][j] be editDistanceTable[i * (inputLength + 1) + j].
// Assuming that dp[0][0] ... dp[outputLength - 1][inputLength] are already calculated,
// and calculate dp[ouputLength][0] ... dp[outputLength][inputLength].
int *const current = editDistanceTable + outputLength * (inputLength + 1);
const int *const prev = editDistanceTable + (outputLength - 1) * (inputLength + 1);
const int *const prevprev =
- outputLength >= 2 ? editDistanceTable + (outputLength - 2) * (inputLength + 1) : NULL;
+ outputLength >= 2 ? editDistanceTable + (outputLength - 2) * (inputLength + 1) : 0;
current[0] = outputLength;
- const uint32_t co = Dictionary::toBaseLowerCase(output[outputLength - 1]);
- const uint32_t prevCO =
- outputLength >= 2 ? Dictionary::toBaseLowerCase(output[outputLength - 2]) : 0;
+ const uint32_t co = toBaseLowerCase(output[outputLength - 1]);
+ const uint32_t prevCO = outputLength >= 2 ? toBaseLowerCase(output[outputLength - 2]) : 0;
for (int i = 1; i <= inputLength; ++i) {
- const uint32_t ci = Dictionary::toBaseLowerCase(input[i - 1]);
+ const uint32_t ci = toBaseLowerCase(input[i - 1]);
const uint16_t cost = (ci == co) ? 0 : 1;
current[i] = min(current[i - 1] + 1, min(prev[i] + 1, prev[i - 1] + cost));
- if (i >= 2 && prevprev && ci == prevCO
- && co == Dictionary::toBaseLowerCase(input[i - 2])) {
+ if (i >= 2 && prevprev && ci == prevCO && co == toBaseLowerCase(input[i - 2])) {
current[i] = min(current[i], prevprev[i - 2] + 1);
}
}
}
-inline static int getCurrentEditDistance(
- int *editDistanceTable, const int inputLength, const int outputLength) {
- return editDistanceTable[(inputLength + 1) * (outputLength + 1) - 1];
+inline static int getCurrentEditDistance(int *editDistanceTable, const int editDistanceTableWidth,
+ const int outputLength, const int inputLength) {
+ if (DEBUG_EDIT_DISTANCE) {
+ AKLOGI("getCurrentEditDistance %d, %d", inputLength, outputLength);
+ }
+ return editDistanceTable[(editDistanceTableWidth + 1) * (outputLength) + inputLength];
}
//////////////////////
@@ -133,6 +114,9 @@ void Correction::initCorrection(const ProximityInfo *pi, const int inputLength,
mInputLength = inputLength;
mMaxDepth = maxDepth;
mMaxEditDistance = mInputLength < 5 ? 2 : mInputLength / 2;
+ // TODO: This is not supposed to be required. Check what's going wrong with
+ // editDistance[0 ~ MAX_WORD_LENGTH_INTERNAL]
+ initEditDistance(mEditDistanceTable);
}
void Correction::initCorrectionState(
@@ -146,7 +130,7 @@ void Correction::initCorrectionState(
void Correction::setCorrectionParams(const int skipPos, const int excessivePos,
const int transposedPos, const int spaceProximityPos, const int missingSpacePos,
- const bool useFullEditDistance) {
+ const bool useFullEditDistance, const bool doAutoCompletion, const int maxErrors) {
// TODO: remove
mTransposedPos = transposedPos;
mExcessivePos = excessivePos;
@@ -159,6 +143,8 @@ void Correction::setCorrectionParams(const int skipPos, const int excessivePos,
mSpaceProximityPos = spaceProximityPos;
mMissingSpacePos = missingSpacePos;
mUseFullEditDistance = useFullEditDistance;
+ mDoAutoCompletion = doAutoCompletion;
+ mMaxErrors = maxErrors;
}
void Correction::checkState() {
@@ -179,16 +165,27 @@ int Correction::getFreqForSplitTwoWords(const int firstFreq, const int secondFre
}
int Correction::getFinalFreq(const int freq, unsigned short **word, int *wordLength) {
+ return getFinalFreqInternal(freq, word, wordLength, mInputLength);
+}
+
+int Correction::getFinalFreqForSubQueue(const int freq, unsigned short **word, int *wordLength,
+ const int inputLength) {
+ return getFinalFreqInternal(freq, word, wordLength, inputLength);
+}
+
+int Correction::getFinalFreqInternal(const int freq, unsigned short **word, int *wordLength,
+ const int inputLength) {
const int outputIndex = mTerminalOutputIndex;
const int inputIndex = mTerminalInputIndex;
*wordLength = outputIndex + 1;
- if (mProximityInfo->sameAsTyped(mWord, outputIndex + 1) || outputIndex < MIN_SUGGEST_DEPTH) {
- return -1;
+ if (outputIndex < MIN_SUGGEST_DEPTH) {
+ return NOT_A_FREQUENCY;
}
*word = mWord;
- return Correction::RankingAlgorithm::calculateFinalFreq(
- inputIndex, outputIndex, freq, mEditDistanceTable, this);
+ int finalFreq = Correction::RankingAlgorithm::calculateFinalFreq(
+ inputIndex, outputIndex, freq, mEditDistanceTable, this, inputLength);
+ return finalFreq;
}
bool Correction::initProcessState(const int outputIndex) {
@@ -229,20 +226,10 @@ int Correction::goDownTree(
}
// TODO: remove
-int Correction::getOutputIndex() {
- return mOutputIndex;
-}
-
-// TODO: remove
int Correction::getInputIndex() {
return mInputIndex;
}
-// TODO: remove
-bool Correction::needsToTraverseAllNodes() {
- return mNeedsToTraverseAllNodes;
-}
-
void Correction::incrementInputIndex() {
++mInputIndex;
}
@@ -280,7 +267,9 @@ void Correction::startToTraverseAllNodes() {
bool Correction::needsToPrune() const {
// TODO: use edit distance here
- return mOutputIndex - 1 >= mMaxDepth || mProximityCount > mMaxEditDistance;
+ return mOutputIndex - 1 >= mMaxDepth || mProximityCount > mMaxEditDistance
+ // Allow one char longer word for missing character
+ || (!mDoAutoCompletion && (mOutputIndex + 1 >= mInputLength));
}
void Correction::addCharToCurrentWord(const int32_t c) {
@@ -290,13 +279,12 @@ void Correction::addCharToCurrentWord(const int32_t c) {
mWord, mOutputIndex + 1);
}
-// TODO: inline?
Correction::CorrectionType Correction::processSkipChar(
const int32_t c, const bool isTerminal, const bool inputIndexIncremented) {
addCharToCurrentWord(c);
- if (needsToTraverseAllNodes() && isTerminal) {
- mTerminalInputIndex = mInputIndex - (inputIndexIncremented ? 1 : 0);
- mTerminalOutputIndex = mOutputIndex;
+ mTerminalInputIndex = mInputIndex - (inputIndexIncremented ? 1 : 0);
+ mTerminalOutputIndex = mOutputIndex;
+ if (mNeedsToTraverseAllNodes && isTerminal) {
incrementOutputIndex();
return TRAVERSE_ALL_ON_TERMINAL;
} else {
@@ -305,6 +293,13 @@ Correction::CorrectionType Correction::processSkipChar(
}
}
+Correction::CorrectionType Correction::processUnrelatedCorrectionType() {
+ // Needs to set mTerminalInputIndex and mTerminalOutputIndex before returning any CorrectionType
+ mTerminalInputIndex = mInputIndex;
+ mTerminalOutputIndex = mOutputIndex;
+ return UNRELATED;
+}
+
inline bool isEquivalentChar(ProximityInfo::ProximityType type) {
return type == ProximityInfo::EQUIVALENT_CHAR;
}
@@ -312,12 +307,17 @@ inline bool isEquivalentChar(ProximityInfo::ProximityType type) {
Correction::CorrectionType Correction::processCharAndCalcState(
const int32_t c, const bool isTerminal) {
const int correctionCount = (mSkippedCount + mExcessiveCount + mTransposedCount);
+ if (correctionCount > mMaxErrors) {
+ return processUnrelatedCorrectionType();
+ }
+
// TODO: Change the limit if we'll allow two or more corrections
const bool noCorrectionsHappenedSoFar = correctionCount == 0;
const bool canTryCorrection = noCorrectionsHappenedSoFar;
int proximityIndex = 0;
mDistances[mOutputIndex] = NOT_A_DISTANCE;
+ // Skip checking this node
if (mNeedsToTraverseAllNodes || isQuote(c)) {
bool incremented = false;
if (mLastCharExceeded && mInputIndex == mInputLength - 1) {
@@ -342,6 +342,7 @@ Correction::CorrectionType Correction::processCharAndCalcState(
return processSkipChar(c, isTerminal, incremented);
}
+ // Check possible corrections.
if (mExcessivePos >= 0) {
if (mExcessiveCount == 0 && mExcessivePos < mOutputIndex) {
mExcessivePos = mOutputIndex;
@@ -384,15 +385,20 @@ Correction::CorrectionType Correction::processCharAndCalcState(
--mTransposedCount;
if (DEBUG_CORRECTION) {
DUMP_WORD(mWord, mOutputIndex);
- LOGI("UNRELATED(0): %d, %d, %d, %d, %c", mProximityCount, mSkippedCount,
+ AKLOGI("UNRELATED(0): %d, %d, %d, %d, %c", mProximityCount, mSkippedCount,
mTransposedCount, mExcessiveCount, c);
}
- return UNRELATED;
+ return processUnrelatedCorrectionType();
}
}
// TODO: Change the limit if we'll allow two or more proximity chars with corrections
- const bool checkProximityChars = noCorrectionsHappenedSoFar || mProximityCount == 0;
+ // Work around: When the mMaxErrors is 1, we only allow just one error
+ // including proximity correction.
+ const bool checkProximityChars = (mMaxErrors > 1)
+ ? (noCorrectionsHappenedSoFar || mProximityCount == 0)
+ : (noCorrectionsHappenedSoFar && mProximityCount == 0);
+
ProximityInfo::ProximityType matchedProximityCharId = secondTransposing
? ProximityInfo::EQUIVALENT_CHAR
: mProximityInfo->getMatchedProximityId(
@@ -405,7 +411,7 @@ Correction::CorrectionType Correction::processCharAndCalcState(
&& isEquivalentChar(mProximityInfo->getMatchedProximityId(
mInputIndex, mWord[mOutputIndex - 1], false))) {
if (DEBUG_CORRECTION) {
- LOGI("CONVERSION p->e %c", mWord[mOutputIndex - 1]);
+ AKLOGI("CONVERSION p->e %c", mWord[mOutputIndex - 1]);
}
// Conversion p->e
// Example:
@@ -482,10 +488,10 @@ Correction::CorrectionType Correction::processCharAndCalcState(
} else {
if (DEBUG_CORRECTION) {
DUMP_WORD(mWord, mOutputIndex);
- LOGI("UNRELATED(1): %d, %d, %d, %d, %c", mProximityCount, mSkippedCount,
+ AKLOGI("UNRELATED(1): %d, %d, %d, %d, %c", mProximityCount, mSkippedCount,
mTransposedCount, mExcessiveCount, c);
}
- return UNRELATED;
+ return processUnrelatedCorrectionType();
}
} else if (secondTransposing) {
// If inputIndex is greater than mInputLength, that means there is no
@@ -535,11 +541,13 @@ Correction::CorrectionType Correction::processCharAndCalcState(
mTerminalOutputIndex = mOutputIndex - 1;
if (DEBUG_CORRECTION) {
DUMP_WORD(mWord, mOutputIndex);
- LOGI("ONTERMINAL(1): %d, %d, %d, %d, %c", mProximityCount, mSkippedCount,
+ AKLOGI("ONTERMINAL(1): %d, %d, %d, %d, %c", mProximityCount, mSkippedCount,
mTransposedCount, mExcessiveCount, c);
}
return ON_TERMINAL;
} else {
+ mTerminalInputIndex = mInputIndex - 1;
+ mTerminalOutputIndex = mOutputIndex - 1;
return NOT_ON_TERMINAL;
}
}
@@ -607,13 +615,7 @@ inline static int getQuoteCount(const unsigned short* word, const int length) {
}
inline static bool isUpperCase(unsigned short c) {
- if (c < sizeof(BASE_CHARS) / sizeof(BASE_CHARS[0])) {
- c = BASE_CHARS[c];
- }
- if (isupper(c)) {
- return true;
- }
- return false;
+ return isAsciiUpper(toBaseChar(c));
}
//////////////////////
@@ -622,9 +624,9 @@ inline static bool isUpperCase(unsigned short c) {
/* static */
int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const int outputIndex,
- const int freq, int* editDistanceTable, const Correction* correction) {
+ const int freq, int* editDistanceTable, const Correction* correction,
+ const int inputLength) {
const int excessivePos = correction->getExcessivePos();
- const int inputLength = correction->mInputLength;
const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER;
const int fullWordMultiplier = correction->FULL_WORD_MULTIPLIER;
const ProximityInfo *proximityInfo = correction->mProximityInfo;
@@ -649,45 +651,50 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const
const unsigned short* word = correction->mWord;
const bool skipped = skippedCount > 0;
- const int quoteDiffCount = max(0, getQuoteCount(word, outputIndex + 1)
+ const int quoteDiffCount = max(0, getQuoteCount(word, outputLength)
- getQuoteCount(proximityInfo->getPrimaryInputWord(), inputLength));
// TODO: Calculate edit distance for transposed and excessive
int ed = 0;
+ if (DEBUG_DICT_FULL) {
+ dumpEditDistance10ForDebug(editDistanceTable, correction->mInputLength, outputLength);
+ }
int adjustedProximityMatchedCount = proximityMatchedCount;
int finalFreq = freq;
// TODO: Optimize this.
- // TODO: Ignoring edit distance for transposed char, for now
- if (transposedCount == 0 && (proximityMatchedCount > 0 || skipped || excessiveCount > 0)) {
- ed = getCurrentEditDistance(editDistanceTable, inputLength, outputIndex + 1);
+ if (transposedCount > 0 || proximityMatchedCount > 0 || skipped || excessiveCount > 0) {
+ ed = getCurrentEditDistance(editDistanceTable, correction->mInputLength, outputLength,
+ inputLength) - transposedCount;
+
const int matchWeight = powerIntCapped(typedLetterMultiplier,
- max(inputLength, outputIndex + 1) - ed);
+ max(inputLength, outputLength) - ed);
multiplyIntCapped(matchWeight, &finalFreq);
// TODO: Demote further if there are two or more excessive chars with longer user input?
- if (inputLength > outputIndex + 1) {
+ if (inputLength > outputLength) {
multiplyRate(INPUT_EXCEEDS_OUTPUT_DEMOTION_RATE, &finalFreq);
}
ed = max(0, ed - quoteDiffCount);
- if (ed == 1 && (inputLength == outputIndex || inputLength == outputIndex + 2)) {
- // Promote a word with just one skipped or excessive char
- if (sameLength) {
- multiplyRate(WORDS_WITH_JUST_ONE_CORRECTION_PROMOTION_RATE, &finalFreq);
- } else {
+ if (transposedCount < 1) {
+ if (ed == 1 && (inputLength == outputLength - 1 || inputLength == outputLength + 1)) {
+ // Promote a word with just one skipped or excessive char
+ if (sameLength) {
+ multiplyRate(WORDS_WITH_JUST_ONE_CORRECTION_PROMOTION_RATE, &finalFreq);
+ } else {
+ multiplyIntCapped(typedLetterMultiplier, &finalFreq);
+ }
+ } else if (ed == 0) {
multiplyIntCapped(typedLetterMultiplier, &finalFreq);
+ sameLength = true;
}
- } else if (ed == 0) {
- multiplyIntCapped(typedLetterMultiplier, &finalFreq);
- sameLength = true;
}
- adjustedProximityMatchedCount = min(max(0, ed - (outputIndex + 1 - inputLength)),
+ adjustedProximityMatchedCount = min(max(0, ed - (outputLength - inputLength)),
proximityMatchedCount);
} else {
- // TODO: Calculate the edit distance for transposed char
const int matchWeight = powerIntCapped(typedLetterMultiplier, matchCount);
multiplyIntCapped(matchWeight, &finalFreq);
}
@@ -707,7 +714,7 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const
/ (10 * inputLength
- WORDS_WITH_MISSING_CHARACTER_DEMOTION_START_POS_10X + 10);
if (DEBUG_DICT_FULL) {
- LOGI("Demotion rate for missing character is %d.", demotionRate);
+ AKLOGI("Demotion rate for missing character is %d.", demotionRate);
}
multiplyRate(demotionRate, &finalFreq);
}
@@ -721,7 +728,7 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const
multiplyRate(WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE, &finalFreq);
if (!lastCharExceeded && !proximityInfo->existsAdjacentProximityChars(excessivePos)) {
if (DEBUG_CORRECTION_FREQ) {
- LOGI("Double excessive demotion");
+ AKLOGI("Double excessive demotion");
}
// If an excessive character is not adjacent to the left char or the right char,
// we will demote this word.
@@ -771,7 +778,7 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const
for (int i = 0; i < adjustedProximityMatchedCount; ++i) {
// A word with proximity corrections
if (DEBUG_DICT_FULL) {
- LOGI("Found a proximity correction.");
+ AKLOGI("Found a proximity correction.");
}
multiplyIntCapped(typedLetterMultiplier, &finalFreq);
multiplyRate(WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE, &finalFreq);
@@ -787,7 +794,8 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const
// Promotion for an exactly matched word
if (ed == 0) {
// Full exact match
- if (sameLength && transposedCount == 0 && !skipped && excessiveCount == 0) {
+ if (sameLength && transposedCount == 0 && !skipped && excessiveCount == 0
+ && quoteDiffCount == 0) {
finalFreq = capped255MultForFullMatchAccentsOrCapitalizationDifference(finalFreq);
}
}
@@ -832,14 +840,14 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const
}
if (DEBUG_DICT_FULL) {
- LOGI("calc: %d, %d", outputIndex, sameLength);
+ AKLOGI("calc: %d, %d", outputLength, sameLength);
}
if (DEBUG_CORRECTION_FREQ) {
- DUMP_WORD(correction->mWord, outputIndex + 1);
- LOGI("FinalFreq: [P%d, S%d, T%d, E%d] %d, %d, %d, %d, %d", proximityMatchedCount,
- skippedCount, transposedCount, excessiveCount, lastCharExceeded, sameLength,
- quoteDiffCount, ed, finalFreq);
+ DUMP_WORD(correction->mWord, outputLength);
+ AKLOGI("FinalFreq: [P%d, S%d, T%d, E%d] %d, %d, %d, %d, %d, %d", proximityMatchedCount,
+ skippedCount, transposedCount, excessiveCount, outputLength, lastCharExceeded,
+ sameLength, quoteDiffCount, ed, finalFreq);
}
return finalFreq;
@@ -878,7 +886,103 @@ int Correction::RankingAlgorithm::calcFreqForSplitTwoWords(
firstCapitalizedWordDemotion ^ secondCapitalizedWordDemotion;
if (DEBUG_DICT_FULL) {
- LOGI("Two words: %c, %c, %d", word[0], word[firstWordLength + 1], capitalizedWordDemotion);
+ AKLOGI("Two words: %c, %c, %d",
+ word[0], word[firstWordLength + 1], capitalizedWordDemotion);
+ }
+
+ if (firstWordLength == 0 || secondWordLength == 0) {
+ return 0;
+ }
+ const int firstDemotionRate = 100 - TWO_WORDS_CORRECTION_DEMOTION_BASE / (firstWordLength + 1);
+ int tempFirstFreq = firstFreq;
+ multiplyRate(firstDemotionRate, &tempFirstFreq);
+
+ const int secondDemotionRate = 100
+ - TWO_WORDS_CORRECTION_DEMOTION_BASE / (secondWordLength + 1);
+ int tempSecondFreq = secondFreq;
+ multiplyRate(secondDemotionRate, &tempSecondFreq);
+
+ const int totalLength = firstWordLength + secondWordLength;
+
+ // Promote pairFreq with multiplying by 2, because the word length is the same as the typed
+ // length.
+ int totalFreq = tempFirstFreq + tempSecondFreq;
+
+ // This is a workaround to try offsetting the not-enough-demotion which will be done in
+ // calcNormalizedScore in Utils.java.
+ // In calcNormalizedScore the score will be demoted by (1 - 1 / length)
+ // but we demoted only (1 - 1 / (length + 1)) so we will additionally adjust freq by
+ // (1 - 1 / length) / (1 - 1 / (length + 1)) = (1 - 1 / (length * length))
+ const int normalizedScoreNotEnoughDemotionAdjustment = 100 - 100 / (totalLength * totalLength);
+ multiplyRate(normalizedScoreNotEnoughDemotionAdjustment, &totalFreq);
+
+ // At this moment, totalFreq is calculated by the following formula:
+ // (firstFreq * (1 - 1 / (firstWordLength + 1)) + secondFreq * (1 - 1 / (secondWordLength + 1)))
+ // * (1 - 1 / totalLength) / (1 - 1 / (totalLength + 1))
+
+ multiplyIntCapped(powerIntCapped(typedLetterMultiplier, totalLength), &totalFreq);
+
+ // This is another workaround to offset the demotion which will be done in
+ // calcNormalizedScore in Utils.java.
+ // In calcNormalizedScore the score will be demoted by (1 - 1 / length) so we have to promote
+ // the same amount because we already have adjusted the synthetic freq of this "missing or
+ // mistyped space" suggestion candidate above in this method.
+ const int normalizedScoreDemotionRateOffset = (100 + 100 / totalLength);
+ multiplyRate(normalizedScoreDemotionRateOffset, &totalFreq);
+
+ if (isSpaceProximity) {
+ // A word pair with one space proximity correction
+ if (DEBUG_DICT) {
+ AKLOGI("Found a word pair with space proximity correction.");
+ }
+ multiplyIntCapped(typedLetterMultiplier, &totalFreq);
+ multiplyRate(WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE, &totalFreq);
+ }
+
+ multiplyRate(WORDS_WITH_MISSING_SPACE_CHARACTER_DEMOTION_RATE, &totalFreq);
+
+ if (capitalizedWordDemotion) {
+ multiplyRate(TWO_WORDS_CAPITALIZED_DEMOTION_RATE, &totalFreq);
+ }
+
+ return totalFreq;
+}
+
+/* static */
+int Correction::RankingAlgorithm::calcFreqForSplitTwoWordsOld(
+ const int firstFreq, const int secondFreq, const Correction* correction,
+ const unsigned short *word) {
+ const int spaceProximityPos = correction->mSpaceProximityPos;
+ const int missingSpacePos = correction->mMissingSpacePos;
+ if (DEBUG_DICT) {
+ int inputCount = 0;
+ if (spaceProximityPos >= 0) ++inputCount;
+ if (missingSpacePos >= 0) ++inputCount;
+ assert(inputCount <= 1);
+ }
+ const bool isSpaceProximity = spaceProximityPos >= 0;
+ const int inputLength = correction->mInputLength;
+ const int firstWordLength = isSpaceProximity ? spaceProximityPos : missingSpacePos;
+ const int secondWordLength = isSpaceProximity ? (inputLength - spaceProximityPos - 1)
+ : (inputLength - missingSpacePos);
+ const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER;
+
+ bool firstCapitalizedWordDemotion = false;
+ if (firstWordLength >= 2) {
+ firstCapitalizedWordDemotion = isUpperCase(word[0]);
+ }
+
+ bool secondCapitalizedWordDemotion = false;
+ if (secondWordLength >= 2) {
+ secondCapitalizedWordDemotion = isUpperCase(word[firstWordLength + 1]);
+ }
+
+ const bool capitalizedWordDemotion =
+ firstCapitalizedWordDemotion ^ secondCapitalizedWordDemotion;
+
+ if (DEBUG_DICT_FULL) {
+ AKLOGI("Two words: %c, %c, %d",
+ word[0], word[firstWordLength + 1], capitalizedWordDemotion);
}
if (firstWordLength == 0 || secondWordLength == 0) {
@@ -923,7 +1027,7 @@ int Correction::RankingAlgorithm::calcFreqForSplitTwoWords(
if (isSpaceProximity) {
// A word pair with one space proximity correction
if (DEBUG_DICT) {
- LOGI("Found a word pair with space proximity correction.");
+ AKLOGI("Found a word pair with space proximity correction.");
}
multiplyIntCapped(typedLetterMultiplier, &totalFreq);
multiplyRate(WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE, &totalFreq);
@@ -938,4 +1042,103 @@ int Correction::RankingAlgorithm::calcFreqForSplitTwoWords(
return totalFreq;
}
+/* Damerau-Levenshtein distance */
+inline static int editDistanceInternal(
+ int* editDistanceTable, const unsigned short* before,
+ const int beforeLength, const unsigned short* after, const int afterLength) {
+ // dp[li][lo] dp[a][b] = dp[ a * lo + b]
+ int* dp = editDistanceTable;
+ const int li = beforeLength + 1;
+ const int lo = afterLength + 1;
+ for (int i = 0; i < li; ++i) {
+ dp[lo * i] = i;
+ }
+ for (int i = 0; i < lo; ++i) {
+ dp[i] = i;
+ }
+
+ for (int i = 0; i < li - 1; ++i) {
+ for (int j = 0; j < lo - 1; ++j) {
+ const uint32_t ci = toBaseLowerCase(before[i]);
+ const uint32_t co = toBaseLowerCase(after[j]);
+ const uint16_t cost = (ci == co) ? 0 : 1;
+ dp[(i + 1) * lo + (j + 1)] = min(dp[i * lo + (j + 1)] + 1,
+ min(dp[(i + 1) * lo + j] + 1, dp[i * lo + j] + cost));
+ if (i > 0 && j > 0 && ci == toBaseLowerCase(after[j - 1])
+ && co == toBaseLowerCase(before[i - 1])) {
+ dp[(i + 1) * lo + (j + 1)] = min(
+ dp[(i + 1) * lo + (j + 1)], dp[(i - 1) * lo + (j - 1)] + cost);
+ }
+ }
+ }
+
+ if (DEBUG_EDIT_DISTANCE) {
+ AKLOGI("IN = %d, OUT = %d", beforeLength, afterLength);
+ for (int i = 0; i < li; ++i) {
+ for (int j = 0; j < lo; ++j) {
+ AKLOGI("EDIT[%d][%d], %d", i, j, dp[i * lo + j]);
+ }
+ }
+ }
+ return dp[li * lo - 1];
+}
+
+int Correction::RankingAlgorithm::editDistance(const unsigned short* before,
+ const int beforeLength, const unsigned short* after, const int afterLength) {
+ int table[(beforeLength + 1) * (afterLength + 1)];
+ return editDistanceInternal(table, before, beforeLength, after, afterLength);
+}
+
+
+// In dictionary.cpp, getSuggestion() method,
+// suggestion scores are computed using the below formula.
+// original score
+// := pow(mTypedLetterMultiplier (this is defined 2),
+// (the number of matched characters between typed word and suggested word))
+// * (individual word's score which defined in the unigram dictionary,
+// and this score is defined in range [0, 255].)
+// Then, the following processing is applied.
+// - If the dictionary word is matched up to the point of the user entry
+// (full match up to min(before.length(), after.length())
+// => Then multiply by FULL_MATCHED_WORDS_PROMOTION_RATE (this is defined 1.2)
+// - If the word is a true full match except for differences in accents or
+// capitalization, then treat it as if the score was 255.
+// - If before.length() == after.length()
+// => multiply by mFullWordMultiplier (this is defined 2))
+// So, maximum original score is pow(2, min(before.length(), after.length())) * 255 * 2 * 1.2
+// For historical reasons we ignore the 1.2 modifier (because the measure for a good
+// autocorrection threshold was done at a time when it didn't exist). This doesn't change
+// the result.
+// So, we can normalize original score by dividing pow(2, min(b.l(),a.l())) * 255 * 2.
+
+/* static */
+double Correction::RankingAlgorithm::calcNormalizedScore(const unsigned short* before,
+ const int beforeLength, const unsigned short* after, const int afterLength,
+ const int score) {
+ if (0 == beforeLength || 0 == afterLength) {
+ return 0;
+ }
+ const int distance = editDistance(before, beforeLength, after, afterLength);
+ int spaceCount = 0;
+ for (int i = 0; i < afterLength; ++i) {
+ if (after[i] == CODE_SPACE) {
+ ++spaceCount;
+ }
+ }
+
+ if (spaceCount == afterLength) {
+ return 0;
+ }
+
+ const double maxScore = score >= S_INT_MAX ? S_INT_MAX : MAX_INITIAL_SCORE
+ * pow((double)TYPED_LETTER_MULTIPLIER,
+ (double)min(beforeLength, afterLength - spaceCount)) * FULL_WORD_MULTIPLIER;
+
+ // add a weight based on edit distance.
+ // distance <= max(afterLength, beforeLength) == afterLength,
+ // so, 0 <= distance / afterLength <= 1
+ const double weight = 1.0 - (double) distance / afterLength;
+ return (score / maxScore) * weight;
+}
+
} // namespace latinime
diff --git a/native/src/correction.h b/native/src/correction.h
index d4e99f0ce..0715551d0 100644
--- a/native/src/correction.h
+++ b/native/src/correction.h
@@ -27,8 +27,7 @@ namespace latinime {
class ProximityInfo;
class Correction {
-
-public:
+ public:
typedef enum {
TRAVERSE_ALL_ON_TERMINAL,
TRAVERSE_ALL_NOT_ON_TERMINAL,
@@ -44,11 +43,11 @@ public:
// TODO: remove
void setCorrectionParams(const int skipPos, const int excessivePos, const int transposedPos,
- const int spaceProximityPos, const int missingSpacePos, const bool useFullEditDistance);
+ const int spaceProximityPos, const int missingSpacePos, const bool useFullEditDistance,
+ const bool doAutoCompletion, const int maxErrors);
void checkState();
bool initProcessState(const int index);
- int getOutputIndex();
int getInputIndex();
virtual ~Correction();
@@ -76,6 +75,8 @@ public:
int getFreqForSplitTwoWords(
const int firstFreq, const int secondFreq, const unsigned short *word);
int getFinalFreq(const int freq, unsigned short **word, int* wordLength);
+ int getFinalFreqForSubQueue(const int freq, unsigned short **word, int* wordLength,
+ const int inputLength);
CorrectionType processCharAndCalcState(const int32_t c, const bool isTerminal);
@@ -94,21 +95,45 @@ public:
inline int getTreeParentIndex(const int index) const {
return mCorrectionStates[index].mParentIndex;
}
-private:
+
+ class RankingAlgorithm {
+ public:
+ static int calculateFinalFreq(const int inputIndex, const int depth,
+ const int freq, int *editDistanceTable, const Correction* correction,
+ const int inputLength);
+ static int calcFreqForSplitTwoWords(const int firstFreq, const int secondFreq,
+ const Correction* correction, const unsigned short *word);
+ static int calcFreqForSplitTwoWordsOld(const int firstFreq, const int secondFreq,
+ const Correction* correction, const unsigned short *word);
+ static double calcNormalizedScore(const unsigned short* before, const int beforeLength,
+ const unsigned short* after, const int afterLength, const int score);
+ static int editDistance(const unsigned short* before,
+ const int beforeLength, const unsigned short* after, const int afterLength);
+ private:
+ static const int CODE_SPACE = ' ';
+ static const int MAX_INITIAL_SCORE = 255;
+ static const int TYPED_LETTER_MULTIPLIER = 2;
+ static const int FULL_WORD_MULTIPLIER = 2;
+ };
+
+ private:
inline void incrementInputIndex();
inline void incrementOutputIndex();
- inline bool needsToTraverseAllNodes();
inline void startToTraverseAllNodes();
inline bool isQuote(const unsigned short c);
inline CorrectionType processSkipChar(
const int32_t c, const bool isTerminal, const bool inputIndexIncremented);
+ inline CorrectionType processUnrelatedCorrectionType();
inline void addCharToCurrentWord(const int32_t c);
+ inline int getFinalFreqInternal(const int freq, unsigned short **word, int* wordLength,
+ const int inputLength);
const int TYPED_LETTER_MULTIPLIER;
const int FULL_WORD_MULTIPLIER;
const ProximityInfo *mProximityInfo;
bool mUseFullEditDistance;
+ bool mDoAutoCompletion;
int mMaxEditDistance;
int mMaxDepth;
int mInputLength;
@@ -116,6 +141,7 @@ private:
int mMissingSpacePos;
int mTerminalInputIndex;
int mTerminalOutputIndex;
+ int mMaxErrors;
// The following arrays are state buffer.
unsigned short mWord[MAX_WORD_LENGTH_INTERNAL];
@@ -150,13 +176,6 @@ private:
bool mTransposing;
bool mSkipping;
- class RankingAlgorithm {
- public:
- static int calculateFinalFreq(const int inputIndex, const int depth,
- const int freq, int *editDistanceTable, const Correction* correction);
- static int calcFreqForSplitTwoWords(const int firstFreq, const int secondFreq,
- const Correction* correction, const unsigned short *word);
- };
};
} // namespace latinime
#endif // LATINIME_CORRECTION_H
diff --git a/native/src/debug.h b/native/src/debug.h
index 38b2f107a..b13052c95 100644
--- a/native/src/debug.h
+++ b/native/src/debug.h
@@ -42,7 +42,7 @@ static inline unsigned char* convertToUnibyteStringAndReplaceLastChar(unsigned s
static inline void LOGI_S16(unsigned short* string, const unsigned int length) {
unsigned char tmp_buffer[length];
convertToUnibyteString(string, tmp_buffer, length);
- LOGI(">> %s", tmp_buffer);
+ AKLOGI(">> %s", tmp_buffer);
// The log facility is throwing out log that comes too fast. The following
// is a dirty way of slowing down processing so that we can see all log.
// TODO : refactor this in a blocking log or something.
@@ -53,7 +53,7 @@ static inline void LOGI_S16_PLUS(unsigned short* string, const unsigned int leng
unsigned char c) {
unsigned char tmp_buffer[length+1];
convertToUnibyteStringAndReplaceLastChar(string, tmp_buffer, length, c);
- LOGI(">> %s", tmp_buffer);
+ AKLOGI(">> %s", tmp_buffer);
// Likewise
// usleep(10);
}
@@ -64,7 +64,7 @@ static inline void printDebug(const char* tag, int* codes, int codesSize, int MA
buf[codesSize] = 0;
while (--codesSize >= 0)
buf[codesSize] = (unsigned char)codes[codesSize * MAX_PROXIMITY_CHARS];
- LOGI("%s, WORD = %s", tag, buf);
+ AKLOGI("%s, WORD = %s", tag, buf);
free(buf);
}
diff --git a/native/src/defines.h b/native/src/defines.h
index ef1beb92f..119a7d779 100644
--- a/native/src/defines.h
+++ b/native/src/defines.h
@@ -20,15 +20,32 @@
#if defined(FLAG_DO_PROFILE) || defined(FLAG_DBG)
#include <cutils/log.h>
+#define AKLOGE ALOGE
+#define AKLOGI ALOGI
+
+#define DUMP_WORD(word, length) do { dumpWord(word, length); } while(0)
+
+static char charBuf[50];
+
+static void dumpWord(const unsigned short* word, const int length) {
+ for (int i = 0; i < length; ++i) {
+ charBuf[i] = word[i];
+ }
+ charBuf[length] = 0;
+ AKLOGI("[ %s ]", charBuf);
+}
+
#else
-#define LOGE(fmt, ...)
-#define LOGI(fmt, ...)
+#define AKLOGE(fmt, ...)
+#define AKLOGI(fmt, ...)
+#define DUMP_WORD(word, length)
#endif
#ifdef FLAG_DO_PROFILE
// Profiler
#include <cutils/log.h>
#include <time.h>
+
#define PROF_BUF_SIZE 100
static double profile_buf[PROF_BUF_SIZE];
static double profile_old[PROF_BUF_SIZE];
@@ -42,8 +59,8 @@ static unsigned int profile_counter[PROF_BUF_SIZE];
#define PROF_CLOSE do { PROF_END(PROF_BUF_SIZE - 1); PROF_OUTALL; } while(0)
#define PROF_END(prof_buf_id) profile_buf[prof_buf_id] += ((clock()) - profile_old[prof_buf_id])
#define PROF_CLOCKOUT(prof_buf_id) \
- LOGI("%s : clock is %f", __FUNCTION__, (clock() - profile_old[prof_buf_id]))
-#define PROF_OUTALL do { LOGI("--- %s ---", __FUNCTION__); prof_out(); } while(0)
+ AKLOGI("%s : clock is %f", __FUNCTION__, (clock() - profile_old[prof_buf_id]))
+#define PROF_OUTALL do { AKLOGI("--- %s ---", __FUNCTION__); prof_out(); } while(0)
static void prof_reset(void) {
for (int i = 0; i < PROF_BUF_SIZE; ++i) {
@@ -55,9 +72,9 @@ static void prof_reset(void) {
static void prof_out(void) {
if (profile_counter[PROF_BUF_SIZE - 1] != 1) {
- LOGI("Error: You must call PROF_OPEN before PROF_CLOSE.");
+ AKLOGI("Error: You must call PROF_OPEN before PROF_CLOSE.");
}
- LOGI("Total time is %6.3f ms.",
+ AKLOGI("Total time is %6.3f ms.",
profile_buf[PROF_BUF_SIZE - 1] * 1000 / (double)CLOCKS_PER_SEC);
double all = 0;
for (int i = 0; i < PROF_BUF_SIZE - 1; ++i) {
@@ -66,7 +83,7 @@ static void prof_out(void) {
if (all == 0) all = 1;
for (int i = 0; i < PROF_BUF_SIZE - 1; ++i) {
if (profile_buf[i] != 0) {
- LOGI("(%d): Used %4.2f%%, %8.4f ms. Called %d times.",
+ AKLOGI("(%d): Used %4.2f%%, %8.4f ms. Called %d times.",
i, (profile_buf[i] * 100 / all),
profile_buf[i] * 1000 / (double)CLOCKS_PER_SEC, profile_counter[i]);
}
@@ -98,21 +115,10 @@ static void prof_out(void) {
#define DEBUG_SHOW_FOUND_WORD false
#define DEBUG_NODE DEBUG_DICT_FULL
#define DEBUG_TRACE DEBUG_DICT_FULL
-#define DEBUG_PROXIMITY_INFO true
+#define DEBUG_PROXIMITY_INFO false
#define DEBUG_CORRECTION false
-#define DEBUG_CORRECTION_FREQ true
-
-#define DUMP_WORD(word, length) do { dumpWord(word, length); } while(0)
-
-static char charBuf[50];
-
-static void dumpWord(const unsigned short* word, const int length) {
- for (int i = 0; i < length; ++i) {
- charBuf[i] = word[i];
- }
- charBuf[length] = 0;
- LOGI("[ %s ]", charBuf);
-}
+#define DEBUG_CORRECTION_FREQ false
+#define DEBUG_WORDS_PRIORITY_QUEUE false
#else // FLAG_DBG
@@ -125,8 +131,8 @@ static void dumpWord(const unsigned short* word, const int length) {
#define DEBUG_PROXIMITY_INFO false
#define DEBUG_CORRECTION false
#define DEBUG_CORRECTION_FREQ false
+#define DEBUG_WORDS_PRIORITY_QUEUE false
-#define DUMP_WORD(word, length)
#endif // FLAG_DBG
@@ -166,6 +172,7 @@ static void dumpWord(const unsigned short* word, const int length) {
#define EQUIVALENT_CHAR_WITHOUT_DISTANCE_INFO -2
#define PROXIMITY_CHAR_WITHOUT_DISTANCE_INFO -3
#define NOT_A_INDEX -1
+#define NOT_A_FREQUENCY -1
#define KEYCODE_SPACE ' '
@@ -180,10 +187,10 @@ static void dumpWord(const unsigned short* word, const int length) {
// The following "rate"s are used as a multiplier before dividing by 100, so they are in percent.
#define WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE 80
#define WORDS_WITH_MISSING_CHARACTER_DEMOTION_START_POS_10X 12
-#define WORDS_WITH_MISSING_SPACE_CHARACTER_DEMOTION_RATE 67
+#define WORDS_WITH_MISSING_SPACE_CHARACTER_DEMOTION_RATE 58
#define WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE 75
#define WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE 75
-#define WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE 60
+#define WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE 70
#define FULL_MATCHED_WORDS_PROMOTION_RATE 120
#define WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE 90
#define WORDS_WITH_MATCH_SKIP_PROMOTION_RATE 105
@@ -192,14 +199,26 @@ static void dumpWord(const unsigned short* word, const int length) {
#define INPUT_EXCEEDS_OUTPUT_DEMOTION_RATE 70
#define FIRST_CHAR_DIFFERENT_DEMOTION_RATE 96
#define TWO_WORDS_CAPITALIZED_DEMOTION_RATE 50
+#define TWO_WORDS_CORRECTION_DEMOTION_BASE 80
+#define TWO_WORDS_PLUS_OTHER_ERROR_CORRECTION_DEMOTION_DIVIDER 1
#define ZERO_DISTANCE_PROMOTION_RATE 110
#define NEUTRAL_SCORE_SQUARED_RADIUS 8.0f
#define HALF_SCORE_SQUARED_RADIUS 32.0f
-// This should be greater than or equal to MAX_WORD_LENGTH defined in BinaryDictionary.java
+// This must be greater than or equal to MAX_WORD_LENGTH defined in BinaryDictionary.java
// This is only used for the size of array. Not to be used in c functions.
#define MAX_WORD_LENGTH_INTERNAL 48
+// Word limit for sub queues used in WordsPriorityQueuePool. Sub queues are temporary queues used
+// for better performance.
+// Holds up to 1 candidate for each word
+#define SUB_QUEUE_MAX_WORDS 1
+#define SUB_QUEUE_MAX_COUNT 10
+#define SUB_QUEUE_MIN_WORD_LENGTH 4
+
+#define TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD 0.39
+#define START_TWO_WORDS_CORRECTION_THRESHOLD 0.22
+
#define MAX_DEPTH_MULTIPLIER 3
// TODO: Reduce this constant if possible; check the maximum number of umlauts in the same German
diff --git a/native/src/dictionary.cpp b/native/src/dictionary.cpp
index a49769bdb..822c2151d 100644
--- a/native/src/dictionary.cpp
+++ b/native/src/dictionary.cpp
@@ -33,11 +33,14 @@ Dictionary::Dictionary(void *dict, int dictSize, int mmapFd, int dictBufAdjust,
IS_LATEST_DICT_VERSION((((unsigned char*) dict)[0] & 0xFF) >= DICTIONARY_VERSION_MIN) {
if (DEBUG_DICT) {
if (MAX_WORD_LENGTH_INTERNAL < maxWordLength) {
- LOGI("Max word length (%d) is greater than %d",
+ AKLOGI("Max word length (%d) is greater than %d",
maxWordLength, MAX_WORD_LENGTH_INTERNAL);
- LOGI("IN NATIVE SUGGEST Version: %d", (mDict[0] & 0xFF));
+ AKLOGI("IN NATIVE SUGGEST Version: %d", (mDict[0] & 0xFF));
}
}
+ mCorrection = new Correction(typedLetterMultiplier, fullWordMultiplier);
+ mWordsPriorityQueuePool = new WordsPriorityQueuePool(
+ maxWords, SUB_QUEUE_MAX_WORDS, maxWordLength);
mUnigramDictionary = new UnigramDictionary(mDict, typedLetterMultiplier, fullWordMultiplier,
maxWordLength, maxWords, maxAlternatives, IS_LATEST_DICT_VERSION);
mBigramDictionary = new BigramDictionary(mDict, maxWordLength, maxAlternatives,
@@ -45,6 +48,8 @@ Dictionary::Dictionary(void *dict, int dictSize, int mmapFd, int dictBufAdjust,
}
Dictionary::~Dictionary() {
+ delete mCorrection;
+ delete mWordsPriorityQueuePool;
delete mUnigramDictionary;
delete mBigramDictionary;
}
diff --git a/native/src/dictionary.h b/native/src/dictionary.h
index d5de0083a..90d7148d5 100644
--- a/native/src/dictionary.h
+++ b/native/src/dictionary.h
@@ -17,22 +17,25 @@
#ifndef LATINIME_DICTIONARY_H
#define LATINIME_DICTIONARY_H
-#include "basechars.h"
#include "bigram_dictionary.h"
#include "char_utils.h"
+#include "correction.h"
#include "defines.h"
#include "proximity_info.h"
#include "unigram_dictionary.h"
+#include "words_priority_queue_pool.h"
namespace latinime {
class Dictionary {
-public:
+ public:
Dictionary(void *dict, int dictSize, int mmapFd, int dictBufAdjust, int typedLetterMultipler,
int fullWordMultiplier, int maxWordLength, int maxWords, int maxAlternatives);
+
int getSuggestions(ProximityInfo *proximityInfo, int *xcoordinates, int *ycoordinates,
int *codes, int codesSize, int flags, unsigned short *outWords, int *frequencies) {
- return mUnigramDictionary->getSuggestions(proximityInfo, xcoordinates, ycoordinates, codes,
+ return mUnigramDictionary->getSuggestions(proximityInfo, mWordsPriorityQueuePool,
+ mCorrection, xcoordinates, ycoordinates, codes,
codesSize, flags, outWords, frequencies);
}
@@ -53,19 +56,9 @@ public:
// public static utility methods
// static inline methods should be defined in the header file
- static unsigned short getChar(const unsigned char *dict, int *pos);
- static int getCount(const unsigned char *dict, int *pos);
- static bool getTerminal(const unsigned char *dict, int *pos);
- static int getAddress(const unsigned char *dict, int *pos);
- static int getFreq(const unsigned char *dict, const bool isLatestDictVersion, int *pos);
static int wideStrLen(unsigned short *str);
- // returns next sibling's position
- static int setDictionaryValues(const unsigned char *dict, const bool isLatestDictVersion,
- const int pos, unsigned short *c, int *childrenPosition,
- bool *terminal, int *freq);
- static inline unsigned short toBaseLowerCase(unsigned short c);
-private:
+ private:
bool hasBigram();
const unsigned char *mDict;
@@ -79,60 +72,12 @@ private:
const bool IS_LATEST_DICT_VERSION;
UnigramDictionary *mUnigramDictionary;
BigramDictionary *mBigramDictionary;
+ WordsPriorityQueuePool *mWordsPriorityQueuePool;
+ Correction *mCorrection;
};
// public static utility methods
// static inline methods should be defined in the header file
-inline unsigned short Dictionary::getChar(const unsigned char *dict, int *pos) {
- unsigned short ch = (unsigned short) (dict[(*pos)++] & 0xFF);
- // If the code is 255, then actual 16 bit code follows (in big endian)
- if (ch == 0xFF) {
- ch = ((dict[*pos] & 0xFF) << 8) | (dict[*pos + 1] & 0xFF);
- (*pos) += 2;
- }
- return ch;
-}
-
-inline int Dictionary::getCount(const unsigned char *dict, int *pos) {
- return dict[(*pos)++] & 0xFF;
-}
-
-inline bool Dictionary::getTerminal(const unsigned char *dict, int *pos) {
- return (dict[*pos] & FLAG_TERMINAL_MASK) > 0;
-}
-
-inline int Dictionary::getAddress(const unsigned char *dict, int *pos) {
- int address = 0;
- if ((dict[*pos] & FLAG_ADDRESS_MASK) == 0) {
- *pos += 1;
- } else {
- address += (dict[*pos] & (ADDRESS_MASK >> 16)) << 16;
- address += (dict[*pos + 1] & 0xFF) << 8;
- address += (dict[*pos + 2] & 0xFF);
- *pos += 3;
- }
- return address;
-}
-
-inline int Dictionary::getFreq(const unsigned char *dict,
- const bool isLatestDictVersion, int *pos) {
- int freq = dict[(*pos)++] & 0xFF;
- if (isLatestDictVersion) {
- // skipping bigram
- int bigramExist = (dict[*pos] & FLAG_BIGRAM_READ);
- if (bigramExist > 0) {
- int nextBigramExist = 1;
- while (nextBigramExist > 0) {
- (*pos) += 3;
- nextBigramExist = (dict[(*pos)++] & FLAG_BIGRAM_CONTINUED);
- }
- } else {
- (*pos)++;
- }
- }
- return freq;
-}
-
inline int Dictionary::wideStrLen(unsigned short *str) {
if (!str) return 0;
unsigned short *end = str;
@@ -140,35 +85,6 @@ inline int Dictionary::wideStrLen(unsigned short *str) {
end++;
return end - str;
}
-
-inline int Dictionary::setDictionaryValues(const unsigned char *dict,
- const bool isLatestDictVersion, const int pos, unsigned short *c,int *childrenPosition,
- bool *terminal, int *freq) {
- int position = pos;
- // -- at char
- *c = Dictionary::getChar(dict, &position);
- // -- at flag/add
- *terminal = Dictionary::getTerminal(dict, &position);
- *childrenPosition = Dictionary::getAddress(dict, &position);
- // -- after address or flag
- *freq = (*terminal) ? Dictionary::getFreq(dict, isLatestDictVersion, &position) : 1;
- // returns next sibling's position
- return position;
-}
-
-
-inline unsigned short Dictionary::toBaseLowerCase(unsigned short c) {
- if (c < sizeof(BASE_CHARS) / sizeof(BASE_CHARS[0])) {
- c = BASE_CHARS[c];
- }
- if (c >='A' && c <= 'Z') {
- c |= 32;
- } else if (c > 127) {
- c = latin_tolower(c);
- }
- return c;
-}
-
} // namespace latinime
#endif // LATINIME_DICTIONARY_H
diff --git a/native/src/proximity_info.cpp b/native/src/proximity_info.cpp
index 763a3a174..b91957c77 100644
--- a/native/src/proximity_info.cpp
+++ b/native/src/proximity_info.cpp
@@ -47,12 +47,12 @@ ProximityInfo::ProximityInfo(const int maxProximityCharsSize, const int keyboard
HAS_TOUCH_POSITION_CORRECTION_DATA(keyCount > 0 && keyXCoordinates && keyYCoordinates
&& keyWidths && keyHeights && keyCharCodes && sweetSpotCenterXs
&& sweetSpotCenterYs && sweetSpotRadii),
- mInputXCoordinates(NULL), mInputYCoordinates(NULL),
+ mInputXCoordinates(0), mInputYCoordinates(0),
mTouchPositionCorrectionEnabled(false) {
const int proximityGridLength = GRID_WIDTH * GRID_HEIGHT * MAX_PROXIMITY_CHARS_SIZE;
mProximityCharsArray = new uint32_t[proximityGridLength];
if (DEBUG_PROXIMITY_INFO) {
- LOGI("Create proximity info array %d", proximityGridLength);
+ AKLOGI("Create proximity info array %d", proximityGridLength);
}
memcpy(mProximityCharsArray, proximityCharsArray,
proximityGridLength * sizeof(mProximityCharsArray[0]));
@@ -100,13 +100,21 @@ inline int ProximityInfo::getStartIndexFromCoordinates(const int x, const int y)
}
bool ProximityInfo::hasSpaceProximity(const int x, const int y) const {
+ if (x < 0 || y < 0) {
+ if (DEBUG_DICT) {
+ AKLOGI("HasSpaceProximity: Illegal coordinates (%d, %d)", x, y);
+ assert(false);
+ }
+ return false;
+ }
+
const int startIndex = getStartIndexFromCoordinates(x, y);
if (DEBUG_PROXIMITY_INFO) {
- LOGI("hasSpaceProximity: index %d", startIndex);
+ AKLOGI("hasSpaceProximity: index %d, %d, %d", startIndex, x, y);
}
for (int i = 0; i < MAX_PROXIMITY_CHARS_SIZE; ++i) {
if (DEBUG_PROXIMITY_INFO) {
- LOGI("Index: %d", mProximityCharsArray[startIndex + i]);
+ AKLOGI("Index: %d", mProximityCharsArray[startIndex + i]);
}
if (mProximityCharsArray[startIndex + i] == KEYCODE_SPACE) {
return true;
@@ -167,7 +175,7 @@ int ProximityInfo::getKeyIndex(const int c) const {
// We do not have the coordinate data
return NOT_A_INDEX;
}
- const unsigned short baseLowerC = Dictionary::toBaseLowerCase(c);
+ const unsigned short baseLowerC = toBaseLowerCase(c);
if (baseLowerC > MAX_CHAR_CODE) {
return NOT_A_INDEX;
}
@@ -232,7 +240,7 @@ ProximityInfo::ProximityType ProximityInfo::getMatchedProximityId(const int inde
const unsigned short c, const bool checkProximityChars, int *proximityIndex) const {
const int *currentChars = getProximityCharsAt(index);
const int firstChar = currentChars[0];
- const unsigned short baseLowerC = Dictionary::toBaseLowerCase(c);
+ const unsigned short baseLowerC = toBaseLowerCase(c);
// The first char in the array is what user typed. If it matches right away,
// that means the user typed that same char for this pos.
@@ -245,7 +253,7 @@ ProximityInfo::ProximityType ProximityInfo::getMatchedProximityId(const int inde
// If the non-accented, lowercased version of that first character matches c,
// then we have a non-accented version of the accented character the user
// typed. Treat it as a close char.
- if (Dictionary::toBaseLowerCase(firstChar) == baseLowerC)
+ if (toBaseLowerCase(firstChar) == baseLowerC)
return NEAR_PROXIMITY_CHAR;
// Not an exact nor an accent-alike match: search the list of close keys
diff --git a/native/src/proximity_info.h b/native/src/proximity_info.h
index 35e354c6e..9ca5505a7 100644
--- a/native/src/proximity_info.h
+++ b/native/src/proximity_info.h
@@ -26,7 +26,7 @@ namespace latinime {
class Correction;
class ProximityInfo {
-public:
+ public:
static const int NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR_LOG_2 = 10;
static const int NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR =
1 << NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR_LOG_2;
@@ -56,7 +56,7 @@ public:
bool existsCharInProximityAt(const int index, const int c) const;
bool existsAdjacentProximityChars(const int index) const;
ProximityType getMatchedProximityId(const int index, const unsigned short c,
- const bool checkProximityChars, int *proximityIndex = NULL) const;
+ const bool checkProximityChars, int *proximityIndex = 0) const;
int getNormalizedSquaredDistance(const int inputIndex, const int proximityIndex) const {
return mNormalizedSquaredDistances[inputIndex * MAX_PROXIMITY_CHARS_SIZE + proximityIndex];
}
@@ -68,7 +68,7 @@ public:
return mTouchPositionCorrectionEnabled;
}
-private:
+ private:
// The max number of the keys in one keyboard layout
static const int MAX_KEY_COUNT_IN_A_KEYBOARD = 64;
// The upper limit of the char code in mCodeToKeyIndex
diff --git a/native/src/terminal_attributes.h b/native/src/terminal_attributes.h
new file mode 100644
index 000000000..1f9815936
--- /dev/null
+++ b/native/src/terminal_attributes.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_TERMINAL_ATTRIBUTES_H
+#define LATINIME_TERMINAL_ATTRIBUTES_H
+
+#include "unigram_dictionary.h"
+
+namespace latinime {
+
+/**
+ * This class encapsulates information about a terminal that allows to
+ * retrieve local node attributes like the list of shortcuts without
+ * exposing the format structure to the client.
+ */
+class TerminalAttributes {
+ public:
+ class ShortcutIterator {
+ const uint8_t* const mDict;
+ bool mHasNextShortcutTarget;
+ int mPos;
+
+ public:
+ ShortcutIterator(const uint8_t* dict, const int pos, const uint8_t flags) : mDict(dict),
+ mPos(pos) {
+ mHasNextShortcutTarget = (0 != (flags & UnigramDictionary::FLAG_HAS_SHORTCUT_TARGETS));
+ }
+
+ inline bool hasNextShortcutTarget() const {
+ return mHasNextShortcutTarget;
+ }
+
+ // Gets the shortcut target itself as a uint16_t string. For parameters and return value
+ // see BinaryFormat::getWordAtAddress.
+ inline int getNextShortcutTarget(const int maxDepth, uint16_t* outWord) {
+ const int shortcutFlags = BinaryFormat::getFlagsAndForwardPointer(mDict, &mPos);
+ mHasNextShortcutTarget =
+ 0 != (shortcutFlags & UnigramDictionary::FLAG_ATTRIBUTE_HAS_NEXT);
+ int shortcutAddress =
+ BinaryFormat::getAttributeAddressAndForwardPointer(mDict, shortcutFlags, &mPos);
+ return BinaryFormat::getWordAtAddress(mDict, shortcutAddress, maxDepth, outWord);
+ }
+ };
+
+ private:
+ const uint8_t* const mDict;
+ const uint8_t mFlags;
+ const int mStartPos;
+
+ public:
+ TerminalAttributes(const uint8_t* const dict, const uint8_t flags, const int pos) :
+ mDict(dict), mFlags(flags), mStartPos(pos) {
+ }
+
+ inline bool isShortcutOnly() const {
+ return 0 != (mFlags & UnigramDictionary::FLAG_IS_SHORTCUT_ONLY);
+ }
+
+ inline ShortcutIterator getShortcutIterator() const {
+ return ShortcutIterator(mDict, mStartPos, mFlags);
+ }
+};
+} // namespace latinime
+
+#endif // LATINIME_TERMINAL_ATTRIBUTES_H
diff --git a/native/src/unigram_dictionary.cpp b/native/src/unigram_dictionary.cpp
index 8eb5a9700..e998ee486 100644
--- a/native/src/unigram_dictionary.cpp
+++ b/native/src/unigram_dictionary.cpp
@@ -25,6 +25,7 @@
#include "unigram_dictionary.h"
#include "binary_format.h"
+#include "terminal_attributes.h"
namespace latinime {
@@ -46,21 +47,25 @@ UnigramDictionary::UnigramDictionary(const uint8_t* const streamStart, int typed
BYTES_IN_ONE_CHAR(MAX_PROXIMITY_CHARS * sizeof(int)),
MAX_UMLAUT_SEARCH_DEPTH(DEFAULT_MAX_UMLAUT_SEARCH_DEPTH) {
if (DEBUG_DICT) {
- LOGI("UnigramDictionary - constructor");
+ AKLOGI("UnigramDictionary - constructor");
}
- mCorrection = new Correction(typedLetterMultiplier, fullWordMultiplier);
}
UnigramDictionary::~UnigramDictionary() {
- delete mCorrection;
}
-static inline unsigned int getCodesBufferSize(const int* codes, const int codesSize,
+static inline unsigned int getCodesBufferSize(const int *codes, const int codesSize,
const int MAX_PROXIMITY_CHARS) {
return sizeof(*codes) * MAX_PROXIMITY_CHARS * codesSize;
}
-bool UnigramDictionary::isDigraph(const int* codes, const int i, const int codesSize) const {
+// TODO: This needs to take an const unsigned short* and not tinker with its contents
+static inline void addWord(
+ unsigned short *word, int length, int frequency, WordsPriorityQueue *queue) {
+ queue->push(frequency, word, length);
+}
+
+bool UnigramDictionary::isDigraph(const int *codes, const int i, const int codesSize) const {
// There can't be a digraph if we don't have at least 2 characters to examine
if (i + 2 > codesSize) return false;
@@ -86,9 +91,10 @@ bool UnigramDictionary::isDigraph(const int* codes, const int i, const int codes
// codesSrc is the current point in the user-input, original, content-unmodified buffer.
// codesRemain is the remaining size in codesSrc.
void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximityInfo,
- const int *xcoordinates, const int* ycoordinates, const int *codesBuffer,
- const int codesBufferSize, const int flags, const int* codesSrc, const int codesRemain,
- const int currentDepth, int* codesDest, unsigned short* outWords, int* frequencies) {
+ const int *xcoordinates, const int *ycoordinates, const int *codesBuffer,
+ const int codesBufferSize, const int flags, const int *codesSrc,
+ const int codesRemain, const int currentDepth, int *codesDest, Correction *correction,
+ WordsPriorityQueuePool *queuePool) {
if (currentDepth < MAX_UMLAUT_SEARCH_DEPTH) {
for (int i = 0; i < codesRemain; ++i) {
@@ -105,8 +111,8 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit
getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates,
codesBuffer, codesBufferSize, flags,
codesSrc + (i + 1) * MAX_PROXIMITY_CHARS, codesRemain - i - 1,
- currentDepth + 1, codesDest + i * MAX_PROXIMITY_CHARS, outWords,
- frequencies);
+ currentDepth + 1, codesDest + i * MAX_PROXIMITY_CHARS, correction,
+ queuePool);
// Copy the second char of the digraph in place, then continue processing on
// the remaining part of the word.
@@ -114,9 +120,9 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit
memcpy(codesDest + i * MAX_PROXIMITY_CHARS, codesSrc + i * MAX_PROXIMITY_CHARS,
BYTES_IN_ONE_CHAR);
getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates,
- codesBuffer, codesBufferSize, flags, codesSrc + i * MAX_PROXIMITY_CHARS,
- codesRemain - i, currentDepth + 1, codesDest + i * MAX_PROXIMITY_CHARS,
- outWords, frequencies);
+ codesBuffer, codesBufferSize, flags,
+ codesSrc + i * MAX_PROXIMITY_CHARS, codesRemain - i, currentDepth + 1,
+ codesDest + i * MAX_PROXIMITY_CHARS, correction, queuePool);
return;
}
}
@@ -132,40 +138,39 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit
memcpy(codesDest, codesSrc, remainingBytes);
getWordSuggestions(proximityInfo, xcoordinates, ycoordinates, codesBuffer,
- (codesDest - codesBuffer) / MAX_PROXIMITY_CHARS + codesRemain, outWords, frequencies,
- flags);
+ (codesDest - codesBuffer) / MAX_PROXIMITY_CHARS + codesRemain, flags, correction,
+ queuePool);
}
-int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
+int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo,
+ WordsPriorityQueuePool *queuePool, Correction *correction, const int *xcoordinates,
const int *ycoordinates, const int *codes, const int codesSize, const int flags,
unsigned short *outWords, int *frequencies) {
+ Correction* masterCorrection = correction;
if (REQUIRES_GERMAN_UMLAUT_PROCESSING & flags)
{ // Incrementally tune the word and try all possibilities
int codesBuffer[getCodesBufferSize(codes, codesSize, MAX_PROXIMITY_CHARS)];
getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates, codesBuffer,
- codesSize, flags, codes, codesSize, 0, codesBuffer, outWords, frequencies);
+ codesSize, flags, codes, codesSize, 0, codesBuffer, masterCorrection, queuePool);
} else { // Normal processing
- getWordSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, codesSize,
- outWords, frequencies, flags);
+ getWordSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, codesSize, flags,
+ masterCorrection, queuePool);
}
PROF_START(20);
- // Get the word count
- int suggestedWordsCount = 0;
- while (suggestedWordsCount < MAX_WORDS && mFrequencies[suggestedWordsCount] > 0) {
- suggestedWordsCount++;
- }
+ const int suggestedWordsCount =
+ queuePool->getMasterQueue()->outputSuggestions(frequencies, outWords);
if (DEBUG_DICT) {
- LOGI("Returning %d words", suggestedWordsCount);
+ AKLOGI("Returning %d words", suggestedWordsCount);
/// Print the returned words
for (int j = 0; j < suggestedWordsCount; ++j) {
#ifdef FLAG_DBG
- short unsigned int* w = mOutputChars + j * MAX_WORD_LENGTH;
+ short unsigned int* w = outWords + j * MAX_WORD_LENGTH;
char s[MAX_WORD_LENGTH];
for (int i = 0; i <= MAX_WORD_LENGTH; i++) s[i] = w[i];
- LOGI("%s %i", s, mFrequencies[j]);
+ AKLOGI("%s %i", s, frequencies[j]);
#endif
}
}
@@ -175,23 +180,19 @@ int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, const int *x
}
void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
- const int *xcoordinates, const int *ycoordinates, const int *codes, const int codesSize,
- unsigned short *outWords, int *frequencies, const int flags) {
+ const int *xcoordinates, const int *ycoordinates, const int *codes,
+ const int inputLength, const int flags, Correction *correction,
+ WordsPriorityQueuePool *queuePool) {
PROF_OPEN;
PROF_START(0);
- initSuggestions(
- proximityInfo, xcoordinates, ycoordinates, codes, codesSize, outWords, frequencies);
- if (DEBUG_DICT) assert(codesSize == mInputLength);
-
- const int maxDepth = min(mInputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH);
- mCorrection->initCorrection(mProximityInfo, mInputLength, maxDepth);
+ queuePool->clearAll();
PROF_END(0);
- const bool useFullEditDistance = USE_FULL_EDIT_DISTANCE & flags;
- // TODO: remove
PROF_START(1);
- getSuggestionCandidates(useFullEditDistance);
+ const bool useFullEditDistance = USE_FULL_EDIT_DISTANCE & flags;
+ getOneWordSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, useFullEditDistance,
+ inputLength, correction, queuePool);
PROF_END(1);
PROF_START(2);
@@ -209,12 +210,13 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
PROF_START(5);
// Suggestions with missing space
if (SUGGEST_WORDS_WITH_MISSING_SPACE_CHARACTER
- && mInputLength >= MIN_USER_TYPED_LENGTH_FOR_MISSING_SPACE_SUGGESTION) {
- for (int i = 1; i < codesSize; ++i) {
+ && inputLength >= MIN_USER_TYPED_LENGTH_FOR_MISSING_SPACE_SUGGESTION) {
+ for (int i = 1; i < inputLength; ++i) {
if (DEBUG_DICT) {
- LOGI("--- Suggest missing space characters %d", i);
+ AKLOGI("--- Suggest missing space characters %d", i);
}
- getMissingSpaceWords(mInputLength, i, mCorrection, useFullEditDistance);
+ getMissingSpaceWords(proximityInfo, xcoordinates, ycoordinates, codes,
+ useFullEditDistance, inputLength, i, correction, queuePool);
}
}
PROF_END(5);
@@ -222,172 +224,297 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
PROF_START(6);
if (SUGGEST_WORDS_WITH_SPACE_PROXIMITY && proximityInfo) {
// The first and last "mistyped spaces" are taken care of by excessive character handling
- for (int i = 1; i < codesSize - 1; ++i) {
+ for (int i = 1; i < inputLength - 1; ++i) {
if (DEBUG_DICT) {
- LOGI("--- Suggest words with proximity space %d", i);
+ AKLOGI("--- Suggest words with proximity space %d", i);
}
const int x = xcoordinates[i];
const int y = ycoordinates[i];
if (DEBUG_PROXIMITY_INFO) {
- LOGI("Input[%d] x = %d, y = %d, has space proximity = %d",
+ AKLOGI("Input[%d] x = %d, y = %d, has space proximity = %d",
i, x, y, proximityInfo->hasSpaceProximity(x, y));
}
if (proximityInfo->hasSpaceProximity(x, y)) {
- getMistypedSpaceWords(mInputLength, i, mCorrection, useFullEditDistance);
+ getMistypedSpaceWords(proximityInfo, xcoordinates, ycoordinates, codes,
+ useFullEditDistance, inputLength, i, correction, queuePool);
}
}
}
PROF_END(6);
-}
-
-void UnigramDictionary::initSuggestions(ProximityInfo *proximityInfo, const int *xCoordinates,
- const int *yCoordinates, const int *codes, const int codesSize,
- unsigned short *outWords, int *frequencies) {
if (DEBUG_DICT) {
- LOGI("initSuggest");
- }
- mFrequencies = frequencies;
- mOutputChars = outWords;
- mInputLength = codesSize;
- proximityInfo->setInputParams(codes, codesSize, xCoordinates, yCoordinates);
- mProximityInfo = proximityInfo;
-}
-
-static inline void registerNextLetter(unsigned short c, int *nextLetters, int nextLettersSize) {
- if (c < nextLettersSize) {
- nextLetters[c]++;
- }
-}
-
-// TODO: We need to optimize addWord by using STL or something
-// TODO: This needs to take an const unsigned short* and not tinker with its contents
-bool UnigramDictionary::addWord(unsigned short *word, int length, int frequency) {
- word[length] = 0;
- if (DEBUG_DICT && DEBUG_SHOW_FOUND_WORD) {
-#ifdef FLAG_DBG
- char s[length + 1];
- for (int i = 0; i <= length; i++) s[i] = word[i];
- LOGI("Found word = %s, freq = %d", s, frequency);
-#endif
- }
- if (length > MAX_WORD_LENGTH) {
- if (DEBUG_DICT) {
- LOGI("Exceeded max word length.");
+ queuePool->dumpSubQueue1TopSuggestions();
+ for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) {
+ WordsPriorityQueue* queue = queuePool->getSubQueue1(i);
+ if (queue->size() > 0) {
+ WordsPriorityQueue::SuggestedWord* sw = queue->top();
+ const int score = sw->mScore;
+ const unsigned short* word = sw->mWord;
+ const int wordLength = sw->mWordLength;
+ double ns = Correction::RankingAlgorithm::calcNormalizedScore(
+ proximityInfo->getPrimaryInputWord(), i, word, wordLength, score);
+ ns += 0;
+ AKLOGI("--- TOP SUB WORDS for %d --- %d %f [%d]", i, score, ns,
+ (ns > TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD));
+ DUMP_WORD(proximityInfo->getPrimaryInputWord(), i);
+ DUMP_WORD(word, wordLength);
+ }
}
- return false;
}
+}
- // Find the right insertion point
- int insertAt = 0;
- while (insertAt < MAX_WORDS) {
- // TODO: How should we sort words with the same frequency?
- if (frequency > mFrequencies[insertAt]) {
- break;
- }
- insertAt++;
- }
- if (insertAt < MAX_WORDS) {
- if (DEBUG_DICT) {
-#ifdef FLAG_DBG
- char s[length + 1];
- for (int i = 0; i <= length; i++) s[i] = word[i];
- LOGI("Added word = %s, freq = %d, %d", s, frequency, S_INT_MAX);
-#endif
- }
- memmove((char*) mFrequencies + (insertAt + 1) * sizeof(mFrequencies[0]),
- (char*) mFrequencies + insertAt * sizeof(mFrequencies[0]),
- (MAX_WORDS - insertAt - 1) * sizeof(mFrequencies[0]));
- mFrequencies[insertAt] = frequency;
- memmove((char*) mOutputChars + (insertAt + 1) * MAX_WORD_LENGTH * sizeof(short),
- (char*) mOutputChars + insertAt * MAX_WORD_LENGTH * sizeof(short),
- (MAX_WORDS - insertAt - 1) * sizeof(short) * MAX_WORD_LENGTH);
- unsigned short *dest = mOutputChars + insertAt * MAX_WORD_LENGTH;
- while (length--) {
- *dest++ = *word++;
- }
- *dest = 0; // NULL terminate
- if (DEBUG_DICT) {
- LOGI("Added word at %d", insertAt);
- }
- return true;
+void UnigramDictionary::initSuggestions(ProximityInfo *proximityInfo, const int *xCoordinates,
+ const int *yCoordinates, const int *codes, const int inputLength, Correction *correction) {
+ if (DEBUG_DICT) {
+ AKLOGI("initSuggest");
}
- return false;
+ proximityInfo->setInputParams(codes, inputLength, xCoordinates, yCoordinates);
+ const int maxDepth = min(inputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH);
+ correction->initCorrection(proximityInfo, inputLength, maxDepth);
}
static const char QUOTE = '\'';
static const char SPACE = ' ';
-void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance) {
+void UnigramDictionary::getOneWordSuggestions(ProximityInfo *proximityInfo,
+ const int *xcoordinates, const int *ycoordinates, const int *codes,
+ const bool useFullEditDistance, const int inputLength, Correction *correction,
+ WordsPriorityQueuePool *queuePool) {
+ initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputLength, correction);
+ getSuggestionCandidates(useFullEditDistance, inputLength, correction, queuePool,
+ true /* doAutoCompletion */, DEFAULT_MAX_ERRORS);
+}
+
+void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance,
+ const int inputLength, Correction *correction, WordsPriorityQueuePool *queuePool,
+ const bool doAutoCompletion, const int maxErrors) {
// TODO: Remove setCorrectionParams
- mCorrection->setCorrectionParams(0, 0, 0,
- -1 /* spaceProximityPos */, -1 /* missingSpacePos */, useFullEditDistance);
+ correction->setCorrectionParams(0, 0, 0,
+ -1 /* spaceProximityPos */, -1 /* missingSpacePos */, useFullEditDistance,
+ doAutoCompletion, maxErrors);
int rootPosition = ROOT_POS;
// Get the number of children of root, then increment the position
- int childCount = Dictionary::getCount(DICT_ROOT, &rootPosition);
+ int childCount = BinaryFormat::getGroupCountAndForwardPointer(DICT_ROOT, &rootPosition);
int outputIndex = 0;
- mCorrection->initCorrectionState(rootPosition, childCount, (mInputLength <= 0));
+ correction->initCorrectionState(rootPosition, childCount, (inputLength <= 0));
// Depth first search
while (outputIndex >= 0) {
- if (mCorrection->initProcessState(outputIndex)) {
- int siblingPos = mCorrection->getTreeSiblingPos(outputIndex);
+ if (correction->initProcessState(outputIndex)) {
+ int siblingPos = correction->getTreeSiblingPos(outputIndex);
int firstChildPos;
const bool needsToTraverseChildrenNodes = processCurrentNode(siblingPos,
- mCorrection, &childCount, &firstChildPos, &siblingPos);
+ correction, &childCount, &firstChildPos, &siblingPos, queuePool);
// Update next sibling pos
- mCorrection->setTreeSiblingPos(outputIndex, siblingPos);
+ correction->setTreeSiblingPos(outputIndex, siblingPos);
if (needsToTraverseChildrenNodes) {
// Goes to child node
- outputIndex = mCorrection->goDownTree(outputIndex, childCount, firstChildPos);
+ outputIndex = correction->goDownTree(outputIndex, childCount, firstChildPos);
}
} else {
// Goes to parent sibling node
- outputIndex = mCorrection->getTreeParentIndex(outputIndex);
+ outputIndex = correction->getTreeParentIndex(outputIndex);
}
}
}
-void UnigramDictionary::getMissingSpaceWords(
+void UnigramDictionary::getMissingSpaceWords(ProximityInfo *proximityInfo, const int *xcoordinates,
+ const int *ycoordinates, const int *codes, const bool useFullEditDistance,
const int inputLength, const int missingSpacePos, Correction *correction,
- const bool useFullEditDistance) {
- correction->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */,
- -1 /* transposedPos */, -1 /* spaceProximityPos */, missingSpacePos,
- useFullEditDistance);
- getSplitTwoWordsSuggestion(inputLength, correction);
+ WordsPriorityQueuePool* queuePool) {
+ getSplitTwoWordsSuggestions(proximityInfo, xcoordinates, ycoordinates, codes,
+ useFullEditDistance, inputLength, missingSpacePos, -1/* spaceProximityPos */,
+ correction, queuePool);
}
-void UnigramDictionary::getMistypedSpaceWords(
+void UnigramDictionary::getMistypedSpaceWords(ProximityInfo *proximityInfo, const int *xcoordinates,
+ const int *ycoordinates, const int *codes, const bool useFullEditDistance,
const int inputLength, const int spaceProximityPos, Correction *correction,
- const bool useFullEditDistance) {
- correction->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */,
- -1 /* transposedPos */, spaceProximityPos, -1 /* missingSpacePos */,
- useFullEditDistance);
- getSplitTwoWordsSuggestion(inputLength, correction);
+ WordsPriorityQueuePool* queuePool) {
+ getSplitTwoWordsSuggestions(proximityInfo, xcoordinates, ycoordinates, codes,
+ useFullEditDistance, inputLength, -1 /* missingSpacePos */, spaceProximityPos,
+ correction, queuePool);
}
-inline bool UnigramDictionary::needsToSkipCurrentNode(const unsigned short c,
- const int inputIndex, const int skipPos, const int depth) {
- const unsigned short userTypedChar = mProximityInfo->getPrimaryCharAt(inputIndex);
- // Skip the ' or other letter and continue deeper
- return (c == QUOTE && userTypedChar != QUOTE) || skipPos == depth;
-}
+inline void UnigramDictionary::onTerminal(const int freq,
+ const TerminalAttributes& terminalAttributes, Correction *correction,
+ WordsPriorityQueuePool *queuePool, const bool addToMasterQueue) {
+ const int inputIndex = correction->getInputIndex();
+ const bool addToSubQueue = inputIndex < SUB_QUEUE_MAX_COUNT;
-inline void UnigramDictionary::onTerminal(const int freq, Correction *correction) {
int wordLength;
unsigned short* wordPointer;
- const int finalFreq = correction->getFinalFreq(freq, &wordPointer, &wordLength);
- if (finalFreq >= 0) {
- addWord(wordPointer, wordLength, finalFreq);
+
+ if (addToMasterQueue) {
+ WordsPriorityQueue *masterQueue = queuePool->getMasterQueue();
+ const int finalFreq = correction->getFinalFreq(freq, &wordPointer, &wordLength);
+ if (finalFreq != NOT_A_FREQUENCY) {
+ if (!terminalAttributes.isShortcutOnly()) {
+ addWord(wordPointer, wordLength, finalFreq, masterQueue);
+ }
+
+ // Please note that the shortcut candidates will be added to the master queue only.
+ TerminalAttributes::ShortcutIterator iterator =
+ terminalAttributes.getShortcutIterator();
+ while (iterator.hasNextShortcutTarget()) {
+ // TODO: addWord only supports weak ordering, meaning we have no means
+ // to control the order of the shortcuts relative to one another or to the word.
+ // We need to either modulate the frequency of each shortcut according
+ // to its own shortcut frequency or to make the queue
+ // so that the insert order is protected inside the queue for words
+ // with the same score.
+ uint16_t shortcutTarget[MAX_WORD_LENGTH_INTERNAL];
+ const int shortcutTargetStringLength = iterator.getNextShortcutTarget(
+ MAX_WORD_LENGTH_INTERNAL, shortcutTarget);
+ addWord(shortcutTarget, shortcutTargetStringLength, finalFreq, masterQueue);
+ }
+ }
+ }
+
+ // We only allow two words + other error correction for words with SUB_QUEUE_MIN_WORD_LENGTH
+ // or more length.
+ if (inputIndex >= SUB_QUEUE_MIN_WORD_LENGTH && addToSubQueue) {
+ // TODO: Check the validity of "inputIndex == wordLength"
+ //if (addToSubQueue && inputIndex == wordLength) {
+ WordsPriorityQueue *subQueue = queuePool->getSubQueue1(inputIndex);
+ const int finalFreq = correction->getFinalFreqForSubQueue(freq, &wordPointer, &wordLength,
+ inputIndex);
+ addWord(wordPointer, wordLength, finalFreq, subQueue);
+ }
+}
+
+void UnigramDictionary::getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo,
+ const int *xcoordinates, const int *ycoordinates, const int *codes,
+ const bool useFullEditDistance, const int inputLength, const int missingSpacePos,
+ const int spaceProximityPos, Correction *correction, WordsPriorityQueuePool* queuePool) {
+ if (inputLength >= MAX_WORD_LENGTH) return;
+ if (DEBUG_DICT) {
+ int inputCount = 0;
+ if (spaceProximityPos >= 0) ++inputCount;
+ if (missingSpacePos >= 0) ++inputCount;
+ assert(inputCount <= 1);
+ }
+
+ WordsPriorityQueue *masterQueue = queuePool->getMasterQueue();
+
+ const bool isSpaceProximity = spaceProximityPos >= 0;
+
+ // First word
+ const int firstInputWordStartPos = 0;
+ const int firstInputWordLength = isSpaceProximity ? spaceProximityPos : missingSpacePos;
+ int firstFreq = getMostFrequentWordLike(
+ firstInputWordStartPos, firstInputWordLength, proximityInfo, mWord);
+ unsigned short* firstOutputWord = 0;
+ int firstOutputWordLength = 0;
+ if (firstFreq > 0) {
+ firstOutputWordLength = firstInputWordLength;
+ firstOutputWord = mWord;
+ } else {
+ if (masterQueue->size() > 0) {
+ double nsForMaster = masterQueue->getHighestNormalizedScore(
+ proximityInfo->getPrimaryInputWord(), inputLength, 0, 0, 0);
+ if (nsForMaster > START_TWO_WORDS_CORRECTION_THRESHOLD) {
+ // Do nothing if the highest suggestion exceeds the threshold.
+ return;
+ }
+ }
+ WordsPriorityQueue* firstWordQueue = queuePool->getSubQueue1(firstInputWordLength);
+ if (firstWordQueue->size() < 1) {
+ return;
+ }
+ int score = 0;
+ const double ns = firstWordQueue->getHighestNormalizedScore(
+ proximityInfo->getPrimaryInputWord(), firstInputWordLength,
+ &firstOutputWord, &score, &firstOutputWordLength);
+ // Two words correction won't be done if the score of the first word doesn't exceed the
+ // threshold.
+ if (ns < TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD) {
+ return;
+ }
+ firstFreq = score >> (firstOutputWordLength
+ + TWO_WORDS_PLUS_OTHER_ERROR_CORRECTION_DEMOTION_DIVIDER);
+ }
+
+ if (DEBUG_DICT) {
+ AKLOGI("First freq: %d", firstFreq);
+ }
+
+ if (firstFreq <= 0 || firstOutputWordLength <= 0 || MAX_WORD_LENGTH <= firstOutputWordLength) {
+ return;
}
+
+ // Allocating fixed length array on stack
+ unsigned short outputWord[MAX_WORD_LENGTH];
+ int outputWordLength = 0;
+
+ for (int i = 0; i < firstOutputWordLength; ++i) {
+ outputWord[i] = firstOutputWord[i];
+ }
+
+ outputWord[firstOutputWordLength] = SPACE;
+ outputWordLength = firstOutputWordLength + 1;
+
+ //const int outputWordLength = firstOutputWordLength + secondWordLength + 1;
+ // Space proximity preparation
+ //WordsPriorityQueue *subQueue = queuePool->getSubQueue1();
+ //initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, firstOutputWordLength,
+ //subQueue, correction);
+ //getSuggestionCandidates(useFullEditDistance, firstOutputWordLength, correction, subQueue,
+ //false, MAX_ERRORS_FOR_TWO_WORDS);
+
+ // Second word
+ const int secondInputWordLength = isSpaceProximity
+ ? (inputLength - spaceProximityPos - 1)
+ : (inputLength - missingSpacePos);
+ const int secondInputWordStartPos =
+ isSpaceProximity ? (spaceProximityPos + 1) : missingSpacePos;
+ int secondFreq = getMostFrequentWordLike(
+ secondInputWordStartPos, secondInputWordLength, proximityInfo, mWord);
+ unsigned short* secondOutputWord = 0;
+ int secondOutputWordLength = 0;
+
+ if (secondFreq > 0) {
+ secondOutputWordLength = secondInputWordLength;
+ secondOutputWord = mWord;
+ }
+
+ if (DEBUG_DICT) {
+ AKLOGI("Second freq: %d", secondFreq);
+ }
+
+ if (secondFreq <= 0 || secondOutputWordLength <= 0
+ || MAX_WORD_LENGTH <= (firstOutputWordLength + 1 + secondOutputWordLength)) {
+ return;
+ }
+
+ for (int i = 0; i < secondOutputWordLength; ++i) {
+ outputWord[firstOutputWordLength + 1 + i] = secondOutputWord[i];
+ }
+
+ outputWordLength += secondOutputWordLength;
+
+ // TODO: Remove initSuggestions and correction->setCorrectionParams
+ initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputLength, correction);
+
+ correction->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */,
+ -1 /* transposedPos */, spaceProximityPos, missingSpacePos,
+ useFullEditDistance, false /* doAutoCompletion */, MAX_ERRORS_FOR_TWO_WORDS);
+ const int pairFreq = correction->getFreqForSplitTwoWords(firstFreq, secondFreq, outputWord);
+ if (DEBUG_DICT) {
+ AKLOGI("Split two words: %d, %d, %d, %d", firstFreq, secondFreq, pairFreq, inputLength);
+ }
+ addWord(outputWord, outputWordLength, pairFreq, masterQueue);
+ return;
}
-void UnigramDictionary::getSplitTwoWordsSuggestion(
- const int inputLength, Correction* correction) {
- const int spaceProximityPos = correction->getSpaceProximityPos();
- const int missingSpacePos = correction->getMissingSpacePos();
+void UnigramDictionary::getSplitTwoWordsSuggestionsOld(ProximityInfo *proximityInfo,
+ const int *xcoordinates, const int *ycoordinates, const int *codes,
+ const bool useFullEditDistance, const int inputLength, const int missingSpacePos,
+ const int spaceProximityPos, Correction *correction, WordsPriorityQueuePool* queuePool) {
+ WordsPriorityQueue *masterQueue = queuePool->getMasterQueue();
+
if (DEBUG_DICT) {
int inputCount = 0;
if (spaceProximityPos >= 0) ++inputCount;
@@ -408,11 +535,21 @@ void UnigramDictionary::getSplitTwoWordsSuggestion(
return;
const int newWordLength = firstWordLength + secondWordLength + 1;
+
+
+ // Space proximity preparation
+ //WordsPriorityQueue *subQueue = queuePool->getSubQueue1();
+ //initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, firstWordLength, subQueue,
+ //correction);
+ //getSuggestionCandidates(useFullEditDistance, firstWordLength, correction, subQueue, false,
+ //MAX_ERRORS_FOR_TWO_WORDS);
+
// Allocating variable length array on stack
unsigned short word[newWordLength];
- const int firstFreq = getMostFrequentWordLike(firstWordStartPos, firstWordLength, mWord);
+ const int firstFreq = getMostFrequentWordLike(
+ firstWordStartPos, firstWordLength, proximityInfo, mWord);
if (DEBUG_DICT) {
- LOGI("First freq: %d", firstFreq);
+ AKLOGI("First freq: %d", firstFreq);
}
if (firstFreq <= 0) return;
@@ -420,9 +557,10 @@ void UnigramDictionary::getSplitTwoWordsSuggestion(
word[i] = mWord[i];
}
- const int secondFreq = getMostFrequentWordLike(secondWordStartPos, secondWordLength, mWord);
+ const int secondFreq = getMostFrequentWordLike(
+ secondWordStartPos, secondWordLength, proximityInfo, mWord);
if (DEBUG_DICT) {
- LOGI("Second freq: %d", secondFreq);
+ AKLOGI("Second freq: %d", secondFreq);
}
if (secondFreq <= 0) return;
@@ -431,22 +569,28 @@ void UnigramDictionary::getSplitTwoWordsSuggestion(
word[i] = mWord[i - firstWordLength - 1];
}
- const int pairFreq = mCorrection->getFreqForSplitTwoWords(firstFreq, secondFreq, word);
+ // TODO: Remove initSuggestions and correction->setCorrectionParams
+ initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputLength, correction);
+
+ correction->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */,
+ -1 /* transposedPos */, spaceProximityPos, missingSpacePos,
+ useFullEditDistance, false /* doAutoCompletion */, MAX_ERRORS_FOR_TWO_WORDS);
+ const int pairFreq = correction->getFreqForSplitTwoWords(firstFreq, secondFreq, word);
if (DEBUG_DICT) {
- LOGI("Split two words: %d, %d, %d, %d", firstFreq, secondFreq, pairFreq, inputLength);
+ AKLOGI("Split two words: %d, %d, %d, %d", firstFreq, secondFreq, pairFreq, inputLength);
}
- addWord(word, newWordLength, pairFreq);
+ addWord(word, newWordLength, pairFreq, masterQueue);
return;
}
// Wrapper for getMostFrequentWordLikeInner, which matches it to the previous
// interface.
inline int UnigramDictionary::getMostFrequentWordLike(const int startInputIndex,
- const int inputLength, unsigned short *word) {
+ const int inputLength, ProximityInfo *proximityInfo, unsigned short *word) {
uint16_t inWord[inputLength];
for (int i = 0; i < inputLength; ++i) {
- inWord[i] = (uint16_t)mProximityInfo->getPrimaryCharAt(startInputIndex + i);
+ inWord[i] = (uint16_t)proximityInfo->getPrimaryCharAt(startInputIndex + i);
}
return getMostFrequentWordLikeInner(inWord, inputLength, word);
}
@@ -470,8 +614,8 @@ static inline bool testCharGroupForContinuedLikeness(const uint8_t flags,
const bool hasMultipleChars = (0 != (UnigramDictionary::FLAG_HAS_MULTIPLE_CHARS & flags));
int pos = startPos;
int32_t character = BinaryFormat::getCharCodeAndForwardPointer(root, &pos);
- int32_t baseChar = Dictionary::toBaseLowerCase(character);
- const uint16_t wChar = Dictionary::toBaseLowerCase(inWord[startInputIndex]);
+ int32_t baseChar = toBaseLowerCase(character);
+ const uint16_t wChar = toBaseLowerCase(inWord[startInputIndex]);
if (baseChar != wChar) {
*outPos = hasMultipleChars ? BinaryFormat::skipOtherCharacters(root, pos) : pos;
@@ -483,8 +627,8 @@ static inline bool testCharGroupForContinuedLikeness(const uint8_t flags,
if (hasMultipleChars) {
character = BinaryFormat::getCharCodeAndForwardPointer(root, &pos);
while (NOT_A_CHARACTER != character) {
- baseChar = Dictionary::toBaseLowerCase(character);
- if (Dictionary::toBaseLowerCase(inWord[++inputIndex]) != baseChar) {
+ baseChar = toBaseLowerCase(character);
+ if (toBaseLowerCase(inWord[++inputIndex]) != baseChar) {
*outPos = BinaryFormat::skipOtherCharacters(root, pos);
*outInputIndex = startInputIndex;
return false;
@@ -521,9 +665,10 @@ int UnigramDictionary::getMostFrequentWordLikeInner(const uint16_t * const inWor
int maxFreq = -1;
const uint8_t* const root = DICT_ROOT;
- mStackChildCount[0] = root[0];
+ int startPos = 0;
+ mStackChildCount[0] = BinaryFormat::getGroupCountAndForwardPointer(root, &startPos);
mStackInputIndex[0] = 0;
- mStackSiblingPos[0] = 1;
+ mStackSiblingPos[0] = startPos;
while (depth >= 0) {
const int charGroupCount = mStackChildCount[depth];
int pos = mStackSiblingPos[depth];
@@ -597,7 +742,7 @@ int UnigramDictionary::getBigramPosition(int pos, unsigned short *word, int offs
// given level, as output into newCount when traversing this level's parent.
inline bool UnigramDictionary::processCurrentNode(const int initialPos,
Correction *correction, int *newCount,
- int *newChildrenPosition, int *nextSiblingPosition) {
+ int *newChildrenPosition, int *nextSiblingPosition, WordsPriorityQueuePool *queuePool) {
if (DEBUG_DICT) {
correction->checkState();
}
@@ -648,7 +793,7 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos,
if (stateType == Correction::TRAVERSE_ALL_ON_TERMINAL
|| stateType == Correction::ON_TERMINAL) {
needsToInvokeOnTerminal = true;
- } else if (stateType == Correction::UNRELATED) {
+ } else if (stateType == Correction::UNRELATED || correction->needsToPrune()) {
// We found that this is an unrelated character, so we should give up traversing
// this node and its children entirely.
// However we may not be on the last virtual node yet so we skip the remaining
@@ -672,12 +817,13 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos,
} while (NOT_A_CHARACTER != c);
if (isTerminalNode) {
- if (needsToInvokeOnTerminal) {
- // The frequency should be here, because we come here only if this is actually
- // a terminal node, and we are on its last char.
- const int freq = BinaryFormat::readFrequencyWithoutMovingPointer(DICT_ROOT, pos);
- onTerminal(freq, mCorrection);
- }
+ // The frequency should be here, because we come here only if this is actually
+ // a terminal node, and we are on its last char.
+ const int freq = BinaryFormat::readFrequencyWithoutMovingPointer(DICT_ROOT, pos);
+ const int childrenAddressPos = BinaryFormat::skipFrequency(flags, pos);
+ const int attributesPos = BinaryFormat::skipChildrenPosition(flags, childrenAddressPos);
+ TerminalAttributes terminalAttributes(DICT_ROOT, flags, attributesPos);
+ onTerminal(freq, terminalAttributes, correction, queuePool, needsToInvokeOnTerminal);
// If there are more chars in this node, then this virtual node has children.
// If we are on the last char, this virtual node has children if this node has.
@@ -702,7 +848,7 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos,
*nextSiblingPosition =
BinaryFormat::skipChildrenPosAndAttributes(DICT_ROOT, flags, pos);
if (DEBUG_DICT_FULL) {
- LOGI("Traversing was pruned.");
+ AKLOGI("Traversing was pruned.");
}
return false;
}
diff --git a/native/src/unigram_dictionary.h b/native/src/unigram_dictionary.h
index ef9709a89..b950971bb 100644
--- a/native/src/unigram_dictionary.h
+++ b/native/src/unigram_dictionary.h
@@ -22,17 +22,14 @@
#include "correction_state.h"
#include "defines.h"
#include "proximity_info.h"
-
-#ifndef NULL
-#define NULL 0
-#endif
+#include "words_priority_queue.h"
+#include "words_priority_queue_pool.h"
namespace latinime {
+class TerminalAttributes;
class UnigramDictionary {
-
-public:
-
+ public:
// Mask and flags for children address type selection.
static const int MASK_GROUP_ADDRESS_TYPE = 0xC0;
static const int FLAG_GROUP_ADDRESS_TYPE_NOADDRESS = 0x00;
@@ -46,8 +43,14 @@ public:
// Flag for terminal groups
static const int FLAG_IS_TERMINAL = 0x10;
+ // Flag for shortcut targets presence
+ static const int FLAG_HAS_SHORTCUT_TARGETS = 0x08;
// Flag for bigram presence
static const int FLAG_HAS_BIGRAMS = 0x04;
+ // Flag for shortcut-only words. Some words are shortcut-only, which means they match when
+ // the user types them but they don't pop in the suggestion strip, only the words they are
+ // shortcuts for do.
+ static const int FLAG_IS_SHORTCUT_ONLY = 0x02;
// Attribute (bigram/shortcut) related flags:
// Flag for presence of more attributes
@@ -64,47 +67,66 @@ public:
static const int FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES = 0x20;
static const int FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES = 0x30;
+ // Error tolerances
+ static const int DEFAULT_MAX_ERRORS = 2;
+ static const int MAX_ERRORS_FOR_TWO_WORDS = 1;
+
UnigramDictionary(const uint8_t* const streamStart, int typedLetterMultipler,
int fullWordMultiplier, int maxWordLength, int maxWords, int maxProximityChars,
const bool isLatestDictVersion);
bool isValidWord(const uint16_t* const inWord, const int length) const;
int getBigramPosition(int pos, unsigned short *word, int offset, int length) const;
- int getSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
+ int getSuggestions(ProximityInfo *proximityInfo, WordsPriorityQueuePool *queuePool,
+ Correction *correction, const int *xcoordinates,
const int *ycoordinates, const int *codes, const int codesSize, const int flags,
unsigned short *outWords, int *frequencies);
virtual ~UnigramDictionary();
-private:
-
+ private:
void getWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
- const int *ycoordinates, const int *codes, const int codesSize,
- unsigned short *outWords, int *frequencies, const int flags);
- bool isDigraph(const int* codes, const int i, const int codesSize) const;
+ const int *ycoordinates, const int *codes, const int inputLength,
+ const int flags, Correction *correction, WordsPriorityQueuePool *queuePool);
+ bool isDigraph(const int *codes, const int i, const int codesSize) const;
void getWordWithDigraphSuggestionsRec(ProximityInfo *proximityInfo,
const int *xcoordinates, const int* ycoordinates, const int *codesBuffer,
- const int codesBufferSize, const int flags, const int* codesSrc, const int codesRemain,
- const int currentDepth, int* codesDest, unsigned short* outWords, int* frequencies);
+ const int codesBufferSize, const int flags, const int* codesSrc,
+ const int codesRemain, const int currentDepth, int* codesDest, Correction *correction,
+ WordsPriorityQueuePool* queuePool);
void initSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
- const int *ycoordinates, const int *codes, const int codesSize,
- unsigned short *outWords, int *frequencies);
- void getSuggestionCandidates(const bool useFullEditDistance);
- bool addWord(unsigned short *word, int length, int frequency);
- void getSplitTwoWordsSuggestion(const int inputLength, Correction *correction);
- void getMissingSpaceWords(const int inputLength, const int missingSpacePos,
- Correction *correction, const bool useFullEditDistance);
- void getMistypedSpaceWords(const int inputLength, const int spaceProximityPos,
- Correction *correction, const bool useFullEditDistance);
- void onTerminal(const int freq, Correction *correction);
+ const int *ycoordinates, const int *codes, const int codesSize, Correction *correction);
+ void getOneWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
+ const int *ycoordinates, const int *codes, const bool useFullEditDistance,
+ const int inputLength, Correction *correction, WordsPriorityQueuePool* queuePool);
+ void getSuggestionCandidates(
+ const bool useFullEditDistance, const int inputLength, Correction *correction,
+ WordsPriorityQueuePool* queuePool, const bool doAutoCompletion, const int maxErrors);
+ void getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo,
+ const int *xcoordinates, const int *ycoordinates, const int *codes,
+ const bool useFullEditDistance, const int inputLength, const int spaceProximityPos,
+ const int missingSpacePos, Correction *correction, WordsPriorityQueuePool* queuePool);
+ void getSplitTwoWordsSuggestionsOld(ProximityInfo *proximityInfo,
+ const int *xcoordinates, const int *ycoordinates, const int *codes,
+ const bool useFullEditDistance, const int inputLength, const int spaceProximityPos,
+ const int missingSpacePos, Correction *correction, WordsPriorityQueuePool* queuePool);
+ void getMissingSpaceWords(ProximityInfo *proximityInfo, const int *xcoordinates,
+ const int *ycoordinates, const int *codes, const bool useFullEditDistance,
+ const int inputLength, const int missingSpacePos, Correction *correction,
+ WordsPriorityQueuePool* queuePool);
+ void getMistypedSpaceWords(ProximityInfo *proximityInfo, const int *xcoordinates,
+ const int *ycoordinates, const int *codes, const bool useFullEditDistance,
+ const int inputLength, const int spaceProximityPos, Correction *correction,
+ WordsPriorityQueuePool* queuePool);
+ void onTerminal(const int freq, const TerminalAttributes& terminalAttributes,
+ Correction *correction, WordsPriorityQueuePool *queuePool, const bool addToMasterQueue);
bool needsToSkipCurrentNode(const unsigned short c,
const int inputIndex, const int skipPos, const int depth);
// Process a node by considering proximity, missing and excessive character
- bool processCurrentNode(const int initialPos,
- Correction *correction, int *newCount,
- int *newChildPosition, int *nextSiblingPosition);
+ bool processCurrentNode(const int initialPos, Correction *correction, int *newCount,
+ int *newChildPosition, int *nextSiblingPosition, WordsPriorityQueuePool *queuePool);
int getMostFrequentWordLike(const int startInputIndex, const int inputLength,
- unsigned short *word);
+ ProximityInfo *proximityInfo, unsigned short *word);
int getMostFrequentWordLikeInner(const uint16_t* const inWord, const int length,
- short unsigned int* outWord);
+ short unsigned int *outWord);
const uint8_t* const DICT_ROOT;
const int MAX_WORD_LENGTH;
@@ -127,14 +149,8 @@ private:
};
static const struct digraph_t { int first; int second; } GERMAN_UMLAUT_DIGRAPHS[];
- int *mFrequencies;
- unsigned short *mOutputChars;
- ProximityInfo *mProximityInfo;
- Correction *mCorrection;
- int mInputLength;
- // MAX_WORD_LENGTH_INTERNAL must be bigger than MAX_WORD_LENGTH
- unsigned short mWord[MAX_WORD_LENGTH_INTERNAL];
-
+ // Still bundled members
+ unsigned short mWord[MAX_WORD_LENGTH_INTERNAL];// TODO: remove
int mStackChildCount[MAX_WORD_LENGTH_INTERNAL];// TODO: remove
int mStackInputIndex[MAX_WORD_LENGTH_INTERNAL];// TODO: remove
int mStackSiblingPos[MAX_WORD_LENGTH_INTERNAL];// TODO: remove
diff --git a/native/src/words_priority_queue.h b/native/src/words_priority_queue.h
new file mode 100644
index 000000000..c85f2b9b3
--- /dev/null
+++ b/native/src/words_priority_queue.h
@@ -0,0 +1,193 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_WORDS_PRIORITY_QUEUE_H
+#define LATINIME_WORDS_PRIORITY_QUEUE_H
+
+#include <iostream>
+#include <queue>
+#include "defines.h"
+
+namespace latinime {
+
+class WordsPriorityQueue {
+ public:
+ class SuggestedWord {
+ public:
+ int mScore;
+ unsigned short mWord[MAX_WORD_LENGTH_INTERNAL];
+ int mWordLength;
+ bool mUsed;
+
+ void setParams(int score, unsigned short* word, int wordLength) {
+ mScore = score;
+ mWordLength = wordLength;
+ memcpy(mWord, word, sizeof(unsigned short) * wordLength);
+ mUsed = true;
+ }
+ };
+
+ WordsPriorityQueue(int maxWords, int maxWordLength) :
+ MAX_WORDS((unsigned int) maxWords), MAX_WORD_LENGTH(
+ (unsigned int) maxWordLength) {
+ mSuggestedWords = new SuggestedWord[maxWordLength];
+ for (int i = 0; i < maxWordLength; ++i) {
+ mSuggestedWords[i].mUsed = false;
+ }
+ mHighestSuggestedWord = 0;
+ }
+
+ ~WordsPriorityQueue() {
+ delete[] mSuggestedWords;
+ }
+
+ void push(int score, unsigned short* word, int wordLength) {
+ SuggestedWord* sw = 0;
+ if (mSuggestions.size() >= MAX_WORDS) {
+ sw = mSuggestions.top();
+ const int minScore = sw->mScore;
+ if (minScore >= score) {
+ return;
+ } else {
+ sw->mUsed = false;
+ mSuggestions.pop();
+ }
+ }
+ if (sw == 0) {
+ sw = getFreeSuggestedWord(score, word, wordLength);
+ } else {
+ sw->setParams(score, word, wordLength);
+ }
+ if (sw == 0) {
+ AKLOGE("SuggestedWord is accidentally null.");
+ return;
+ }
+ if (DEBUG_WORDS_PRIORITY_QUEUE) {
+ AKLOGI("Push word. %d, %d", score, wordLength);
+ DUMP_WORD(word, wordLength);
+ }
+ mSuggestions.push(sw);
+ if (!mHighestSuggestedWord || mHighestSuggestedWord->mScore < sw->mScore) {
+ mHighestSuggestedWord = sw;
+ }
+ }
+
+ SuggestedWord* top() {
+ if (mSuggestions.empty()) return 0;
+ SuggestedWord* sw = mSuggestions.top();
+ return sw;
+ }
+
+ int outputSuggestions(int *frequencies, unsigned short *outputChars) {
+ mHighestSuggestedWord = 0;
+ const unsigned int size = min(MAX_WORDS, mSuggestions.size());
+ int index = size - 1;
+ while (!mSuggestions.empty() && index >= 0) {
+ SuggestedWord* sw = mSuggestions.top();
+ if (DEBUG_WORDS_PRIORITY_QUEUE) {
+ AKLOGI("dump word. %d", sw->mScore);
+ DUMP_WORD(sw->mWord, sw->mWordLength);
+ }
+ const unsigned int wordLength = sw->mWordLength;
+ char* targetAdr = (char*) outputChars
+ + (index) * MAX_WORD_LENGTH * sizeof(short);
+ frequencies[index] = sw->mScore;
+ memcpy(targetAdr, sw->mWord, (wordLength) * sizeof(short));
+ if (wordLength < MAX_WORD_LENGTH) {
+ ((unsigned short*) targetAdr)[wordLength] = 0;
+ }
+ sw->mUsed = false;
+ mSuggestions.pop();
+ --index;
+ }
+ return size;
+ }
+
+ int size() const {
+ return mSuggestions.size();
+ }
+
+ void clear() {
+ mHighestSuggestedWord = 0;
+ while (!mSuggestions.empty()) {
+ SuggestedWord* sw = mSuggestions.top();
+ if (DEBUG_WORDS_PRIORITY_QUEUE) {
+ AKLOGI("Clear word. %d", sw->mScore);
+ DUMP_WORD(sw->mWord, sw->mWordLength);
+ }
+ sw->mUsed = false;
+ mSuggestions.pop();
+ }
+ }
+
+ void dumpTopWord() {
+ if (size() <= 0) {
+ return;
+ }
+ DUMP_WORD(mSuggestions.top()->mWord, mSuggestions.top()->mWordLength);
+ }
+
+ double getHighestNormalizedScore(const unsigned short* before, const int beforeLength,
+ unsigned short** outWord, int *outScore, int *outLength) {
+ if (!mHighestSuggestedWord) {
+ return 0.0;
+ }
+ SuggestedWord* sw = mHighestSuggestedWord;
+ const int score = sw->mScore;
+ unsigned short* word = sw->mWord;
+ const int wordLength = sw->mWordLength;
+ if (outScore) {
+ *outScore = score;
+ }
+ if (outWord) {
+ *outWord = word;
+ }
+ if (outLength) {
+ *outLength = wordLength;
+ }
+ return Correction::RankingAlgorithm::calcNormalizedScore(
+ before, beforeLength, word, wordLength, score);
+ }
+
+ private:
+ struct wordComparator {
+ bool operator ()(SuggestedWord * left, SuggestedWord * right) {
+ return left->mScore > right->mScore;
+ }
+ };
+
+ SuggestedWord* getFreeSuggestedWord(int score, unsigned short* word,
+ int wordLength) {
+ for (unsigned int i = 0; i < MAX_WORD_LENGTH; ++i) {
+ if (!mSuggestedWords[i].mUsed) {
+ mSuggestedWords[i].setParams(score, word, wordLength);
+ return &mSuggestedWords[i];
+ }
+ }
+ return 0;
+ }
+
+ typedef std::priority_queue<SuggestedWord*, std::vector<SuggestedWord*>,
+ wordComparator> Suggestions;
+ Suggestions mSuggestions;
+ const unsigned int MAX_WORDS;
+ const unsigned int MAX_WORD_LENGTH;
+ SuggestedWord* mSuggestedWords;
+ SuggestedWord* mHighestSuggestedWord;
+};
+}
+
+#endif // LATINIME_WORDS_PRIORITY_QUEUE_H
diff --git a/native/src/words_priority_queue_pool.h b/native/src/words_priority_queue_pool.h
new file mode 100644
index 000000000..5fa254852
--- /dev/null
+++ b/native/src/words_priority_queue_pool.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_WORDS_PRIORITY_QUEUE_POOL_H
+#define LATINIME_WORDS_PRIORITY_QUEUE_POOL_H
+
+#include <assert.h>
+#include <new>
+#include "words_priority_queue.h"
+
+namespace latinime {
+
+class WordsPriorityQueuePool {
+ public:
+ WordsPriorityQueuePool(int mainQueueMaxWords, int subQueueMaxWords, int maxWordLength) {
+ mMasterQueue = new(mMasterQueueBuf) WordsPriorityQueue(mainQueueMaxWords, maxWordLength);
+ for (int i = 0, subQueueBufOffset = 0; i < SUB_QUEUE_MAX_COUNT;
+ ++i, subQueueBufOffset += sizeof(WordsPriorityQueue)) {
+ mSubQueues1[i] = new(mSubQueueBuf1 + subQueueBufOffset)
+ WordsPriorityQueue(subQueueMaxWords, maxWordLength);
+ mSubQueues2[i] = new(mSubQueueBuf2 + subQueueBufOffset)
+ WordsPriorityQueue(subQueueMaxWords, maxWordLength);
+ }
+ }
+
+ virtual ~WordsPriorityQueuePool() {
+ }
+
+ WordsPriorityQueue* getMasterQueue() {
+ return mMasterQueue;
+ }
+
+ // TODO: Come up with more generic pool
+ WordsPriorityQueue* getSubQueue1(const int id) {
+ if (DEBUG_WORDS_PRIORITY_QUEUE) {
+ assert(id >= 0 && id < SUB_QUEUE_MAX_COUNT);
+ }
+ return mSubQueues1[id];
+ }
+
+ WordsPriorityQueue* getSubQueue2(const int id) {
+ if (DEBUG_WORDS_PRIORITY_QUEUE) {
+ assert(id >= 0 && id < SUB_QUEUE_MAX_COUNT);
+ }
+ return mSubQueues2[id];
+ }
+
+ inline void clearAll() {
+ mMasterQueue->clear();
+ for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) {
+ mSubQueues1[i]->clear();
+ mSubQueues2[i]->clear();
+ }
+ }
+
+ void dumpSubQueue1TopSuggestions() {
+ AKLOGI("DUMP SUBQUEUE1 TOP SUGGESTIONS");
+ for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) {
+ mSubQueues1[i]->dumpTopWord();
+ }
+ }
+
+ private:
+ WordsPriorityQueue* mMasterQueue;
+ WordsPriorityQueue* mSubQueues1[SUB_QUEUE_MAX_COUNT];
+ WordsPriorityQueue* mSubQueues2[SUB_QUEUE_MAX_COUNT];
+ char mMasterQueueBuf[sizeof(WordsPriorityQueue)];
+ char mSubQueueBuf1[SUB_QUEUE_MAX_COUNT * sizeof(WordsPriorityQueue)];
+ char mSubQueueBuf2[SUB_QUEUE_MAX_COUNT * sizeof(WordsPriorityQueue)];
+};
+}
+
+#endif // LATINIME_WORDS_PRIORITY_QUEUE_POOL_H