aboutsummaryrefslogtreecommitdiffstats
path: root/native
diff options
context:
space:
mode:
Diffstat (limited to 'native')
-rw-r--r--native/Android.mk61
-rw-r--r--native/jni/Android.mk87
-rw-r--r--native/jni/Application.mk1
-rw-r--r--native/jni/com_android_inputmethod_keyboard_ProximityInfo.cpp12
-rw-r--r--native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp86
-rw-r--r--native/jni/jni_common.cpp6
-rw-r--r--native/jni/jni_common.h8
-rw-r--r--native/src/basechars.cpp (renamed from native/src/basechars.h)12
-rw-r--r--native/src/bigram_dictionary.h4
-rw-r--r--native/src/binary_format.h18
-rw-r--r--native/src/char_utils.h39
-rw-r--r--native/src/correction.cpp211
-rw-r--r--native/src/correction.h35
-rw-r--r--native/src/defines.h10
-rw-r--r--native/src/dictionary.cpp5
-rw-r--r--native/src/dictionary.h27
-rw-r--r--native/src/proximity_info.cpp18
-rw-r--r--native/src/proximity_info.h6
-rw-r--r--native/src/terminal_attributes.h78
-rw-r--r--native/src/unigram_dictionary.cpp300
-rw-r--r--native/src/unigram_dictionary.h87
-rw-r--r--native/src/words_priority_queue.h157
-rw-r--r--native/src/words_priority_queue_pool.h54
23 files changed, 914 insertions, 408 deletions
diff --git a/native/Android.mk b/native/Android.mk
index f07be6abe..5053e7d64 100644
--- a/native/Android.mk
+++ b/native/Android.mk
@@ -1,60 +1 @@
-LOCAL_PATH := $(call my-dir)
-include $(CLEAR_VARS)
-
-LOCAL_C_INCLUDES += $(LOCAL_PATH)/src
-
-LOCAL_CFLAGS += -Werror -Wall
-
-# To suppress compiler warnings for unused variables/functions used for debug features etc.
-LOCAL_CFLAGS += -Wno-unused-parameter -Wno-unused-function
-
-LOCAL_SRC_FILES := \
- jni/com_android_inputmethod_keyboard_ProximityInfo.cpp \
- jni/com_android_inputmethod_latin_BinaryDictionary.cpp \
- jni/jni_common.cpp \
- src/bigram_dictionary.cpp \
- src/char_utils.cpp \
- src/correction.cpp \
- src/dictionary.cpp \
- src/proximity_info.cpp \
- src/unigram_dictionary.cpp
-
-#FLAG_DBG := true
-#FLAG_DO_PROFILE := true
-
-TARGETING_UNBUNDLED_FROYO := true
-
-ifeq ($(TARGET_ARCH), x86)
- TARGETING_UNBUNDLED_FROYO := false
-endif
-
-ifeq ($(FLAG_DBG), true)
- TARGETING_UNBUNDLED_FROYO := false
-endif
-
-ifeq ($(FLAG_DO_PROFILE), true)
- TARGETING_UNBUNDLED_FROYO := false
-endif
-
-ifeq ($(TARGETING_UNBUNDLED_FROYO), true)
- LOCAL_NDK_VERSION := 4
- LOCAL_SDK_VERSION := 8
-endif
-
-LOCAL_MODULE := libjni_latinime
-
-LOCAL_MODULE_TAGS := user
-
-ifeq ($(FLAG_DO_PROFILE), true)
- $(warning Making profiling version of native library)
- LOCAL_CFLAGS += -DFLAG_DO_PROFILE
- LOCAL_SHARED_LIBRARIES := libcutils libutils
-else # FLAG_DO_PROFILE
-ifeq ($(FLAG_DBG), true)
- $(warning Making debug version of native library)
- LOCAL_CFLAGS += -DFLAG_DBG
- LOCAL_SHARED_LIBRARIES := libcutils libutils
-endif # FLAG_DBG
-endif # FLAG_DO_PROFILE
-
-include $(BUILD_SHARED_LIBRARY)
+include $(call all-subdir-makefiles)
diff --git a/native/jni/Android.mk b/native/jni/Android.mk
new file mode 100644
index 000000000..c4adbfab4
--- /dev/null
+++ b/native/jni/Android.mk
@@ -0,0 +1,87 @@
+# Copyright (C) 2011 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+LOCAL_PATH := $(call my-dir)
+include $(CLEAR_VARS)
+
+LATIN_IME_SRC_DIR := ../src
+
+LOCAL_C_INCLUDES += $(LOCAL_PATH)/$(LATIN_IME_SRC_DIR)
+
+LOCAL_CFLAGS += -Werror -Wall
+
+# To suppress compiler warnings for unused variables/functions used for debug features etc.
+LOCAL_CFLAGS += -Wno-unused-parameter -Wno-unused-function
+
+LATIN_IME_JNI_SRC_FILES := \
+ com_android_inputmethod_keyboard_ProximityInfo.cpp \
+ com_android_inputmethod_latin_BinaryDictionary.cpp \
+ jni_common.cpp
+
+LATIN_IME_CORE_SRC_FILES := \
+ basechars.cpp \
+ bigram_dictionary.cpp \
+ char_utils.cpp \
+ correction.cpp \
+ dictionary.cpp \
+ proximity_info.cpp \
+ unigram_dictionary.cpp
+
+LOCAL_SRC_FILES := \
+ $(LATIN_IME_JNI_SRC_FILES) \
+ $(addprefix $(LATIN_IME_SRC_DIR)/,$(LATIN_IME_CORE_SRC_FILES))
+
+#FLAG_DBG := true
+#FLAG_DO_PROFILE := true
+
+TARGETING_UNBUNDLED_FROYO := true
+
+ifeq ($(TARGET_ARCH), x86)
+ TARGETING_UNBUNDLED_FROYO := false
+endif
+
+ifeq ($(FLAG_DBG), true)
+ TARGETING_UNBUNDLED_FROYO := false
+endif
+
+ifeq ($(FLAG_DO_PROFILE), true)
+ TARGETING_UNBUNDLED_FROYO := false
+endif
+
+ifeq ($(TARGETING_UNBUNDLED_FROYO), true)
+ LOCAL_NDK_VERSION := 4
+ LOCAL_SDK_VERSION := 8
+endif
+
+LOCAL_MODULE := libjni_latinime
+
+LOCAL_MODULE_TAGS := user
+
+# For STL
+LOCAL_C_INCLUDES += external/stlport/stlport bionic
+LOCAL_SHARED_LIBRARIES += libstlport
+
+ifeq ($(FLAG_DO_PROFILE), true)
+ $(warning Making profiling version of native library)
+ LOCAL_CFLAGS += -DFLAG_DO_PROFILE
+ LOCAL_SHARED_LIBRARIES += libcutils libutils
+else # FLAG_DO_PROFILE
+ifeq ($(FLAG_DBG), true)
+ $(warning Making debug version of native library)
+ LOCAL_CFLAGS += -DFLAG_DBG
+ LOCAL_SHARED_LIBRARIES += libcutils libutils
+endif # FLAG_DBG
+endif # FLAG_DO_PROFILE
+
+include $(BUILD_SHARED_LIBRARY)
diff --git a/native/jni/Application.mk b/native/jni/Application.mk
new file mode 100644
index 000000000..caf3b2622
--- /dev/null
+++ b/native/jni/Application.mk
@@ -0,0 +1 @@
+APP_STL := stlport_static
diff --git a/native/jni/com_android_inputmethod_keyboard_ProximityInfo.cpp b/native/jni/com_android_inputmethod_keyboard_ProximityInfo.cpp
index 595ea2fdc..6e4fefd72 100644
--- a/native/jni/com_android_inputmethod_keyboard_ProximityInfo.cpp
+++ b/native/jni/com_android_inputmethod_keyboard_ProximityInfo.cpp
@@ -28,14 +28,14 @@
namespace latinime {
-static jint latinime_Keyboard_setProximityInfo(JNIEnv *env, jobject object,
+static jlong latinime_Keyboard_setProximityInfo(JNIEnv *env, jobject object,
jint maxProximityCharsSize, jint displayWidth, jint displayHeight, jint gridWidth,
jint gridHeight, jintArray proximityCharsArray, jint keyCount,
jintArray keyXCoordinateArray, jintArray keyYCoordinateArray, jintArray keyWidthArray,
jintArray keyHeightArray, jintArray keyCharCodeArray,
jfloatArray sweetSpotCenterXArray, jfloatArray sweetSpotCenterYArray,
jfloatArray sweetSpotRadiusArray) {
- jint *proximityChars = env->GetIntArrayElements(proximityCharsArray, NULL);
+ jint *proximityChars = env->GetIntArrayElements(proximityCharsArray, 0);
jint *keyXCoordinates = safeGetIntArrayElements(env, keyXCoordinateArray);
jint *keyYCoordinates = safeGetIntArrayElements(env, keyYCoordinateArray);
jint *keyWidths = safeGetIntArrayElements(env, keyWidthArray);
@@ -59,19 +59,19 @@ static jint latinime_Keyboard_setProximityInfo(JNIEnv *env, jobject object,
safeReleaseIntArrayElements(env, keyYCoordinateArray, keyYCoordinates);
safeReleaseIntArrayElements(env, keyXCoordinateArray, keyXCoordinates);
env->ReleaseIntArrayElements(proximityCharsArray, proximityChars, 0);
- return (jint)proximityInfo;
+ return (jlong)proximityInfo;
}
-static void latinime_Keyboard_release(JNIEnv *env, jobject object, jint proximityInfo) {
+static void latinime_Keyboard_release(JNIEnv *env, jobject object, jlong proximityInfo) {
ProximityInfo *pi = (ProximityInfo*)proximityInfo;
if (!pi) return;
delete pi;
}
static JNINativeMethod sKeyboardMethods[] = {
- {"setProximityInfoNative", "(IIIII[II[I[I[I[I[I[F[F[F)I",
+ {"setProximityInfoNative", "(IIIII[II[I[I[I[I[I[F[F[F)J",
(void*)latinime_Keyboard_setProximityInfo},
- {"releaseProximityInfoNative", "(I)V", (void*)latinime_Keyboard_release}
+ {"releaseProximityInfoNative", "(J)V", (void*)latinime_Keyboard_release}
};
int register_ProximityInfo(JNIEnv *env) {
diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
index 18c972444..71a893ca7 100644
--- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
+++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
@@ -18,6 +18,7 @@
#define LOG_TAG "LatinIME: jni: BinaryDictionary"
#include "binary_format.h"
+#include "correction.h"
#include "com_android_inputmethod_latin_BinaryDictionary.h"
#include "dictionary.h"
#include "jni.h"
@@ -33,6 +34,7 @@
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
+#include <unistd.h>
#else // USE_MMAP_FOR_DICTIONARY
#include <stdlib.h>
#endif // USE_MMAP_FOR_DICTIONARY
@@ -41,19 +43,19 @@ namespace latinime {
void releaseDictBuf(void* dictBuf, const size_t length, int fd);
-static jint latinime_BinaryDictionary_open(JNIEnv *env, jobject object,
+static jlong latinime_BinaryDictionary_open(JNIEnv *env, jobject object,
jstring sourceDir, jlong dictOffset, jlong dictSize,
jint typedLetterMultiplier, jint fullWordMultiplier, jint maxWordLength, jint maxWords,
jint maxAlternatives) {
PROF_OPEN;
PROF_START(66);
- const char *sourceDirChars = env->GetStringUTFChars(sourceDir, NULL);
- if (sourceDirChars == NULL) {
+ const char *sourceDirChars = env->GetStringUTFChars(sourceDir, 0);
+ if (sourceDirChars == 0) {
LOGE("DICT: Can't get sourceDir string");
return 0;
}
int fd = 0;
- void *dictBuf = NULL;
+ void *dictBuf = 0;
int adjust = 0;
#ifdef USE_MMAP_FOR_DICTIONARY
/* mmap version */
@@ -66,7 +68,7 @@ static jint latinime_BinaryDictionary_open(JNIEnv *env, jobject object,
adjust = dictOffset % pagesize;
int adjDictOffset = dictOffset - adjust;
int adjDictSize = dictSize + adjust;
- dictBuf = mmap(NULL, sizeof(char) * adjDictSize, PROT_READ, MAP_PRIVATE, fd, adjDictOffset);
+ dictBuf = mmap(0, sizeof(char) * adjDictSize, PROT_READ, MAP_PRIVATE, fd, adjDictOffset);
if (dictBuf == MAP_FAILED) {
LOGE("DICT: Can't mmap dictionary. errno=%d", errno);
return 0;
@@ -74,9 +76,9 @@ static jint latinime_BinaryDictionary_open(JNIEnv *env, jobject object,
dictBuf = (void *)((char *)dictBuf + adjust);
#else // USE_MMAP_FOR_DICTIONARY
/* malloc version */
- FILE *file = NULL;
+ FILE *file = 0;
file = fopen(sourceDirChars, "rb");
- if (file == NULL) {
+ if (file == 0) {
LOGE("DICT: Can't fopen sourceDir. sourceDirChars=%s errno=%d", sourceDirChars, errno);
return 0;
}
@@ -107,7 +109,7 @@ static jint latinime_BinaryDictionary_open(JNIEnv *env, jobject object,
LOGE("DICT: dictBuf is null");
return 0;
}
- Dictionary *dictionary = NULL;
+ Dictionary *dictionary = 0;
if (BinaryFormat::UNKNOWN_FORMAT == BinaryFormat::detectFormat((uint8_t*)dictBuf)) {
LOGE("DICT: dictionary format is unknown, bad magic number");
#ifdef USE_MMAP_FOR_DICTIONARY
@@ -121,23 +123,23 @@ static jint latinime_BinaryDictionary_open(JNIEnv *env, jobject object,
}
PROF_END(66);
PROF_CLOSE;
- return (jint)dictionary;
+ return (jlong)dictionary;
}
-static int latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jobject object, jint dict,
- jint proximityInfo, jintArray xCoordinatesArray, jintArray yCoordinatesArray,
+static int latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jobject object, jlong dict,
+ jlong proximityInfo, jintArray xCoordinatesArray, jintArray yCoordinatesArray,
jintArray inputArray, jint arraySize, jint flags,
jcharArray outputArray, jintArray frequencyArray) {
Dictionary *dictionary = (Dictionary*)dict;
if (!dictionary) return 0;
ProximityInfo *pInfo = (ProximityInfo*)proximityInfo;
- int *xCoordinates = env->GetIntArrayElements(xCoordinatesArray, NULL);
- int *yCoordinates = env->GetIntArrayElements(yCoordinatesArray, NULL);
+ int *xCoordinates = env->GetIntArrayElements(xCoordinatesArray, 0);
+ int *yCoordinates = env->GetIntArrayElements(yCoordinatesArray, 0);
- int *frequencies = env->GetIntArrayElements(frequencyArray, NULL);
- int *inputCodes = env->GetIntArrayElements(inputArray, NULL);
- jchar *outputChars = env->GetCharArrayElements(outputArray, NULL);
+ int *frequencies = env->GetIntArrayElements(frequencyArray, 0);
+ int *inputCodes = env->GetIntArrayElements(inputArray, 0);
+ jchar *outputChars = env->GetCharArrayElements(outputArray, 0);
int count = dictionary->getSuggestions(pInfo, xCoordinates, yCoordinates, inputCodes,
arraySize, flags, (unsigned short*) outputChars, frequencies);
@@ -151,17 +153,17 @@ static int latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jobject object,
return count;
}
-static int latinime_BinaryDictionary_getBigrams(JNIEnv *env, jobject object, jint dict,
+static int latinime_BinaryDictionary_getBigrams(JNIEnv *env, jobject object, jlong dict,
jcharArray prevWordArray, jint prevWordLength, jintArray inputArray, jint inputArraySize,
jcharArray outputArray, jintArray frequencyArray, jint maxWordLength, jint maxBigrams,
jint maxAlternatives) {
Dictionary *dictionary = (Dictionary*)dict;
if (!dictionary) return 0;
- jchar *prevWord = env->GetCharArrayElements(prevWordArray, NULL);
- int *inputCodes = env->GetIntArrayElements(inputArray, NULL);
- jchar *outputChars = env->GetCharArrayElements(outputArray, NULL);
- int *frequencies = env->GetIntArrayElements(frequencyArray, NULL);
+ jchar *prevWord = env->GetCharArrayElements(prevWordArray, 0);
+ int *inputCodes = env->GetIntArrayElements(inputArray, 0);
+ jchar *outputChars = env->GetCharArrayElements(outputArray, 0);
+ int *frequencies = env->GetIntArrayElements(frequencyArray, 0);
int count = dictionary->getBigrams((unsigned short*) prevWord, prevWordLength, inputCodes,
inputArraySize, (unsigned short*) outputChars, frequencies, maxWordLength, maxBigrams,
@@ -175,19 +177,42 @@ static int latinime_BinaryDictionary_getBigrams(JNIEnv *env, jobject object, jin
return count;
}
-static jboolean latinime_BinaryDictionary_isValidWord(JNIEnv *env, jobject object, jint dict,
+static jboolean latinime_BinaryDictionary_isValidWord(JNIEnv *env, jobject object, jlong dict,
jcharArray wordArray, jint wordLength) {
Dictionary *dictionary = (Dictionary*)dict;
if (!dictionary) return (jboolean) false;
- jchar *word = env->GetCharArrayElements(wordArray, NULL);
+ jchar *word = env->GetCharArrayElements(wordArray, 0);
jboolean result = dictionary->isValidWord((unsigned short*) word, wordLength);
env->ReleaseCharArrayElements(wordArray, word, JNI_ABORT);
return result;
}
-static void latinime_BinaryDictionary_close(JNIEnv *env, jobject object, jint dict) {
+static jdouble latinime_BinaryDictionary_calcNormalizedScore(JNIEnv *env, jobject object,
+ jcharArray before, jint beforeLength, jcharArray after, jint afterLength, jint score) {
+ jchar *beforeChars = env->GetCharArrayElements(before, 0);
+ jchar *afterChars = env->GetCharArrayElements(after, 0);
+ jdouble result = Correction::RankingAlgorithm::calcNormalizedScore(
+ (unsigned short*)beforeChars, beforeLength, (unsigned short*)afterChars, afterLength,
+ score);
+ env->ReleaseCharArrayElements(before, beforeChars, JNI_ABORT);
+ env->ReleaseCharArrayElements(after, afterChars, JNI_ABORT);
+ return result;
+}
+
+static jint latinime_BinaryDictionary_editDistance(JNIEnv *env, jobject object,
+ jcharArray before, jint beforeLength, jcharArray after, jint afterLength) {
+ jchar *beforeChars = env->GetCharArrayElements(before, 0);
+ jchar *afterChars = env->GetCharArrayElements(after, 0);
+ jint result = Correction::RankingAlgorithm::editDistance(
+ (unsigned short*)beforeChars, beforeLength, (unsigned short*)afterChars, afterLength);
+ env->ReleaseCharArrayElements(before, beforeChars, JNI_ABORT);
+ env->ReleaseCharArrayElements(after, afterChars, JNI_ABORT);
+ return result;
+}
+
+static void latinime_BinaryDictionary_close(JNIEnv *env, jobject object, jlong dict) {
Dictionary *dictionary = (Dictionary*)dict;
if (!dictionary) return;
void *dictBuf = dictionary->getDict();
@@ -217,11 +242,14 @@ void releaseDictBuf(void* dictBuf, const size_t length, int fd) {
}
static JNINativeMethod sMethods[] = {
- {"openNative", "(Ljava/lang/String;JJIIIII)I", (void*)latinime_BinaryDictionary_open},
- {"closeNative", "(I)V", (void*)latinime_BinaryDictionary_close},
- {"getSuggestionsNative", "(II[I[I[III[C[I)I", (void*)latinime_BinaryDictionary_getSuggestions},
- {"isValidWordNative", "(I[CI)Z", (void*)latinime_BinaryDictionary_isValidWord},
- {"getBigramsNative", "(I[CI[II[C[IIII)I", (void*)latinime_BinaryDictionary_getBigrams}
+ {"openNative", "(Ljava/lang/String;JJIIIII)J", (void*)latinime_BinaryDictionary_open},
+ {"closeNative", "(J)V", (void*)latinime_BinaryDictionary_close},
+ {"getSuggestionsNative", "(JJ[I[I[III[C[I)I", (void*)latinime_BinaryDictionary_getSuggestions},
+ {"isValidWordNative", "(J[CI)Z", (void*)latinime_BinaryDictionary_isValidWord},
+ {"getBigramsNative", "(J[CI[II[C[IIII)I", (void*)latinime_BinaryDictionary_getBigrams},
+ {"calcNormalizedScoreNative", "([CI[CII)D",
+ (void*)latinime_BinaryDictionary_calcNormalizedScore},
+ {"editDistanceNative", "([CI[CI)I", (void*)latinime_BinaryDictionary_editDistance}
};
int register_BinaryDictionary(JNIEnv *env) {
diff --git a/native/jni/jni_common.cpp b/native/jni/jni_common.cpp
index 8643f723f..958abfd67 100644
--- a/native/jni/jni_common.cpp
+++ b/native/jni/jni_common.cpp
@@ -32,14 +32,14 @@ using namespace latinime;
* Returns the JNI version on success, -1 on failure.
*/
jint JNI_OnLoad(JavaVM* vm, void* reserved) {
- JNIEnv* env = NULL;
+ JNIEnv* env = 0;
jint result = -1;
if (vm->GetEnv((void**) &env, JNI_VERSION_1_4) != JNI_OK) {
LOGE("ERROR: GetEnv failed");
goto bail;
}
- assert(env != NULL);
+ assert(env != 0);
if (!register_BinaryDictionary(env)) {
LOGE("ERROR: BinaryDictionary native registration failed");
@@ -63,7 +63,7 @@ namespace latinime {
int registerNativeMethods(JNIEnv* env, const char* className, JNINativeMethod* methods,
int numMethods) {
jclass clazz = env->FindClass(className);
- if (clazz == NULL) {
+ if (clazz == 0) {
LOGE("Native registration unable to find class '%s'", className);
return JNI_FALSE;
}
diff --git a/native/jni/jni_common.h b/native/jni/jni_common.h
index 9548e1b3f..6741443ac 100644
--- a/native/jni/jni_common.h
+++ b/native/jni/jni_common.h
@@ -29,17 +29,17 @@ int registerNativeMethods(JNIEnv *env, const char *className, JNINativeMethod *m
inline jint *safeGetIntArrayElements(JNIEnv *env, jintArray jArray) {
if (jArray) {
- return env->GetIntArrayElements(jArray, NULL);
+ return env->GetIntArrayElements(jArray, 0);
} else {
- return NULL;
+ return 0;
}
}
inline jfloat *safeGetFloatArrayElements(JNIEnv *env, jfloatArray jArray) {
if (jArray) {
- return env->GetFloatArrayElements(jArray, NULL);
+ return env->GetFloatArrayElements(jArray, 0);
} else {
- return NULL;
+ return 0;
}
}
diff --git a/native/src/basechars.h b/native/src/basechars.cpp
index 3843e11c5..31f1e18a8 100644
--- a/native/src/basechars.h
+++ b/native/src/basechars.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2009 The Android Open Source Project
+ * Copyright (C) 2011 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,8 +14,9 @@
* limitations under the License.
*/
-#ifndef LATINIME_BASECHARS_H
-#define LATINIME_BASECHARS_H
+#include "char_utils.h"
+
+namespace latinime {
/**
* Table mapping most combined Latin, Greek, and Cyrillic characters
@@ -23,7 +24,7 @@
* if c is not a combined character, or the base character if it
* is combined.
*/
-static unsigned short BASE_CHARS[] = {
+const unsigned short BASE_CHARS[BASE_CHARS_SIZE] = {
0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f,
0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017,
@@ -189,4 +190,5 @@ static unsigned short BASE_CHARS[] = {
// generated with:
// cat UnicodeData.txt | perl -e 'while (<>) { @foo = split(/;/); $foo[5] =~ s/<.*> //; $base[hex($foo[0])] = hex($foo[5]);} for ($i = 0; $i < 0x500; $i += 8) { for ($j = $i; $j < $i + 8; $j++) { printf("0x%04x, ", $base[$j] ? $base[$j] : $j)}; print "\n"; }'
-#endif // LATINIME_BASECHARS_H
+
+} // namespace latinime
diff --git a/native/src/bigram_dictionary.h b/native/src/bigram_dictionary.h
index c07458a38..585a1866a 100644
--- a/native/src/bigram_dictionary.h
+++ b/native/src/bigram_dictionary.h
@@ -21,14 +21,14 @@ namespace latinime {
class Dictionary;
class BigramDictionary {
-public:
+ public:
BigramDictionary(const unsigned char *dict, int maxWordLength, int maxAlternatives,
const bool isLatestDictVersion, const bool hasBigram, Dictionary *parentDictionary);
int getBigrams(unsigned short *word, int length, int *codes, int codesSize,
unsigned short *outWords, int *frequencies, int maxWordLength, int maxBigrams,
int maxAlternatives);
~BigramDictionary();
-private:
+ private:
bool addWordBigram(unsigned short *word, int length, int frequency);
int getBigramAddress(int *pos, bool advance);
int getBigramFreq(int *pos);
diff --git a/native/src/binary_format.h b/native/src/binary_format.h
index 6f65088db..9944fa2bd 100644
--- a/native/src/binary_format.h
+++ b/native/src/binary_format.h
@@ -22,12 +22,12 @@
namespace latinime {
class BinaryFormat {
-private:
+ private:
const static int32_t MINIMAL_ONE_BYTE_CHARACTER_VALUE = 0x20;
const static int32_t CHARACTER_ARRAY_TERMINATOR = 0x1F;
const static int MULTIPLE_BYTE_CHARACTER_ADDITIONAL_SIZE = 2;
-public:
+ public:
const static int UNKNOWN_FORMAT = -1;
const static int FORMAT_VERSION_1 = 1;
const static uint16_t FORMAT_VERSION_1_MAGIC_NUMBER = 0x78B1;
@@ -145,15 +145,15 @@ inline int BinaryFormat::skipFrequency(const uint8_t flags, const int pos) {
inline int BinaryFormat::skipAllAttributes(const uint8_t* const dict, const uint8_t flags,
const int pos) {
- // This function skips all attributes. The format makes provision for future extension
- // with other attributes (notably shortcuts) but for the time being, bigrams are the
- // only attributes that may be found in a character group, so we only look at bigrams
- // in this version.
+ // This function skips all attributes: shortcuts and bigrams.
+ int newPos = pos;
+ if (UnigramDictionary::FLAG_HAS_SHORTCUT_TARGETS & flags) {
+ newPos = skipAttributes(dict, newPos);
+ }
if (UnigramDictionary::FLAG_HAS_BIGRAMS & flags) {
- return skipAttributes(dict, pos);
- } else {
- return pos;
+ newPos = skipAttributes(dict, newPos);
}
+ return newPos;
}
inline int BinaryFormat::skipChildrenPosAndAttributes(const uint8_t* const dict,
diff --git a/native/src/char_utils.h b/native/src/char_utils.h
index a69a35e7a..607dc5195 100644
--- a/native/src/char_utils.h
+++ b/native/src/char_utils.h
@@ -19,8 +19,47 @@
namespace latinime {
+inline static int isAsciiUpper(unsigned short c) {
+ return c >= 'A' && c <= 'Z';
+}
+
+inline static unsigned short toAsciiLower(unsigned short c) {
+ return c - 'A' + 'a';
+}
+
+inline static int isAscii(unsigned short c) {
+ return c <= 127;
+}
+
unsigned short latin_tolower(unsigned short c);
+/**
+ * Table mapping most combined Latin, Greek, and Cyrillic characters
+ * to their base characters. If c is in range, BASE_CHARS[c] == c
+ * if c is not a combined character, or the base character if it
+ * is combined.
+ */
+
+static const int BASE_CHARS_SIZE = 0x0500;
+extern const unsigned short BASE_CHARS[BASE_CHARS_SIZE];
+
+inline static unsigned short toBaseChar(unsigned short c) {
+ if (c < BASE_CHARS_SIZE) {
+ return BASE_CHARS[c];
+ }
+ return c;
+}
+
+inline static unsigned short toBaseLowerCase(unsigned short c) {
+ c = toBaseChar(c);
+ if (isAsciiUpper(c)) {
+ return toAsciiLower(c);
+ } else if (isAscii(c)) {
+ return c;
+ }
+ return latin_tolower(c);
+}
+
} // namespace latinime
#endif // LATINIME_CHAR_UTILS_H
diff --git a/native/src/correction.cpp b/native/src/correction.cpp
index 27dc40745..dc31bfae7 100644
--- a/native/src/correction.cpp
+++ b/native/src/correction.cpp
@@ -16,11 +16,13 @@
#include <assert.h>
#include <ctype.h>
+#include <math.h>
#include <stdio.h>
#include <string.h>
#define LOG_TAG "LatinIME: correction.cpp"
+#include "char_utils.h"
#include "correction.h"
#include "dictionary.h"
#include "proximity_info.h"
@@ -31,73 +33,49 @@ namespace latinime {
// edit distance funcitons //
/////////////////////////////
-#if 0 /* no longer used */
-inline static int editDistance(
- int* editDistanceTable, const unsigned short* input,
- const int inputLength, const unsigned short* output, const int outputLength) {
- // dp[li][lo] dp[a][b] = dp[ a * lo + b]
- int* dp = editDistanceTable;
- const int li = inputLength + 1;
- const int lo = outputLength + 1;
- for (int i = 0; i < li; ++i) {
- dp[lo * i] = i;
- }
- for (int i = 0; i < lo; ++i) {
- dp[i] = i;
- }
-
- for (int i = 0; i < li - 1; ++i) {
- for (int j = 0; j < lo - 1; ++j) {
- const uint32_t ci = Dictionary::toBaseLowerCase(input[i]);
- const uint32_t co = Dictionary::toBaseLowerCase(output[j]);
- const uint16_t cost = (ci == co) ? 0 : 1;
- dp[(i + 1) * lo + (j + 1)] = min(dp[i * lo + (j + 1)] + 1,
- min(dp[(i + 1) * lo + j] + 1, dp[i * lo + j] + cost));
- if (i > 0 && j > 0 && ci == Dictionary::toBaseLowerCase(output[j - 1])
- && co == Dictionary::toBaseLowerCase(input[i - 1])) {
- dp[(i + 1) * lo + (j + 1)] = min(
- dp[(i + 1) * lo + (j + 1)], dp[(i - 1) * lo + (j - 1)] + cost);
- }
- }
+inline static void initEditDistance(int *editDistanceTable) {
+ for (int i = 0; i <= MAX_WORD_LENGTH_INTERNAL; ++i) {
+ editDistanceTable[i] = i;
}
+}
- if (DEBUG_EDIT_DISTANCE) {
- LOGI("IN = %d, OUT = %d", inputLength, outputLength);
- for (int i = 0; i < li; ++i) {
- for (int j = 0; j < lo; ++j) {
- LOGI("EDIT[%d][%d], %d", i, j, dp[i * lo + j]);
+inline static void dumpEditDistance10ForDebug(int *editDistanceTable, const int inputLength,
+ const int outputLength) {
+ if (DEBUG_DICT) {
+ LOGI("EditDistanceTable");
+ for (int i = 0; i <= 10; ++i) {
+ int c[11];
+ for (int j = 0; j <= 10; ++j) {
+ if (j < inputLength + 1 && i < outputLength + 1) {
+ c[j] = (editDistanceTable + i * (inputLength + 1))[j];
+ } else {
+ c[j] = -1;
+ }
}
+ LOGI("[ %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d ]",
+ c[0], c[1], c[2], c[3], c[4], c[5], c[6], c[7], c[8], c[9], c[10]);
}
}
- return dp[li * lo - 1];
-}
-#endif
-
-inline static void initEditDistance(int *editDistanceTable) {
- for (int i = 0; i <= MAX_WORD_LENGTH_INTERNAL; ++i) {
- editDistanceTable[i] = i;
- }
}
inline static void calcEditDistanceOneStep(int *editDistanceTable, const unsigned short *input,
const int inputLength, const unsigned short *output, const int outputLength) {
+ // TODO: Make sure that editDistance[0 ~ MAX_WORD_LENGTH_INTERNAL] is not touched.
// Let dp[i][j] be editDistanceTable[i * (inputLength + 1) + j].
// Assuming that dp[0][0] ... dp[outputLength - 1][inputLength] are already calculated,
// and calculate dp[ouputLength][0] ... dp[outputLength][inputLength].
int *const current = editDistanceTable + outputLength * (inputLength + 1);
const int *const prev = editDistanceTable + (outputLength - 1) * (inputLength + 1);
const int *const prevprev =
- outputLength >= 2 ? editDistanceTable + (outputLength - 2) * (inputLength + 1) : NULL;
+ outputLength >= 2 ? editDistanceTable + (outputLength - 2) * (inputLength + 1) : 0;
current[0] = outputLength;
- const uint32_t co = Dictionary::toBaseLowerCase(output[outputLength - 1]);
- const uint32_t prevCO =
- outputLength >= 2 ? Dictionary::toBaseLowerCase(output[outputLength - 2]) : 0;
+ const uint32_t co = toBaseLowerCase(output[outputLength - 1]);
+ const uint32_t prevCO = outputLength >= 2 ? toBaseLowerCase(output[outputLength - 2]) : 0;
for (int i = 1; i <= inputLength; ++i) {
- const uint32_t ci = Dictionary::toBaseLowerCase(input[i - 1]);
+ const uint32_t ci = toBaseLowerCase(input[i - 1]);
const uint16_t cost = (ci == co) ? 0 : 1;
current[i] = min(current[i - 1] + 1, min(prev[i] + 1, prev[i - 1] + cost));
- if (i >= 2 && prevprev && ci == prevCO
- && co == Dictionary::toBaseLowerCase(input[i - 2])) {
+ if (i >= 2 && prevprev && ci == prevCO && co == toBaseLowerCase(input[i - 2])) {
current[i] = min(current[i], prevprev[i - 2] + 1);
}
}
@@ -105,6 +83,9 @@ inline static void calcEditDistanceOneStep(int *editDistanceTable, const unsigne
inline static int getCurrentEditDistance(
int *editDistanceTable, const int inputLength, const int outputLength) {
+ if (DEBUG_DICT) {
+ LOGI("getCurrentEditDistance %d, %d", inputLength, outputLength);
+ }
return editDistanceTable[(inputLength + 1) * (outputLength + 1) - 1];
}
@@ -133,6 +114,9 @@ void Correction::initCorrection(const ProximityInfo *pi, const int inputLength,
mInputLength = inputLength;
mMaxDepth = maxDepth;
mMaxEditDistance = mInputLength < 5 ? 2 : mInputLength / 2;
+ // TODO: This is not supposed to be required. Check what's going wrong with
+ // editDistance[0 ~ MAX_WORD_LENGTH_INTERNAL]
+ initEditDistance(mEditDistanceTable);
}
void Correction::initCorrectionState(
@@ -146,7 +130,7 @@ void Correction::initCorrectionState(
void Correction::setCorrectionParams(const int skipPos, const int excessivePos,
const int transposedPos, const int spaceProximityPos, const int missingSpacePos,
- const bool useFullEditDistance) {
+ const bool useFullEditDistance, const bool doAutoCompletion, const int maxErrors) {
// TODO: remove
mTransposedPos = transposedPos;
mExcessivePos = excessivePos;
@@ -159,6 +143,8 @@ void Correction::setCorrectionParams(const int skipPos, const int excessivePos,
mSpaceProximityPos = spaceProximityPos;
mMissingSpacePos = missingSpacePos;
mUseFullEditDistance = useFullEditDistance;
+ mDoAutoCompletion = doAutoCompletion;
+ mMaxErrors = maxErrors;
}
void Correction::checkState() {
@@ -280,7 +266,9 @@ void Correction::startToTraverseAllNodes() {
bool Correction::needsToPrune() const {
// TODO: use edit distance here
- return mOutputIndex - 1 >= mMaxDepth || mProximityCount > mMaxEditDistance;
+ return mOutputIndex - 1 >= mMaxDepth || mProximityCount > mMaxEditDistance
+ // Allow one char longer word for missing character
+ || (!mDoAutoCompletion && (mOutputIndex + 1 >= mInputLength));
}
void Correction::addCharToCurrentWord(const int32_t c) {
@@ -312,12 +300,17 @@ inline bool isEquivalentChar(ProximityInfo::ProximityType type) {
Correction::CorrectionType Correction::processCharAndCalcState(
const int32_t c, const bool isTerminal) {
const int correctionCount = (mSkippedCount + mExcessiveCount + mTransposedCount);
+ if (correctionCount > mMaxErrors) {
+ return UNRELATED;
+ }
+
// TODO: Change the limit if we'll allow two or more corrections
const bool noCorrectionsHappenedSoFar = correctionCount == 0;
const bool canTryCorrection = noCorrectionsHappenedSoFar;
int proximityIndex = 0;
mDistances[mOutputIndex] = NOT_A_DISTANCE;
+ // Skip checking this node
if (mNeedsToTraverseAllNodes || isQuote(c)) {
bool incremented = false;
if (mLastCharExceeded && mInputIndex == mInputLength - 1) {
@@ -342,6 +335,7 @@ Correction::CorrectionType Correction::processCharAndCalcState(
return processSkipChar(c, isTerminal, incremented);
}
+ // Check possible corrections.
if (mExcessivePos >= 0) {
if (mExcessiveCount == 0 && mExcessivePos < mOutputIndex) {
mExcessivePos = mOutputIndex;
@@ -392,7 +386,12 @@ Correction::CorrectionType Correction::processCharAndCalcState(
}
// TODO: Change the limit if we'll allow two or more proximity chars with corrections
- const bool checkProximityChars = noCorrectionsHappenedSoFar || mProximityCount == 0;
+ // Work around: When the mMaxErrors is 1, we only allow just one error
+ // including proximity correction.
+ const bool checkProximityChars = (mMaxErrors > 1)
+ ? (noCorrectionsHappenedSoFar || mProximityCount == 0)
+ : (noCorrectionsHappenedSoFar && mProximityCount == 0);
+
ProximityInfo::ProximityType matchedProximityCharId = secondTransposing
? ProximityInfo::EQUIVALENT_CHAR
: mProximityInfo->getMatchedProximityId(
@@ -607,13 +606,7 @@ inline static int getQuoteCount(const unsigned short* word, const int length) {
}
inline static bool isUpperCase(unsigned short c) {
- if (c < sizeof(BASE_CHARS) / sizeof(BASE_CHARS[0])) {
- c = BASE_CHARS[c];
- }
- if (isupper(c)) {
- return true;
- }
- return false;
+ return isAsciiUpper(toBaseChar(c));
}
//////////////////////
@@ -654,6 +647,9 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const
// TODO: Calculate edit distance for transposed and excessive
int ed = 0;
+ if (DEBUG_DICT_FULL) {
+ dumpEditDistance10ForDebug(editDistanceTable, inputLength, outputIndex + 1);
+ }
int adjustedProximityMatchedCount = proximityMatchedCount;
int finalFreq = freq;
@@ -938,4 +934,103 @@ int Correction::RankingAlgorithm::calcFreqForSplitTwoWords(
return totalFreq;
}
+/* Damerau-Levenshtein distance */
+inline static int editDistanceInternal(
+ int* editDistanceTable, const unsigned short* before,
+ const int beforeLength, const unsigned short* after, const int afterLength) {
+ // dp[li][lo] dp[a][b] = dp[ a * lo + b]
+ int* dp = editDistanceTable;
+ const int li = beforeLength + 1;
+ const int lo = afterLength + 1;
+ for (int i = 0; i < li; ++i) {
+ dp[lo * i] = i;
+ }
+ for (int i = 0; i < lo; ++i) {
+ dp[i] = i;
+ }
+
+ for (int i = 0; i < li - 1; ++i) {
+ for (int j = 0; j < lo - 1; ++j) {
+ const uint32_t ci = toBaseLowerCase(before[i]);
+ const uint32_t co = toBaseLowerCase(after[j]);
+ const uint16_t cost = (ci == co) ? 0 : 1;
+ dp[(i + 1) * lo + (j + 1)] = min(dp[i * lo + (j + 1)] + 1,
+ min(dp[(i + 1) * lo + j] + 1, dp[i * lo + j] + cost));
+ if (i > 0 && j > 0 && ci == toBaseLowerCase(after[j - 1])
+ && co == toBaseLowerCase(before[i - 1])) {
+ dp[(i + 1) * lo + (j + 1)] = min(
+ dp[(i + 1) * lo + (j + 1)], dp[(i - 1) * lo + (j - 1)] + cost);
+ }
+ }
+ }
+
+ if (DEBUG_EDIT_DISTANCE) {
+ LOGI("IN = %d, OUT = %d", beforeLength, afterLength);
+ for (int i = 0; i < li; ++i) {
+ for (int j = 0; j < lo; ++j) {
+ LOGI("EDIT[%d][%d], %d", i, j, dp[i * lo + j]);
+ }
+ }
+ }
+ return dp[li * lo - 1];
+}
+
+int Correction::RankingAlgorithm::editDistance(const unsigned short* before,
+ const int beforeLength, const unsigned short* after, const int afterLength) {
+ int table[(beforeLength + 1) * (afterLength + 1)];
+ return editDistanceInternal(table, before, beforeLength, after, afterLength);
+}
+
+
+// In dictionary.cpp, getSuggestion() method,
+// suggestion scores are computed using the below formula.
+// original score
+// := pow(mTypedLetterMultiplier (this is defined 2),
+// (the number of matched characters between typed word and suggested word))
+// * (individual word's score which defined in the unigram dictionary,
+// and this score is defined in range [0, 255].)
+// Then, the following processing is applied.
+// - If the dictionary word is matched up to the point of the user entry
+// (full match up to min(before.length(), after.length())
+// => Then multiply by FULL_MATCHED_WORDS_PROMOTION_RATE (this is defined 1.2)
+// - If the word is a true full match except for differences in accents or
+// capitalization, then treat it as if the score was 255.
+// - If before.length() == after.length()
+// => multiply by mFullWordMultiplier (this is defined 2))
+// So, maximum original score is pow(2, min(before.length(), after.length())) * 255 * 2 * 1.2
+// For historical reasons we ignore the 1.2 modifier (because the measure for a good
+// autocorrection threshold was done at a time when it didn't exist). This doesn't change
+// the result.
+// So, we can normalize original score by dividing pow(2, min(b.l(),a.l())) * 255 * 2.
+
+/* static */
+double Correction::RankingAlgorithm::calcNormalizedScore(const unsigned short* before,
+ const int beforeLength, const unsigned short* after, const int afterLength,
+ const int score) {
+ if (0 == beforeLength || 0 == afterLength) {
+ return 0;
+ }
+ const int distance = editDistance(before, beforeLength, after, afterLength);
+ int spaceCount = 0;
+ for (int i = 0; i < afterLength; ++i) {
+ if (after[i] == CODE_SPACE) {
+ ++spaceCount;
+ }
+ }
+
+ if (spaceCount == afterLength) {
+ return 0;
+ }
+
+ const double maxScore = score >= S_INT_MAX ? S_INT_MAX : MAX_INITIAL_SCORE
+ * pow((double)TYPED_LETTER_MULTIPLIER,
+ (double)min(beforeLength, afterLength - spaceCount)) * FULL_WORD_MULTIPLIER;
+
+ // add a weight based on edit distance.
+ // distance <= max(afterLength, beforeLength) == afterLength,
+ // so, 0 <= distance / afterLength <= 1
+ const double weight = 1.0 - (double) distance / afterLength;
+ return (score / maxScore) * weight;
+}
+
} // namespace latinime
diff --git a/native/src/correction.h b/native/src/correction.h
index d4e99f0ce..4012e7e82 100644
--- a/native/src/correction.h
+++ b/native/src/correction.h
@@ -27,8 +27,7 @@ namespace latinime {
class ProximityInfo;
class Correction {
-
-public:
+ public:
typedef enum {
TRAVERSE_ALL_ON_TERMINAL,
TRAVERSE_ALL_NOT_ON_TERMINAL,
@@ -44,7 +43,8 @@ public:
// TODO: remove
void setCorrectionParams(const int skipPos, const int excessivePos, const int transposedPos,
- const int spaceProximityPos, const int missingSpacePos, const bool useFullEditDistance);
+ const int spaceProximityPos, const int missingSpacePos, const bool useFullEditDistance,
+ const bool doAutoCompletion, const int maxErrors);
void checkState();
bool initProcessState(const int index);
@@ -94,7 +94,25 @@ public:
inline int getTreeParentIndex(const int index) const {
return mCorrectionStates[index].mParentIndex;
}
-private:
+
+ class RankingAlgorithm {
+ public:
+ static int calculateFinalFreq(const int inputIndex, const int depth,
+ const int freq, int *editDistanceTable, const Correction* correction);
+ static int calcFreqForSplitTwoWords(const int firstFreq, const int secondFreq,
+ const Correction* correction, const unsigned short *word);
+ static double calcNormalizedScore(const unsigned short* before, const int beforeLength,
+ const unsigned short* after, const int afterLength, const int score);
+ static int editDistance(const unsigned short* before,
+ const int beforeLength, const unsigned short* after, const int afterLength);
+ private:
+ static const int CODE_SPACE = ' ';
+ static const int MAX_INITIAL_SCORE = 255;
+ static const int TYPED_LETTER_MULTIPLIER = 2;
+ static const int FULL_WORD_MULTIPLIER = 2;
+ };
+
+ private:
inline void incrementInputIndex();
inline void incrementOutputIndex();
inline bool needsToTraverseAllNodes();
@@ -109,6 +127,7 @@ private:
const ProximityInfo *mProximityInfo;
bool mUseFullEditDistance;
+ bool mDoAutoCompletion;
int mMaxEditDistance;
int mMaxDepth;
int mInputLength;
@@ -116,6 +135,7 @@ private:
int mMissingSpacePos;
int mTerminalInputIndex;
int mTerminalOutputIndex;
+ int mMaxErrors;
// The following arrays are state buffer.
unsigned short mWord[MAX_WORD_LENGTH_INTERNAL];
@@ -150,13 +170,6 @@ private:
bool mTransposing;
bool mSkipping;
- class RankingAlgorithm {
- public:
- static int calculateFinalFreq(const int inputIndex, const int depth,
- const int freq, int *editDistanceTable, const Correction* correction);
- static int calcFreqForSplitTwoWords(const int firstFreq, const int secondFreq,
- const Correction* correction, const unsigned short *word);
- };
};
} // namespace latinime
#endif // LATINIME_CORRECTION_H
diff --git a/native/src/defines.h b/native/src/defines.h
index ef1beb92f..d636e5197 100644
--- a/native/src/defines.h
+++ b/native/src/defines.h
@@ -98,9 +98,10 @@ static void prof_out(void) {
#define DEBUG_SHOW_FOUND_WORD false
#define DEBUG_NODE DEBUG_DICT_FULL
#define DEBUG_TRACE DEBUG_DICT_FULL
-#define DEBUG_PROXIMITY_INFO true
+#define DEBUG_PROXIMITY_INFO false
#define DEBUG_CORRECTION false
#define DEBUG_CORRECTION_FREQ true
+#define DEBUG_WORDS_PRIORITY_QUEUE true
#define DUMP_WORD(word, length) do { dumpWord(word, length); } while(0)
@@ -125,6 +126,7 @@ static void dumpWord(const unsigned short* word, const int length) {
#define DEBUG_PROXIMITY_INFO false
#define DEBUG_CORRECTION false
#define DEBUG_CORRECTION_FREQ false
+#define DEBUG_WORDS_PRIORITY_QUEUE false
#define DUMP_WORD(word, length)
@@ -196,10 +198,14 @@ static void dumpWord(const unsigned short* word, const int length) {
#define NEUTRAL_SCORE_SQUARED_RADIUS 8.0f
#define HALF_SCORE_SQUARED_RADIUS 32.0f
-// This should be greater than or equal to MAX_WORD_LENGTH defined in BinaryDictionary.java
+// This must be greater than or equal to MAX_WORD_LENGTH defined in BinaryDictionary.java
// This is only used for the size of array. Not to be used in c functions.
#define MAX_WORD_LENGTH_INTERNAL 48
+// Word limit for sub queues used in WordsPriorityQueuePool. Sub queues are temporary queues used
+// for better performance.
+#define SUB_QUEUE_MAX_WORDS 5
+
#define MAX_DEPTH_MULTIPLIER 3
// TODO: Reduce this constant if possible; check the maximum number of umlauts in the same German
diff --git a/native/src/dictionary.cpp b/native/src/dictionary.cpp
index a49769bdb..e3673d425 100644
--- a/native/src/dictionary.cpp
+++ b/native/src/dictionary.cpp
@@ -38,6 +38,9 @@ Dictionary::Dictionary(void *dict, int dictSize, int mmapFd, int dictBufAdjust,
LOGI("IN NATIVE SUGGEST Version: %d", (mDict[0] & 0xFF));
}
}
+ mCorrection = new Correction(typedLetterMultiplier, fullWordMultiplier);
+ mWordsPriorityQueuePool = new WordsPriorityQueuePool(
+ maxWords, SUB_QUEUE_MAX_WORDS, maxWordLength);
mUnigramDictionary = new UnigramDictionary(mDict, typedLetterMultiplier, fullWordMultiplier,
maxWordLength, maxWords, maxAlternatives, IS_LATEST_DICT_VERSION);
mBigramDictionary = new BigramDictionary(mDict, maxWordLength, maxAlternatives,
@@ -45,6 +48,8 @@ Dictionary::Dictionary(void *dict, int dictSize, int mmapFd, int dictBufAdjust,
}
Dictionary::~Dictionary() {
+ delete mCorrection;
+ delete mWordsPriorityQueuePool;
delete mUnigramDictionary;
delete mBigramDictionary;
}
diff --git a/native/src/dictionary.h b/native/src/dictionary.h
index d5de0083a..79d377a4f 100644
--- a/native/src/dictionary.h
+++ b/native/src/dictionary.h
@@ -17,22 +17,25 @@
#ifndef LATINIME_DICTIONARY_H
#define LATINIME_DICTIONARY_H
-#include "basechars.h"
#include "bigram_dictionary.h"
#include "char_utils.h"
+#include "correction.h"
#include "defines.h"
#include "proximity_info.h"
#include "unigram_dictionary.h"
+#include "words_priority_queue_pool.h"
namespace latinime {
class Dictionary {
-public:
+ public:
Dictionary(void *dict, int dictSize, int mmapFd, int dictBufAdjust, int typedLetterMultipler,
int fullWordMultiplier, int maxWordLength, int maxWords, int maxAlternatives);
+
int getSuggestions(ProximityInfo *proximityInfo, int *xcoordinates, int *ycoordinates,
int *codes, int codesSize, int flags, unsigned short *outWords, int *frequencies) {
- return mUnigramDictionary->getSuggestions(proximityInfo, xcoordinates, ycoordinates, codes,
+ return mUnigramDictionary->getSuggestions(proximityInfo, mWordsPriorityQueuePool,
+ mCorrection, xcoordinates, ycoordinates, codes,
codesSize, flags, outWords, frequencies);
}
@@ -63,9 +66,8 @@ public:
static int setDictionaryValues(const unsigned char *dict, const bool isLatestDictVersion,
const int pos, unsigned short *c, int *childrenPosition,
bool *terminal, int *freq);
- static inline unsigned short toBaseLowerCase(unsigned short c);
-private:
+ private:
bool hasBigram();
const unsigned char *mDict;
@@ -79,6 +81,8 @@ private:
const bool IS_LATEST_DICT_VERSION;
UnigramDictionary *mUnigramDictionary;
BigramDictionary *mBigramDictionary;
+ WordsPriorityQueuePool *mWordsPriorityQueuePool;
+ Correction *mCorrection;
};
// public static utility methods
@@ -156,19 +160,6 @@ inline int Dictionary::setDictionaryValues(const unsigned char *dict,
return position;
}
-
-inline unsigned short Dictionary::toBaseLowerCase(unsigned short c) {
- if (c < sizeof(BASE_CHARS) / sizeof(BASE_CHARS[0])) {
- c = BASE_CHARS[c];
- }
- if (c >='A' && c <= 'Z') {
- c |= 32;
- } else if (c > 127) {
- c = latin_tolower(c);
- }
- return c;
-}
-
} // namespace latinime
#endif // LATINIME_DICTIONARY_H
diff --git a/native/src/proximity_info.cpp b/native/src/proximity_info.cpp
index 763a3a174..95e35263b 100644
--- a/native/src/proximity_info.cpp
+++ b/native/src/proximity_info.cpp
@@ -47,7 +47,7 @@ ProximityInfo::ProximityInfo(const int maxProximityCharsSize, const int keyboard
HAS_TOUCH_POSITION_CORRECTION_DATA(keyCount > 0 && keyXCoordinates && keyYCoordinates
&& keyWidths && keyHeights && keyCharCodes && sweetSpotCenterXs
&& sweetSpotCenterYs && sweetSpotRadii),
- mInputXCoordinates(NULL), mInputYCoordinates(NULL),
+ mInputXCoordinates(0), mInputYCoordinates(0),
mTouchPositionCorrectionEnabled(false) {
const int proximityGridLength = GRID_WIDTH * GRID_HEIGHT * MAX_PROXIMITY_CHARS_SIZE;
mProximityCharsArray = new uint32_t[proximityGridLength];
@@ -100,9 +100,17 @@ inline int ProximityInfo::getStartIndexFromCoordinates(const int x, const int y)
}
bool ProximityInfo::hasSpaceProximity(const int x, const int y) const {
+ if (x < 0 || y < 0) {
+ if (DEBUG_DICT) {
+ LOGI("HasSpaceProximity: Illegal coordinates (%d, %d)", x, y);
+ assert(false);
+ }
+ return false;
+ }
+
const int startIndex = getStartIndexFromCoordinates(x, y);
if (DEBUG_PROXIMITY_INFO) {
- LOGI("hasSpaceProximity: index %d", startIndex);
+ LOGI("hasSpaceProximity: index %d, %d, %d", startIndex, x, y);
}
for (int i = 0; i < MAX_PROXIMITY_CHARS_SIZE; ++i) {
if (DEBUG_PROXIMITY_INFO) {
@@ -167,7 +175,7 @@ int ProximityInfo::getKeyIndex(const int c) const {
// We do not have the coordinate data
return NOT_A_INDEX;
}
- const unsigned short baseLowerC = Dictionary::toBaseLowerCase(c);
+ const unsigned short baseLowerC = toBaseLowerCase(c);
if (baseLowerC > MAX_CHAR_CODE) {
return NOT_A_INDEX;
}
@@ -232,7 +240,7 @@ ProximityInfo::ProximityType ProximityInfo::getMatchedProximityId(const int inde
const unsigned short c, const bool checkProximityChars, int *proximityIndex) const {
const int *currentChars = getProximityCharsAt(index);
const int firstChar = currentChars[0];
- const unsigned short baseLowerC = Dictionary::toBaseLowerCase(c);
+ const unsigned short baseLowerC = toBaseLowerCase(c);
// The first char in the array is what user typed. If it matches right away,
// that means the user typed that same char for this pos.
@@ -245,7 +253,7 @@ ProximityInfo::ProximityType ProximityInfo::getMatchedProximityId(const int inde
// If the non-accented, lowercased version of that first character matches c,
// then we have a non-accented version of the accented character the user
// typed. Treat it as a close char.
- if (Dictionary::toBaseLowerCase(firstChar) == baseLowerC)
+ if (toBaseLowerCase(firstChar) == baseLowerC)
return NEAR_PROXIMITY_CHAR;
// Not an exact nor an accent-alike match: search the list of close keys
diff --git a/native/src/proximity_info.h b/native/src/proximity_info.h
index 35e354c6e..9ca5505a7 100644
--- a/native/src/proximity_info.h
+++ b/native/src/proximity_info.h
@@ -26,7 +26,7 @@ namespace latinime {
class Correction;
class ProximityInfo {
-public:
+ public:
static const int NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR_LOG_2 = 10;
static const int NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR =
1 << NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR_LOG_2;
@@ -56,7 +56,7 @@ public:
bool existsCharInProximityAt(const int index, const int c) const;
bool existsAdjacentProximityChars(const int index) const;
ProximityType getMatchedProximityId(const int index, const unsigned short c,
- const bool checkProximityChars, int *proximityIndex = NULL) const;
+ const bool checkProximityChars, int *proximityIndex = 0) const;
int getNormalizedSquaredDistance(const int inputIndex, const int proximityIndex) const {
return mNormalizedSquaredDistances[inputIndex * MAX_PROXIMITY_CHARS_SIZE + proximityIndex];
}
@@ -68,7 +68,7 @@ public:
return mTouchPositionCorrectionEnabled;
}
-private:
+ private:
// The max number of the keys in one keyboard layout
static const int MAX_KEY_COUNT_IN_A_KEYBOARD = 64;
// The upper limit of the char code in mCodeToKeyIndex
diff --git a/native/src/terminal_attributes.h b/native/src/terminal_attributes.h
new file mode 100644
index 000000000..1f9815936
--- /dev/null
+++ b/native/src/terminal_attributes.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_TERMINAL_ATTRIBUTES_H
+#define LATINIME_TERMINAL_ATTRIBUTES_H
+
+#include "unigram_dictionary.h"
+
+namespace latinime {
+
+/**
+ * This class encapsulates information about a terminal that allows to
+ * retrieve local node attributes like the list of shortcuts without
+ * exposing the format structure to the client.
+ */
+class TerminalAttributes {
+ public:
+ class ShortcutIterator {
+ const uint8_t* const mDict;
+ bool mHasNextShortcutTarget;
+ int mPos;
+
+ public:
+ ShortcutIterator(const uint8_t* dict, const int pos, const uint8_t flags) : mDict(dict),
+ mPos(pos) {
+ mHasNextShortcutTarget = (0 != (flags & UnigramDictionary::FLAG_HAS_SHORTCUT_TARGETS));
+ }
+
+ inline bool hasNextShortcutTarget() const {
+ return mHasNextShortcutTarget;
+ }
+
+ // Gets the shortcut target itself as a uint16_t string. For parameters and return value
+ // see BinaryFormat::getWordAtAddress.
+ inline int getNextShortcutTarget(const int maxDepth, uint16_t* outWord) {
+ const int shortcutFlags = BinaryFormat::getFlagsAndForwardPointer(mDict, &mPos);
+ mHasNextShortcutTarget =
+ 0 != (shortcutFlags & UnigramDictionary::FLAG_ATTRIBUTE_HAS_NEXT);
+ int shortcutAddress =
+ BinaryFormat::getAttributeAddressAndForwardPointer(mDict, shortcutFlags, &mPos);
+ return BinaryFormat::getWordAtAddress(mDict, shortcutAddress, maxDepth, outWord);
+ }
+ };
+
+ private:
+ const uint8_t* const mDict;
+ const uint8_t mFlags;
+ const int mStartPos;
+
+ public:
+ TerminalAttributes(const uint8_t* const dict, const uint8_t flags, const int pos) :
+ mDict(dict), mFlags(flags), mStartPos(pos) {
+ }
+
+ inline bool isShortcutOnly() const {
+ return 0 != (mFlags & UnigramDictionary::FLAG_IS_SHORTCUT_ONLY);
+ }
+
+ inline ShortcutIterator getShortcutIterator() const {
+ return ShortcutIterator(mDict, mStartPos, mFlags);
+ }
+};
+} // namespace latinime
+
+#endif // LATINIME_TERMINAL_ATTRIBUTES_H
diff --git a/native/src/unigram_dictionary.cpp b/native/src/unigram_dictionary.cpp
index 8eb5a9700..e95e03ce5 100644
--- a/native/src/unigram_dictionary.cpp
+++ b/native/src/unigram_dictionary.cpp
@@ -25,6 +25,7 @@
#include "unigram_dictionary.h"
#include "binary_format.h"
+#include "terminal_attributes.h"
namespace latinime {
@@ -48,19 +49,23 @@ UnigramDictionary::UnigramDictionary(const uint8_t* const streamStart, int typed
if (DEBUG_DICT) {
LOGI("UnigramDictionary - constructor");
}
- mCorrection = new Correction(typedLetterMultiplier, fullWordMultiplier);
}
UnigramDictionary::~UnigramDictionary() {
- delete mCorrection;
}
-static inline unsigned int getCodesBufferSize(const int* codes, const int codesSize,
+static inline unsigned int getCodesBufferSize(const int *codes, const int codesSize,
const int MAX_PROXIMITY_CHARS) {
return sizeof(*codes) * MAX_PROXIMITY_CHARS * codesSize;
}
-bool UnigramDictionary::isDigraph(const int* codes, const int i, const int codesSize) const {
+// TODO: This needs to take an const unsigned short* and not tinker with its contents
+static inline void addWord(
+ unsigned short *word, int length, int frequency, WordsPriorityQueue *queue) {
+ queue->push(frequency, word, length);
+}
+
+bool UnigramDictionary::isDigraph(const int *codes, const int i, const int codesSize) const {
// There can't be a digraph if we don't have at least 2 characters to examine
if (i + 2 > codesSize) return false;
@@ -86,9 +91,10 @@ bool UnigramDictionary::isDigraph(const int* codes, const int i, const int codes
// codesSrc is the current point in the user-input, original, content-unmodified buffer.
// codesRemain is the remaining size in codesSrc.
void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximityInfo,
- const int *xcoordinates, const int* ycoordinates, const int *codesBuffer,
- const int codesBufferSize, const int flags, const int* codesSrc, const int codesRemain,
- const int currentDepth, int* codesDest, unsigned short* outWords, int* frequencies) {
+ const int *xcoordinates, const int *ycoordinates, const int *codesBuffer,
+ const int codesBufferSize, const int flags, const int *codesSrc,
+ const int codesRemain, const int currentDepth, int *codesDest, Correction *correction,
+ WordsPriorityQueuePool *queuePool) {
if (currentDepth < MAX_UMLAUT_SEARCH_DEPTH) {
for (int i = 0; i < codesRemain; ++i) {
@@ -105,8 +111,8 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit
getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates,
codesBuffer, codesBufferSize, flags,
codesSrc + (i + 1) * MAX_PROXIMITY_CHARS, codesRemain - i - 1,
- currentDepth + 1, codesDest + i * MAX_PROXIMITY_CHARS, outWords,
- frequencies);
+ currentDepth + 1, codesDest + i * MAX_PROXIMITY_CHARS, correction,
+ queuePool);
// Copy the second char of the digraph in place, then continue processing on
// the remaining part of the word.
@@ -114,9 +120,9 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit
memcpy(codesDest + i * MAX_PROXIMITY_CHARS, codesSrc + i * MAX_PROXIMITY_CHARS,
BYTES_IN_ONE_CHAR);
getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates,
- codesBuffer, codesBufferSize, flags, codesSrc + i * MAX_PROXIMITY_CHARS,
- codesRemain - i, currentDepth + 1, codesDest + i * MAX_PROXIMITY_CHARS,
- outWords, frequencies);
+ codesBuffer, codesBufferSize, flags,
+ codesSrc + i * MAX_PROXIMITY_CHARS, codesRemain - i, currentDepth + 1,
+ codesDest + i * MAX_PROXIMITY_CHARS, correction, queuePool);
return;
}
}
@@ -132,40 +138,39 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit
memcpy(codesDest, codesSrc, remainingBytes);
getWordSuggestions(proximityInfo, xcoordinates, ycoordinates, codesBuffer,
- (codesDest - codesBuffer) / MAX_PROXIMITY_CHARS + codesRemain, outWords, frequencies,
- flags);
+ (codesDest - codesBuffer) / MAX_PROXIMITY_CHARS + codesRemain, flags, correction,
+ queuePool);
}
-int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
+int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo,
+ WordsPriorityQueuePool *queuePool, Correction *correction, const int *xcoordinates,
const int *ycoordinates, const int *codes, const int codesSize, const int flags,
unsigned short *outWords, int *frequencies) {
+ Correction* masterCorrection = correction;
if (REQUIRES_GERMAN_UMLAUT_PROCESSING & flags)
{ // Incrementally tune the word and try all possibilities
int codesBuffer[getCodesBufferSize(codes, codesSize, MAX_PROXIMITY_CHARS)];
getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates, codesBuffer,
- codesSize, flags, codes, codesSize, 0, codesBuffer, outWords, frequencies);
+ codesSize, flags, codes, codesSize, 0, codesBuffer, masterCorrection, queuePool);
} else { // Normal processing
- getWordSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, codesSize,
- outWords, frequencies, flags);
+ getWordSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, codesSize, flags,
+ masterCorrection, queuePool);
}
PROF_START(20);
- // Get the word count
- int suggestedWordsCount = 0;
- while (suggestedWordsCount < MAX_WORDS && mFrequencies[suggestedWordsCount] > 0) {
- suggestedWordsCount++;
- }
+ const int suggestedWordsCount =
+ queuePool->getMasterQueue()->outputSuggestions(frequencies, outWords);
if (DEBUG_DICT) {
LOGI("Returning %d words", suggestedWordsCount);
/// Print the returned words
for (int j = 0; j < suggestedWordsCount; ++j) {
#ifdef FLAG_DBG
- short unsigned int* w = mOutputChars + j * MAX_WORD_LENGTH;
+ short unsigned int* w = outWords + j * MAX_WORD_LENGTH;
char s[MAX_WORD_LENGTH];
for (int i = 0; i <= MAX_WORD_LENGTH; i++) s[i] = w[i];
- LOGI("%s %i", s, mFrequencies[j]);
+ LOGI("%s %i", s, frequencies[j]);
#endif
}
}
@@ -175,23 +180,19 @@ int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, const int *x
}
void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
- const int *xcoordinates, const int *ycoordinates, const int *codes, const int codesSize,
- unsigned short *outWords, int *frequencies, const int flags) {
+ const int *xcoordinates, const int *ycoordinates, const int *codes,
+ const int inputLength, const int flags, Correction *correction,
+ WordsPriorityQueuePool *queuePool) {
PROF_OPEN;
PROF_START(0);
- initSuggestions(
- proximityInfo, xcoordinates, ycoordinates, codes, codesSize, outWords, frequencies);
- if (DEBUG_DICT) assert(codesSize == mInputLength);
-
- const int maxDepth = min(mInputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH);
- mCorrection->initCorrection(mProximityInfo, mInputLength, maxDepth);
+ // Note: This line is intentionally left blank
PROF_END(0);
- const bool useFullEditDistance = USE_FULL_EDIT_DISTANCE & flags;
- // TODO: remove
PROF_START(1);
- getSuggestionCandidates(useFullEditDistance);
+ const bool useFullEditDistance = USE_FULL_EDIT_DISTANCE & flags;
+ getOneWordSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, useFullEditDistance,
+ inputLength, correction, queuePool);
PROF_END(1);
PROF_START(2);
@@ -209,12 +210,13 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
PROF_START(5);
// Suggestions with missing space
if (SUGGEST_WORDS_WITH_MISSING_SPACE_CHARACTER
- && mInputLength >= MIN_USER_TYPED_LENGTH_FOR_MISSING_SPACE_SUGGESTION) {
- for (int i = 1; i < codesSize; ++i) {
+ && inputLength >= MIN_USER_TYPED_LENGTH_FOR_MISSING_SPACE_SUGGESTION) {
+ for (int i = 1; i < inputLength; ++i) {
if (DEBUG_DICT) {
LOGI("--- Suggest missing space characters %d", i);
}
- getMissingSpaceWords(mInputLength, i, mCorrection, useFullEditDistance);
+ getMissingSpaceWords(proximityInfo, xcoordinates, ycoordinates, codes,
+ useFullEditDistance, inputLength, i, correction, queuePool);
}
}
PROF_END(5);
@@ -222,7 +224,7 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
PROF_START(6);
if (SUGGEST_WORDS_WITH_SPACE_PROXIMITY && proximityInfo) {
// The first and last "mistyped spaces" are taken care of by excessive character handling
- for (int i = 1; i < codesSize - 1; ++i) {
+ for (int i = 1; i < inputLength - 1; ++i) {
if (DEBUG_DICT) {
LOGI("--- Suggest words with proximity space %d", i);
}
@@ -233,7 +235,8 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
i, x, y, proximityInfo->hasSpaceProximity(x, y));
}
if (proximityInfo->hasSpaceProximity(x, y)) {
- getMistypedSpaceWords(mInputLength, i, mCorrection, useFullEditDistance);
+ getMistypedSpaceWords(proximityInfo, xcoordinates, ycoordinates, codes,
+ useFullEditDistance, inputLength, i, correction, queuePool);
}
}
}
@@ -241,153 +244,118 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
}
void UnigramDictionary::initSuggestions(ProximityInfo *proximityInfo, const int *xCoordinates,
- const int *yCoordinates, const int *codes, const int codesSize,
- unsigned short *outWords, int *frequencies) {
+ const int *yCoordinates, const int *codes, const int inputLength,
+ WordsPriorityQueue *queue, Correction *correction) {
if (DEBUG_DICT) {
LOGI("initSuggest");
}
- mFrequencies = frequencies;
- mOutputChars = outWords;
- mInputLength = codesSize;
- proximityInfo->setInputParams(codes, codesSize, xCoordinates, yCoordinates);
- mProximityInfo = proximityInfo;
-}
-
-static inline void registerNextLetter(unsigned short c, int *nextLetters, int nextLettersSize) {
- if (c < nextLettersSize) {
- nextLetters[c]++;
- }
-}
-
-// TODO: We need to optimize addWord by using STL or something
-// TODO: This needs to take an const unsigned short* and not tinker with its contents
-bool UnigramDictionary::addWord(unsigned short *word, int length, int frequency) {
- word[length] = 0;
- if (DEBUG_DICT && DEBUG_SHOW_FOUND_WORD) {
-#ifdef FLAG_DBG
- char s[length + 1];
- for (int i = 0; i <= length; i++) s[i] = word[i];
- LOGI("Found word = %s, freq = %d", s, frequency);
-#endif
- }
- if (length > MAX_WORD_LENGTH) {
- if (DEBUG_DICT) {
- LOGI("Exceeded max word length.");
- }
- return false;
- }
-
- // Find the right insertion point
- int insertAt = 0;
- while (insertAt < MAX_WORDS) {
- // TODO: How should we sort words with the same frequency?
- if (frequency > mFrequencies[insertAt]) {
- break;
- }
- insertAt++;
- }
- if (insertAt < MAX_WORDS) {
- if (DEBUG_DICT) {
-#ifdef FLAG_DBG
- char s[length + 1];
- for (int i = 0; i <= length; i++) s[i] = word[i];
- LOGI("Added word = %s, freq = %d, %d", s, frequency, S_INT_MAX);
-#endif
- }
- memmove((char*) mFrequencies + (insertAt + 1) * sizeof(mFrequencies[0]),
- (char*) mFrequencies + insertAt * sizeof(mFrequencies[0]),
- (MAX_WORDS - insertAt - 1) * sizeof(mFrequencies[0]));
- mFrequencies[insertAt] = frequency;
- memmove((char*) mOutputChars + (insertAt + 1) * MAX_WORD_LENGTH * sizeof(short),
- (char*) mOutputChars + insertAt * MAX_WORD_LENGTH * sizeof(short),
- (MAX_WORDS - insertAt - 1) * sizeof(short) * MAX_WORD_LENGTH);
- unsigned short *dest = mOutputChars + insertAt * MAX_WORD_LENGTH;
- while (length--) {
- *dest++ = *word++;
- }
- *dest = 0; // NULL terminate
- if (DEBUG_DICT) {
- LOGI("Added word at %d", insertAt);
- }
- return true;
+ proximityInfo->setInputParams(codes, inputLength, xCoordinates, yCoordinates);
+ if (queue) {
+ queue->clear();
}
- return false;
+ const int maxDepth = min(inputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH);
+ correction->initCorrection(proximityInfo, inputLength, maxDepth);
}
static const char QUOTE = '\'';
static const char SPACE = ' ';
-void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance) {
+void UnigramDictionary::getOneWordSuggestions(ProximityInfo *proximityInfo,
+ const int *xcoordinates, const int *ycoordinates, const int *codes,
+ const bool useFullEditDistance, const int inputLength, Correction *correction,
+ WordsPriorityQueuePool *queuePool) {
+ WordsPriorityQueue *masterQueue = queuePool->getMasterQueue();
+ initSuggestions(
+ proximityInfo, xcoordinates, ycoordinates, codes, inputLength, masterQueue, correction);
+ getSuggestionCandidates(useFullEditDistance, inputLength, correction, masterQueue,
+ true /* doAutoCompletion */, DEFAULT_MAX_ERRORS);
+}
+
+void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance,
+ const int inputLength, Correction *correction, WordsPriorityQueue *queue,
+ const bool doAutoCompletion, const int maxErrors) {
// TODO: Remove setCorrectionParams
- mCorrection->setCorrectionParams(0, 0, 0,
- -1 /* spaceProximityPos */, -1 /* missingSpacePos */, useFullEditDistance);
+ correction->setCorrectionParams(0, 0, 0,
+ -1 /* spaceProximityPos */, -1 /* missingSpacePos */, useFullEditDistance,
+ doAutoCompletion, maxErrors);
int rootPosition = ROOT_POS;
// Get the number of children of root, then increment the position
int childCount = Dictionary::getCount(DICT_ROOT, &rootPosition);
int outputIndex = 0;
- mCorrection->initCorrectionState(rootPosition, childCount, (mInputLength <= 0));
+ correction->initCorrectionState(rootPosition, childCount, (inputLength <= 0));
// Depth first search
while (outputIndex >= 0) {
- if (mCorrection->initProcessState(outputIndex)) {
- int siblingPos = mCorrection->getTreeSiblingPos(outputIndex);
+ if (correction->initProcessState(outputIndex)) {
+ int siblingPos = correction->getTreeSiblingPos(outputIndex);
int firstChildPos;
const bool needsToTraverseChildrenNodes = processCurrentNode(siblingPos,
- mCorrection, &childCount, &firstChildPos, &siblingPos);
+ correction, &childCount, &firstChildPos, &siblingPos, queue);
// Update next sibling pos
- mCorrection->setTreeSiblingPos(outputIndex, siblingPos);
+ correction->setTreeSiblingPos(outputIndex, siblingPos);
if (needsToTraverseChildrenNodes) {
// Goes to child node
- outputIndex = mCorrection->goDownTree(outputIndex, childCount, firstChildPos);
+ outputIndex = correction->goDownTree(outputIndex, childCount, firstChildPos);
}
} else {
// Goes to parent sibling node
- outputIndex = mCorrection->getTreeParentIndex(outputIndex);
+ outputIndex = correction->getTreeParentIndex(outputIndex);
}
}
}
-void UnigramDictionary::getMissingSpaceWords(
+void UnigramDictionary::getMissingSpaceWords(ProximityInfo *proximityInfo, const int *xcoordinates,
+ const int *ycoordinates, const int *codes, const bool useFullEditDistance,
const int inputLength, const int missingSpacePos, Correction *correction,
- const bool useFullEditDistance) {
- correction->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */,
- -1 /* transposedPos */, -1 /* spaceProximityPos */, missingSpacePos,
- useFullEditDistance);
- getSplitTwoWordsSuggestion(inputLength, correction);
+ WordsPriorityQueuePool* queuePool) {
+ getSplitTwoWordsSuggestions(proximityInfo, xcoordinates, ycoordinates, codes,
+ useFullEditDistance, inputLength, missingSpacePos, -1/* spaceProximityPos */,
+ correction, queuePool);
}
-void UnigramDictionary::getMistypedSpaceWords(
+void UnigramDictionary::getMistypedSpaceWords(ProximityInfo *proximityInfo, const int *xcoordinates,
+ const int *ycoordinates, const int *codes, const bool useFullEditDistance,
const int inputLength, const int spaceProximityPos, Correction *correction,
- const bool useFullEditDistance) {
- correction->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */,
- -1 /* transposedPos */, spaceProximityPos, -1 /* missingSpacePos */,
- useFullEditDistance);
- getSplitTwoWordsSuggestion(inputLength, correction);
-}
-
-inline bool UnigramDictionary::needsToSkipCurrentNode(const unsigned short c,
- const int inputIndex, const int skipPos, const int depth) {
- const unsigned short userTypedChar = mProximityInfo->getPrimaryCharAt(inputIndex);
- // Skip the ' or other letter and continue deeper
- return (c == QUOTE && userTypedChar != QUOTE) || skipPos == depth;
+ WordsPriorityQueuePool* queuePool) {
+ getSplitTwoWordsSuggestions(proximityInfo, xcoordinates, ycoordinates, codes,
+ useFullEditDistance, inputLength, -1 /* missingSpacePos */, spaceProximityPos,
+ correction, queuePool);
}
-inline void UnigramDictionary::onTerminal(const int freq, Correction *correction) {
+inline void UnigramDictionary::onTerminal(const int freq,
+ const TerminalAttributes& terminalAttributes, Correction *correction,
+ WordsPriorityQueue *queue) {
int wordLength;
unsigned short* wordPointer;
const int finalFreq = correction->getFinalFreq(freq, &wordPointer, &wordLength);
if (finalFreq >= 0) {
- addWord(wordPointer, wordLength, finalFreq);
+ if (!terminalAttributes.isShortcutOnly()) {
+ addWord(wordPointer, wordLength, finalFreq, queue);
+ }
+ TerminalAttributes::ShortcutIterator iterator = terminalAttributes.getShortcutIterator();
+ while (iterator.hasNextShortcutTarget()) {
+ // TODO: addWord only supports weak ordering, meaning we have no means to control the
+ // order of the shortcuts relative to one another or to the word. We need to either
+ // modulate the frequency of each shortcut according to its own shortcut frequency or
+ // to make the queue so that the insert order is protected inside the queue for words
+ // with the same score.
+ uint16_t shortcutTarget[MAX_WORD_LENGTH_INTERNAL];
+ const int shortcutTargetStringLength = iterator.getNextShortcutTarget(
+ MAX_WORD_LENGTH_INTERNAL, shortcutTarget);
+ addWord(shortcutTarget, shortcutTargetStringLength, finalFreq, queue);
+ }
}
}
-void UnigramDictionary::getSplitTwoWordsSuggestion(
- const int inputLength, Correction* correction) {
- const int spaceProximityPos = correction->getSpaceProximityPos();
- const int missingSpacePos = correction->getMissingSpacePos();
+void UnigramDictionary::getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo,
+ const int *xcoordinates, const int *ycoordinates, const int *codes,
+ const bool useFullEditDistance, const int inputLength, const int missingSpacePos,
+ const int spaceProximityPos, Correction *correction, WordsPriorityQueuePool* queuePool) {
+ WordsPriorityQueue *masterQueue = queuePool->getMasterQueue();
+
if (DEBUG_DICT) {
int inputCount = 0;
if (spaceProximityPos >= 0) ++inputCount;
@@ -408,9 +376,19 @@ void UnigramDictionary::getSplitTwoWordsSuggestion(
return;
const int newWordLength = firstWordLength + secondWordLength + 1;
+
+
+ // Space proximity preparation
+ //WordsPriorityQueue *subQueue = queuePool->getSubQueue1();
+ //initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, firstWordLength, subQueue,
+ //correction);
+ //getSuggestionCandidates(useFullEditDistance, firstWordLength, correction, subQueue, false,
+ //MAX_ERRORS_FOR_TWO_WORDS);
+
// Allocating variable length array on stack
unsigned short word[newWordLength];
- const int firstFreq = getMostFrequentWordLike(firstWordStartPos, firstWordLength, mWord);
+ const int firstFreq = getMostFrequentWordLike(
+ firstWordStartPos, firstWordLength, proximityInfo, mWord);
if (DEBUG_DICT) {
LOGI("First freq: %d", firstFreq);
}
@@ -420,7 +398,8 @@ void UnigramDictionary::getSplitTwoWordsSuggestion(
word[i] = mWord[i];
}
- const int secondFreq = getMostFrequentWordLike(secondWordStartPos, secondWordLength, mWord);
+ const int secondFreq = getMostFrequentWordLike(
+ secondWordStartPos, secondWordLength, proximityInfo, mWord);
if (DEBUG_DICT) {
LOGI("Second freq: %d", secondFreq);
}
@@ -431,22 +410,29 @@ void UnigramDictionary::getSplitTwoWordsSuggestion(
word[i] = mWord[i - firstWordLength - 1];
}
- const int pairFreq = mCorrection->getFreqForSplitTwoWords(firstFreq, secondFreq, word);
+ // TODO: Remove initSuggestions and correction->setCorrectionParams
+ initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputLength,
+ 0 /* do not clear queue */, correction);
+
+ correction->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */,
+ -1 /* transposedPos */, spaceProximityPos, missingSpacePos,
+ useFullEditDistance, false /* doAutoCompletion */, MAX_ERRORS_FOR_TWO_WORDS);
+ const int pairFreq = correction->getFreqForSplitTwoWords(firstFreq, secondFreq, word);
if (DEBUG_DICT) {
LOGI("Split two words: %d, %d, %d, %d", firstFreq, secondFreq, pairFreq, inputLength);
}
- addWord(word, newWordLength, pairFreq);
+ addWord(word, newWordLength, pairFreq, masterQueue);
return;
}
// Wrapper for getMostFrequentWordLikeInner, which matches it to the previous
// interface.
inline int UnigramDictionary::getMostFrequentWordLike(const int startInputIndex,
- const int inputLength, unsigned short *word) {
+ const int inputLength, ProximityInfo *proximityInfo, unsigned short *word) {
uint16_t inWord[inputLength];
for (int i = 0; i < inputLength; ++i) {
- inWord[i] = (uint16_t)mProximityInfo->getPrimaryCharAt(startInputIndex + i);
+ inWord[i] = (uint16_t)proximityInfo->getPrimaryCharAt(startInputIndex + i);
}
return getMostFrequentWordLikeInner(inWord, inputLength, word);
}
@@ -470,8 +456,8 @@ static inline bool testCharGroupForContinuedLikeness(const uint8_t flags,
const bool hasMultipleChars = (0 != (UnigramDictionary::FLAG_HAS_MULTIPLE_CHARS & flags));
int pos = startPos;
int32_t character = BinaryFormat::getCharCodeAndForwardPointer(root, &pos);
- int32_t baseChar = Dictionary::toBaseLowerCase(character);
- const uint16_t wChar = Dictionary::toBaseLowerCase(inWord[startInputIndex]);
+ int32_t baseChar = toBaseLowerCase(character);
+ const uint16_t wChar = toBaseLowerCase(inWord[startInputIndex]);
if (baseChar != wChar) {
*outPos = hasMultipleChars ? BinaryFormat::skipOtherCharacters(root, pos) : pos;
@@ -483,8 +469,8 @@ static inline bool testCharGroupForContinuedLikeness(const uint8_t flags,
if (hasMultipleChars) {
character = BinaryFormat::getCharCodeAndForwardPointer(root, &pos);
while (NOT_A_CHARACTER != character) {
- baseChar = Dictionary::toBaseLowerCase(character);
- if (Dictionary::toBaseLowerCase(inWord[++inputIndex]) != baseChar) {
+ baseChar = toBaseLowerCase(character);
+ if (toBaseLowerCase(inWord[++inputIndex]) != baseChar) {
*outPos = BinaryFormat::skipOtherCharacters(root, pos);
*outInputIndex = startInputIndex;
return false;
@@ -597,7 +583,7 @@ int UnigramDictionary::getBigramPosition(int pos, unsigned short *word, int offs
// given level, as output into newCount when traversing this level's parent.
inline bool UnigramDictionary::processCurrentNode(const int initialPos,
Correction *correction, int *newCount,
- int *newChildrenPosition, int *nextSiblingPosition) {
+ int *newChildrenPosition, int *nextSiblingPosition, WordsPriorityQueue *queue) {
if (DEBUG_DICT) {
correction->checkState();
}
@@ -648,7 +634,7 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos,
if (stateType == Correction::TRAVERSE_ALL_ON_TERMINAL
|| stateType == Correction::ON_TERMINAL) {
needsToInvokeOnTerminal = true;
- } else if (stateType == Correction::UNRELATED) {
+ } else if (stateType == Correction::UNRELATED || correction->needsToPrune()) {
// We found that this is an unrelated character, so we should give up traversing
// this node and its children entirely.
// However we may not be on the last virtual node yet so we skip the remaining
@@ -676,7 +662,9 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos,
// The frequency should be here, because we come here only if this is actually
// a terminal node, and we are on its last char.
const int freq = BinaryFormat::readFrequencyWithoutMovingPointer(DICT_ROOT, pos);
- onTerminal(freq, mCorrection);
+ TerminalAttributes terminalAttributes(DICT_ROOT, flags,
+ BinaryFormat::skipFrequency(flags, pos));
+ onTerminal(freq, terminalAttributes, correction, queue);
}
// If there are more chars in this node, then this virtual node has children.
diff --git a/native/src/unigram_dictionary.h b/native/src/unigram_dictionary.h
index ef9709a89..23581425a 100644
--- a/native/src/unigram_dictionary.h
+++ b/native/src/unigram_dictionary.h
@@ -22,17 +22,14 @@
#include "correction_state.h"
#include "defines.h"
#include "proximity_info.h"
-
-#ifndef NULL
-#define NULL 0
-#endif
+#include "words_priority_queue.h"
+#include "words_priority_queue_pool.h"
namespace latinime {
+class TerminalAttributes;
class UnigramDictionary {
-
-public:
-
+ public:
// Mask and flags for children address type selection.
static const int MASK_GROUP_ADDRESS_TYPE = 0xC0;
static const int FLAG_GROUP_ADDRESS_TYPE_NOADDRESS = 0x00;
@@ -46,8 +43,14 @@ public:
// Flag for terminal groups
static const int FLAG_IS_TERMINAL = 0x10;
+ // Flag for shortcut targets presence
+ static const int FLAG_HAS_SHORTCUT_TARGETS = 0x08;
// Flag for bigram presence
static const int FLAG_HAS_BIGRAMS = 0x04;
+ // Flag for shortcut-only words. Some words are shortcut-only, which means they match when
+ // the user types them but they don't pop in the suggestion strip, only the words they are
+ // shortcuts for do.
+ static const int FLAG_IS_SHORTCUT_ONLY = 0x02;
// Attribute (bigram/shortcut) related flags:
// Flag for presence of more attributes
@@ -64,47 +67,63 @@ public:
static const int FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES = 0x20;
static const int FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES = 0x30;
+ // Error tolerances
+ static const int DEFAULT_MAX_ERRORS = 2;
+ static const int MAX_ERRORS_FOR_TWO_WORDS = 1;
+
UnigramDictionary(const uint8_t* const streamStart, int typedLetterMultipler,
int fullWordMultiplier, int maxWordLength, int maxWords, int maxProximityChars,
const bool isLatestDictVersion);
bool isValidWord(const uint16_t* const inWord, const int length) const;
int getBigramPosition(int pos, unsigned short *word, int offset, int length) const;
- int getSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
+ int getSuggestions(ProximityInfo *proximityInfo, WordsPriorityQueuePool *queuePool,
+ Correction *correction, const int *xcoordinates,
const int *ycoordinates, const int *codes, const int codesSize, const int flags,
unsigned short *outWords, int *frequencies);
virtual ~UnigramDictionary();
-private:
-
+ private:
void getWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
- const int *ycoordinates, const int *codes, const int codesSize,
- unsigned short *outWords, int *frequencies, const int flags);
- bool isDigraph(const int* codes, const int i, const int codesSize) const;
+ const int *ycoordinates, const int *codes, const int inputLength,
+ const int flags, Correction *correction, WordsPriorityQueuePool *queuePool);
+ bool isDigraph(const int *codes, const int i, const int codesSize) const;
void getWordWithDigraphSuggestionsRec(ProximityInfo *proximityInfo,
const int *xcoordinates, const int* ycoordinates, const int *codesBuffer,
- const int codesBufferSize, const int flags, const int* codesSrc, const int codesRemain,
- const int currentDepth, int* codesDest, unsigned short* outWords, int* frequencies);
+ const int codesBufferSize, const int flags, const int* codesSrc,
+ const int codesRemain, const int currentDepth, int* codesDest, Correction *correction,
+ WordsPriorityQueuePool* queuePool);
void initSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
const int *ycoordinates, const int *codes, const int codesSize,
- unsigned short *outWords, int *frequencies);
- void getSuggestionCandidates(const bool useFullEditDistance);
- bool addWord(unsigned short *word, int length, int frequency);
- void getSplitTwoWordsSuggestion(const int inputLength, Correction *correction);
- void getMissingSpaceWords(const int inputLength, const int missingSpacePos,
- Correction *correction, const bool useFullEditDistance);
- void getMistypedSpaceWords(const int inputLength, const int spaceProximityPos,
- Correction *correction, const bool useFullEditDistance);
- void onTerminal(const int freq, Correction *correction);
+ WordsPriorityQueue *queue, Correction *correction);
+ void getOneWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
+ const int *ycoordinates, const int *codes, const bool useFullEditDistance,
+ const int inputLength, Correction *correction, WordsPriorityQueuePool* queuePool);
+ void getSuggestionCandidates(
+ const bool useFullEditDistance, const int inputLength, Correction *correction,
+ WordsPriorityQueue* queue, const bool doAutoCompletion, const int maxErrors);
+ void getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo,
+ const int *xcoordinates, const int *ycoordinates, const int *codes,
+ const bool useFullEditDistance, const int inputLength, const int spaceProximityPos,
+ const int missingSpacePos, Correction *correction, WordsPriorityQueuePool* queuePool);
+ void getMissingSpaceWords(ProximityInfo *proximityInfo, const int *xcoordinates,
+ const int *ycoordinates, const int *codes, const bool useFullEditDistance,
+ const int inputLength, const int missingSpacePos, Correction *correction,
+ WordsPriorityQueuePool* queuePool);
+ void getMistypedSpaceWords(ProximityInfo *proximityInfo, const int *xcoordinates,
+ const int *ycoordinates, const int *codes, const bool useFullEditDistance,
+ const int inputLength, const int spaceProximityPos, Correction *correction,
+ WordsPriorityQueuePool* queuePool);
+ void onTerminal(const int freq, const TerminalAttributes& terminalAttributes,
+ Correction *correction, WordsPriorityQueue *queue);
bool needsToSkipCurrentNode(const unsigned short c,
const int inputIndex, const int skipPos, const int depth);
// Process a node by considering proximity, missing and excessive character
- bool processCurrentNode(const int initialPos,
- Correction *correction, int *newCount,
- int *newChildPosition, int *nextSiblingPosition);
+ bool processCurrentNode(const int initialPos, Correction *correction, int *newCount,
+ int *newChildPosition, int *nextSiblingPosition, WordsPriorityQueue *queue);
int getMostFrequentWordLike(const int startInputIndex, const int inputLength,
- unsigned short *word);
+ ProximityInfo *proximityInfo, unsigned short *word);
int getMostFrequentWordLikeInner(const uint16_t* const inWord, const int length,
- short unsigned int* outWord);
+ short unsigned int *outWord);
const uint8_t* const DICT_ROOT;
const int MAX_WORD_LENGTH;
@@ -127,14 +146,8 @@ private:
};
static const struct digraph_t { int first; int second; } GERMAN_UMLAUT_DIGRAPHS[];
- int *mFrequencies;
- unsigned short *mOutputChars;
- ProximityInfo *mProximityInfo;
- Correction *mCorrection;
- int mInputLength;
- // MAX_WORD_LENGTH_INTERNAL must be bigger than MAX_WORD_LENGTH
- unsigned short mWord[MAX_WORD_LENGTH_INTERNAL];
-
+ // Still bundled members
+ unsigned short mWord[MAX_WORD_LENGTH_INTERNAL];// TODO: remove
int mStackChildCount[MAX_WORD_LENGTH_INTERNAL];// TODO: remove
int mStackInputIndex[MAX_WORD_LENGTH_INTERNAL];// TODO: remove
int mStackSiblingPos[MAX_WORD_LENGTH_INTERNAL];// TODO: remove
diff --git a/native/src/words_priority_queue.h b/native/src/words_priority_queue.h
new file mode 100644
index 000000000..84f2523c2
--- /dev/null
+++ b/native/src/words_priority_queue.h
@@ -0,0 +1,157 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_WORDS_PRIORITY_QUEUE_H
+#define LATINIME_WORDS_PRIORITY_QUEUE_H
+
+#include <iostream>
+#include <queue>
+#include "defines.h"
+
+namespace latinime {
+
+class WordsPriorityQueue {
+ public:
+ class SuggestedWord {
+ public:
+ int mScore;
+ unsigned short mWord[MAX_WORD_LENGTH_INTERNAL];
+ int mWordLength;
+ bool mUsed;
+
+ void setParams(int score, unsigned short* word, int wordLength) {
+ mScore = score;
+ mWordLength = wordLength;
+ memcpy(mWord, word, sizeof(unsigned short) * wordLength);
+ mUsed = true;
+ }
+ };
+
+ WordsPriorityQueue(int maxWords, int maxWordLength) :
+ MAX_WORDS((unsigned int) maxWords), MAX_WORD_LENGTH(
+ (unsigned int) maxWordLength) {
+ mSuggestedWords = new SuggestedWord[maxWordLength];
+ for (int i = 0; i < maxWordLength; ++i) {
+ mSuggestedWords[i].mUsed = false;
+ }
+ }
+ ~WordsPriorityQueue() {
+ delete[] mSuggestedWords;
+ }
+
+ void push(int score, unsigned short* word, int wordLength) {
+ SuggestedWord* sw = 0;
+ if (mSuggestions.size() >= MAX_WORDS) {
+ sw = mSuggestions.top();
+ const int minScore = sw->mScore;
+ if (minScore >= score) {
+ return;
+ } else {
+ sw->mUsed = false;
+ mSuggestions.pop();
+ }
+ }
+ if (sw == 0) {
+ sw = getFreeSuggestedWord(score, word, wordLength);
+ } else {
+ sw->setParams(score, word, wordLength);
+ }
+ if (sw == 0) {
+ LOGE("SuggestedWord is accidentally null.");
+ return;
+ }
+ if (DEBUG_WORDS_PRIORITY_QUEUE) {
+ LOGI("Push word. %d, %d", score, wordLength);
+ DUMP_WORD(word, wordLength);
+ }
+ mSuggestions.push(sw);
+ }
+
+ SuggestedWord* topAndPop() {
+ if (mSuggestions.empty()) return 0;
+ SuggestedWord* sw = mSuggestions.top();
+ mSuggestions.pop();
+ return sw;
+ }
+
+ int outputSuggestions(int *frequencies, unsigned short *outputChars) {
+ const unsigned int size = min(MAX_WORDS, mSuggestions.size());
+ int index = size - 1;
+ while (!mSuggestions.empty() && index >= 0) {
+ SuggestedWord* sw = mSuggestions.top();
+ if (DEBUG_WORDS_PRIORITY_QUEUE) {
+ LOGI("dump word. %d", sw->mScore);
+ DUMP_WORD(sw->mWord, sw->mWordLength);
+ }
+ const unsigned int wordLength = sw->mWordLength;
+ char* targetAdr = (char*) outputChars
+ + (index) * MAX_WORD_LENGTH * sizeof(short);
+ frequencies[index] = sw->mScore;
+ memcpy(targetAdr, sw->mWord, (wordLength) * sizeof(short));
+ if (wordLength < MAX_WORD_LENGTH) {
+ ((unsigned short*) targetAdr)[wordLength] = 0;
+ }
+ sw->mUsed = false;
+ mSuggestions.pop();
+ --index;
+ }
+ return size;
+ }
+
+ int size() {
+ return mSuggestions.size();
+ }
+
+ void clear() {
+ while (!mSuggestions.empty()) {
+ SuggestedWord* sw = mSuggestions.top();
+ if (DEBUG_WORDS_PRIORITY_QUEUE) {
+ LOGI("Clear word. %d", sw->mScore);
+ DUMP_WORD(sw->mWord, sw->mWordLength);
+ }
+ sw->mUsed = false;
+ mSuggestions.pop();
+ }
+ }
+
+ private:
+ struct wordComparator {
+ bool operator ()(SuggestedWord * left, SuggestedWord * right) {
+ return left->mScore > right->mScore;
+ }
+ };
+
+ SuggestedWord* getFreeSuggestedWord(int score, unsigned short* word,
+ int wordLength) {
+ for (unsigned int i = 0; i < MAX_WORD_LENGTH; ++i) {
+ if (!mSuggestedWords[i].mUsed) {
+ mSuggestedWords[i].setParams(score, word, wordLength);
+ return &mSuggestedWords[i];
+ }
+ }
+ return 0;
+ }
+
+ typedef std::priority_queue<SuggestedWord*, std::vector<SuggestedWord*>,
+ wordComparator> Suggestions;
+ Suggestions mSuggestions;
+ const unsigned int MAX_WORDS;
+ const unsigned int MAX_WORD_LENGTH;
+ SuggestedWord* mSuggestedWords;
+};
+}
+
+#endif // LATINIME_WORDS_PRIORITY_QUEUE_H
diff --git a/native/src/words_priority_queue_pool.h b/native/src/words_priority_queue_pool.h
new file mode 100644
index 000000000..386297650
--- /dev/null
+++ b/native/src/words_priority_queue_pool.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_WORDS_PRIORITY_QUEUE_POOL_H
+#define LATINIME_WORDS_PRIORITY_QUEUE_POOL_H
+
+#include "words_priority_queue.h"
+
+namespace latinime {
+
+class WordsPriorityQueuePool {
+ public:
+ WordsPriorityQueuePool(int mainQueueMaxWords, int subQueueMaxWords, int maxWordLength) {
+ mMasterQueue = new WordsPriorityQueue(mainQueueMaxWords, maxWordLength);
+ mSubQueue1 = new WordsPriorityQueue(subQueueMaxWords, maxWordLength);
+ mSubQueue2 = new WordsPriorityQueue(subQueueMaxWords, maxWordLength);
+ }
+
+ ~WordsPriorityQueuePool() {
+ delete mMasterQueue;
+ }
+
+ WordsPriorityQueue* getMasterQueue() {
+ return mMasterQueue;
+ }
+ // TODO: Come up with more generic pool
+ WordsPriorityQueue* getSubQueue1() {
+ return mSubQueue1;
+ }
+ WordsPriorityQueue* getSubQueue2() {
+ return mSubQueue2;
+ }
+
+ private:
+ WordsPriorityQueue *mMasterQueue;
+ WordsPriorityQueue *mSubQueue1;
+ WordsPriorityQueue *mSubQueue2;
+};
+}
+
+#endif // LATINIME_WORDS_PRIORITY_QUEUE_POOL_H