aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--dictionaries/ru_wordlist.combined.gzbin1384737 -> 1384763 bytes
-rw-r--r--java/res/raw/main_ru.dictbin2229298 -> 2229294 bytes
-rw-r--r--java/res/values-ka/strings-appname.xml27
-rw-r--r--java/src/com/android/inputmethod/latin/LatinIME.java4
-rw-r--r--java/src/com/android/inputmethod/latin/suggestions/SuggestionStripLayoutHelper.java19
-rw-r--r--java/src/com/android/inputmethod/research/ResearchLogger.java66
-rw-r--r--native/jni/Android.mk6
-rw-r--r--native/jni/com_android_inputmethod_keyboard_ProximityInfo.cpp18
-rw-r--r--native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp119
-rw-r--r--native/jni/com_android_inputmethod_latin_DicTraverseSession.cpp41
-rw-r--r--native/jni/jni_common.cpp4
-rw-r--r--native/jni/jni_common.h4
-rw-r--r--native/jni/src/defines.h64
-rw-r--r--native/jni/src/dic_traverse_wrapper.cpp26
-rw-r--r--native/jni/src/dic_traverse_wrapper.h68
-rw-r--r--native/jni/src/obsolete/correction.cpp (renamed from native/jni/src/correction.cpp)31
-rw-r--r--native/jni/src/obsolete/correction.h (renamed from native/jni/src/correction.h)23
-rw-r--r--native/jni/src/obsolete/correction_state.h (renamed from native/jni/src/correction_state.h)0
-rw-r--r--native/jni/src/suggest/core/dicnode/dic_nodes_cache.h24
-rw-r--r--native/jni/src/suggest/core/dictionary/binary_format.h3
-rw-r--r--native/jni/src/suggest/core/dictionary/dictionary.cpp65
-rw-r--r--native/jni/src/suggest/core/dictionary/dictionary.h10
-rw-r--r--native/jni/src/suggest/core/session/dic_traverse_session.cpp38
-rw-r--r--native/jni/src/suggest/core/session/dic_traverse_session.h19
-rw-r--r--native/jni/src/unigram_dictionary.cpp998
-rw-r--r--native/jni/src/unigram_dictionary.h119
-rw-r--r--native/jni/src/words_priority_queue.cpp76
-rw-r--r--native/jni/src/words_priority_queue.h175
-rw-r--r--native/jni/src/words_priority_queue_pool.h96
29 files changed, 303 insertions, 1840 deletions
diff --git a/dictionaries/ru_wordlist.combined.gz b/dictionaries/ru_wordlist.combined.gz
index 1cfab4e8c..8b67e7c83 100644
--- a/dictionaries/ru_wordlist.combined.gz
+++ b/dictionaries/ru_wordlist.combined.gz
Binary files differ
diff --git a/java/res/raw/main_ru.dict b/java/res/raw/main_ru.dict
index 707441616..3e23617c2 100644
--- a/java/res/raw/main_ru.dict
+++ b/java/res/raw/main_ru.dict
Binary files differ
diff --git a/java/res/values-ka/strings-appname.xml b/java/res/values-ka/strings-appname.xml
new file mode 100644
index 000000000..703c66aac
--- /dev/null
+++ b/java/res/values-ka/strings-appname.xml
@@ -0,0 +1,27 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+/*
+**
+** Copyright 2013, The Android Open Source Project
+**
+** Licensed under the Apache License, Version 2.0 (the "License");
+** you may not use this file except in compliance with the License.
+** You may obtain a copy of the License at
+**
+** http://www.apache.org/licenses/LICENSE-2.0
+**
+** Unless required by applicable law or agreed to in writing, software
+** distributed under the License is distributed on an "AS IS" BASIS,
+** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+** See the License for the specific language governing permissions and
+** limitations under the License.
+*/
+ -->
+
+<resources xmlns:android="http://schemas.android.com/apk/res/android"
+ xmlns:xliff="urn:oasis:names:tc:xliff:document:1.2">
+ <string name="english_ime_name" msgid="5940510615957428904">"Android-ის კლავიატურა (AOSP)"</string>
+ <string name="spell_checker_service_name" msgid="1254221805440242662">"Android-ის მართლწერის შემმოწმებელი (AOSP)"</string>
+ <string name="english_ime_settings" msgid="5760361067176802794">"Android-ის კლავიატურის პარამეტრები (AOSP)"</string>
+ <string name="android_spell_checker_settings" msgid="6123949487832861885">"Android-ის მართლწერის შემმოწმებლის პარამეტრები (AOSP)"</string>
+</resources>
diff --git a/java/src/com/android/inputmethod/latin/LatinIME.java b/java/src/com/android/inputmethod/latin/LatinIME.java
index 6ac54748c..0bf167fd4 100644
--- a/java/src/com/android/inputmethod/latin/LatinIME.java
+++ b/java/src/com/android/inputmethod/latin/LatinIME.java
@@ -1642,8 +1642,10 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen
public void onStartBatchInput(final LatinIME latinIme) {
synchronized (mLock) {
mHandler.removeMessages(MSG_UPDATE_GESTURE_PREVIEW_AND_SUGGESTION_STRIP);
- mLatinIme = latinIme;
mInBatchInput = true;
+ mLatinIme = latinIme;
+ mLatinIme.mHandler.showGesturePreviewAndSuggestionStrip(
+ SuggestedWords.EMPTY, false /* dismissGestureFloatingPreviewText */);
}
}
diff --git a/java/src/com/android/inputmethod/latin/suggestions/SuggestionStripLayoutHelper.java b/java/src/com/android/inputmethod/latin/suggestions/SuggestionStripLayoutHelper.java
index e4c5a06a2..f434a1211 100644
--- a/java/src/com/android/inputmethod/latin/suggestions/SuggestionStripLayoutHelper.java
+++ b/java/src/com/android/inputmethod/latin/suggestions/SuggestionStripLayoutHelper.java
@@ -302,6 +302,23 @@ final class SuggestionStripLayoutHelper {
final int countInStrip = mSuggestionsCountInStrip;
setupWordViewsTextAndColor(suggestedWords, countInStrip);
+ final TextView centerWordView = mWordViews.get(mCenterPositionInStrip);
+ final int stripWidth = placerView.getWidth();
+ final int centerWidth = getSuggestionWidth(mCenterPositionInStrip, stripWidth);
+ if (getTextScaleX(centerWordView.getText(), centerWidth, centerWordView.getPaint())
+ < MIN_TEXT_XSCALE) {
+ // Layout only the most relevant suggested word at the center of the suggestion strip
+ // by consolidating all slots in the strip.
+ mMoreSuggestionsAvailable = (suggestedWords.size() > 1);
+ layoutWord(mCenterPositionInStrip, stripWidth);
+ stripView.addView(centerWordView);
+ setLayoutWeight(centerWordView, 1.0f, ViewGroup.LayoutParams.MATCH_PARENT);
+ if (SuggestionStripView.DBG) {
+ layoutDebugInfo(mCenterPositionInStrip, placerView, stripWidth);
+ }
+ return;
+ }
+
mMoreSuggestionsAvailable = (suggestedWords.size() > countInStrip);
int x = 0;
for (int positionInStrip = 0; positionInStrip < countInStrip; positionInStrip++) {
@@ -312,7 +329,7 @@ final class SuggestionStripLayoutHelper {
x += divider.getMeasuredWidth();
}
- final int width = getSuggestionWidth(positionInStrip, placerView.getWidth());
+ final int width = getSuggestionWidth(positionInStrip, stripWidth);
final TextView wordView = layoutWord(positionInStrip, width);
stripView.addView(wordView);
setLayoutWeight(wordView, getSuggestionWeight(positionInStrip),
diff --git a/java/src/com/android/inputmethod/research/ResearchLogger.java b/java/src/com/android/inputmethod/research/ResearchLogger.java
index ec54616b7..aa4a866b8 100644
--- a/java/src/com/android/inputmethod/research/ResearchLogger.java
+++ b/java/src/com/android/inputmethod/research/ResearchLogger.java
@@ -83,6 +83,8 @@ import java.util.List;
import java.util.Random;
import java.util.regex.Pattern;
+// TODO: Add a unit test for every "logging" method (i.e. that is called from the IME and calls
+// enqueueEvent to record a LogStatement).
/**
* Logs the use of the LatinIME keyboard.
*
@@ -852,23 +854,22 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang
// The user has deleted this word and returned to the previous. Check that the word in the
// logUnit matches the expected word. If so, restore the last log unit committed to be the
// current logUnit. I.e., pull out the last LogUnit from all the LogBuffers, and make
- // restore it to mCurrentLogUnit so the new edits are captured with the word. Optionally
- // dump the contents of mCurrentLogUnit (useful if they contain deletions of the next word
- // that should not be reported to protect user privacy)
+ // it the mCurrentLogUnit so the new edits are captured with the word. Optionally dump the
+ // contents of mCurrentLogUnit (useful if they contain deletions of the next word that
+ // should not be reported to protect user privacy)
//
// Note that we don't use mLastLogUnit here, because it only goes one word back and is only
// needed for reverts, which only happen one back.
final LogUnit oldLogUnit = mMainLogBuffer.peekLastLogUnit();
- // Check that expected word matches.
+ // Check that expected word matches. It's ok if both strings are null, because this is the
+ // case where the LogUnit is storing a non-word, e.g. a separator.
if (oldLogUnit != null) {
- final String oldLogUnitWords = oldLogUnit.getWordsAsString();
// Because the word is stored in the LogUnit with digits scrubbed, the comparison must
// be made on a scrubbed version of the expectedWord as well.
- if (oldLogUnitWords != null && !oldLogUnitWords.equals(
- scrubDigitsFromString(expectedWord))) {
- return;
- }
+ final String scrubbedExpectedWord = scrubDigitsFromString(expectedWord);
+ final String oldLogUnitWords = oldLogUnit.getWordsAsString();
+ if (!TextUtils.equals(scrubbedExpectedWord, oldLogUnitWords)) return;
}
// Uncommit, merging if necessary.
@@ -984,7 +985,8 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang
return Character.isDigit(codePoint) ? DIGIT_REPLACEMENT_CODEPOINT : codePoint;
}
- /* package for test */ static String scrubDigitsFromString(String s) {
+ /* package for test */ static String scrubDigitsFromString(final String s) {
+ if (s == null) return null;
StringBuilder sb = null;
final int length = s.length();
for (int i = 0; i < length; i = s.offsetByCodePoints(i, 1)) {
@@ -1463,21 +1465,39 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang
public static void latinIME_revertCommit(final String committedWord,
final String originallyTypedWord, final boolean isBatchMode,
final String separatorString) {
+ // TODO: Prioritize adding a unit test for this method (as it is especially complex)
+ // TODO: Update the UserRecording LogBuffer as well as the MainLogBuffer
final ResearchLogger researchLogger = getInstance();
- // TODO: Verify that mCurrentLogUnit has been restored and contains the reverted word.
- final LogUnit logUnit;
- logUnit = researchLogger.mMainLogBuffer.peekLastLogUnit();
- if (originallyTypedWord.length() > 0 && hasLetters(originallyTypedWord)) {
- if (logUnit != null) {
- logUnit.setWords(originallyTypedWord);
- }
- }
- researchLogger.enqueueEvent(logUnit != null ? logUnit : researchLogger.mCurrentLogUnit,
- LOGSTATEMENT_LATINIME_REVERTCOMMIT, committedWord, originallyTypedWord,
- separatorString);
- if (logUnit != null) {
- logUnit.setContainsUserDeletions();
+ //
+ // 1. Remove separator LogUnit
+ final LogUnit lastLogUnit = researchLogger.mMainLogBuffer.peekLastLogUnit();
+ // Check that we're not at the beginning of input
+ if (lastLogUnit == null) return;
+ // Check that we're after a separator
+ if (lastLogUnit.getWordsAsString() != null) return;
+ // Remove separator
+ final LogUnit separatorLogUnit = researchLogger.mMainLogBuffer.unshiftIn();
+
+ // 2. Add revert LogStatement
+ final LogUnit revertedLogUnit = researchLogger.mMainLogBuffer.peekLastLogUnit();
+ if (revertedLogUnit == null) return;
+ if (!revertedLogUnit.getWordsAsString().equals(scrubDigitsFromString(committedWord))) {
+ // Any word associated with the reverted LogUnit has already had its digits scrubbed, so
+ // any digits in the committedWord argument must also be scrubbed for an accurate
+ // comparison.
+ return;
}
+ researchLogger.enqueueEvent(revertedLogUnit, LOGSTATEMENT_LATINIME_REVERTCOMMIT,
+ committedWord, originallyTypedWord, separatorString);
+
+ // 3. Update the word associated with the LogUnit
+ revertedLogUnit.setWords(originallyTypedWord);
+ revertedLogUnit.setContainsUserDeletions();
+
+ // 4. Re-add the separator LogUnit
+ researchLogger.mMainLogBuffer.shiftIn(separatorLogUnit);
+
+ // 5. Record stats
researchLogger.mStatistics.recordRevertCommit(SystemClock.uptimeMillis());
}
diff --git a/native/jni/Android.mk b/native/jni/Android.mk
index 1518dad17..7ca405752 100644
--- a/native/jni/Android.mk
+++ b/native/jni/Android.mk
@@ -46,11 +46,9 @@ LATIN_IME_JNI_SRC_FILES := \
jni_common.cpp
LATIN_IME_CORE_SRC_FILES := \
- correction.cpp \
- dic_traverse_wrapper.cpp \
- unigram_dictionary.cpp \
- words_priority_queue.cpp \
suggest/core/suggest.cpp \
+ $(addprefix obsolete/, \
+ correction.cpp) \
$(addprefix suggest/core/dicnode/, \
dic_node.cpp \
dic_node_utils.cpp \
diff --git a/native/jni/com_android_inputmethod_keyboard_ProximityInfo.cpp b/native/jni/com_android_inputmethod_keyboard_ProximityInfo.cpp
index e312aeabc..f88d37ec9 100644
--- a/native/jni/com_android_inputmethod_keyboard_ProximityInfo.cpp
+++ b/native/jni/com_android_inputmethod_keyboard_ProximityInfo.cpp
@@ -43,13 +43,17 @@ static void latinime_Keyboard_release(JNIEnv *env, jclass clazz, jlong proximity
delete pi;
}
-static JNINativeMethod sMethods[] = {
- {const_cast<char *>("setProximityInfoNative"),
- const_cast<char *>("(Ljava/lang/String;IIIIII[II[I[I[I[I[I[F[F[F)J"),
- reinterpret_cast<void *>(latinime_Keyboard_setProximityInfo)},
- {const_cast<char *>("releaseProximityInfoNative"),
- const_cast<char *>("(J)V"),
- reinterpret_cast<void *>(latinime_Keyboard_release)}
+static const JNINativeMethod sMethods[] = {
+ {
+ const_cast<char *>("setProximityInfoNative"),
+ const_cast<char *>("(Ljava/lang/String;IIIIII[II[I[I[I[I[I[F[F[F)J"),
+ reinterpret_cast<void *>(latinime_Keyboard_setProximityInfo)
+ },
+ {
+ const_cast<char *>("releaseProximityInfoNative"),
+ const_cast<char *>("(J)V"),
+ reinterpret_cast<void *>(latinime_Keyboard_release)
+ }
};
int register_ProximityInfo(JNIEnv *env) {
diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
index e94120587..f60793733 100644
--- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
+++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
@@ -14,26 +14,19 @@
* limitations under the License.
*/
-#include <cstring> // for memset()
-
#define LOG_TAG "LatinIME: jni: BinaryDictionary"
-#include "defines.h" // for macros below
+#include "com_android_inputmethod_latin_BinaryDictionary.h"
-#ifdef USE_MMAP_FOR_DICTIONARY
#include <cerrno>
+#include <cstring> // for memset()
#include <fcntl.h>
#include <sys/mman.h>
-#else // USE_MMAP_FOR_DICTIONARY
-#include <cstdlib>
-#include <cstdio> // for fopen() etc.
-#endif // USE_MMAP_FOR_DICTIONARY
-
-#include "com_android_inputmethod_latin_BinaryDictionary.h"
-#include "correction.h"
+#include "defines.h"
#include "jni.h"
#include "jni_common.h"
+#include "obsolete/correction.h"
#include "suggest/core/dictionary/binary_dictionary_format.h"
#include "suggest/core/dictionary/binary_dictionary_info.h"
#include "suggest/core/dictionary/dictionary.h"
@@ -60,8 +53,6 @@ static jlong latinime_BinaryDictionary_open(JNIEnv *env, jclass clazz, jstring s
int fd = 0;
void *dictBuf = 0;
int adjust = 0;
-#ifdef USE_MMAP_FOR_DICTIONARY
- /* mmap version */
fd = open(sourceDirChars, O_RDONLY);
if (fd < 0) {
AKLOGE("DICT: Can't open sourceDir. sourceDirChars=%s errno=%d", sourceDirChars, errno);
@@ -77,35 +68,6 @@ static jlong latinime_BinaryDictionary_open(JNIEnv *env, jclass clazz, jstring s
return 0;
}
dictBuf = static_cast<char *>(dictBuf) + adjust;
-#else // USE_MMAP_FOR_DICTIONARY
- /* malloc version */
- FILE *file = 0;
- file = fopen(sourceDirChars, "rb");
- if (file == 0) {
- AKLOGE("DICT: Can't fopen sourceDir. sourceDirChars=%s errno=%d", sourceDirChars, errno);
- return 0;
- }
- dictBuf = malloc(dictSize);
- if (!dictBuf) {
- AKLOGE("DICT: Can't allocate memory region for dictionary. errno=%d", errno);
- return 0;
- }
- int ret = fseek(file, static_cast<long>(dictOffset), SEEK_SET);
- if (ret != 0) {
- AKLOGE("DICT: Failure in fseek. ret=%d errno=%d", ret, errno);
- return 0;
- }
- ret = fread(dictBuf, dictSize, 1, file);
- if (ret != 1) {
- AKLOGE("DICT: Failure in fread. ret=%d errno=%d", ret, errno);
- return 0;
- }
- ret = fclose(file);
- if (ret != 0) {
- AKLOGE("DICT: Failure in fclose. ret=%d errno=%d", ret, errno);
- return 0;
- }
-#endif // USE_MMAP_FOR_DICTIONARY
if (!dictBuf) {
AKLOGE("DICT: dictBuf is null");
return 0;
@@ -115,11 +77,7 @@ static jlong latinime_BinaryDictionary_open(JNIEnv *env, jclass clazz, jstring s
== BinaryDictionaryFormat::detectFormatVersion(static_cast<uint8_t *>(dictBuf),
static_cast<int>(dictSize))) {
AKLOGE("DICT: dictionary format is unknown, bad magic number");
-#ifdef USE_MMAP_FOR_DICTIONARY
releaseDictBuf(static_cast<const char *>(dictBuf) - adjust, adjDictSize, fd);
-#else // USE_MMAP_FOR_DICTIONARY
- releaseDictBuf(dictBuf, 0, 0);
-#endif // USE_MMAP_FOR_DICTIONARY
} else {
dictionary = new Dictionary(dictBuf, static_cast<int>(dictSize), fd, adjust);
}
@@ -137,7 +95,8 @@ static int latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jclass clazz, j
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) return 0;
ProximityInfo *pInfo = reinterpret_cast<ProximityInfo *>(proximityInfo);
- void *traverseSession = reinterpret_cast<void *>(dicTraverseSession);
+ DicTraverseSession *traverseSession =
+ reinterpret_cast<DicTraverseSession *>(dicTraverseSession);
// Input values
int xCoordinates[inputSize];
@@ -263,17 +222,12 @@ static void latinime_BinaryDictionary_close(JNIEnv *env, jclass clazz, jlong dic
if (!dictionary) return;
const void *dictBuf = dictionary->getBinaryDictionaryInfo()->getDictBuf();
if (!dictBuf) return;
-#ifdef USE_MMAP_FOR_DICTIONARY
releaseDictBuf(static_cast<const char *>(dictBuf) - dictionary->getDictBufAdjust(),
dictionary->getDictSize() + dictionary->getDictBufAdjust(), dictionary->getMmapFd());
-#else // USE_MMAP_FOR_DICTIONARY
- releaseDictBuf(dictBuf, 0, 0);
-#endif // USE_MMAP_FOR_DICTIONARY
delete dictionary;
}
static void releaseDictBuf(const void *dictBuf, const size_t length, const int fd) {
-#ifdef USE_MMAP_FOR_DICTIONARY
int ret = munmap(const_cast<void *>(dictBuf), length);
if (ret != 0) {
AKLOGE("DICT: Failure in munmap. ret=%d errno=%d", ret, errno);
@@ -282,33 +236,44 @@ static void releaseDictBuf(const void *dictBuf, const size_t length, const int f
if (ret != 0) {
AKLOGE("DICT: Failure in close. ret=%d errno=%d", ret, errno);
}
-#else // USE_MMAP_FOR_DICTIONARY
- free(const_cast<void *>(dictBuf));
-#endif // USE_MMAP_FOR_DICTIONARY
}
-static JNINativeMethod sMethods[] = {
- {const_cast<char *>("openNative"),
- const_cast<char *>("(Ljava/lang/String;JJ)J"),
- reinterpret_cast<void *>(latinime_BinaryDictionary_open)},
- {const_cast<char *>("closeNative"),
- const_cast<char *>("(J)V"),
- reinterpret_cast<void *>(latinime_BinaryDictionary_close)},
- {const_cast<char *>("getSuggestionsNative"),
- const_cast<char *>("(JJJ[I[I[I[I[III[I[I[I[I[I[I)I"),
- reinterpret_cast<void *>(latinime_BinaryDictionary_getSuggestions)},
- {const_cast<char *>("getProbabilityNative"),
- const_cast<char *>("(J[I)I"),
- reinterpret_cast<void *>(latinime_BinaryDictionary_getProbability)},
- {const_cast<char *>("isValidBigramNative"),
- const_cast<char *>("(J[I[I)Z"),
- reinterpret_cast<void *>(latinime_BinaryDictionary_isValidBigram)},
- {const_cast<char *>("calcNormalizedScoreNative"),
- const_cast<char *>("([I[II)F"),
- reinterpret_cast<void *>(latinime_BinaryDictionary_calcNormalizedScore)},
- {const_cast<char *>("editDistanceNative"),
- const_cast<char *>("([I[I)I"),
- reinterpret_cast<void *>(latinime_BinaryDictionary_editDistance)}
+static const JNINativeMethod sMethods[] = {
+ {
+ const_cast<char *>("openNative"),
+ const_cast<char *>("(Ljava/lang/String;JJ)J"),
+ reinterpret_cast<void *>(latinime_BinaryDictionary_open)
+ },
+ {
+ const_cast<char *>("closeNative"),
+ const_cast<char *>("(J)V"),
+ reinterpret_cast<void *>(latinime_BinaryDictionary_close)
+ },
+ {
+ const_cast<char *>("getSuggestionsNative"),
+ const_cast<char *>("(JJJ[I[I[I[I[III[I[I[I[I[I[I)I"),
+ reinterpret_cast<void *>(latinime_BinaryDictionary_getSuggestions)
+ },
+ {
+ const_cast<char *>("getProbabilityNative"),
+ const_cast<char *>("(J[I)I"),
+ reinterpret_cast<void *>(latinime_BinaryDictionary_getProbability)
+ },
+ {
+ const_cast<char *>("isValidBigramNative"),
+ const_cast<char *>("(J[I[I)Z"),
+ reinterpret_cast<void *>(latinime_BinaryDictionary_isValidBigram)
+ },
+ {
+ const_cast<char *>("calcNormalizedScoreNative"),
+ const_cast<char *>("([I[II)F"),
+ reinterpret_cast<void *>(latinime_BinaryDictionary_calcNormalizedScore)
+ },
+ {
+ const_cast<char *>("editDistanceNative"),
+ const_cast<char *>("([I[I)I"),
+ reinterpret_cast<void *>(latinime_BinaryDictionary_editDistance)
+ }
};
int register_BinaryDictionary(JNIEnv *env) {
diff --git a/native/jni/com_android_inputmethod_latin_DicTraverseSession.cpp b/native/jni/com_android_inputmethod_latin_DicTraverseSession.cpp
index 08124ffc8..72e625836 100644
--- a/native/jni/com_android_inputmethod_latin_DicTraverseSession.cpp
+++ b/native/jni/com_android_inputmethod_latin_DicTraverseSession.cpp
@@ -17,48 +17,55 @@
#define LOG_TAG "LatinIME: jni: Session"
#include "com_android_inputmethod_latin_DicTraverseSession.h"
+
#include "defines.h"
-#include "dic_traverse_wrapper.h"
#include "jni.h"
#include "jni_common.h"
+#include "suggest/core/session/dic_traverse_session.h"
namespace latinime {
class Dictionary;
static jlong latinime_setDicTraverseSession(JNIEnv *env, jclass clazz, jstring localeJStr) {
- void *traverseSession = DicTraverseWrapper::getDicTraverseSession(env, localeJStr);
+ void *traverseSession = DicTraverseSession::getSessionInstance(env, localeJStr);
return reinterpret_cast<jlong>(traverseSession);
}
static void latinime_initDicTraverseSession(JNIEnv *env, jclass clazz, jlong traverseSession,
jlong dictionary, jintArray previousWord, jint previousWordLength) {
- void *ts = reinterpret_cast<void *>(traverseSession);
+ DicTraverseSession *ts = reinterpret_cast<DicTraverseSession *>(traverseSession);
Dictionary *dict = reinterpret_cast<Dictionary *>(dictionary);
if (!previousWord) {
- DicTraverseWrapper::initDicTraverseSession(
+ DicTraverseSession::initSessionInstance(
ts, dict, 0 /* prevWord */, 0 /* prevWordLength*/, 0 /* suggestOptions */);
return;
}
int prevWord[previousWordLength];
env->GetIntArrayRegion(previousWord, 0, previousWordLength, prevWord);
- DicTraverseWrapper::initDicTraverseSession(
+ DicTraverseSession::initSessionInstance(
ts, dict, prevWord, previousWordLength, 0 /* suggestOptions */);
}
static void latinime_releaseDicTraverseSession(JNIEnv *env, jclass clazz, jlong traverseSession) {
- void *ts = reinterpret_cast<void *>(traverseSession);
- DicTraverseWrapper::releaseDicTraverseSession(ts);
+ DicTraverseSession *ts = reinterpret_cast<DicTraverseSession *>(traverseSession);
+ DicTraverseSession::releaseSessionInstance(ts);
}
-static JNINativeMethod sMethods[] = {
- {const_cast<char *>("setDicTraverseSessionNative"),
- const_cast<char *>("(Ljava/lang/String;)J"),
- reinterpret_cast<void *>(latinime_setDicTraverseSession)},
- {const_cast<char *>("initDicTraverseSessionNative"),
- const_cast<char *>("(JJ[II)V"),
- reinterpret_cast<void *>(latinime_initDicTraverseSession)},
- {const_cast<char *>("releaseDicTraverseSessionNative"),
- const_cast<char *>("(J)V"),
- reinterpret_cast<void *>(latinime_releaseDicTraverseSession)}
+static const JNINativeMethod sMethods[] = {
+ {
+ const_cast<char *>("setDicTraverseSessionNative"),
+ const_cast<char *>("(Ljava/lang/String;)J"),
+ reinterpret_cast<void *>(latinime_setDicTraverseSession)
+ },
+ {
+ const_cast<char *>("initDicTraverseSessionNative"),
+ const_cast<char *>("(JJ[II)V"),
+ reinterpret_cast<void *>(latinime_initDicTraverseSession)
+ },
+ {
+ const_cast<char *>("releaseDicTraverseSessionNative"),
+ const_cast<char *>("(J)V"),
+ reinterpret_cast<void *>(latinime_releaseDicTraverseSession)
+ }
};
int register_DicTraverseSession(JNIEnv *env) {
diff --git a/native/jni/jni_common.cpp b/native/jni/jni_common.cpp
index 8e5c50880..f2867d7c3 100644
--- a/native/jni/jni_common.cpp
+++ b/native/jni/jni_common.cpp
@@ -55,8 +55,8 @@ jint JNI_OnLoad(JavaVM *vm, void *reserved) {
}
namespace latinime {
-int registerNativeMethods(JNIEnv *env, const char *className, JNINativeMethod *methods,
- int numMethods) {
+int registerNativeMethods(JNIEnv *env, const char *const className, const JNINativeMethod *methods,
+ const int numMethods) {
jclass clazz = env->FindClass(className);
if (!clazz) {
AKLOGE("Native registration unable to find class '%s'", className);
diff --git a/native/jni/jni_common.h b/native/jni/jni_common.h
index f960b05a6..ef72a7ce9 100644
--- a/native/jni/jni_common.h
+++ b/native/jni/jni_common.h
@@ -20,7 +20,7 @@
#include "jni.h"
namespace latinime {
-int registerNativeMethods(JNIEnv *env, const char *className, JNINativeMethod *methods,
- int numMethods);
+int registerNativeMethods(JNIEnv *env, const char *const className, const JNINativeMethod *methods,
+ const int numMethods);
} // namespace latinime
#endif // LATINIME_JNI_COMMON_H
diff --git a/native/jni/src/defines.h b/native/jni/src/defines.h
index eb59744f6..e0edff584 100644
--- a/native/jni/src/defines.h
+++ b/native/jni/src/defines.h
@@ -203,14 +203,12 @@ static inline void prof_out(void) {
#define DEBUG_DICT true
#define DEBUG_DICT_FULL false
#define DEBUG_EDIT_DISTANCE false
-#define DEBUG_SHOW_FOUND_WORD false
#define DEBUG_NODE DEBUG_DICT_FULL
#define DEBUG_TRACE DEBUG_DICT_FULL
#define DEBUG_PROXIMITY_INFO false
#define DEBUG_PROXIMITY_CHARS false
#define DEBUG_CORRECTION false
#define DEBUG_CORRECTION_FREQ false
-#define DEBUG_WORDS_PRIORITY_QUEUE false
#define DEBUG_SAMPLING_POINTS false
#define DEBUG_POINTS_PROBABILITY false
#define DEBUG_DOUBLE_LETTER false
@@ -229,14 +227,12 @@ static inline void prof_out(void) {
#define DEBUG_DICT false
#define DEBUG_DICT_FULL false
#define DEBUG_EDIT_DISTANCE false
-#define DEBUG_SHOW_FOUND_WORD false
#define DEBUG_NODE false
#define DEBUG_TRACE false
#define DEBUG_PROXIMITY_INFO false
#define DEBUG_PROXIMITY_CHARS false
#define DEBUG_CORRECTION false
#define DEBUG_CORRECTION_FREQ false
-#define DEBUG_WORDS_PRIORITY_QUEUE false
#define DEBUG_SAMPLING_POINTS false
#define DEBUG_POINTS_PROBABILITY false
#define DEBUG_DOUBLE_LETTER false
@@ -268,11 +264,6 @@ static inline void prof_out(void) {
// of the binary dictionary where a {key,value} string pair scheme is used.
#define LARGEST_INT_DIGIT_COUNT 11
-// Define this to use mmap() for dictionary loading. Undefine to use malloc() instead of mmap().
-// We measured and compared performance of both, and found mmap() is fairly good in terms of
-// loading time, and acceptable even for several initial lookups which involve page faults.
-#define USE_MMAP_FOR_DICTIONARY
-
#define NOT_VALID_WORD (-99)
#define NOT_A_CODE_POINT (-1)
#define NOT_A_DISTANCE (-1)
@@ -289,61 +280,21 @@ static inline void prof_out(void) {
#define CALIBRATE_SCORE_BY_TOUCH_COORDINATES true
#define SUGGEST_MULTIPLE_WORDS true
-#define USE_SUGGEST_INTERFACE_FOR_TYPING true
#define SUGGEST_INTERFACE_OUTPUT_SCALE 1000000.0f
-// The following "rate"s are used as a multiplier before dividing by 100, so they are in percent.
-#define WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE 80
-#define WORDS_WITH_MISSING_CHARACTER_DEMOTION_START_POS_10X 12
-#define WORDS_WITH_MISSING_SPACE_CHARACTER_DEMOTION_RATE 58
-#define WORDS_WITH_MISTYPED_SPACE_DEMOTION_RATE 50
-#define WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE 75
-#define WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE 75
-#define WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE 70
-#define FULL_MATCHED_WORDS_PROMOTION_RATE 120
-#define WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE 90
-#define WORDS_WITH_ADDITIONAL_PROXIMITY_CHARACTER_DEMOTION_RATE 70
-#define WORDS_WITH_MATCH_SKIP_PROMOTION_RATE 105
-#define WORDS_WITH_JUST_ONE_CORRECTION_PROMOTION_RATE 148
-#define WORDS_WITH_JUST_ONE_CORRECTION_PROMOTION_MULTIPLIER 3
-#define CORRECTION_COUNT_RATE_DEMOTION_RATE_BASE 45
-#define INPUT_EXCEEDS_OUTPUT_DEMOTION_RATE 70
-#define FIRST_CHAR_DIFFERENT_DEMOTION_RATE 96
-#define TWO_WORDS_CAPITALIZED_DEMOTION_RATE 50
-#define TWO_WORDS_CORRECTION_DEMOTION_BASE 80
-#define TWO_WORDS_PLUS_OTHER_ERROR_CORRECTION_DEMOTION_DIVIDER 1
#define ZERO_DISTANCE_PROMOTION_RATE 110.0f
#define NEUTRAL_SCORE_SQUARED_RADIUS 8.0f
#define HALF_SCORE_SQUARED_RADIUS 32.0f
#define MAX_PROBABILITY 255
#define MAX_BIGRAM_ENCODED_PROBABILITY 15
+#define MULTIPLE_WORDS_DEMOTION_RATE 80
// Assuming locale strings such as en_US, sr-Latn etc.
#define MAX_LOCALE_STRING_LENGTH 10
-// Word limit for sub queues used in WordsPriorityQueuePool. Sub queues are temporary queues used
-// for better performance.
-// Holds up to 1 candidate for each word
-#define SUB_QUEUE_MAX_WORDS 1
-#define SUB_QUEUE_MAX_COUNT 10
-#define SUB_QUEUE_MIN_WORD_LENGTH 4
-// TODO: Extend this limitation
-#define MULTIPLE_WORDS_SUGGESTION_MAX_WORDS 5
-// TODO: Remove this limitation
-#define MULTIPLE_WORDS_SUGGESTION_MAX_WORD_LENGTH 12
-// TODO: Remove this limitation
-#define MULTIPLE_WORDS_SUGGESTION_MAX_TOTAL_TRAVERSE_COUNT 45
-#define MULTIPLE_WORDS_DEMOTION_RATE 80
-#define MIN_INPUT_LENGTH_FOR_THREE_OR_MORE_WORDS_CORRECTION 6
-
-#define TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD 0.35f
-#define START_TWO_WORDS_CORRECTION_THRESHOLD 0.185f
/* heuristic... This should be changed if we change the unit of the probability. */
#define SUPPRESS_SHORT_MULTIPLE_WORDS_THRESHOLD_FREQ (MAX_PROBABILITY * 58 / 100)
-#define MAX_DEPTH_MULTIPLIER 3
-#define FIRST_WORD_INDEX 0
-
// Max value for length, distance and probability which are used in weighting
// TODO: Remove
#define MAX_VALUE_FOR_WEIGHTING 10000000
@@ -351,16 +302,17 @@ static inline void prof_out(void) {
// The max number of the keys in one keyboard layout
#define MAX_KEY_COUNT_IN_A_KEYBOARD 64
-// TODO: Reduce this constant if possible; check the maximum number of digraphs in the same
-// word in the dictionary for languages with digraphs, like German and French
-#define DEFAULT_MAX_DIGRAPH_SEARCH_DEPTH 5
-
-#define MIN_USER_TYPED_LENGTH_FOR_MULTIPLE_WORD_SUGGESTION 3
-
// TODO: Remove
#define MAX_POINTER_COUNT 1
#define MAX_POINTER_COUNT_G 2
+// Queue IDs and size for DicNodesCache
+#define DIC_NODES_CACHE_INITIAL_QUEUE_ID_ACTIVE 0
+#define DIC_NODES_CACHE_INITIAL_QUEUE_ID_NEXT_ACTIVE 1
+#define DIC_NODES_CACHE_INITIAL_QUEUE_ID_TERMINAL 2
+#define DIC_NODES_CACHE_INITIAL_QUEUE_ID_CACHE_FOR_CONTINUOUS_SUGGESTION 3
+#define DIC_NODES_CACHE_PRIORITY_QUEUES_SIZE 4
+
// Size, in bytes, of the bloom filter index for bigrams
// 128 gives us 1024 buckets. The probability of false positive is (1 - e ** (-kn/m))**k,
// where k is the number of hash functions, n the number of bigrams, and m the number of
diff --git a/native/jni/src/dic_traverse_wrapper.cpp b/native/jni/src/dic_traverse_wrapper.cpp
deleted file mode 100644
index ec8c62dcc..000000000
--- a/native/jni/src/dic_traverse_wrapper.cpp
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * Copyright (C) 2012, The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#define LOG_TAG "LatinIME: jni: Session"
-
-#include "dic_traverse_wrapper.h"
-
-namespace latinime {
-void *(*DicTraverseWrapper::sDicTraverseSessionFactoryMethod)(JNIEnv *, jstring) = 0;
-void (*DicTraverseWrapper::sDicTraverseSessionReleaseMethod)(void *) = 0;
-void (*DicTraverseWrapper::sDicTraverseSessionInitMethod)(
- void *, const Dictionary *const, const int *, const int, const SuggestOptions *const) = 0;
-} // namespace latinime
diff --git a/native/jni/src/dic_traverse_wrapper.h b/native/jni/src/dic_traverse_wrapper.h
deleted file mode 100644
index 43b4c9ade..000000000
--- a/native/jni/src/dic_traverse_wrapper.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LATINIME_DIC_TRAVERSE_WRAPPER_H
-#define LATINIME_DIC_TRAVERSE_WRAPPER_H
-
-#include "defines.h"
-#include "jni.h"
-
-namespace latinime {
-class Dictionary;
-class SuggestOptions;
-// TODO: Remove
-class DicTraverseWrapper {
- public:
- static void *getDicTraverseSession(JNIEnv *env, jstring locale) {
- if (sDicTraverseSessionFactoryMethod) {
- return sDicTraverseSessionFactoryMethod(env, locale);
- }
- return 0;
- }
- static void initDicTraverseSession(void *traverseSession, const Dictionary *const dictionary,
- const int *prevWord, const int prevWordLength,
- const SuggestOptions *const suggestOptions) {
- if (sDicTraverseSessionInitMethod) {
- sDicTraverseSessionInitMethod(
- traverseSession, dictionary, prevWord, prevWordLength, suggestOptions);
- }
- }
- static void releaseDicTraverseSession(void *traverseSession) {
- if (sDicTraverseSessionReleaseMethod) {
- sDicTraverseSessionReleaseMethod(traverseSession);
- }
- }
- static void setTraverseSessionFactoryMethod(void *(*factoryMethod)(JNIEnv *, jstring)) {
- sDicTraverseSessionFactoryMethod = factoryMethod;
- }
- static void setTraverseSessionInitMethod(
- void (*initMethod)(void *, const Dictionary *const, const int *, const int,
- const SuggestOptions *const)) {
- sDicTraverseSessionInitMethod = initMethod;
- }
- static void setTraverseSessionReleaseMethod(void (*releaseMethod)(void *)) {
- sDicTraverseSessionReleaseMethod = releaseMethod;
- }
-
- private:
- DISALLOW_IMPLICIT_CONSTRUCTORS(DicTraverseWrapper);
- static void *(*sDicTraverseSessionFactoryMethod)(JNIEnv *, jstring);
- static void (*sDicTraverseSessionInitMethod)(
- void *, const Dictionary *const, const int *, const int, const SuggestOptions *const);
- static void (*sDicTraverseSessionReleaseMethod)(void *);
-};
-} // namespace latinime
-#endif // LATINIME_DIC_TRAVERSE_WRAPPER_H
diff --git a/native/jni/src/correction.cpp b/native/jni/src/obsolete/correction.cpp
index feed5622b..6b80ed8ea 100644
--- a/native/jni/src/correction.cpp
+++ b/native/jni/src/obsolete/correction.cpp
@@ -18,8 +18,8 @@
#include <cmath>
-#include "correction.h"
#include "defines.h"
+#include "obsolete/correction.h"
#include "suggest/core/layout/proximity_info_state.h"
#include "suggest/core/layout/touch_position_correction_utils.h"
#include "suggest/policyimpl/utils/edit_distance.h"
@@ -30,6 +30,27 @@ namespace latinime {
class ProximityInfo;
+// private static const member variables
+// The following "rate"s are used as a multiplier before dividing by 100, so they are in percent.
+const int Correction::WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE = 80;
+const int Correction::WORDS_WITH_MISSING_CHARACTER_DEMOTION_START_POS_10X = 12;
+const int Correction::WORDS_WITH_MISSING_SPACE_CHARACTER_DEMOTION_RATE = 58;
+const int Correction::WORDS_WITH_MISTYPED_SPACE_DEMOTION_RATE = 50;
+const int Correction::WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE = 75;
+const int Correction::WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE = 75;
+const int Correction::WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE = 70;
+const int Correction::FULL_MATCHED_WORDS_PROMOTION_RATE = 120;
+const int Correction::WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE = 90;
+const int Correction::WORDS_WITH_ADDITIONAL_PROXIMITY_CHARACTER_DEMOTION_RATE = 70;
+const int Correction::WORDS_WITH_MATCH_SKIP_PROMOTION_RATE = 105;
+const int Correction::WORDS_WITH_JUST_ONE_CORRECTION_PROMOTION_RATE = 148;
+const int Correction::WORDS_WITH_JUST_ONE_CORRECTION_PROMOTION_MULTIPLIER = 3;
+const int Correction::CORRECTION_COUNT_RATE_DEMOTION_RATE_BASE = 45;
+const int Correction::INPUT_EXCEEDS_OUTPUT_DEMOTION_RATE = 70;
+const int Correction::FIRST_CHAR_DIFFERENT_DEMOTION_RATE = 96;
+const int Correction::TWO_WORDS_CAPITALIZED_DEMOTION_RATE = 50;
+const int Correction::TWO_WORDS_CORRECTION_DEMOTION_BASE = 80;
+
/////////////////////////////
// edit distance funcitons //
/////////////////////////////
@@ -918,11 +939,15 @@ inline static bool isUpperCase(unsigned short c) {
// In dictionary.cpp, getSuggestion() method,
// When USE_SUGGEST_INTERFACE_FOR_TYPING is true:
+//
+// // TODO: Revise the following logic thoroughly by referring to the logic
+// // marked as "Otherwise" below.
// SUGGEST_INTERFACE_OUTPUT_SCALE was multiplied to the original suggestion scores to convert
// them to integers.
// score = (int)((original score) * SUGGEST_INTERFACE_OUTPUT_SCALE)
// Undo the scaling here to recover the original score.
// normalizedScore = ((float)score) / SUGGEST_INTERFACE_OUTPUT_SCALE
+//
// Otherwise: suggestion scores are computed using the below formula.
// original score
// := powf(mTypedLetterMultiplier (this is defined 2),
@@ -965,9 +990,11 @@ inline static bool isUpperCase(unsigned short c) {
// so, 0 <= distance / afterLength <= 1
const float weight = 1.0f - static_cast<float>(distance) / static_cast<float>(afterLength);
- if (USE_SUGGEST_INTERFACE_FOR_TYPING) {
+ // TODO: Revise the following logic thoroughly by referring to...
+ if (true /* USE_SUGGEST_INTERFACE_FOR_TYPING */) {
return (static_cast<float>(score) / SUGGEST_INTERFACE_OUTPUT_SCALE) * weight;
}
+ // ...this logic.
const float maxScore = score >= S_INT_MAX ? static_cast<float>(S_INT_MAX)
: static_cast<float>(MAX_INITIAL_SCORE)
* powf(static_cast<float>(TYPED_LETTER_MULTIPLIER),
diff --git a/native/jni/src/correction.h b/native/jni/src/obsolete/correction.h
index 84d6429ba..47dcef2d7 100644
--- a/native/jni/src/correction.h
+++ b/native/jni/src/obsolete/correction.h
@@ -19,8 +19,8 @@
#include <cstring> // for memset()
-#include "correction_state.h"
#include "defines.h"
+#include "obsolete/correction_state.h"
#include "suggest/core/layout/proximity_info_state.h"
#include "utils/char_utils.h"
@@ -135,6 +135,27 @@ class Correction {
private:
DISALLOW_COPY_AND_ASSIGN(Correction);
+ // The following "rate"s are used as a multiplier before dividing by 100, so they are in
+ // percent.
+ static const int WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE;
+ static const int WORDS_WITH_MISSING_CHARACTER_DEMOTION_START_POS_10X;
+ static const int WORDS_WITH_MISSING_SPACE_CHARACTER_DEMOTION_RATE;
+ static const int WORDS_WITH_MISTYPED_SPACE_DEMOTION_RATE;
+ static const int WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE;
+ static const int WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE;
+ static const int WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE;
+ static const int FULL_MATCHED_WORDS_PROMOTION_RATE;
+ static const int WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE;
+ static const int WORDS_WITH_ADDITIONAL_PROXIMITY_CHARACTER_DEMOTION_RATE;
+ static const int WORDS_WITH_MATCH_SKIP_PROMOTION_RATE;
+ static const int WORDS_WITH_JUST_ONE_CORRECTION_PROMOTION_RATE;
+ static const int WORDS_WITH_JUST_ONE_CORRECTION_PROMOTION_MULTIPLIER;
+ static const int CORRECTION_COUNT_RATE_DEMOTION_RATE_BASE;
+ static const int INPUT_EXCEEDS_OUTPUT_DEMOTION_RATE;
+ static const int FIRST_CHAR_DIFFERENT_DEMOTION_RATE;
+ static const int TWO_WORDS_CAPITALIZED_DEMOTION_RATE;
+ static const int TWO_WORDS_CORRECTION_DEMOTION_BASE;
+
/////////////////////////
// static inline utils //
/////////////////////////
diff --git a/native/jni/src/correction_state.h b/native/jni/src/obsolete/correction_state.h
index a63d4aa94..a63d4aa94 100644
--- a/native/jni/src/correction_state.h
+++ b/native/jni/src/obsolete/correction_state.h
diff --git a/native/jni/src/suggest/core/dicnode/dic_nodes_cache.h b/native/jni/src/suggest/core/dicnode/dic_nodes_cache.h
index 7f5bdbcf6..7aab0906e 100644
--- a/native/jni/src/suggest/core/dicnode/dic_nodes_cache.h
+++ b/native/jni/src/suggest/core/dicnode/dic_nodes_cache.h
@@ -22,12 +22,6 @@
#include "defines.h"
#include "suggest/core/dicnode/dic_node_priority_queue.h"
-#define INITIAL_QUEUE_ID_ACTIVE 0
-#define INITIAL_QUEUE_ID_NEXT_ACTIVE 1
-#define INITIAL_QUEUE_ID_TERMINAL 2
-#define INITIAL_QUEUE_ID_CACHE_FOR_CONTINUOUS_SUGGESTION 3
-#define PRIORITY_QUEUES_SIZE 4
-
namespace latinime {
class DicNode;
@@ -38,11 +32,12 @@ class DicNode;
class DicNodesCache {
public:
AK_FORCE_INLINE DicNodesCache()
- : mActiveDicNodes(&mDicNodePriorityQueues[INITIAL_QUEUE_ID_ACTIVE]),
- mNextActiveDicNodes(&mDicNodePriorityQueues[INITIAL_QUEUE_ID_NEXT_ACTIVE]),
- mTerminalDicNodes(&mDicNodePriorityQueues[INITIAL_QUEUE_ID_TERMINAL]),
- mCachedDicNodesForContinuousSuggestion(
- &mDicNodePriorityQueues[INITIAL_QUEUE_ID_CACHE_FOR_CONTINUOUS_SUGGESTION]),
+ : mActiveDicNodes(&mDicNodePriorityQueues[DIC_NODES_CACHE_INITIAL_QUEUE_ID_ACTIVE]),
+ mNextActiveDicNodes(&mDicNodePriorityQueues[
+ DIC_NODES_CACHE_INITIAL_QUEUE_ID_NEXT_ACTIVE]),
+ mTerminalDicNodes(&mDicNodePriorityQueues[DIC_NODES_CACHE_INITIAL_QUEUE_ID_TERMINAL]),
+ mCachedDicNodesForContinuousSuggestion(&mDicNodePriorityQueues[
+ DIC_NODES_CACHE_INITIAL_QUEUE_ID_CACHE_FOR_CONTINUOUS_SUGGESTION]),
mInputIndex(0), mLastCachedInputIndex(0) {
}
@@ -147,9 +142,8 @@ class DicNodesCache {
mCachedDicNodesForContinuousSuggestion->dump();
}
mInputIndex = mLastCachedInputIndex;
- mCachedDicNodesForContinuousSuggestion =
- moveNodesAndReturnReusableEmptyQueue(
- mCachedDicNodesForContinuousSuggestion, &mActiveDicNodes);
+ mCachedDicNodesForContinuousSuggestion = moveNodesAndReturnReusableEmptyQueue(
+ mCachedDicNodesForContinuousSuggestion, &mActiveDicNodes);
}
AK_FORCE_INLINE static DicNodePriorityQueue *moveNodesAndReturnReusableEmptyQueue(
@@ -169,7 +163,7 @@ class DicNodesCache {
mTerminalDicNodes->clear();
}
- DicNodePriorityQueue mDicNodePriorityQueues[PRIORITY_QUEUES_SIZE];
+ DicNodePriorityQueue mDicNodePriorityQueues[DIC_NODES_CACHE_PRIORITY_QUEUES_SIZE];
// Active dicNodes currently being expanded.
DicNodePriorityQueue *mActiveDicNodes;
// Next dicNodes to be expanded.
diff --git a/native/jni/src/suggest/core/dictionary/binary_format.h b/native/jni/src/suggest/core/dictionary/binary_format.h
index 1b57793fa..c82065f97 100644
--- a/native/jni/src/suggest/core/dictionary/binary_format.h
+++ b/native/jni/src/suggest/core/dictionary/binary_format.h
@@ -267,8 +267,7 @@ AK_FORCE_INLINE int BinaryFormat::getGroupCountAndForwardPointer(const uint8_t *
inline float BinaryFormat::getMultiWordCostMultiplier(const uint8_t *const dict,
const int dictSize) {
- const int headerValue = readHeaderValueInt(dict, dictSize,
- "MULTIPLE_WORDS_DEMOTION_RATE");
+ const int headerValue = readHeaderValueInt(dict, dictSize, "MULTIPLE_WORDS_DEMOTION_RATE");
if (headerValue == S_INT_MIN) {
return 1.0f;
}
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp
index 6fd755dfe..2d4ad5df5 100644
--- a/native/jni/src/suggest/core/dictionary/dictionary.cpp
+++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp
@@ -22,42 +22,40 @@
#include <stdint.h>
#include "defines.h"
-#include "dic_traverse_wrapper.h"
#include "suggest/core/dictionary/bigram_dictionary.h"
#include "suggest/core/dictionary/binary_format.h"
+#include "suggest/core/session/dic_traverse_session.h"
#include "suggest/core/suggest.h"
#include "suggest/core/suggest_options.h"
#include "suggest/policyimpl/gesture/gesture_suggest_policy_factory.h"
#include "suggest/policyimpl/typing/typing_suggest_policy_factory.h"
-#include "unigram_dictionary.h"
namespace latinime {
Dictionary::Dictionary(void *dict, int dictSize, int mmapFd, int dictBufAdjust)
: mBinaryDicitonaryInfo(static_cast<const uint8_t *>(dict), dictSize),
- mDictSize(dictSize), mMmapFd(mmapFd), mDictBufAdjust(dictBufAdjust),
- mUnigramDictionary(new UnigramDictionary(&mBinaryDicitonaryInfo,
- BinaryFormat::getFlags(mBinaryDicitonaryInfo.getDictBuf(), dictSize))),
+ mDictSize(dictSize),
+ mDictFlags(BinaryFormat::getFlags(mBinaryDicitonaryInfo.getDictBuf(), dictSize)),
+ mMmapFd(mmapFd), mDictBufAdjust(dictBufAdjust),
mBigramDictionary(new BigramDictionary(&mBinaryDicitonaryInfo)),
mGestureSuggest(new Suggest(GestureSuggestPolicyFactory::getGestureSuggestPolicy())),
mTypingSuggest(new Suggest(TypingSuggestPolicyFactory::getTypingSuggestPolicy())) {
}
Dictionary::~Dictionary() {
- delete mUnigramDictionary;
delete mBigramDictionary;
delete mGestureSuggest;
delete mTypingSuggest;
}
-int Dictionary::getSuggestions(ProximityInfo *proximityInfo, void *traverseSession,
+int Dictionary::getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession *traverseSession,
int *xcoordinates, int *ycoordinates, int *times, int *pointerIds, int *inputCodePoints,
int inputSize, int *prevWordCodePoints, int prevWordLength, int commitPoint,
const SuggestOptions *const suggestOptions, int *outWords, int *frequencies,
int *spaceIndices, int *outputTypes) const {
int result = 0;
if (suggestOptions->isGesture()) {
- DicTraverseWrapper::initDicTraverseSession(
+ DicTraverseSession::initSessionInstance(
traverseSession, this, prevWordCodePoints, prevWordLength, suggestOptions);
result = mGestureSuggest->getSuggestions(proximityInfo, traverseSession, xcoordinates,
ycoordinates, times, pointerIds, inputCodePoints, inputSize, commitPoint, outWords,
@@ -67,26 +65,15 @@ int Dictionary::getSuggestions(ProximityInfo *proximityInfo, void *traverseSessi
}
return result;
} else {
- if (USE_SUGGEST_INTERFACE_FOR_TYPING) {
- DicTraverseWrapper::initDicTraverseSession(
- traverseSession, this, prevWordCodePoints, prevWordLength, suggestOptions);
- result = mTypingSuggest->getSuggestions(proximityInfo, traverseSession, xcoordinates,
- ycoordinates, times, pointerIds, inputCodePoints, inputSize, commitPoint,
- outWords, frequencies, spaceIndices, outputTypes);
- if (DEBUG_DICT) {
- DUMP_RESULT(outWords, frequencies);
- }
- return result;
- } else {
- std::map<int, int> bigramMap;
- uint8_t bigramFilter[BIGRAM_FILTER_BYTE_SIZE];
- mBigramDictionary->fillBigramAddressToProbabilityMapAndFilter(prevWordCodePoints,
- prevWordLength, &bigramMap, bigramFilter);
- result = mUnigramDictionary->getSuggestions(proximityInfo, xcoordinates, ycoordinates,
- inputCodePoints, inputSize, &bigramMap, bigramFilter,
- suggestOptions->useFullEditDistance(), outWords, frequencies, outputTypes);
- return result;
+ DicTraverseSession::initSessionInstance(
+ traverseSession, this, prevWordCodePoints, prevWordLength, suggestOptions);
+ result = mTypingSuggest->getSuggestions(proximityInfo, traverseSession, xcoordinates,
+ ycoordinates, times, pointerIds, inputCodePoints, inputSize, commitPoint,
+ outWords, frequencies, spaceIndices, outputTypes);
+ if (DEBUG_DICT) {
+ DUMP_RESULT(outWords, frequencies);
}
+ return result;
}
}
@@ -98,7 +85,27 @@ int Dictionary::getBigrams(const int *word, int length, int *inputCodePoints, in
}
int Dictionary::getProbability(const int *word, int length) const {
- return mUnigramDictionary->getProbability(word, length);
+ const uint8_t *const root = mBinaryDicitonaryInfo.getDictRoot();
+ int pos = BinaryFormat::getTerminalPosition(root, word, length,
+ false /* forceLowerCaseSearch */);
+ if (NOT_VALID_WORD == pos) {
+ return NOT_A_PROBABILITY;
+ }
+ const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
+ if (flags & (BinaryFormat::FLAG_IS_BLACKLISTED | BinaryFormat::FLAG_IS_NOT_A_WORD)) {
+ // If this is not a word, or if it's a blacklisted entry, it should behave as
+ // having no probability outside of the suggestion process (where it should be used
+ // for shortcuts).
+ return NOT_A_PROBABILITY;
+ }
+ const bool hasMultipleChars = (0 != (BinaryFormat::FLAG_HAS_MULTIPLE_CHARS & flags));
+ if (hasMultipleChars) {
+ pos = BinaryFormat::skipOtherCharacters(root, pos);
+ } else {
+ BinaryFormat::getCodePointAndForwardPointer(root, &pos);
+ }
+ const int unigramProbability = BinaryFormat::readProbabilityWithoutMovingPointer(root, pos);
+ return unigramProbability;
}
bool Dictionary::isValidBigram(const int *word1, int length1, const int *word2, int length2) const {
@@ -106,7 +113,7 @@ bool Dictionary::isValidBigram(const int *word1, int length1, const int *word2,
}
int Dictionary::getDictFlags() const {
- return mUnigramDictionary->getDictFlags();
+ return mDictFlags;
}
} // namespace latinime
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.h b/native/jni/src/suggest/core/dictionary/dictionary.h
index 771837bc6..1f25080b1 100644
--- a/native/jni/src/suggest/core/dictionary/dictionary.h
+++ b/native/jni/src/suggest/core/dictionary/dictionary.h
@@ -25,10 +25,10 @@
namespace latinime {
class BigramDictionary;
+class DicTraverseSession;
class ProximityInfo;
class SuggestInterface;
class SuggestOptions;
-class UnigramDictionary;
class Dictionary {
public:
@@ -54,9 +54,9 @@ class Dictionary {
Dictionary(void *dict, int dictSize, int mmapFd, int dictBufAdjust);
- int getSuggestions(ProximityInfo *proximityInfo, void *traverseSession, int *xcoordinates,
- int *ycoordinates, int *times, int *pointerIds, int *inputCodePoints, int inputSize,
- int *prevWordCodePoints, int prevWordLength, int commitPoint,
+ int getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession *traverseSession,
+ int *xcoordinates, int *ycoordinates, int *times, int *pointerIds, int *inputCodePoints,
+ int inputSize, int *prevWordCodePoints, int prevWordLength, int commitPoint,
const SuggestOptions *const suggestOptions, int *outWords, int *frequencies,
int *spaceIndices, int *outputTypes) const;
@@ -81,10 +81,10 @@ class Dictionary {
// Used only for the mmap version of dictionary loading, but we use these as dummy variables
// also for the malloc version.
const int mDictSize;
+ const int mDictFlags;
const int mMmapFd;
const int mDictBufAdjust;
- const UnigramDictionary *mUnigramDictionary;
const BigramDictionary *mBigramDictionary;
SuggestInterface *mGestureSuggest;
SuggestInterface *mTypingSuggest;
diff --git a/native/jni/src/suggest/core/session/dic_traverse_session.cpp b/native/jni/src/suggest/core/session/dic_traverse_session.cpp
index be293df42..c398caefa 100644
--- a/native/jni/src/suggest/core/session/dic_traverse_session.cpp
+++ b/native/jni/src/suggest/core/session/dic_traverse_session.cpp
@@ -17,7 +17,6 @@
#include "suggest/core/session/dic_traverse_session.h"
#include "defines.h"
-#include "dic_traverse_wrapper.h"
#include "jni.h"
#include "suggest/core/dicnode/dic_node_utils.h"
#include "suggest/core/dictionary/binary_dictionary_info.h"
@@ -26,43 +25,6 @@
namespace latinime {
-const int DicTraverseSession::CACHE_START_INPUT_LENGTH_THRESHOLD = 20;
-
-// A factory method for DicTraverseSession
-static void *getSessionInstance(JNIEnv *env, jstring localeStr) {
- return new DicTraverseSession(env, localeStr);
-}
-
-// TODO: Pass "DicTraverseSession *traverseSession" when the source code structure settles down.
-static void initSessionInstance(void *traverseSession, const Dictionary *const dictionary,
- const int *prevWord, const int prevWordLength,
- const SuggestOptions *const suggestOptions) {
- if (traverseSession) {
- DicTraverseSession *tSession = static_cast<DicTraverseSession *>(traverseSession);
- tSession->init(dictionary, prevWord, prevWordLength, suggestOptions);
- }
-}
-
-// TODO: Pass "DicTraverseSession *traverseSession" when the source code structure settles down.
-static void releaseSessionInstance(void *traverseSession) {
- delete static_cast<DicTraverseSession *>(traverseSession);
-}
-
-// An ad-hoc internal class to register the factory method defined above
-class TraverseSessionFactoryRegisterer {
- public:
- TraverseSessionFactoryRegisterer() {
- DicTraverseWrapper::setTraverseSessionFactoryMethod(getSessionInstance);
- DicTraverseWrapper::setTraverseSessionInitMethod(initSessionInstance);
- DicTraverseWrapper::setTraverseSessionReleaseMethod(releaseSessionInstance);
- }
- private:
- DISALLOW_COPY_AND_ASSIGN(TraverseSessionFactoryRegisterer);
-};
-
-// To invoke the TraverseSessionFactoryRegisterer constructor in the global constructor.
-static TraverseSessionFactoryRegisterer traverseSessionFactoryRegisterer;
-
void DicTraverseSession::init(const Dictionary *const dictionary, const int *prevWord,
int prevWordLength, const SuggestOptions *const suggestOptions) {
mDictionary = dictionary;
diff --git a/native/jni/src/suggest/core/session/dic_traverse_session.h b/native/jni/src/suggest/core/session/dic_traverse_session.h
index 3b6a3dc8c..630b3b59b 100644
--- a/native/jni/src/suggest/core/session/dic_traverse_session.h
+++ b/native/jni/src/suggest/core/session/dic_traverse_session.h
@@ -35,6 +35,25 @@ class SuggestOptions;
class DicTraverseSession {
public:
+
+ // A factory method for DicTraverseSession
+ static AK_FORCE_INLINE void *getSessionInstance(JNIEnv *env, jstring localeStr) {
+ return new DicTraverseSession(env, localeStr);
+ }
+
+ static AK_FORCE_INLINE void initSessionInstance(DicTraverseSession *traverseSession,
+ const Dictionary *const dictionary, const int *prevWord, const int prevWordLength,
+ const SuggestOptions *const suggestOptions) {
+ if (traverseSession) {
+ DicTraverseSession *tSession = static_cast<DicTraverseSession *>(traverseSession);
+ tSession->init(dictionary, prevWord, prevWordLength, suggestOptions);
+ }
+ }
+
+ static AK_FORCE_INLINE void releaseSessionInstance(DicTraverseSession *traverseSession) {
+ delete traverseSession;
+ }
+
AK_FORCE_INLINE DicTraverseSession(JNIEnv *env, jstring localeStr)
: mPrevWordPos(NOT_VALID_WORD), mProximityInfo(0),
mDictionary(0), mSuggestOptions(0), mDicNodesCache(), mMultiBigramMap(),
diff --git a/native/jni/src/unigram_dictionary.cpp b/native/jni/src/unigram_dictionary.cpp
deleted file mode 100644
index 5820a1d0e..000000000
--- a/native/jni/src/unigram_dictionary.cpp
+++ /dev/null
@@ -1,998 +0,0 @@
-/*
- * Copyright (C) 2010, The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cstring>
-
-#define LOG_TAG "LatinIME: unigram_dictionary.cpp"
-
-#include "defines.h"
-#include "suggest/core/dictionary/binary_dictionary_info.h"
-#include "suggest/core/dictionary/binary_format.h"
-#include "suggest/core/dictionary/dictionary.h"
-#include "suggest/core/dictionary/digraph_utils.h"
-#include "suggest/core/dictionary/probability_utils.h"
-#include "suggest/core/dictionary/terminal_attributes.h"
-#include "suggest/core/layout/proximity_info.h"
-#include "utils/char_utils.h"
-#include "unigram_dictionary.h"
-#include "words_priority_queue.h"
-#include "words_priority_queue_pool.h"
-
-namespace latinime {
-
-// TODO: check the header
-UnigramDictionary::UnigramDictionary(
- const BinaryDictionaryInfo *const binaryDicitonaryInfo, const uint8_t dictFlags)
- : mBinaryDicitonaryInfo(binaryDicitonaryInfo),
- MAX_DIGRAPH_SEARCH_DEPTH(DEFAULT_MAX_DIGRAPH_SEARCH_DEPTH), DICT_FLAGS(dictFlags) {
- if (DEBUG_DICT) {
- AKLOGI("UnigramDictionary - constructor");
- }
-}
-
-UnigramDictionary::~UnigramDictionary() {
-}
-
-// TODO: This needs to take a const int* and not tinker with its contents
-static void addWord(int *word, int length, int probability, WordsPriorityQueue *queue, int type) {
- queue->push(probability, word, length, type);
-}
-
-// Return the replacement code point for a digraph, or 0 if none.
-int UnigramDictionary::getDigraphReplacement(const int *codes, const int i, const int inputSize,
- const DigraphUtils::digraph_t *const digraphs, const unsigned int digraphsSize) const {
-
- // There can't be a digraph if we don't have at least 2 characters to examine
- if (i + 2 > inputSize) return false;
-
- // Search for the first char of some digraph
- int lastDigraphIndex = -1;
- const int thisChar = codes[i];
- for (lastDigraphIndex = digraphsSize - 1; lastDigraphIndex >= 0; --lastDigraphIndex) {
- if (thisChar == digraphs[lastDigraphIndex].first) break;
- }
- // No match: return early
- if (lastDigraphIndex < 0) return 0;
-
- // It's an interesting digraph if the second char matches too.
- if (digraphs[lastDigraphIndex].second == codes[i + 1]) {
- return digraphs[lastDigraphIndex].compositeGlyph;
- } else {
- return 0;
- }
-}
-
-// Mostly the same arguments as the non-recursive version, except:
-// codes is the original value. It points to the start of the work buffer, and gets passed as is.
-// inputSize is the size of the user input (thus, it is the size of codesSrc).
-// codesDest is the current point in the work buffer.
-// codesSrc is the current point in the user-input, original, content-unmodified buffer.
-// codesRemain is the remaining size in codesSrc.
-void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximityInfo,
- const int *xcoordinates, const int *ycoordinates, const int *codesBuffer,
- int *xCoordinatesBuffer, int *yCoordinatesBuffer,
- const int codesBufferSize, const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
- const bool useFullEditDistance, const int *codesSrc,
- const int codesRemain, const int currentDepth, int *codesDest, Correction *correction,
- WordsPriorityQueuePool *queuePool,
- const DigraphUtils::digraph_t *const digraphs, const unsigned int digraphsSize) const {
- ASSERT(sizeof(codesDest[0]) == sizeof(codesSrc[0]));
- ASSERT(sizeof(xCoordinatesBuffer[0]) == sizeof(xcoordinates[0]));
- ASSERT(sizeof(yCoordinatesBuffer[0]) == sizeof(ycoordinates[0]));
-
- const int startIndex = static_cast<int>(codesDest - codesBuffer);
- if (currentDepth < MAX_DIGRAPH_SEARCH_DEPTH) {
- for (int i = 0; i < codesRemain; ++i) {
- xCoordinatesBuffer[startIndex + i] = xcoordinates[codesBufferSize - codesRemain + i];
- yCoordinatesBuffer[startIndex + i] = ycoordinates[codesBufferSize - codesRemain + i];
- const int replacementCodePoint =
- getDigraphReplacement(codesSrc, i, codesRemain, digraphs, digraphsSize);
- if (0 != replacementCodePoint) {
- // Found a digraph. We will try both spellings. eg. the word is "pruefen"
-
- // Copy the word up to the first char of the digraph, including proximity chars,
- // and overwrite the primary code with the replacement code point. Then, continue
- // processing on the remaining part of the word, skipping the second char of the
- // digraph.
- // In our example, copy "pru", replace "u" with the version with the diaeresis and
- // continue running on "fen".
- // Make i the index of the second char of the digraph for simplicity. Forgetting
- // to do that results in an infinite recursion so take care!
- ++i;
- memcpy(codesDest, codesSrc, i * sizeof(codesDest[0]));
- codesDest[i - 1] = replacementCodePoint;
- getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates,
- codesBuffer, xCoordinatesBuffer, yCoordinatesBuffer, codesBufferSize,
- bigramMap, bigramFilter, useFullEditDistance, codesSrc + i + 1,
- codesRemain - i - 1, currentDepth + 1, codesDest + i, correction,
- queuePool, digraphs, digraphsSize);
-
- // Copy the second char of the digraph in place, then continue processing on
- // the remaining part of the word.
- // In our example, after "pru" in the buffer copy the "e", and continue on "fen"
- memcpy(codesDest + i, codesSrc + i, sizeof(codesDest[0]));
- getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates,
- codesBuffer, xCoordinatesBuffer, yCoordinatesBuffer, codesBufferSize,
- bigramMap, bigramFilter, useFullEditDistance, codesSrc + i, codesRemain - i,
- currentDepth + 1, codesDest + i, correction, queuePool, digraphs,
- digraphsSize);
- return;
- }
- }
- }
-
- // If we come here, we hit the end of the word: let's check it against the dictionary.
- // In our example, we'll come here once for "prufen" and then once for "pruefen".
- // If the word contains several digraphs, we'll come it for the product of them.
- // eg. if the word is "ueberpruefen" we'll test, in order, against
- // "uberprufen", "uberpruefen", "ueberprufen", "ueberpruefen".
- const unsigned int remainingBytes = sizeof(codesDest[0]) * codesRemain;
- if (0 != remainingBytes) {
- memcpy(codesDest, codesSrc, remainingBytes);
- memcpy(&xCoordinatesBuffer[startIndex], &xcoordinates[codesBufferSize - codesRemain],
- sizeof(xCoordinatesBuffer[0]) * codesRemain);
- memcpy(&yCoordinatesBuffer[startIndex], &ycoordinates[codesBufferSize - codesRemain],
- sizeof(yCoordinatesBuffer[0]) * codesRemain);
- }
-
- getWordSuggestions(proximityInfo, xCoordinatesBuffer, yCoordinatesBuffer, codesBuffer,
- startIndex + codesRemain, bigramMap, bigramFilter, useFullEditDistance, correction,
- queuePool);
-}
-
-// bigramMap contains the association <bigram address> -> <bigram probability>
-// bigramFilter is a bloom filter for fast rejection: see functions setInFilter and isInFilter
-// in bigram_dictionary.cpp
-int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
- const int *ycoordinates, const int *inputCodePoints, const int inputSize,
- const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
- const bool useFullEditDistance, int *outWords, int *frequencies, int *outputTypes) const {
- WordsPriorityQueuePool queuePool(MAX_RESULTS, SUB_QUEUE_MAX_WORDS);
- queuePool.clearAll();
- Correction masterCorrection;
- masterCorrection.resetCorrection();
- const DigraphUtils::digraph_t *digraphs = 0;
- const int digraphsSize =
- DigraphUtils::getAllDigraphsForDictionaryAndReturnSize(DICT_FLAGS, &digraphs);
- if (digraphsSize > 0)
- { // Incrementally tune the word and try all possibilities
- int codesBuffer[sizeof(*inputCodePoints) * inputSize];
- int xCoordinatesBuffer[inputSize];
- int yCoordinatesBuffer[inputSize];
- getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates, codesBuffer,
- xCoordinatesBuffer, yCoordinatesBuffer, inputSize, bigramMap, bigramFilter,
- useFullEditDistance, inputCodePoints, inputSize, 0, codesBuffer, &masterCorrection,
- &queuePool, digraphs, digraphsSize);
- } else { // Normal processing
- getWordSuggestions(proximityInfo, xcoordinates, ycoordinates, inputCodePoints, inputSize,
- bigramMap, bigramFilter, useFullEditDistance, &masterCorrection, &queuePool);
- }
-
- PROF_START(20);
- if (DEBUG_DICT) {
- float ns = queuePool.getMasterQueue()->getHighestNormalizedScore(
- masterCorrection.getPrimaryInputWord(), inputSize, 0, 0, 0);
- ns += 0;
- AKLOGI("Max normalized score = %f", ns);
- }
- const int suggestedWordsCount =
- queuePool.getMasterQueue()->outputSuggestions(masterCorrection.getPrimaryInputWord(),
- inputSize, frequencies, outWords, outputTypes);
-
- if (DEBUG_DICT) {
- float ns = queuePool.getMasterQueue()->getHighestNormalizedScore(
- masterCorrection.getPrimaryInputWord(), inputSize, 0, 0, 0);
- ns += 0;
- AKLOGI("Returning %d words", suggestedWordsCount);
- /// Print the returned words
- for (int j = 0; j < suggestedWordsCount; ++j) {
- int *w = outWords + j * MAX_WORD_LENGTH;
- char s[MAX_WORD_LENGTH];
- for (int i = 0; i <= MAX_WORD_LENGTH; i++) s[i] = w[i];
- (void)s; // To suppress compiler warning
- AKLOGI("%s %i", s, frequencies[j]);
- }
- }
- PROF_END(20);
- PROF_CLOSE;
- return suggestedWordsCount;
-}
-
-void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
- const int *ycoordinates, const int *inputCodePoints, const int inputSize,
- const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
- const bool useFullEditDistance, Correction *correction, WordsPriorityQueuePool *queuePool)
- const {
- PROF_OPEN;
- PROF_START(0);
- PROF_END(0);
-
- PROF_START(1);
- getOneWordSuggestions(proximityInfo, xcoordinates, ycoordinates, inputCodePoints, bigramMap,
- bigramFilter, useFullEditDistance, inputSize, correction, queuePool);
- PROF_END(1);
-
- PROF_START(2);
- // Note: This line is intentionally left blank
- PROF_END(2);
-
- PROF_START(3);
- // Note: This line is intentionally left blank
- PROF_END(3);
-
- PROF_START(4);
- bool hasAutoCorrectionCandidate = false;
- WordsPriorityQueue *masterQueue = queuePool->getMasterQueue();
- if (masterQueue->size() > 0) {
- float nsForMaster = masterQueue->getHighestNormalizedScore(
- correction->getPrimaryInputWord(), inputSize, 0, 0, 0);
- hasAutoCorrectionCandidate = (nsForMaster > START_TWO_WORDS_CORRECTION_THRESHOLD);
- }
- PROF_END(4);
-
- PROF_START(5);
- // Multiple word suggestions
- if (SUGGEST_MULTIPLE_WORDS
- && inputSize >= MIN_USER_TYPED_LENGTH_FOR_MULTIPLE_WORD_SUGGESTION) {
- getSplitMultipleWordsSuggestions(proximityInfo, xcoordinates, ycoordinates, inputCodePoints,
- useFullEditDistance, inputSize, correction, queuePool,
- hasAutoCorrectionCandidate);
- }
- PROF_END(5);
-
- PROF_START(6);
- // Note: This line is intentionally left blank
- PROF_END(6);
-
- if (DEBUG_DICT) {
- queuePool->dumpSubQueue1TopSuggestions();
- for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) {
- WordsPriorityQueue *queue = queuePool->getSubQueue(FIRST_WORD_INDEX, i);
- if (queue->size() > 0) {
- WordsPriorityQueue::SuggestedWord *sw = queue->top();
- const int score = sw->mScore;
- const int *word = sw->mWord;
- const int wordLength = sw->mWordLength;
- float ns = Correction::RankingAlgorithm::calcNormalizedScore(
- correction->getPrimaryInputWord(), i, word, wordLength, score);
- ns += 0;
- AKLOGI("--- TOP SUB WORDS for %d --- %d %f [%d]", i, score, ns,
- (ns > TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD));
- DUMP_WORD(correction->getPrimaryInputWord(), i);
- DUMP_WORD(word, wordLength);
- }
- }
- }
-}
-
-void UnigramDictionary::initSuggestions(ProximityInfo *proximityInfo, const int *xCoordinates,
- const int *yCoordinates, const int *codes, const int inputSize,
- Correction *correction) const {
- if (DEBUG_DICT) {
- AKLOGI("initSuggest");
- DUMP_WORD(codes, inputSize);
- }
- correction->initInputParams(proximityInfo, codes, inputSize, xCoordinates, yCoordinates);
- const int maxDepth = min(inputSize * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH);
- correction->initCorrection(proximityInfo, inputSize, maxDepth);
-}
-
-void UnigramDictionary::getOneWordSuggestions(ProximityInfo *proximityInfo,
- const int *xcoordinates, const int *ycoordinates, const int *codes,
- const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
- const bool useFullEditDistance, const int inputSize,
- Correction *correction, WordsPriorityQueuePool *queuePool) const {
- initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputSize, correction);
- getSuggestionCandidates(useFullEditDistance, inputSize, bigramMap, bigramFilter, correction,
- queuePool, true /* doAutoCompletion */, DEFAULT_MAX_ERRORS, FIRST_WORD_INDEX);
-}
-
-void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance,
- const int inputSize, const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
- Correction *correction, WordsPriorityQueuePool *queuePool,
- const bool doAutoCompletion, const int maxErrors, const int currentWordIndex) const {
- uint8_t totalTraverseCount = correction->pushAndGetTotalTraverseCount();
- if (DEBUG_DICT) {
- AKLOGI("Traverse count %d", totalTraverseCount);
- }
- if (totalTraverseCount > MULTIPLE_WORDS_SUGGESTION_MAX_TOTAL_TRAVERSE_COUNT) {
- if (DEBUG_DICT) {
- AKLOGI("Abort traversing %d", totalTraverseCount);
- }
- return;
- }
- // TODO: Remove setCorrectionParams
- correction->setCorrectionParams(0, 0, 0,
- -1 /* spaceProximityPos */, -1 /* missingSpacePos */, useFullEditDistance,
- doAutoCompletion, maxErrors);
- int rootPosition = mBinaryDicitonaryInfo->getRootPosition();
- // Get the number of children of root, then increment the position
- int childCount = BinaryFormat::getGroupCountAndForwardPointer(
- mBinaryDicitonaryInfo->getDictRoot(), &rootPosition);
- int outputIndex = 0;
-
- correction->initCorrectionState(rootPosition, childCount, (inputSize <= 0));
-
- // Depth first search
- while (outputIndex >= 0) {
- if (correction->initProcessState(outputIndex)) {
- int siblingPos = correction->getTreeSiblingPos(outputIndex);
- int firstChildPos;
-
- const bool needsToTraverseChildrenNodes = processCurrentNode(siblingPos,
- bigramMap, bigramFilter, correction, &childCount, &firstChildPos, &siblingPos,
- queuePool, currentWordIndex);
- // Update next sibling pos
- correction->setTreeSiblingPos(outputIndex, siblingPos);
-
- if (needsToTraverseChildrenNodes) {
- // Goes to child node
- outputIndex = correction->goDownTree(outputIndex, childCount, firstChildPos);
- }
- } else {
- // Goes to parent sibling node
- outputIndex = correction->getTreeParentIndex(outputIndex);
- }
- }
-}
-
-void UnigramDictionary::onTerminal(const int probability,
- const TerminalAttributes &terminalAttributes, Correction *correction,
- WordsPriorityQueuePool *queuePool, const bool addToMasterQueue,
- const int currentWordIndex) const {
- const int inputIndex = correction->getInputIndex();
- const bool addToSubQueue = inputIndex < SUB_QUEUE_MAX_COUNT;
-
- int wordLength;
- int *wordPointer;
-
- if ((currentWordIndex == FIRST_WORD_INDEX) && addToMasterQueue) {
- WordsPriorityQueue *masterQueue = queuePool->getMasterQueue();
- const int finalProbability =
- correction->getFinalProbability(probability, &wordPointer, &wordLength);
-
- if (0 != finalProbability && !terminalAttributes.isBlacklistedOrNotAWord()) {
- // If the probability is 0, we don't want to add this word. However we still
- // want to add its shortcuts (including a possible whitelist entry) if any.
- // Furthermore, if this is not a word (shortcut only for example) or a blacklisted
- // entry then we never want to suggest this.
- addWord(wordPointer, wordLength, finalProbability, masterQueue,
- Dictionary::KIND_CORRECTION);
- }
-
- const int shortcutProbability = finalProbability > 0 ? finalProbability - 1 : 0;
- // Please note that the shortcut candidates will be added to the master queue only.
- TerminalAttributes::ShortcutIterator iterator = terminalAttributes.getShortcutIterator();
- while (iterator.hasNextShortcutTarget()) {
- // TODO: addWord only supports weak ordering, meaning we have no means
- // to control the order of the shortcuts relative to one another or to the word.
- // We need to either modulate the probability of each shortcut according
- // to its own shortcut probability or to make the queue
- // so that the insert order is protected inside the queue for words
- // with the same score. For the moment we use -1 to make sure the shortcut will
- // never be in front of the word.
- int shortcutTarget[MAX_WORD_LENGTH];
- int shortcutFrequency;
- const int shortcutTargetStringLength = iterator.getNextShortcutTarget(
- MAX_WORD_LENGTH, shortcutTarget, &shortcutFrequency);
- int shortcutScore;
- int kind;
- if (shortcutFrequency == BinaryFormat::WHITELIST_SHORTCUT_PROBABILITY
- && correction->sameAsTyped()) {
- shortcutScore = S_INT_MAX;
- kind = Dictionary::KIND_WHITELIST;
- } else {
- shortcutScore = shortcutProbability;
- kind = Dictionary::KIND_CORRECTION;
- }
- addWord(shortcutTarget, shortcutTargetStringLength, shortcutScore,
- masterQueue, kind);
- }
- }
-
- // We only allow two words + other error correction for words with SUB_QUEUE_MIN_WORD_LENGTH
- // or more length.
- if (inputIndex >= SUB_QUEUE_MIN_WORD_LENGTH && addToSubQueue) {
- WordsPriorityQueue *subQueue;
- subQueue = queuePool->getSubQueue(currentWordIndex, inputIndex);
- if (!subQueue) {
- return;
- }
- const int finalProbability = correction->getFinalProbabilityForSubQueue(
- probability, &wordPointer, &wordLength, inputIndex);
- addWord(wordPointer, wordLength, finalProbability, subQueue, Dictionary::KIND_CORRECTION);
- }
-}
-
-int UnigramDictionary::getSubStringSuggestion(
- ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates,
- const int *codes, const bool useFullEditDistance, Correction *correction,
- WordsPriorityQueuePool *queuePool, const int inputSize,
- const bool hasAutoCorrectionCandidate, const int currentWordIndex,
- const int inputWordStartPos, const int inputWordLength,
- const int outputWordStartPos, const bool isSpaceProximity, int *freqArray,
- int *wordLengthArray, int *outputWord, int *outputWordLength) const {
- if (inputWordLength > MULTIPLE_WORDS_SUGGESTION_MAX_WORD_LENGTH) {
- return FLAG_MULTIPLE_SUGGEST_ABORT;
- }
-
- /////////////////////////////////////////////
- // safety net for multiple word suggestion //
- // TODO: Remove this safety net //
- /////////////////////////////////////////////
- int smallWordCount = 0;
- int singleLetterWordCount = 0;
- if (inputWordLength == 1) {
- ++singleLetterWordCount;
- }
- if (inputWordLength <= 2) {
- // small word == single letter or 2-letter word
- ++smallWordCount;
- }
- for (int i = 0; i < currentWordIndex; ++i) {
- const int length = wordLengthArray[i];
- if (length == 1) {
- ++singleLetterWordCount;
- // Safety net to avoid suggesting sequential single letter words
- if (i < (currentWordIndex - 1)) {
- if (wordLengthArray[i + 1] == 1) {
- return FLAG_MULTIPLE_SUGGEST_ABORT;
- }
- } else if (inputWordLength == 1) {
- return FLAG_MULTIPLE_SUGGEST_ABORT;
- }
- }
- if (length <= 2) {
- ++smallWordCount;
- }
- // Safety net to avoid suggesting multiple words with many (4 or more, for now) small words
- if (singleLetterWordCount >= 3 || smallWordCount >= 4) {
- return FLAG_MULTIPLE_SUGGEST_ABORT;
- }
- }
- //////////////////////////////////////////////
- // TODO: Remove the safety net above //
- //////////////////////////////////////////////
-
- int *tempOutputWord = 0;
- int nextWordLength = 0;
- // TODO: Optimize init suggestion
- initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes,
- inputSize, correction);
-
- int word[MAX_WORD_LENGTH];
- int freq = getMostProbableWordLike(
- inputWordStartPos, inputWordLength, correction, word);
- if (freq > 0) {
- nextWordLength = inputWordLength;
- tempOutputWord = word;
- } else if (!hasAutoCorrectionCandidate) {
- if (inputWordStartPos > 0) {
- const int offset = inputWordStartPos;
- initSuggestions(proximityInfo, &xcoordinates[offset], &ycoordinates[offset],
- codes + offset, inputWordLength, correction);
- queuePool->clearSubQueue(currentWordIndex);
- // TODO: pass the bigram list for substring suggestion
- getSuggestionCandidates(useFullEditDistance, inputWordLength,
- 0 /* bigramMap */, 0 /* bigramFilter */, correction, queuePool,
- false /* doAutoCompletion */, MAX_ERRORS_FOR_TWO_WORDS, currentWordIndex);
- if (DEBUG_DICT) {
- if (currentWordIndex < MULTIPLE_WORDS_SUGGESTION_MAX_WORDS) {
- AKLOGI("Dump word candidates(%d) %d", currentWordIndex, inputWordLength);
- for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) {
- queuePool->getSubQueue(currentWordIndex, i)->dumpTopWord();
- }
- }
- }
- }
- WordsPriorityQueue *queue = queuePool->getSubQueue(currentWordIndex, inputWordLength);
- // TODO: Return the correct value depending on doAutoCompletion
- if (!queue || queue->size() <= 0) {
- return FLAG_MULTIPLE_SUGGEST_ABORT;
- }
- int score = 0;
- const float ns = queue->getHighestNormalizedScore(
- correction->getPrimaryInputWord(), inputWordLength,
- &tempOutputWord, &score, &nextWordLength);
- if (DEBUG_DICT) {
- AKLOGI("NS(%d) = %f, Score = %d", currentWordIndex, ns, score);
- }
- // Two words correction won't be done if the score of the first word doesn't exceed the
- // threshold.
- if (ns < TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD
- || nextWordLength < SUB_QUEUE_MIN_WORD_LENGTH) {
- return FLAG_MULTIPLE_SUGGEST_SKIP;
- }
- freq = score >> (nextWordLength + TWO_WORDS_PLUS_OTHER_ERROR_CORRECTION_DEMOTION_DIVIDER);
- }
- if (DEBUG_DICT) {
- AKLOGI("Freq(%d): %d, length: %d, input length: %d, input start: %d (%d)",
- currentWordIndex, freq, nextWordLength, inputWordLength, inputWordStartPos,
- (currentWordIndex > 0) ? wordLengthArray[0] : 0);
- }
- if (freq <= 0 || nextWordLength <= 0
- || MAX_WORD_LENGTH <= (outputWordStartPos + nextWordLength)) {
- return FLAG_MULTIPLE_SUGGEST_SKIP;
- }
- for (int i = 0; i < nextWordLength; ++i) {
- outputWord[outputWordStartPos + i] = tempOutputWord[i];
- }
-
- // Put output values
- freqArray[currentWordIndex] = freq;
- // TODO: put output length instead of input length
- wordLengthArray[currentWordIndex] = inputWordLength;
- const int tempOutputWordLength = outputWordStartPos + nextWordLength;
- if (outputWordLength) {
- *outputWordLength = tempOutputWordLength;
- }
-
- if ((inputWordStartPos + inputWordLength) < inputSize) {
- if (outputWordStartPos + nextWordLength >= MAX_WORD_LENGTH) {
- return FLAG_MULTIPLE_SUGGEST_SKIP;
- }
- outputWord[tempOutputWordLength] = KEYCODE_SPACE;
- if (outputWordLength) {
- ++*outputWordLength;
- }
- } else if (currentWordIndex >= 1) {
- // TODO: Handle 3 or more words
- const int pairFreq = correction->getFreqForSplitMultipleWords(
- freqArray, wordLengthArray, currentWordIndex + 1, isSpaceProximity, outputWord);
- if (DEBUG_DICT) {
- DUMP_WORD(outputWord, tempOutputWordLength);
- for (int i = 0; i < currentWordIndex + 1; ++i) {
- AKLOGI("Split %d,%d words: freq = %d, length = %d", i, currentWordIndex + 1,
- freqArray[i], wordLengthArray[i]);
- }
- AKLOGI("Split two words: freq = %d, length = %d, %d, isSpace ? %d", pairFreq,
- inputSize, tempOutputWordLength, isSpaceProximity);
- }
- addWord(outputWord, tempOutputWordLength, pairFreq, queuePool->getMasterQueue(),
- Dictionary::KIND_CORRECTION);
- }
- return FLAG_MULTIPLE_SUGGEST_CONTINUE;
-}
-
-void UnigramDictionary::getMultiWordsSuggestionRec(ProximityInfo *proximityInfo,
- const int *xcoordinates, const int *ycoordinates, const int *codes,
- const bool useFullEditDistance, const int inputSize, Correction *correction,
- WordsPriorityQueuePool *queuePool, const bool hasAutoCorrectionCandidate,
- const int startInputPos, const int startWordIndex, const int outputWordLength,
- int *freqArray, int *wordLengthArray, int *outputWord) const {
- if (startWordIndex >= (MULTIPLE_WORDS_SUGGESTION_MAX_WORDS - 1)) {
- // Return if the last word index
- return;
- }
- if (startWordIndex >= 1
- && (hasAutoCorrectionCandidate
- || inputSize < MIN_INPUT_LENGTH_FOR_THREE_OR_MORE_WORDS_CORRECTION)) {
- // Do not suggest 3+ words if already has auto correction candidate
- return;
- }
- for (int i = startInputPos + 1; i < inputSize; ++i) {
- if (DEBUG_CORRECTION_FREQ) {
- AKLOGI("Multi words(%d), start in %d sep %d start out %d",
- startWordIndex, startInputPos, i, outputWordLength);
- DUMP_WORD(outputWord, outputWordLength);
- }
- int tempOutputWordLength = 0;
- // Current word
- int inputWordStartPos = startInputPos;
- int inputWordLength = i - startInputPos;
- const int suggestionFlag = getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates,
- codes, useFullEditDistance, correction, queuePool, inputSize,
- hasAutoCorrectionCandidate, startWordIndex, inputWordStartPos, inputWordLength,
- outputWordLength, true /* not used */, freqArray, wordLengthArray, outputWord,
- &tempOutputWordLength);
- if (suggestionFlag == FLAG_MULTIPLE_SUGGEST_ABORT) {
- // TODO: break here
- continue;
- } else if (suggestionFlag == FLAG_MULTIPLE_SUGGEST_SKIP) {
- continue;
- }
-
- if (DEBUG_CORRECTION_FREQ) {
- AKLOGI("Do missing space correction");
- }
- // Next word
- // Missing space
- inputWordStartPos = i;
- inputWordLength = inputSize - i;
- if (getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, codes,
- useFullEditDistance, correction, queuePool, inputSize, hasAutoCorrectionCandidate,
- startWordIndex + 1, inputWordStartPos, inputWordLength, tempOutputWordLength,
- false /* missing space */, freqArray, wordLengthArray, outputWord, 0)
- != FLAG_MULTIPLE_SUGGEST_CONTINUE) {
- getMultiWordsSuggestionRec(proximityInfo, xcoordinates, ycoordinates, codes,
- useFullEditDistance, inputSize, correction, queuePool,
- hasAutoCorrectionCandidate, inputWordStartPos, startWordIndex + 1,
- tempOutputWordLength, freqArray, wordLengthArray, outputWord);
- }
-
- // Mistyped space
- ++inputWordStartPos;
- --inputWordLength;
-
- if (inputWordLength <= 0) {
- continue;
- }
-
- const int x = xcoordinates[inputWordStartPos - 1];
- const int y = ycoordinates[inputWordStartPos - 1];
- if (!proximityInfo->hasSpaceProximity(x, y)) {
- continue;
- }
-
- if (DEBUG_CORRECTION_FREQ) {
- AKLOGI("Do mistyped space correction");
- }
- getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, codes,
- useFullEditDistance, correction, queuePool, inputSize, hasAutoCorrectionCandidate,
- startWordIndex + 1, inputWordStartPos, inputWordLength, tempOutputWordLength,
- true /* mistyped space */, freqArray, wordLengthArray, outputWord, 0);
- }
-}
-
-void UnigramDictionary::getSplitMultipleWordsSuggestions(ProximityInfo *proximityInfo,
- const int *xcoordinates, const int *ycoordinates, const int *codes,
- const bool useFullEditDistance, const int inputSize,
- Correction *correction, WordsPriorityQueuePool *queuePool,
- const bool hasAutoCorrectionCandidate) const {
- if (inputSize >= MAX_WORD_LENGTH) return;
- if (DEBUG_DICT) {
- AKLOGI("--- Suggest multiple words");
- }
-
- // Allocating fixed length array on stack
- int outputWord[MAX_WORD_LENGTH];
- int freqArray[MULTIPLE_WORDS_SUGGESTION_MAX_WORDS];
- int wordLengthArray[MULTIPLE_WORDS_SUGGESTION_MAX_WORDS];
- const int outputWordLength = 0;
- const int startInputPos = 0;
- const int startWordIndex = 0;
- getMultiWordsSuggestionRec(proximityInfo, xcoordinates, ycoordinates, codes,
- useFullEditDistance, inputSize, correction, queuePool, hasAutoCorrectionCandidate,
- startInputPos, startWordIndex, outputWordLength, freqArray, wordLengthArray,
- outputWord);
-}
-
-// Wrapper for getMostProbableWordLikeInner, which matches it to the previous
-// interface.
-int UnigramDictionary::getMostProbableWordLike(const int startInputIndex, const int inputSize,
- Correction *correction, int *word) const {
- int inWord[inputSize];
- for (int i = 0; i < inputSize; ++i) {
- inWord[i] = correction->getPrimaryCodePointAt(startInputIndex + i);
- }
- return getMostProbableWordLikeInner(inWord, inputSize, word);
-}
-
-// This function will take the position of a character array within a CharGroup,
-// and check it actually like-matches the word in inWord starting at startInputIndex,
-// that is, it matches it with case and accents squashed.
-// The function returns true if there was a full match, false otherwise.
-// The function will copy on-the-fly the characters in the CharGroup to outNewWord.
-// It will also place the end position of the array in outPos; in outInputIndex,
-// it will place the index of the first char AFTER the match if there was a match,
-// and the initial position if there was not. It makes sense because if there was
-// a match we want to continue searching, but if there was not, we want to go to
-// the next CharGroup.
-// In and out parameters may point to the same location. This function takes care
-// not to use any input parameters after it wrote into its outputs.
-static inline bool testCharGroupForContinuedLikeness(const uint8_t flags,
- const uint8_t *const root, const int startPos, const int *const inWord,
- const int startInputIndex, const int inputSize, int *outNewWord, int *outInputIndex,
- int *outPos) {
- const bool hasMultipleChars = (0 != (BinaryFormat::FLAG_HAS_MULTIPLE_CHARS & flags));
- int pos = startPos;
- int codePoint = BinaryFormat::getCodePointAndForwardPointer(root, &pos);
- int baseChar = CharUtils::toBaseLowerCase(codePoint);
- const int wChar = CharUtils::toBaseLowerCase(inWord[startInputIndex]);
-
- if (baseChar != wChar) {
- *outPos = hasMultipleChars ? BinaryFormat::skipOtherCharacters(root, pos) : pos;
- *outInputIndex = startInputIndex;
- return false;
- }
- int inputIndex = startInputIndex;
- outNewWord[inputIndex] = codePoint;
- if (hasMultipleChars) {
- codePoint = BinaryFormat::getCodePointAndForwardPointer(root, &pos);
- while (NOT_A_CODE_POINT != codePoint) {
- baseChar = CharUtils::toBaseLowerCase(codePoint);
- if (inputIndex + 1 >= inputSize
- || CharUtils::toBaseLowerCase(inWord[++inputIndex]) != baseChar) {
- *outPos = BinaryFormat::skipOtherCharacters(root, pos);
- *outInputIndex = startInputIndex;
- return false;
- }
- outNewWord[inputIndex] = codePoint;
- codePoint = BinaryFormat::getCodePointAndForwardPointer(root, &pos);
- }
- }
- *outInputIndex = inputIndex + 1;
- *outPos = pos;
- return true;
-}
-
-// This function is invoked when a word like the word searched for is found.
-// It will compare the probability to the max probability, and if greater, will
-// copy the word into the output buffer. In output value maxFreq, it will
-// write the new maximum probability if it changed.
-static inline void onTerminalWordLike(const int freq, int *newWord, const int length, int *outWord,
- int *maxFreq) {
- if (freq > *maxFreq) {
- for (int q = 0; q < length; ++q) {
- outWord[q] = newWord[q];
- }
- outWord[length] = 0;
- *maxFreq = freq;
- }
-}
-
-// Will find the highest probability of the words like the one passed as an argument,
-// that is, everything that only differs by case/accents.
-int UnigramDictionary::getMostProbableWordLikeInner(const int *const inWord, const int inputSize,
- int *outWord) const {
- int newWord[MAX_WORD_LENGTH];
- int depth = 0;
- int maxFreq = -1;
- const uint8_t *const root = mBinaryDicitonaryInfo->getDictRoot();
- int stackChildCount[MAX_WORD_LENGTH];
- int stackInputIndex[MAX_WORD_LENGTH];
- int stackSiblingPos[MAX_WORD_LENGTH];
-
- int startPos = 0;
- stackChildCount[0] = BinaryFormat::getGroupCountAndForwardPointer(root, &startPos);
- stackInputIndex[0] = 0;
- stackSiblingPos[0] = startPos;
- while (depth >= 0) {
- const int charGroupCount = stackChildCount[depth];
- int pos = stackSiblingPos[depth];
- for (int charGroupIndex = charGroupCount - 1; charGroupIndex >= 0; --charGroupIndex) {
- int inputIndex = stackInputIndex[depth];
- const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
- // Test whether all chars in this group match with the word we are searching for. If so,
- // we want to traverse its children (or if the inputSize match, evaluate its
- // probability). Note that this function will output the position regardless, but will
- // only write into inputIndex if there is a match.
- const bool isAlike = testCharGroupForContinuedLikeness(flags, root, pos, inWord,
- inputIndex, inputSize, newWord, &inputIndex, &pos);
- if (isAlike && (!(BinaryFormat::FLAG_IS_NOT_A_WORD & flags))
- && (BinaryFormat::FLAG_IS_TERMINAL & flags) && (inputIndex == inputSize)) {
- const int probability =
- BinaryFormat::readProbabilityWithoutMovingPointer(root, pos);
- onTerminalWordLike(probability, newWord, inputIndex, outWord, &maxFreq);
- }
- pos = BinaryFormat::skipProbability(flags, pos);
- const int siblingPos = BinaryFormat::skipChildrenPosAndAttributes(root, flags, pos);
- const int childrenNodePos = BinaryFormat::readChildrenPosition(root, flags, pos);
- // If we had a match and the word has children, we want to traverse them. We don't have
- // to traverse words longer than the one we are searching for, since they will not match
- // anyway, so don't traverse unless inputIndex < inputSize.
- if (isAlike && (-1 != childrenNodePos) && (inputIndex < inputSize)) {
- // Save position for this depth, to get back to this once children are done
- stackChildCount[depth] = charGroupIndex;
- stackSiblingPos[depth] = siblingPos;
- // Prepare stack values for next depth
- ++depth;
- int childrenPos = childrenNodePos;
- stackChildCount[depth] =
- BinaryFormat::getGroupCountAndForwardPointer(root, &childrenPos);
- stackSiblingPos[depth] = childrenPos;
- stackInputIndex[depth] = inputIndex;
- pos = childrenPos;
- // Go to the next depth level.
- ++depth;
- break;
- } else {
- // No match, or no children, or word too long to ever match: go the next sibling.
- pos = siblingPos;
- }
- }
- --depth;
- }
- return maxFreq;
-}
-
-int UnigramDictionary::getProbability(const int *const inWord, const int length) const {
- const uint8_t *const root = mBinaryDicitonaryInfo->getDictRoot();
- int pos = BinaryFormat::getTerminalPosition(root, inWord, length,
- false /* forceLowerCaseSearch */);
- if (NOT_VALID_WORD == pos) {
- return NOT_A_PROBABILITY;
- }
- const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
- if (flags & (BinaryFormat::FLAG_IS_BLACKLISTED | BinaryFormat::FLAG_IS_NOT_A_WORD)) {
- // If this is not a word, or if it's a blacklisted entry, it should behave as
- // having no probability outside of the suggestion process (where it should be used
- // for shortcuts).
- return NOT_A_PROBABILITY;
- }
- const bool hasMultipleChars = (0 != (BinaryFormat::FLAG_HAS_MULTIPLE_CHARS & flags));
- if (hasMultipleChars) {
- pos = BinaryFormat::skipOtherCharacters(root, pos);
- } else {
- BinaryFormat::getCodePointAndForwardPointer(root, &pos);
- }
- const int unigramProbability = BinaryFormat::readProbabilityWithoutMovingPointer(root, pos);
- return unigramProbability;
-}
-
-// TODO: remove this function.
-int UnigramDictionary::getBigramPosition(int pos, int *word, int offset, int length) const {
- return -1;
-}
-
-// ProcessCurrentNode returns a boolean telling whether to traverse children nodes or not.
-// If the return value is false, then the caller should read in the output "nextSiblingPosition"
-// to find out the address of the next sibling node and pass it to a new call of processCurrentNode.
-// It is worthy to note that when false is returned, the output values other than
-// nextSiblingPosition are undefined.
-// If the return value is true, then the caller must proceed to traverse the children of this
-// node. processCurrentNode will output the information about the children: their count in
-// newCount, their position in newChildrenPosition, the traverseAllNodes flag in
-// newTraverseAllNodes, the match weight into newMatchRate, the input index into newInputIndex, the
-// diffs into newDiffs, the sibling position in nextSiblingPosition, and the output index into
-// newOutputIndex. Please also note the following caveat: processCurrentNode does not know when
-// there aren't any more nodes at this level, it merely returns the address of the first byte after
-// the current node in nextSiblingPosition. Thus, the caller must keep count of the nodes at any
-// given level, as output into newCount when traversing this level's parent.
-bool UnigramDictionary::processCurrentNode(const int initialPos,
- const std::map<int, int> *bigramMap, const uint8_t *bigramFilter, Correction *correction,
- int *newCount, int *newChildrenPosition, int *nextSiblingPosition,
- WordsPriorityQueuePool *queuePool, const int currentWordIndex) const {
- if (DEBUG_DICT) {
- correction->checkState();
- }
- int pos = initialPos;
-
- // Flags contain the following information:
- // - Address type (MASK_GROUP_ADDRESS_TYPE) on two bits:
- // - FLAG_GROUP_ADDRESS_TYPE_{ONE,TWO,THREE}_BYTES means there are children and their address
- // is on the specified number of bytes.
- // - FLAG_GROUP_ADDRESS_TYPE_NOADDRESS means there are no children, and therefore no address.
- // - FLAG_HAS_MULTIPLE_CHARS: whether this node has multiple char or not.
- // - FLAG_IS_TERMINAL: whether this node is a terminal or not (it may still have children)
- // - FLAG_HAS_BIGRAMS: whether this node has bigrams or not
- const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(
- mBinaryDicitonaryInfo->getDictRoot(), &pos);
- const bool hasMultipleChars = (0 != (BinaryFormat::FLAG_HAS_MULTIPLE_CHARS & flags));
- const bool isTerminalNode = (0 != (BinaryFormat::FLAG_IS_TERMINAL & flags));
-
- bool needsToInvokeOnTerminal = false;
-
- // This gets only ONE character from the stream. Next there will be:
- // if FLAG_HAS_MULTIPLE CHARS: the other characters of the same node
- // else if FLAG_IS_TERMINAL: the probability
- // else if MASK_GROUP_ADDRESS_TYPE is not NONE: the children address
- // Note that you can't have a node that both is not a terminal and has no children.
- int c = BinaryFormat::getCodePointAndForwardPointer(
- mBinaryDicitonaryInfo->getDictRoot(), &pos);
- ASSERT(NOT_A_CODE_POINT != c);
-
- // We are going to loop through each character and make it look like it's a different
- // node each time. To do that, we will process characters in this node in order until
- // we find the character terminator. This is signalled by getCodePoint* returning
- // NOT_A_CODE_POINT.
- // As a special case, if there is only one character in this node, we must not read the
- // next bytes so we will simulate the NOT_A_CODE_POINT return by testing the flags.
- // This way, each loop run will look like a "virtual node".
- do {
- // We prefetch the next char. If 'c' is the last char of this node, we will have
- // NOT_A_CODE_POINT in the next char. From this we can decide whether this virtual node
- // should behave as a terminal or not and whether we have children.
- const int nextc = hasMultipleChars ? BinaryFormat::getCodePointAndForwardPointer(
- mBinaryDicitonaryInfo->getDictRoot(), &pos) : NOT_A_CODE_POINT;
- const bool isLastChar = (NOT_A_CODE_POINT == nextc);
- // If there are more chars in this nodes, then this virtual node is not a terminal.
- // If we are on the last char, this virtual node is a terminal if this node is.
- const bool isTerminal = isLastChar && isTerminalNode;
-
- Correction::CorrectionType stateType = correction->processCharAndCalcState(
- c, isTerminal);
- if (stateType == Correction::TRAVERSE_ALL_ON_TERMINAL
- || stateType == Correction::ON_TERMINAL) {
- needsToInvokeOnTerminal = true;
- } else if (stateType == Correction::UNRELATED || correction->needsToPrune()) {
- // We found that this is an unrelated character, so we should give up traversing
- // this node and its children entirely.
- // However we may not be on the last virtual node yet so we skip the remaining
- // characters in this node, the probability if it's there, read the next sibling
- // position to output it, then return false.
- // We don't have to output other values because we return false, as in
- // "don't traverse children".
- if (!isLastChar) {
- pos = BinaryFormat::skipOtherCharacters(mBinaryDicitonaryInfo->getDictRoot(), pos);
- }
- pos = BinaryFormat::skipProbability(flags, pos);
- *nextSiblingPosition = BinaryFormat::skipChildrenPosAndAttributes(
- mBinaryDicitonaryInfo->getDictRoot(), flags, pos);
- return false;
- }
-
- // Prepare for the next character. Promote the prefetched char to current char - the loop
- // will take care of prefetching the next. If we finally found our last char, nextc will
- // contain NOT_A_CODE_POINT.
- c = nextc;
- } while (NOT_A_CODE_POINT != c);
-
- if (isTerminalNode) {
- // The probability should be here, because we come here only if this is actually
- // a terminal node, and we are on its last char.
- const int unigramProbability = BinaryFormat::readProbabilityWithoutMovingPointer(
- mBinaryDicitonaryInfo->getDictRoot(), pos);
- const int childrenAddressPos = BinaryFormat::skipProbability(flags, pos);
- const int attributesPos = BinaryFormat::skipChildrenPosition(flags, childrenAddressPos);
- TerminalAttributes terminalAttributes(mBinaryDicitonaryInfo, flags, attributesPos);
- // bigramMap contains the bigram frequencies indexed by addresses for fast lookup.
- // bigramFilter is a bloom filter of said frequencies for even faster rejection.
- const int probability = ProbabilityUtils::getProbability(
- initialPos, bigramMap, bigramFilter, unigramProbability);
- onTerminal(probability, terminalAttributes, correction, queuePool, needsToInvokeOnTerminal,
- currentWordIndex);
-
- // If there are more chars in this node, then this virtual node has children.
- // If we are on the last char, this virtual node has children if this node has.
- const bool hasChildren = BinaryFormat::hasChildrenInFlags(flags);
-
- // This character matched the typed character (enough to traverse the node at least)
- // so we just evaluated it. Now we should evaluate this virtual node's children - that
- // is, if it has any. If it has no children, we're done here - so we skip the end of
- // the node, output the siblings position, and return false "don't traverse children".
- // Note that !hasChildren implies isLastChar, so we know we don't have to skip any
- // remaining char in this group for there can't be any.
- if (!hasChildren) {
- pos = BinaryFormat::skipProbability(flags, pos);
- *nextSiblingPosition = BinaryFormat::skipChildrenPosAndAttributes(
- mBinaryDicitonaryInfo->getDictRoot(), flags, pos);
- return false;
- }
-
- // Optimization: Prune out words that are too long compared to how much was typed.
- if (correction->needsToPrune()) {
- pos = BinaryFormat::skipProbability(flags, pos);
- *nextSiblingPosition = BinaryFormat::skipChildrenPosAndAttributes(
- mBinaryDicitonaryInfo->getDictRoot(), flags, pos);
- if (DEBUG_DICT_FULL) {
- AKLOGI("Traversing was pruned.");
- }
- return false;
- }
- }
-
- // Now we finished processing this node, and we want to traverse children. If there are no
- // children, we can't come here.
- ASSERT(BinaryFormat::hasChildrenInFlags(flags));
-
- // If this node was a terminal it still has the probability under the pointer (it may have been
- // read, but not skipped - see readProbabilityWithoutMovingPointer).
- // Next come the children position, then possibly attributes (attributes are bigrams only for
- // now, maybe something related to shortcuts in the future).
- // Once this is read, we still need to output the number of nodes in the immediate children of
- // this node, so we read and output it before returning true, as in "please traverse children".
- pos = BinaryFormat::skipProbability(flags, pos);
- int childrenPos = BinaryFormat::readChildrenPosition(
- mBinaryDicitonaryInfo->getDictRoot(), flags, pos);
- *nextSiblingPosition = BinaryFormat::skipChildrenPosAndAttributes(
- mBinaryDicitonaryInfo->getDictRoot(), flags, pos);
- *newCount = BinaryFormat::getGroupCountAndForwardPointer(
- mBinaryDicitonaryInfo->getDictRoot(), &childrenPos);
- *newChildrenPosition = childrenPos;
- return true;
-}
-} // namespace latinime
diff --git a/native/jni/src/unigram_dictionary.h b/native/jni/src/unigram_dictionary.h
deleted file mode 100644
index 4edd1f847..000000000
--- a/native/jni/src/unigram_dictionary.h
+++ /dev/null
@@ -1,119 +0,0 @@
-/*
- * Copyright (C) 2010 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LATINIME_UNIGRAM_DICTIONARY_H
-#define LATINIME_UNIGRAM_DICTIONARY_H
-
-#include <map>
-#include <stdint.h>
-
-#include "defines.h"
-#include "suggest/core/dictionary/digraph_utils.h"
-
-namespace latinime {
-
-class BinaryDictionaryInfo;
-class Correction;
-class ProximityInfo;
-class TerminalAttributes;
-class WordsPriorityQueuePool;
-
-class UnigramDictionary {
- public:
- // Error tolerances
- static const int DEFAULT_MAX_ERRORS = 2;
- static const int MAX_ERRORS_FOR_TWO_WORDS = 1;
-
- static const int FLAG_MULTIPLE_SUGGEST_ABORT = 0;
- static const int FLAG_MULTIPLE_SUGGEST_SKIP = 1;
- static const int FLAG_MULTIPLE_SUGGEST_CONTINUE = 2;
-
- UnigramDictionary(const BinaryDictionaryInfo *const binaryDicitonaryInfo,
- const uint8_t dictFlags);
- virtual ~UnigramDictionary();
- int getProbability(const int *const inWord, const int length) const;
- int getBigramPosition(int pos, int *word, int offset, int length) const;
- int getSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
- const int *ycoordinates, const int *inputCodePoints, const int inputSize,
- const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
- const bool useFullEditDistance, int *outWords, int *frequencies,
- int *outputTypes) const;
- int getDictFlags() const { return DICT_FLAGS; }
-
- private:
- DISALLOW_IMPLICIT_CONSTRUCTORS(UnigramDictionary);
- void getWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
- const int *ycoordinates, const int *inputCodePoints, const int inputSize,
- const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
- const bool useFullEditDistance, Correction *correction,
- WordsPriorityQueuePool *queuePool) const;
- int getDigraphReplacement(const int *codes, const int i, const int inputSize,
- const DigraphUtils::digraph_t *const digraphs, const unsigned int digraphsSize) const;
- void getWordWithDigraphSuggestionsRec(ProximityInfo *proximityInfo, const int *xcoordinates,
- const int *ycoordinates, const int *codesBuffer, int *xCoordinatesBuffer,
- int *yCoordinatesBuffer, const int codesBufferSize, const std::map<int, int> *bigramMap,
- const uint8_t *bigramFilter, const bool useFullEditDistance, const int *codesSrc,
- const int codesRemain, const int currentDepth, int *codesDest, Correction *correction,
- WordsPriorityQueuePool *queuePool, const DigraphUtils::digraph_t *const digraphs,
- const unsigned int digraphsSize) const;
- void initSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
- const int *ycoordinates, const int *codes, const int inputSize,
- Correction *correction) const;
- void getOneWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
- const int *ycoordinates, const int *codes, const std::map<int, int> *bigramMap,
- const uint8_t *bigramFilter, const bool useFullEditDistance, const int inputSize,
- Correction *correction, WordsPriorityQueuePool *queuePool) const;
- void getSuggestionCandidates(
- const bool useFullEditDistance, const int inputSize,
- const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
- Correction *correction, WordsPriorityQueuePool *queuePool, const bool doAutoCompletion,
- const int maxErrors, const int currentWordIndex) const;
- void getSplitMultipleWordsSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
- const int *ycoordinates, const int *codes, const bool useFullEditDistance,
- const int inputSize, Correction *correction, WordsPriorityQueuePool *queuePool,
- const bool hasAutoCorrectionCandidate) const;
- void onTerminal(const int freq, const TerminalAttributes &terminalAttributes,
- Correction *correction, WordsPriorityQueuePool *queuePool, const bool addToMasterQueue,
- const int currentWordIndex) const;
- // Process a node by considering proximity, missing and excessive character
- bool processCurrentNode(const int initialPos, const std::map<int, int> *bigramMap,
- const uint8_t *bigramFilter, Correction *correction, int *newCount,
- int *newChildPosition, int *nextSiblingPosition, WordsPriorityQueuePool *queuePool,
- const int currentWordIndex) const;
- int getMostProbableWordLike(const int startInputIndex, const int inputSize,
- Correction *correction, int *word) const;
- int getMostProbableWordLikeInner(const int *const inWord, const int inputSize,
- int *outWord) const;
- int getSubStringSuggestion(ProximityInfo *proximityInfo, const int *xcoordinates,
- const int *ycoordinates, const int *codes, const bool useFullEditDistance,
- Correction *correction, WordsPriorityQueuePool *queuePool, const int inputSize,
- const bool hasAutoCorrectionCandidate, const int currentWordIndex,
- const int inputWordStartPos, const int inputWordLength, const int outputWordStartPos,
- const bool isSpaceProximity, int *freqArray, int *wordLengthArray, int *outputWord,
- int *outputWordLength) const;
- void getMultiWordsSuggestionRec(ProximityInfo *proximityInfo, const int *xcoordinates,
- const int *ycoordinates, const int *codes, const bool useFullEditDistance,
- const int inputSize, Correction *correction, WordsPriorityQueuePool *queuePool,
- const bool hasAutoCorrectionCandidate, const int startPos, const int startWordIndex,
- const int outputWordLength, int *freqArray, int *wordLengthArray,
- int *outputWord) const;
-
- const BinaryDictionaryInfo *const mBinaryDicitonaryInfo;
- const int MAX_DIGRAPH_SEARCH_DEPTH;
- const int DICT_FLAGS;
-};
-} // namespace latinime
-#endif // LATINIME_UNIGRAM_DICTIONARY_H
diff --git a/native/jni/src/words_priority_queue.cpp b/native/jni/src/words_priority_queue.cpp
deleted file mode 100644
index 7e18d0f87..000000000
--- a/native/jni/src/words_priority_queue.cpp
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Copyright (C) 2012, The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "words_priority_queue.h"
-
-namespace latinime {
-
-int WordsPriorityQueue::outputSuggestions(const int *before, const int beforeLength,
- int *frequencies, int *outputCodePoints, int* outputTypes) {
- mHighestSuggestedWord = 0;
- const int size = min(MAX_WORDS, static_cast<int>(mSuggestions.size()));
- SuggestedWord *swBuffer[size];
- int index = size - 1;
- while (!mSuggestions.empty() && index >= 0) {
- SuggestedWord *sw = mSuggestions.top();
- if (DEBUG_WORDS_PRIORITY_QUEUE) {
- AKLOGI("dump word. %d", sw->mScore);
- DUMP_WORD(sw->mWord, sw->mWordLength);
- }
- swBuffer[index] = sw;
- mSuggestions.pop();
- --index;
- }
- if (size >= 2) {
- SuggestedWord *nsMaxSw = 0;
- int maxIndex = 0;
- float maxNs = 0;
- for (int i = 0; i < size; ++i) {
- SuggestedWord *tempSw = swBuffer[i];
- if (!tempSw) {
- continue;
- }
- const float tempNs = getNormalizedScore(tempSw, before, beforeLength, 0, 0, 0);
- if (tempNs >= maxNs) {
- maxNs = tempNs;
- maxIndex = i;
- nsMaxSw = tempSw;
- }
- }
- if (maxIndex > 0 && nsMaxSw) {
- memmove(&swBuffer[1], &swBuffer[0], maxIndex * sizeof(swBuffer[0]));
- swBuffer[0] = nsMaxSw;
- }
- }
- for (int i = 0; i < size; ++i) {
- SuggestedWord *sw = swBuffer[i];
- if (!sw) {
- AKLOGE("SuggestedWord is null %d", i);
- continue;
- }
- const int wordLength = sw->mWordLength;
- int *targetAddress = outputCodePoints + i * MAX_WORD_LENGTH;
- frequencies[i] = sw->mScore;
- outputTypes[i] = sw->mType;
- memcpy(targetAddress, sw->mWord, wordLength * sizeof(targetAddress[0]));
- if (wordLength < MAX_WORD_LENGTH) {
- targetAddress[wordLength] = 0;
- }
- sw->mUsed = false;
- }
- return size;
-}
-} // namespace latinime
diff --git a/native/jni/src/words_priority_queue.h b/native/jni/src/words_priority_queue.h
deleted file mode 100644
index 54e8007a2..000000000
--- a/native/jni/src/words_priority_queue.h
+++ /dev/null
@@ -1,175 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LATINIME_WORDS_PRIORITY_QUEUE_H
-#define LATINIME_WORDS_PRIORITY_QUEUE_H
-
-#include <cstring> // for memcpy()
-#include <queue>
-
-#include "correction.h"
-#include "defines.h"
-
-namespace latinime {
-
-class WordsPriorityQueue {
- public:
- struct SuggestedWord {
- int mScore;
- int mWord[MAX_WORD_LENGTH];
- int mWordLength;
- bool mUsed;
- int mType;
-
- void setParams(int score, int *word, int wordLength, int type) {
- mScore = score;
- mWordLength = wordLength;
- memcpy(mWord, word, sizeof(mWord[0]) * wordLength);
- mUsed = true;
- mType = type;
- }
- };
-
- WordsPriorityQueue(int maxWords)
- : mSuggestions(), MAX_WORDS(maxWords),
- mSuggestedWords(new SuggestedWord[MAX_WORD_LENGTH]), mHighestSuggestedWord(0) {
- for (int i = 0; i < MAX_WORD_LENGTH; ++i) {
- mSuggestedWords[i].mUsed = false;
- }
- }
-
- // Non virtual inline destructor -- never inherit this class
- AK_FORCE_INLINE ~WordsPriorityQueue() {
- delete[] mSuggestedWords;
- }
-
- void push(int score, int *word, int wordLength, int type) {
- SuggestedWord *sw = 0;
- if (size() >= MAX_WORDS) {
- sw = mSuggestions.top();
- const int minScore = sw->mScore;
- if (minScore >= score) {
- return;
- }
- sw->mUsed = false;
- mSuggestions.pop();
- }
- if (sw == 0) {
- sw = getFreeSuggestedWord(score, word, wordLength, type);
- } else {
- sw->setParams(score, word, wordLength, type);
- }
- if (sw == 0) {
- AKLOGE("SuggestedWord is accidentally null.");
- return;
- }
- if (DEBUG_WORDS_PRIORITY_QUEUE) {
- AKLOGI("Push word. %d, %d", score, wordLength);
- DUMP_WORD(word, wordLength);
- }
- mSuggestions.push(sw);
- if (!mHighestSuggestedWord || mHighestSuggestedWord->mScore < sw->mScore) {
- mHighestSuggestedWord = sw;
- }
- }
-
- SuggestedWord *top() const {
- if (mSuggestions.empty()) return 0;
- SuggestedWord *sw = mSuggestions.top();
- return sw;
- }
-
- int size() const {
- return static_cast<int>(mSuggestions.size());
- }
-
- AK_FORCE_INLINE void clear() {
- mHighestSuggestedWord = 0;
- while (!mSuggestions.empty()) {
- SuggestedWord *sw = mSuggestions.top();
- if (DEBUG_WORDS_PRIORITY_QUEUE) {
- AKLOGI("Clear word. %d", sw->mScore);
- DUMP_WORD(sw->mWord, sw->mWordLength);
- }
- sw->mUsed = false;
- mSuggestions.pop();
- }
- }
-
- AK_FORCE_INLINE void dumpTopWord() const {
- if (size() <= 0) {
- return;
- }
- DUMP_WORD(mHighestSuggestedWord->mWord, mHighestSuggestedWord->mWordLength);
- }
-
- AK_FORCE_INLINE float getHighestNormalizedScore(const int *before, const int beforeLength,
- int **outWord, int *outScore, int *outLength) const {
- if (!mHighestSuggestedWord) {
- return 0.0f;
- }
- return getNormalizedScore(mHighestSuggestedWord, before, beforeLength, outWord, outScore,
- outLength);
- }
-
- int outputSuggestions(const int *before, const int beforeLength, int *frequencies,
- int *outputCodePoints, int* outputTypes);
-
- private:
- DISALLOW_IMPLICIT_CONSTRUCTORS(WordsPriorityQueue);
- struct wordComparator {
- bool operator ()(SuggestedWord * left, SuggestedWord * right) {
- return left->mScore > right->mScore;
- }
- };
-
- SuggestedWord *getFreeSuggestedWord(int score, int *word, int wordLength, int type) const {
- for (int i = 0; i < MAX_WORD_LENGTH; ++i) {
- if (!mSuggestedWords[i].mUsed) {
- mSuggestedWords[i].setParams(score, word, wordLength, type);
- return &mSuggestedWords[i];
- }
- }
- return 0;
- }
-
- static float getNormalizedScore(SuggestedWord *sw, const int *before, const int beforeLength,
- int **outWord, int *outScore, int *outLength) {
- const int score = sw->mScore;
- int *word = sw->mWord;
- const int wordLength = sw->mWordLength;
- if (outScore) {
- *outScore = score;
- }
- if (outWord) {
- *outWord = word;
- }
- if (outLength) {
- *outLength = wordLength;
- }
- return Correction::RankingAlgorithm::calcNormalizedScore(before, beforeLength, word,
- wordLength, score);
- }
-
- typedef std::priority_queue<SuggestedWord *, std::vector<SuggestedWord *>,
- wordComparator> Suggestions;
- Suggestions mSuggestions;
- const int MAX_WORDS;
- SuggestedWord *mSuggestedWords;
- SuggestedWord *mHighestSuggestedWord;
-};
-} // namespace latinime
-#endif // LATINIME_WORDS_PRIORITY_QUEUE_H
diff --git a/native/jni/src/words_priority_queue_pool.h b/native/jni/src/words_priority_queue_pool.h
deleted file mode 100644
index 2cd210a05..000000000
--- a/native/jni/src/words_priority_queue_pool.h
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LATINIME_WORDS_PRIORITY_QUEUE_POOL_H
-#define LATINIME_WORDS_PRIORITY_QUEUE_POOL_H
-
-#include "defines.h"
-#include "words_priority_queue.h"
-
-namespace latinime {
-
-class WordsPriorityQueuePool {
- public:
- WordsPriorityQueuePool(int mainQueueMaxWords, int subQueueMaxWords)
- // Note: using placement new() requires the caller to call the destructor explicitly.
- : mMasterQueue(new(mMasterQueueBuf) WordsPriorityQueue(mainQueueMaxWords)) {
- for (int i = 0, subQueueBufOffset = 0;
- i < MULTIPLE_WORDS_SUGGESTION_MAX_WORDS * SUB_QUEUE_MAX_COUNT;
- ++i, subQueueBufOffset += static_cast<int>(sizeof(WordsPriorityQueue))) {
- mSubQueues[i] = new(mSubQueueBuf + subQueueBufOffset)
- WordsPriorityQueue(subQueueMaxWords);
- }
- }
-
- // Non virtual inline destructor -- never inherit this class
- ~WordsPriorityQueuePool() {
- // Note: these explicit calls to the destructor match the calls to placement new() above.
- if (mMasterQueue) mMasterQueue->~WordsPriorityQueue();
- for (int i = 0; i < MULTIPLE_WORDS_SUGGESTION_MAX_WORDS * SUB_QUEUE_MAX_COUNT; ++i) {
- if (mSubQueues[i]) mSubQueues[i]->~WordsPriorityQueue();
- }
- }
-
- WordsPriorityQueue *getMasterQueue() const {
- return mMasterQueue;
- }
-
- WordsPriorityQueue *getSubQueue(const int wordIndex, const int inputWordLength) const {
- if (wordIndex >= MULTIPLE_WORDS_SUGGESTION_MAX_WORDS) {
- return 0;
- }
- if (inputWordLength < 0 || inputWordLength >= SUB_QUEUE_MAX_COUNT) {
- if (DEBUG_WORDS_PRIORITY_QUEUE) {
- ASSERT(false);
- }
- return 0;
- }
- return mSubQueues[wordIndex * SUB_QUEUE_MAX_COUNT + inputWordLength];
- }
-
- inline void clearAll() {
- mMasterQueue->clear();
- for (int i = 0; i < MULTIPLE_WORDS_SUGGESTION_MAX_WORDS; ++i) {
- clearSubQueue(i);
- }
- }
-
- AK_FORCE_INLINE void clearSubQueue(const int wordIndex) {
- for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) {
- WordsPriorityQueue *queue = getSubQueue(wordIndex, i);
- if (queue) {
- queue->clear();
- }
- }
- }
-
- void dumpSubQueue1TopSuggestions() const {
- AKLOGI("DUMP SUBQUEUE1 TOP SUGGESTIONS");
- for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) {
- getSubQueue(0, i)->dumpTopWord();
- }
- }
-
- private:
- DISALLOW_IMPLICIT_CONSTRUCTORS(WordsPriorityQueuePool);
- char mMasterQueueBuf[sizeof(WordsPriorityQueue)];
- char mSubQueueBuf[SUB_QUEUE_MAX_COUNT * MULTIPLE_WORDS_SUGGESTION_MAX_WORDS
- * sizeof(WordsPriorityQueue)];
- WordsPriorityQueue *mMasterQueue;
- WordsPriorityQueue *mSubQueues[SUB_QUEUE_MAX_COUNT * MULTIPLE_WORDS_SUGGESTION_MAX_WORDS];
-};
-} // namespace latinime
-#endif // LATINIME_WORDS_PRIORITY_QUEUE_POOL_H