aboutsummaryrefslogtreecommitdiffstats
path: root/native
diff options
context:
space:
mode:
Diffstat (limited to 'native')
-rw-r--r--native/jni/Android.mk47
-rw-r--r--native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp146
-rw-r--r--native/jni/src/defines.h36
-rw-r--r--native/jni/src/suggest/core/dicnode/dic_node.h79
-rw-r--r--native/jni/src/suggest/core/dicnode/dic_node_utils.cpp42
-rw-r--r--native/jni/src/suggest/core/dicnode/dic_node_utils.h12
-rw-r--r--native/jni/src/suggest/core/dicnode/dic_node_vector.h12
-rw-r--r--native/jni/src/suggest/core/dicnode/internal/dic_node_properties.h79
-rw-r--r--native/jni/src/suggest/core/dicnode/internal/dic_node_state_prevword.h16
-rw-r--r--native/jni/src/suggest/core/dicnode/internal/dic_node_state_scoring.h41
-rw-r--r--native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp7
-rw-r--r--native/jni/src/suggest/core/dictionary/bloom_filter.h2
-rw-r--r--native/jni/src/suggest/core/dictionary/dictionary.cpp73
-rw-r--r--native/jni/src/suggest/core/dictionary/dictionary.h43
-rw-r--r--native/jni/src/suggest/core/dictionary/digraph_utils.cpp12
-rw-r--r--native/jni/src/suggest/core/dictionary/digraph_utils.h2
-rw-r--r--native/jni/src/suggest/core/dictionary/error_type_utils.cpp34
-rw-r--r--native/jni/src/suggest/core/dictionary/error_type_utils.h69
-rw-r--r--native/jni/src/suggest/core/dictionary/multi_bigram_map.cpp71
-rw-r--r--native/jni/src/suggest/core/dictionary/multi_bigram_map.h67
-rw-r--r--native/jni/src/suggest/core/dictionary/unigram_property.cpp52
-rw-r--r--native/jni/src/suggest/core/dictionary/unigram_property.h87
-rw-r--r--native/jni/src/suggest/core/layout/proximity_info.cpp16
-rw-r--r--native/jni/src/suggest/core/layout/proximity_info.h11
-rw-r--r--native/jni/src/suggest/core/layout/proximity_info_state.cpp6
-rw-r--r--native/jni/src/suggest/core/layout/proximity_info_state.h2
-rw-r--r--native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h4
-rw-r--r--native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h22
-rw-r--r--native/jni/src/suggest/core/policy/weighting.cpp5
-rw-r--r--native/jni/src/suggest/core/policy/weighting.h3
-rw-r--r--native/jni/src/suggest/core/session/dic_traverse_session.cpp8
-rw-r--r--native/jni/src/suggest/core/session/dic_traverse_session.h27
-rw-r--r--native/jni/src/suggest/core/suggest.cpp27
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp95
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h35
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp391
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h92
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp240
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h69
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.cpp53
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp191
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp124
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h163
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp380
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h121
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp558
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h138
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp14
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h81
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp32
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.h23
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h123
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/shortcut/ver4_shortcut_list_policy.h110
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp132
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h (renamed from native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h)15
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.cpp144
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.h (renamed from native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h)88
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.cpp (renamed from native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp)169
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h (renamed from native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h)190
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.cpp (renamed from native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp)26
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h (renamed from native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h)22
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp294
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h99
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.cpp (renamed from native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.cpp)61
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.h (renamed from native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h)25
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h229
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h39
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h95
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp (renamed from native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp)29
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h (renamed from native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h)36
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.cpp (renamed from native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp)2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h (renamed from native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.h)1
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp202
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h102
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_entry.h99
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/dict_content.h36
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.cpp160
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h63
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h79
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.cpp175
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h90
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h75
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.cpp39
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h111
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.cpp100
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h61
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp138
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h137
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp71
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h73
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp98
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h58
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp411
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h123
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp352
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h140
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.cpp28
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.h37
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp285
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.h123
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp52
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h22
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h18
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp85
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h12
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/file_utils.cpp157
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/file_utils.h58
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp167
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h57
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp9
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h7
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/historical_info.h58
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.cpp98
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h60
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.cpp96
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.h61
-rw-r--r--native/jni/src/suggest/policyimpl/typing/typing_traversal.h4
-rw-r--r--native/jni/src/suggest/policyimpl/typing/typing_weighting.cpp47
-rw-r--r--native/jni/src/suggest/policyimpl/typing/typing_weighting.h3
-rw-r--r--native/jni/src/utils/exclusive_ownership_pointer.h81
-rw-r--r--native/jni/src/utils/time_keeper.cpp41
-rw-r--r--native/jni/src/utils/time_keeper.h41
122 files changed, 7183 insertions, 3406 deletions
diff --git a/native/jni/Android.mk b/native/jni/Android.mk
index ca6a77997..e11e706f3 100644
--- a/native/jni/Android.mk
+++ b/native/jni/Android.mk
@@ -57,7 +57,9 @@ LATIN_IME_CORE_SRC_FILES := \
bloom_filter.cpp \
dictionary.cpp \
digraph_utils.cpp \
- multi_bigram_map.cpp) \
+ error_type_utils.cpp \
+ multi_bigram_map.cpp \
+ unigram_property.cpp) \
$(addprefix suggest/core/layout/, \
additional_proximity_chars.cpp \
proximity_info.cpp \
@@ -67,27 +69,45 @@ LATIN_IME_CORE_SRC_FILES := \
suggest/core/policy/weighting.cpp \
suggest/core/session/dic_traverse_session.cpp \
$(addprefix suggest/policyimpl/dictionary/, \
- bigram/bigram_list_read_write_utils.cpp \
- bigram/dynamic_bigram_list_policy.cpp \
header/header_policy.cpp \
header/header_read_write_utils.cpp \
shortcut/shortcut_list_reading_utils.cpp \
- dictionary_structure_with_buffer_policy_factory.cpp \
- dynamic_patricia_trie_gc_event_listeners.cpp \
- dynamic_patricia_trie_node_reader.cpp \
- dynamic_patricia_trie_policy.cpp \
- dynamic_patricia_trie_reading_helper.cpp \
- dynamic_patricia_trie_reading_utils.cpp \
- dynamic_patricia_trie_writing_helper.cpp \
- dynamic_patricia_trie_writing_utils.cpp \
+ structure/dictionary_structure_with_buffer_policy_factory.cpp) \
+ $(addprefix suggest/policyimpl/dictionary/bigram/, \
+ bigram_list_read_write_utils.cpp \
+ ver4_bigram_list_policy.cpp) \
+ $(addprefix suggest/policyimpl/dictionary/structure/pt_common/, \
+ dynamic_pt_gc_event_listeners.cpp \
+ dynamic_pt_reading_helper.cpp \
+ dynamic_pt_reading_utils.cpp \
+ dynamic_pt_updating_helper.cpp \
+ dynamic_pt_writing_utils.cpp) \
+ $(addprefix suggest/policyimpl/dictionary/structure/v2/, \
patricia_trie_policy.cpp \
patricia_trie_reading_utils.cpp) \
+ $(addprefix suggest/policyimpl/dictionary/structure/v4/, \
+ ver4_dict_buffers.cpp \
+ ver4_dict_constants.cpp \
+ ver4_patricia_trie_node_reader.cpp \
+ ver4_patricia_trie_node_writer.cpp \
+ ver4_patricia_trie_policy.cpp \
+ ver4_patricia_trie_reading_utils.cpp \
+ ver4_patricia_trie_writing_helper.cpp) \
+ $(addprefix suggest/policyimpl/dictionary/structure/v4/content/, \
+ bigram_dict_content.cpp \
+ probability_dict_content.cpp \
+ shortcut_dict_content.cpp \
+ sparse_table_dict_content.cpp \
+ terminal_position_lookup_table.cpp) \
$(addprefix suggest/policyimpl/dictionary/utils/, \
buffer_with_extendable_buffer.cpp \
byte_array_utils.cpp \
dict_file_writing_utils.cpp \
+ file_utils.cpp \
forgetting_curve_utils.cpp \
- format_utils.cpp) \
+ format_utils.cpp \
+ mmapped_buffer.cpp \
+ sparse_table.cpp) \
suggest/policyimpl/gesture/gesture_suggest_policy_factory.cpp \
$(addprefix suggest/policyimpl/typing/, \
scoring_params.cpp \
@@ -98,7 +118,8 @@ LATIN_IME_CORE_SRC_FILES := \
$(addprefix utils/, \
autocorrection_threshold_utils.cpp \
char_utils.cpp \
- log_utils.cpp)
+ log_utils.cpp \
+ time_keeper.cpp)
LOCAL_SRC_FILES := \
$(LATIN_IME_JNI_SRC_FILES) \
diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
index 8f21c50ec..716bda5a7 100644
--- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
+++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
@@ -24,8 +24,9 @@
#include "jni.h"
#include "jni_common.h"
#include "suggest/core/dictionary/dictionary.h"
+#include "suggest/core/dictionary/unigram_property.h"
#include "suggest/core/suggest_options.h"
-#include "suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h"
+#include "suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h"
#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
#include "utils/autocorrection_threshold_utils.h"
@@ -86,11 +87,11 @@ static jlong latinime_BinaryDictionary_open(JNIEnv *env, jclass clazz, jstring s
char sourceDirChars[sourceDirUtf8Length + 1];
env->GetStringUTFRegion(sourceDir, 0, env->GetStringLength(sourceDir), sourceDirChars);
sourceDirChars[sourceDirUtf8Length] = '\0';
- DictionaryStructureWithBufferPolicy *const dictionaryStructureWithBufferPolicy =
+ DictionaryStructureWithBufferPolicy::StructurePolicyPtr dictionaryStructureWithBufferPolicy =
DictionaryStructureWithBufferPolicyFactory::newDictionaryStructureWithBufferPolicy(
sourceDirChars, static_cast<int>(dictOffset), static_cast<int>(dictSize),
isUpdatable == JNI_TRUE);
- if (!dictionaryStructureWithBufferPolicy) {
+ if (!dictionaryStructureWithBufferPolicy.get()) {
return 0;
}
@@ -135,6 +136,12 @@ static void latinime_BinaryDictionary_close(JNIEnv *env, jclass clazz, jlong dic
delete dictionary;
}
+static int latinime_BinaryDictionary_getFormatVersion(JNIEnv *env, jclass clazz, jlong dict) {
+ Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
+ if (!dictionary) return 0;
+ return dictionary->getFormatVersionNumber();
+}
+
static int latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jclass clazz, jlong dict,
jlong proximityInfo, jlong dicTraverseSession, jintArray xCoordinatesArray,
jintArray yCoordinatesArray, jintArray timesArray, jintArray pointerIdsArray,
@@ -252,6 +259,21 @@ static jint latinime_BinaryDictionary_getBigramProbability(JNIEnv *env, jclass c
word1Length);
}
+static void latinime_BinaryDictionary_getUnigramProperty(JNIEnv *env, jclass clazz,
+ jlong dict, jintArray word, jintArray outCodePoints, jbooleanArray outFlags,
+ jintArray outProbability, jintArray outHistoricalInfo, jobject outShortcutTargets,
+ jobject outShortcutProbabilities) {
+ Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
+ if (!dictionary) return;
+ const jsize wordLength = env->GetArrayLength(word);
+ int wordCodePoints[wordLength];
+ env->GetIntArrayRegion(word, 0, wordLength, wordCodePoints);
+ const UnigramProperty unigramProperty = dictionary->getUnigramProperty(
+ wordCodePoints, wordLength);
+ unigramProperty.outputProperties(env, outCodePoints, outFlags, outProbability,
+ outHistoricalInfo, outShortcutTargets, outShortcutProbabilities);
+}
+
static jfloat latinime_BinaryDictionary_calcNormalizedScore(JNIEnv *env, jclass clazz,
jintArray before, jintArray after, jint score) {
jsize beforeLength = env->GetArrayLength(before);
@@ -277,7 +299,8 @@ static jint latinime_BinaryDictionary_editDistance(JNIEnv *env, jclass clazz, ji
}
static void latinime_BinaryDictionary_addUnigramWord(JNIEnv *env, jclass clazz, jlong dict,
- jintArray word, jint probability) {
+ jintArray word, jint probability, jintArray shortcutTarget, jint shortuctProbability,
+ jboolean isNotAWord, jboolean isBlacklisted, jint timestamp) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) {
return;
@@ -285,11 +308,17 @@ static void latinime_BinaryDictionary_addUnigramWord(JNIEnv *env, jclass clazz,
jsize wordLength = env->GetArrayLength(word);
int codePoints[wordLength];
env->GetIntArrayRegion(word, 0, wordLength, codePoints);
- dictionary->addUnigramWord(codePoints, wordLength, probability);
+ jsize shortcutLength = shortcutTarget ? env->GetArrayLength(shortcutTarget) : 0;
+ int shortcutTargetCodePoints[shortcutLength];
+ if (shortcutTarget) {
+ env->GetIntArrayRegion(shortcutTarget, 0, shortcutLength, shortcutTargetCodePoints);
+ }
+ dictionary->addUnigramWord(codePoints, wordLength, probability, shortcutTargetCodePoints,
+ shortcutLength, shortuctProbability, isNotAWord, isBlacklisted, timestamp);
}
static void latinime_BinaryDictionary_addBigramWords(JNIEnv *env, jclass clazz, jlong dict,
- jintArray word0, jintArray word1, jint probability) {
+ jintArray word0, jintArray word1, jint probability, jint timestamp) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) {
return;
@@ -301,7 +330,7 @@ static void latinime_BinaryDictionary_addBigramWords(JNIEnv *env, jclass clazz,
int word1CodePoints[word1Length];
env->GetIntArrayRegion(word1, 0, word1Length, word1CodePoints);
dictionary->addBigramWords(word0CodePoints, word0Length, word1CodePoints,
- word1Length, probability);
+ word1Length, probability, timestamp);
}
static void latinime_BinaryDictionary_removeBigramWords(JNIEnv *env, jclass clazz, jlong dict,
@@ -320,6 +349,87 @@ static void latinime_BinaryDictionary_removeBigramWords(JNIEnv *env, jclass claz
word1Length);
}
+// Returns how many language model params are processed.
+static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, jclass clazz,
+ jlong dict, jobjectArray languageModelParams, jint startIndex) {
+ Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
+ if (!dictionary) {
+ return 0;
+ }
+ jsize languageModelParamCount = env->GetArrayLength(languageModelParams);
+ if (languageModelParamCount == 0 || startIndex >= languageModelParamCount) {
+ return 0;
+ }
+ jobject languageModelParam = env->GetObjectArrayElement(languageModelParams, 0);
+ jclass languageModelParamClass = env->GetObjectClass(languageModelParam);
+ env->DeleteLocalRef(languageModelParam);
+
+ jfieldID word0FieldId = env->GetFieldID(languageModelParamClass, "mWord0", "[I");
+ jfieldID word1FieldId = env->GetFieldID(languageModelParamClass, "mWord1", "[I");
+ jfieldID unigramProbabilityFieldId =
+ env->GetFieldID(languageModelParamClass, "mUnigramProbability", "I");
+ jfieldID bigramProbabilityFieldId =
+ env->GetFieldID(languageModelParamClass, "mBigramProbability", "I");
+ jfieldID timestampFieldId =
+ env->GetFieldID(languageModelParamClass, "mTimestamp", "I");
+ jfieldID shortcutTargetFieldId =
+ env->GetFieldID(languageModelParamClass, "mShortcutTarget", "[I");
+ jfieldID shortcutProbabilityFieldId =
+ env->GetFieldID(languageModelParamClass, "mShortcutProbability", "I");
+ jfieldID isNotAWordFieldId =
+ env->GetFieldID(languageModelParamClass, "mIsNotAWord", "Z");
+ jfieldID isBlacklistedFieldId =
+ env->GetFieldID(languageModelParamClass, "mIsBlacklisted", "Z");
+ env->DeleteLocalRef(languageModelParamClass);
+
+ for (int i = startIndex; i < languageModelParamCount; ++i) {
+ jobject languageModelParam = env->GetObjectArrayElement(languageModelParams, i);
+ // languageModelParam is a set of params for word1; thus, word1 cannot be null. On the
+ // other hand, word0 can be null and then it means the set of params doesn't contain bigram
+ // information.
+ jintArray word0 = static_cast<jintArray>(
+ env->GetObjectField(languageModelParam, word0FieldId));
+ jsize word0Length = word0 ? env->GetArrayLength(word0) : 0;
+ int word0CodePoints[word0Length];
+ if (word0) {
+ env->GetIntArrayRegion(word0, 0, word0Length, word0CodePoints);
+ }
+ jintArray word1 = static_cast<jintArray>(
+ env->GetObjectField(languageModelParam, word1FieldId));
+ jsize word1Length = env->GetArrayLength(word1);
+ int word1CodePoints[word1Length];
+ env->GetIntArrayRegion(word1, 0, word1Length, word1CodePoints);
+ jint unigramProbability = env->GetIntField(languageModelParam, unigramProbabilityFieldId);
+ jint timestamp = env->GetIntField(languageModelParam, timestampFieldId);
+ jboolean isNotAWord = env->GetBooleanField(languageModelParam, isNotAWordFieldId);
+ jboolean isBlacklisted = env->GetBooleanField(languageModelParam, isBlacklistedFieldId);
+ jintArray shortcutTarget = static_cast<jintArray>(
+ env->GetObjectField(languageModelParam, shortcutTargetFieldId));
+ jsize shortcutLength = shortcutTarget ? env->GetArrayLength(shortcutTarget) : 0;
+ int shortcutTargetCodePoints[shortcutLength];
+ if (shortcutTarget) {
+ env->GetIntArrayRegion(shortcutTarget, 0, shortcutLength, shortcutTargetCodePoints);
+ }
+ jint shortcutProbability = env->GetIntField(languageModelParam, shortcutProbabilityFieldId);
+ dictionary->addUnigramWord(word1CodePoints, word1Length, unigramProbability,
+ shortcutTargetCodePoints, shortcutLength, shortcutProbability,
+ isNotAWord, isBlacklisted, timestamp);
+ if (word0) {
+ jint bigramProbability = env->GetIntField(languageModelParam, bigramProbabilityFieldId);
+ dictionary->addBigramWords(word0CodePoints, word0Length, word1CodePoints, word1Length,
+ bigramProbability, timestamp);
+ }
+ if (dictionary->needsToRunGC(true /* mindsBlockByGC */)) {
+ return i + 1;
+ }
+ env->DeleteLocalRef(word0);
+ env->DeleteLocalRef(word1);
+ env->DeleteLocalRef(shortcutTarget);
+ env->DeleteLocalRef(languageModelParam);
+ }
+ return languageModelParamCount;
+}
+
static int latinime_BinaryDictionary_calculateProbabilityNative(JNIEnv *env, jclass clazz,
jlong dict, jint unigramProbability, jint bigramProbability) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
@@ -343,7 +453,7 @@ static jstring latinime_BinaryDictionary_getProperty(JNIEnv *env, jclass clazz,
static const int GET_PROPERTY_RESULT_LENGTH = 100;
char resultChars[GET_PROPERTY_RESULT_LENGTH];
resultChars[0] = '\0';
- dictionary->getProperty(queryChars, resultChars, GET_PROPERTY_RESULT_LENGTH);
+ dictionary->getProperty(queryChars, queryUtf8Length, resultChars, GET_PROPERTY_RESULT_LENGTH);
return env->NewStringUTF(resultChars);
}
@@ -364,6 +474,11 @@ static const JNINativeMethod sMethods[] = {
reinterpret_cast<void *>(latinime_BinaryDictionary_close)
},
{
+ const_cast<char *>("getFormatVersionNative"),
+ const_cast<char *>("(J)I"),
+ reinterpret_cast<void *>(latinime_BinaryDictionary_getFormatVersion)
+ },
+ {
const_cast<char *>("flushNative"),
const_cast<char *>("(JLjava/lang/String;)V"),
reinterpret_cast<void *>(latinime_BinaryDictionary_flush)
@@ -394,6 +509,11 @@ static const JNINativeMethod sMethods[] = {
reinterpret_cast<void *>(latinime_BinaryDictionary_getBigramProbability)
},
{
+ const_cast<char *>("getUnigramPropertyNative"),
+ const_cast<char *>("(J[I[I[Z[I[ILjava/util/ArrayList;Ljava/util/ArrayList;)V"),
+ reinterpret_cast<void *>(latinime_BinaryDictionary_getUnigramProperty)
+ },
+ {
const_cast<char *>("calcNormalizedScoreNative"),
const_cast<char *>("([I[II)F"),
reinterpret_cast<void *>(latinime_BinaryDictionary_calcNormalizedScore)
@@ -405,12 +525,12 @@ static const JNINativeMethod sMethods[] = {
},
{
const_cast<char *>("addUnigramWordNative"),
- const_cast<char *>("(J[II)V"),
+ const_cast<char *>("(J[II[IIZZI)V"),
reinterpret_cast<void *>(latinime_BinaryDictionary_addUnigramWord)
},
{
const_cast<char *>("addBigramWordsNative"),
- const_cast<char *>("(J[I[II)V"),
+ const_cast<char *>("(J[I[III)V"),
reinterpret_cast<void *>(latinime_BinaryDictionary_addBigramWords)
},
{
@@ -419,6 +539,12 @@ static const JNINativeMethod sMethods[] = {
reinterpret_cast<void *>(latinime_BinaryDictionary_removeBigramWords)
},
{
+ const_cast<char *>("addMultipleDictionaryEntriesNative"),
+ const_cast<char *>(
+ "(J[Lcom/android/inputmethod/latin/BinaryDictionary$LanguageModelParam;I)I"),
+ reinterpret_cast<void *>(latinime_BinaryDictionary_addMultipleDictionaryEntries)
+ },
+ {
const_cast<char *>("calculateProbabilityNative"),
const_cast<char *>("(JII)I"),
reinterpret_cast<void *>(latinime_BinaryDictionary_calculateProbabilityNative)
diff --git a/native/jni/src/defines.h b/native/jni/src/defines.h
index 742e388e4..9a26fe051 100644
--- a/native/jni/src/defines.h
+++ b/native/jni/src/defines.h
@@ -298,6 +298,7 @@ static inline void prof_out(void) {
#define NOT_AN_INDEX (-1)
#define NOT_A_PROBABILITY (-1)
#define NOT_A_DICT_POS (S_INT_MIN)
+#define NOT_A_TIMESTAMP (-1)
// A special value to mean the first word confidence makes no sense in this case,
// e.g. this is not a multi-word suggestion.
@@ -341,12 +342,21 @@ template<typename T> AK_FORCE_INLINE const T &max(const T &a, const T &b) { retu
#define INPUTLENGTH_FOR_DEBUG (-1)
#define MIN_OUTPUT_INDEX_FOR_DEBUG (-1)
-#define DISALLOW_COPY_AND_ASSIGN(TypeName) \
- TypeName(const TypeName&); \
+#define DISALLOW_DEFAULT_CONSTRUCTOR(TypeName) \
+ TypeName()
+
+#define DISALLOW_COPY_CONSTRUCTOR(TypeName) \
+ TypeName(const TypeName&)
+
+#define DISALLOW_ASSIGNMENT_OPERATOR(TypeName) \
void operator=(const TypeName&)
+#define DISALLOW_COPY_AND_ASSIGN(TypeName) \
+ DISALLOW_COPY_CONSTRUCTOR(TypeName); \
+ DISALLOW_ASSIGNMENT_OPERATOR(TypeName)
+
#define DISALLOW_IMPLICIT_CONSTRUCTORS(TypeName) \
- TypeName(); \
+ DISALLOW_DEFAULT_CONSTRUCTOR(TypeName); \
DISALLOW_COPY_AND_ASSIGN(TypeName)
// Used as a return value for character comparison
@@ -392,24 +402,4 @@ typedef enum {
// Create new word with space substitution
CT_NEW_WORD_SPACE_SUBSTITUTION,
} CorrectionType;
-
-// ErrorType is mainly decided by CorrectionType but it is also depending on if
-// the correction has really been performed or not.
-typedef enum {
- // Substitution, omission and transposition
- ET_EDIT_CORRECTION,
- // Proximity error
- ET_PROXIMITY_CORRECTION,
- // Completion
- ET_COMPLETION,
- // New word
- // TODO: Remove.
- // A new word error should be an edit correction error or a proximity correction error.
- ET_NEW_WORD,
- // Treat error as an intentional omission when the CorrectionType is omission and the node can
- // be intentional omission.
- ET_INTENTIONAL_OMISSION,
- // Not treated as an error. Tracked for checking exact match
- ET_NOT_AN_ERROR
-} ErrorType;
#endif // LATINIME_DEFINES_H
diff --git a/native/jni/src/suggest/core/dicnode/dic_node.h b/native/jni/src/suggest/core/dicnode/dic_node.h
index 49cfdecac..0b2b4a9e8 100644
--- a/native/jni/src/suggest/core/dicnode/dic_node.h
+++ b/native/jni/src/suggest/core/dicnode/dic_node.h
@@ -99,7 +99,7 @@ class DicNode {
virtual ~DicNode() {}
// Init for copy
- void initByCopy(const DicNode *dicNode) {
+ void initByCopy(const DicNode *const dicNode) {
mIsUsed = true;
mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion;
mDicNodeProperties.init(&dicNode->mDicNodeProperties);
@@ -107,25 +107,25 @@ class DicNode {
PROF_NODE_COPY(&dicNode->mProfiler, mProfiler);
}
- // Init for root with prevWordNodePos which is used for bigram
- void initAsRoot(const int rootGroupPos, const int prevWordNodePos) {
+ // Init for root with prevWordPtNodePos which is used for bigram
+ void initAsRoot(const int rootPtNodeArrayPos, const int prevWordPtNodePos) {
mIsUsed = true;
mIsCachedForNextSuggestion = false;
mDicNodeProperties.init(
- NOT_A_DICT_POS /* pos */, rootGroupPos, NOT_A_CODE_POINT /* nodeCodePoint */,
+ NOT_A_DICT_POS /* pos */, rootPtNodeArrayPos, NOT_A_CODE_POINT /* nodeCodePoint */,
NOT_A_PROBABILITY /* probability */, false /* isTerminal */,
true /* hasChildren */, false /* isBlacklistedOrNotAWord */, 0 /* depth */,
0 /* terminalDepth */);
- mDicNodeState.init(prevWordNodePos);
+ mDicNodeState.init(prevWordPtNodePos);
PROF_NODE_RESET(mProfiler);
}
// Init for root with previous word
- void initAsRootWithPreviousWord(DicNode *dicNode, const int rootGroupPos) {
+ void initAsRootWithPreviousWord(const DicNode *const dicNode, const int rootPtNodeArrayPos) {
mIsUsed = true;
mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion;
mDicNodeProperties.init(
- NOT_A_DICT_POS /* pos */, rootGroupPos, NOT_A_CODE_POINT /* nodeCodePoint */,
+ NOT_A_DICT_POS /* pos */, rootPtNodeArrayPos, NOT_A_CODE_POINT /* nodeCodePoint */,
NOT_A_PROBABILITY /* probability */, false /* isTerminal */,
true /* hasChildren */, false /* isBlacklistedOrNotAWord */, 0 /* depth */,
0 /* terminalDepth */);
@@ -138,7 +138,7 @@ class DicNode {
mDicNodeState.mDicNodeStatePrevWord.init(
dicNode->mDicNodeState.mDicNodeStatePrevWord.getPrevWordCount() + 1,
dicNode->mDicNodeProperties.getProbability(),
- dicNode->mDicNodeProperties.getPos(),
+ dicNode->mDicNodeProperties.getPtNodePos(),
dicNode->mDicNodeState.mDicNodeStatePrevWord.mPrevWord,
dicNode->mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength(),
dicNode->getOutputWordBuf(),
@@ -148,26 +148,27 @@ class DicNode {
PROF_NODE_COPY(&dicNode->mProfiler, mProfiler);
}
- void initAsPassingChild(DicNode *parentNode) {
+ void initAsPassingChild(DicNode *parentDicNode) {
mIsUsed = true;
- mIsCachedForNextSuggestion = parentNode->mIsCachedForNextSuggestion;
- const int c = parentNode->getNodeTypedCodePoint();
- mDicNodeProperties.init(&parentNode->mDicNodeProperties, c);
- mDicNodeState.init(&parentNode->mDicNodeState);
- PROF_NODE_COPY(&parentNode->mProfiler, mProfiler);
+ mIsCachedForNextSuggestion = parentDicNode->mIsCachedForNextSuggestion;
+ const int parentCodePoint = parentDicNode->getNodeTypedCodePoint();
+ mDicNodeProperties.init(&parentDicNode->mDicNodeProperties, parentCodePoint);
+ mDicNodeState.init(&parentDicNode->mDicNodeState);
+ PROF_NODE_COPY(&parentDicNode->mProfiler, mProfiler);
}
- void initAsChild(const DicNode *const dicNode, const int pos, const int childrenPos,
- const int probability, const bool isTerminal, const bool hasChildren,
- const bool isBlacklistedOrNotAWord, const uint16_t mergedNodeCodePointCount,
- const int *const mergedNodeCodePoints) {
+ void initAsChild(const DicNode *const dicNode, const int ptNodePos,
+ const int childrenPtNodeArrayPos, const int probability, const bool isTerminal,
+ const bool hasChildren, const bool isBlacklistedOrNotAWord,
+ const uint16_t mergedNodeCodePointCount, const int *const mergedNodeCodePoints) {
mIsUsed = true;
uint16_t newDepth = static_cast<uint16_t>(dicNode->getNodeCodePointCount() + 1);
mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion;
const uint16_t newLeavingDepth = static_cast<uint16_t>(
dicNode->mDicNodeProperties.getLeavingDepth() + mergedNodeCodePointCount);
- mDicNodeProperties.init(pos, childrenPos, mergedNodeCodePoints[0], probability,
- isTerminal, hasChildren, isBlacklistedOrNotAWord, newDepth, newLeavingDepth);
+ mDicNodeProperties.init(ptNodePos, childrenPtNodeArrayPos, mergedNodeCodePoints[0],
+ probability, isTerminal, hasChildren, isBlacklistedOrNotAWord, newDepth,
+ newLeavingDepth);
mDicNodeState.init(&dicNode->mDicNodeState, mergedNodeCodePointCount,
mergedNodeCodePoints);
PROF_NODE_COPY(&dicNode->mProfiler, mProfiler);
@@ -234,7 +235,7 @@ class DicNode {
}
bool isFirstWord() const {
- return mDicNodeState.mDicNodeStatePrevWord.getPrevWordNodePos() == NOT_A_DICT_POS;
+ return mDicNodeState.mDicNodeStatePrevWord.getPrevWordPtNodePos() == NOT_A_DICT_POS;
}
bool isCompletion(const int inputSize) const {
@@ -246,29 +247,30 @@ class DicNode {
}
// Used to get bigram probability in DicNodeUtils
- int getPos() const {
- return mDicNodeProperties.getPos();
+ int getPtNodePos() const {
+ return mDicNodeProperties.getPtNodePos();
}
// Used to get bigram probability in DicNodeUtils
- int getPrevWordPos() const {
- return mDicNodeState.mDicNodeStatePrevWord.getPrevWordNodePos();
+ int getPrevWordTerminalPtNodePos() const {
+ return mDicNodeState.mDicNodeStatePrevWord.getPrevWordPtNodePos();
}
// Used in DicNodeUtils
- int getChildrenPos() const {
- return mDicNodeProperties.getChildrenPos();
+ int getChildrenPtNodeArrayPos() const {
+ return mDicNodeProperties.getChildrenPtNodeArrayPos();
}
int getProbability() const {
return mDicNodeProperties.getProbability();
}
- AK_FORCE_INLINE bool isTerminalWordNode() const {
- const bool isTerminalNodes = mDicNodeProperties.isTerminal();
- const int currentNodeDepth = getNodeCodePointCount();
- const int terminalNodeDepth = mDicNodeProperties.getLeavingDepth();
- return isTerminalNodes && currentNodeDepth > 0 && currentNodeDepth == terminalNodeDepth;
+ AK_FORCE_INLINE bool isTerminalDicNode() const {
+ const bool isTerminalPtNode = mDicNodeProperties.isTerminal();
+ const int currentDicNodeDepth = getNodeCodePointCount();
+ const int terminalDicNodeDepth = mDicNodeProperties.getLeavingDepth();
+ return isTerminalPtNode && currentDicNodeDepth > 0
+ && currentDicNodeDepth == terminalDicNodeDepth;
}
bool shouldBeFilteredBySafetyNetForBigram() const {
@@ -374,8 +376,8 @@ class DicNode {
}
// Used to commit input partially
- int getPrevWordNodePos() const {
- return mDicNodeState.mDicNodeStatePrevWord.getPrevWordNodePos();
+ int getPrevWordPtNodePos() const {
+ return mDicNodeState.mDicNodeStatePrevWord.getPrevWordPtNodePos();
}
AK_FORCE_INLINE const int *getOutputWordBuf() const {
@@ -410,7 +412,7 @@ class DicNode {
// TODO: Remove once touch path is merged into ProximityInfoState
// Note: Returned codepoint may be a digraph codepoint if the node is in a composite glyph.
int getNodeCodePoint() const {
- const int codePoint = mDicNodeProperties.getNodeCodePoint();
+ const int codePoint = mDicNodeProperties.getDicNodeCodePoint();
const DigraphUtils::DigraphCodePointIndex digraphIndex =
mDicNodeState.mDicNodeStateScoring.getDigraphIndex();
if (digraphIndex == DigraphUtils::NOT_A_DIGRAPH_INDEX) {
@@ -423,8 +425,8 @@ class DicNode {
// Utils for cost calculation //
////////////////////////////////
AK_FORCE_INLINE bool isSameNodeCodePoint(const DicNode *const dicNode) const {
- return mDicNodeProperties.getNodeCodePoint()
- == dicNode->mDicNodeProperties.getNodeCodePoint();
+ return mDicNodeProperties.getDicNodeCodePoint()
+ == dicNode->mDicNodeProperties.getDicNodeCodePoint();
}
// TODO: remove
@@ -574,7 +576,8 @@ class DicNode {
// Caveat: Must not be called outside Weighting
// This restriction is guaranteed by "friend"
AK_FORCE_INLINE void addCost(const float spatialCost, const float languageCost,
- const bool doNormalization, const int inputSize, const ErrorType errorType) {
+ const bool doNormalization, const int inputSize,
+ const ErrorTypeUtils::ErrorType errorType) {
if (DEBUG_GEO_FULL) {
LOGI_SHOW_ADD_COST_PROP;
}
diff --git a/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp b/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp
index ec65114c7..5540b6df5 100644
--- a/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp
+++ b/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp
@@ -22,7 +22,6 @@
#include "suggest/core/dicnode/dic_node_vector.h"
#include "suggest/core/dictionary/multi_bigram_map.h"
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
-#include "utils/char_utils.h"
namespace latinime {
@@ -32,19 +31,20 @@ namespace latinime {
/* static */ void DicNodeUtils::initAsRoot(
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
- const int prevWordNodePos, DicNode *const newRootNode) {
- newRootNode->initAsRoot(dictionaryStructurePolicy->getRootPosition(), prevWordNodePos);
+ const int prevWordPtNodePos, DicNode *const newRootDicNode) {
+ newRootDicNode->initAsRoot(dictionaryStructurePolicy->getRootPosition(), prevWordPtNodePos);
}
/*static */ void DicNodeUtils::initAsRootWithPreviousWord(
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
- DicNode *const prevWordLastNode, DicNode *const newRootNode) {
- newRootNode->initAsRootWithPreviousWord(
- prevWordLastNode, dictionaryStructurePolicy->getRootPosition());
+ const DicNode *const prevWordLastDicNode, DicNode *const newRootDicNode) {
+ newRootDicNode->initAsRootWithPreviousWord(
+ prevWordLastDicNode, dictionaryStructurePolicy->getRootPosition());
}
-/* static */ void DicNodeUtils::initByCopy(DicNode *srcNode, DicNode *destNode) {
- destNode->initByCopy(srcNode);
+/* static */ void DicNodeUtils::initByCopy(const DicNode *const srcDicNode,
+ DicNode *const destDicNode) {
+ destDicNode->initByCopy(srcDicNode);
}
///////////////////////////////////
@@ -52,14 +52,14 @@ namespace latinime {
///////////////////////////////////
/* static */ void DicNodeUtils::getAllChildDicNodes(DicNode *dicNode,
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
- DicNodeVector *childDicNodes) {
+ DicNodeVector *const childDicNodes) {
if (dicNode->isTotalInputSizeExceedingLimit()) {
return;
}
if (!dicNode->isLeavingNode()) {
childDicNodes->pushPassingChild(dicNode);
} else {
- dictionaryStructurePolicy->createAndGetAllChildNodes(dicNode, childDicNodes);
+ dictionaryStructurePolicy->createAndGetAllChildDicNodes(dicNode, childDicNodes);
}
}
@@ -71,11 +71,11 @@ namespace latinime {
*/
/* static */ float DicNodeUtils::getBigramNodeImprobability(
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
- const DicNode *const node, MultiBigramMap *multiBigramMap) {
- if (node->hasMultipleWords() && !node->isValidMultipleWordSuggestion()) {
+ const DicNode *const dicNode, MultiBigramMap *const multiBigramMap) {
+ if (dicNode->hasMultipleWords() && !dicNode->isValidMultipleWordSuggestion()) {
return static_cast<float>(MAX_VALUE_FOR_WEIGHTING);
}
- const int probability = getBigramNodeProbability(dictionaryStructurePolicy, node,
+ const int probability = getBigramNodeProbability(dictionaryStructurePolicy, dicNode,
multiBigramMap);
// TODO: This equation to calculate the improbability looks unreasonable. Investigate this.
const float cost = static_cast<float>(MAX_PROBABILITY - probability)
@@ -85,19 +85,19 @@ namespace latinime {
/* static */ int DicNodeUtils::getBigramNodeProbability(
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
- const DicNode *const node, MultiBigramMap *multiBigramMap) {
- const int unigramProbability = node->getProbability();
- const int wordPos = node->getPos();
- const int prevWordPos = node->getPrevWordPos();
- if (NOT_A_DICT_POS == wordPos || NOT_A_DICT_POS == prevWordPos) {
+ const DicNode *const dicNode, MultiBigramMap *const multiBigramMap) {
+ const int unigramProbability = dicNode->getProbability();
+ const int ptNodePos = dicNode->getPtNodePos();
+ const int prevWordTerminalPtNodePos = dicNode->getPrevWordTerminalPtNodePos();
+ if (NOT_A_DICT_POS == ptNodePos || NOT_A_DICT_POS == prevWordTerminalPtNodePos) {
// Note: Normally wordPos comes from the dictionary and should never equal
// NOT_A_VALID_WORD_POS.
return dictionaryStructurePolicy->getProbability(unigramProbability,
NOT_A_PROBABILITY);
}
if (multiBigramMap) {
- return multiBigramMap->getBigramProbability(dictionaryStructurePolicy, prevWordPos,
- wordPos, unigramProbability);
+ return multiBigramMap->getBigramProbability(dictionaryStructurePolicy,
+ prevWordTerminalPtNodePos, ptNodePos, unigramProbability);
}
return dictionaryStructurePolicy->getProbability(unigramProbability,
NOT_A_PROBABILITY);
@@ -109,7 +109,7 @@ namespace latinime {
// TODO: Move to char_utils?
/* static */ int DicNodeUtils::appendTwoWords(const int *const src0, const int16_t length0,
- const int *const src1, const int16_t length1, int *dest) {
+ const int *const src1, const int16_t length1, int *const dest) {
int actualLength0 = 0;
for (int i = 0; i < length0; ++i) {
if (src0[i] == 0) {
diff --git a/native/jni/src/suggest/core/dicnode/dic_node_utils.h b/native/jni/src/suggest/core/dicnode/dic_node_utils.h
index 3fb351a61..3f1514a52 100644
--- a/native/jni/src/suggest/core/dicnode/dic_node_utils.h
+++ b/native/jni/src/suggest/core/dicnode/dic_node_utils.h
@@ -31,20 +31,20 @@ class MultiBigramMap;
class DicNodeUtils {
public:
static int appendTwoWords(const int *src0, const int16_t length0, const int *src1,
- const int16_t length1, int *dest);
+ const int16_t length1, int *const dest);
static void initAsRoot(
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
- const int prevWordNodePos, DicNode *newRootNode);
+ const int prevWordPtNodePos, DicNode *const newRootDicNode);
static void initAsRootWithPreviousWord(
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
- DicNode *prevWordLastNode, DicNode *newRootNode);
- static void initByCopy(DicNode *srcNode, DicNode *destNode);
+ const DicNode *const prevWordLastDicNode, DicNode *const newRootDicNode);
+ static void initByCopy(const DicNode *const srcDicNode, DicNode *const destDicNode);
static void getAllChildDicNodes(DicNode *dicNode,
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
DicNodeVector *childDicNodes);
static float getBigramNodeImprobability(
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
- const DicNode *const node, MultiBigramMap *const multiBigramMap);
+ const DicNode *const dicNode, MultiBigramMap *const multiBigramMap);
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(DicNodeUtils);
@@ -53,7 +53,7 @@ class DicNodeUtils {
static int getBigramNodeProbability(
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
- const DicNode *const node, MultiBigramMap *multiBigramMap);
+ const DicNode *const dicNode, MultiBigramMap *const multiBigramMap);
};
} // namespace latinime
#endif // LATINIME_DIC_NODE_UTILS_H
diff --git a/native/jni/src/suggest/core/dicnode/dic_node_vector.h b/native/jni/src/suggest/core/dicnode/dic_node_vector.h
index 42addae8d..9364e7751 100644
--- a/native/jni/src/suggest/core/dicnode/dic_node_vector.h
+++ b/native/jni/src/suggest/core/dicnode/dic_node_vector.h
@@ -62,14 +62,14 @@ class DicNodeVector {
mDicNodes.back().initAsPassingChild(dicNode);
}
- void pushLeavingChild(const DicNode *const dicNode, const int pos, const int childrenPos,
- const int probability, const bool isTerminal, const bool hasChildren,
- const bool isBlacklistedOrNotAWord, const uint16_t mergedNodeCodePointCount,
- const int *const mergedNodeCodePoints) {
+ void pushLeavingChild(const DicNode *const dicNode, const int ptNodePos,
+ const int childrenPtNodeArrayPos, const int probability, const bool isTerminal,
+ const bool hasChildren, const bool isBlacklistedOrNotAWord,
+ const uint16_t mergedNodeCodePointCount, const int *const mergedNodeCodePoints) {
ASSERT(!mLock);
mDicNodes.push_back(mEmptyNode);
- mDicNodes.back().initAsChild(dicNode, pos, childrenPos, probability, isTerminal,
- hasChildren, isBlacklistedOrNotAWord, mergedNodeCodePointCount,
+ mDicNodes.back().initAsChild(dicNode, ptNodePos, childrenPtNodeArrayPos, probability,
+ isTerminal, hasChildren, isBlacklistedOrNotAWord, mergedNodeCodePointCount,
mergedNodeCodePoints);
}
diff --git a/native/jni/src/suggest/core/dicnode/internal/dic_node_properties.h b/native/jni/src/suggest/core/dicnode/internal/dic_node_properties.h
index 9e0f62ceb..c41a7243a 100644
--- a/native/jni/src/suggest/core/dicnode/internal/dic_node_properties.h
+++ b/native/jni/src/suggest/core/dicnode/internal/dic_node_properties.h
@@ -24,15 +24,14 @@
namespace latinime {
/**
- * Node for traversing the lexicon trie.
+ * PtNode information related to the DicNode from the lexicon trie.
*/
-// TODO: Introduce a dictionary node class which has attribute members required to understand the
-// dictionary structure.
class DicNodeProperties {
public:
AK_FORCE_INLINE DicNodeProperties()
- : mPos(0), mChildrenPos(0), mProbability(0), mNodeCodePoint(0), mIsTerminal(false),
- mHasChildren(false), mIsBlacklistedOrNotAWord(false), mDepth(0), mLeavingDepth(0) {}
+ : mPtNodePos(0), mChildrenPtNodeArrayPos(0), mProbability(0), mDicNodeCodePoint(0),
+ mIsTerminal(false), mHasChildrenPtNodes(false), mIsBlacklistedOrNotAWord(false),
+ mDepth(0), mLeavingDepth(0) {}
virtual ~DicNodeProperties() {}
@@ -40,57 +39,57 @@ class DicNodeProperties {
void init(const int pos, const int childrenPos, const int nodeCodePoint, const int probability,
const bool isTerminal, const bool hasChildren, const bool isBlacklistedOrNotAWord,
const uint16_t depth, const uint16_t leavingDepth) {
- mPos = pos;
- mChildrenPos = childrenPos;
- mNodeCodePoint = nodeCodePoint;
+ mPtNodePos = pos;
+ mChildrenPtNodeArrayPos = childrenPos;
+ mDicNodeCodePoint = nodeCodePoint;
mProbability = probability;
mIsTerminal = isTerminal;
- mHasChildren = hasChildren;
+ mHasChildrenPtNodes = hasChildren;
mIsBlacklistedOrNotAWord = isBlacklistedOrNotAWord;
mDepth = depth;
mLeavingDepth = leavingDepth;
}
// Init for copy
- void init(const DicNodeProperties *const nodeProp) {
- mPos = nodeProp->mPos;
- mChildrenPos = nodeProp->mChildrenPos;
- mNodeCodePoint = nodeProp->mNodeCodePoint;
- mProbability = nodeProp->mProbability;
- mIsTerminal = nodeProp->mIsTerminal;
- mHasChildren = nodeProp->mHasChildren;
- mIsBlacklistedOrNotAWord = nodeProp->mIsBlacklistedOrNotAWord;
- mDepth = nodeProp->mDepth;
- mLeavingDepth = nodeProp->mLeavingDepth;
+ void init(const DicNodeProperties *const dicNodeProp) {
+ mPtNodePos = dicNodeProp->mPtNodePos;
+ mChildrenPtNodeArrayPos = dicNodeProp->mChildrenPtNodeArrayPos;
+ mDicNodeCodePoint = dicNodeProp->mDicNodeCodePoint;
+ mProbability = dicNodeProp->mProbability;
+ mIsTerminal = dicNodeProp->mIsTerminal;
+ mHasChildrenPtNodes = dicNodeProp->mHasChildrenPtNodes;
+ mIsBlacklistedOrNotAWord = dicNodeProp->mIsBlacklistedOrNotAWord;
+ mDepth = dicNodeProp->mDepth;
+ mLeavingDepth = dicNodeProp->mLeavingDepth;
}
// Init as passing child
- void init(const DicNodeProperties *const nodeProp, const int codePoint) {
- mPos = nodeProp->mPos;
- mChildrenPos = nodeProp->mChildrenPos;
- mNodeCodePoint = codePoint; // Overwrite the node char of a passing child
- mProbability = nodeProp->mProbability;
- mIsTerminal = nodeProp->mIsTerminal;
- mHasChildren = nodeProp->mHasChildren;
- mIsBlacklistedOrNotAWord = nodeProp->mIsBlacklistedOrNotAWord;
- mDepth = nodeProp->mDepth + 1; // Increment the depth of a passing child
- mLeavingDepth = nodeProp->mLeavingDepth;
+ void init(const DicNodeProperties *const dicNodeProp, const int codePoint) {
+ mPtNodePos = dicNodeProp->mPtNodePos;
+ mChildrenPtNodeArrayPos = dicNodeProp->mChildrenPtNodeArrayPos;
+ mDicNodeCodePoint = codePoint; // Overwrite the node char of a passing child
+ mProbability = dicNodeProp->mProbability;
+ mIsTerminal = dicNodeProp->mIsTerminal;
+ mHasChildrenPtNodes = dicNodeProp->mHasChildrenPtNodes;
+ mIsBlacklistedOrNotAWord = dicNodeProp->mIsBlacklistedOrNotAWord;
+ mDepth = dicNodeProp->mDepth + 1; // Increment the depth of a passing child
+ mLeavingDepth = dicNodeProp->mLeavingDepth;
}
- int getPos() const {
- return mPos;
+ int getPtNodePos() const {
+ return mPtNodePos;
}
- int getChildrenPos() const {
- return mChildrenPos;
+ int getChildrenPtNodeArrayPos() const {
+ return mChildrenPtNodeArrayPos;
}
int getProbability() const {
return mProbability;
}
- int getNodeCodePoint() const {
- return mNodeCodePoint;
+ int getDicNodeCodePoint() const {
+ return mDicNodeCodePoint;
}
uint16_t getDepth() const {
@@ -107,7 +106,7 @@ class DicNodeProperties {
}
bool hasChildren() const {
- return mHasChildren || mDepth != mLeavingDepth;
+ return mHasChildrenPtNodes || mDepth != mLeavingDepth;
}
bool isBlacklistedOrNotAWord() const {
@@ -118,12 +117,12 @@ class DicNodeProperties {
// Caution!!!
// Use a default copy constructor and an assign operator because shallow copies are ok
// for this class
- int mPos;
- int mChildrenPos;
+ int mPtNodePos;
+ int mChildrenPtNodeArrayPos;
int mProbability;
- int mNodeCodePoint;
+ int mDicNodeCodePoint;
bool mIsTerminal;
- bool mHasChildren;
+ bool mHasChildrenPtNodes;
bool mIsBlacklistedOrNotAWord;
uint16_t mDepth;
uint16_t mLeavingDepth;
diff --git a/native/jni/src/suggest/core/dicnode/internal/dic_node_state_prevword.h b/native/jni/src/suggest/core/dicnode/internal/dic_node_state_prevword.h
index b8986203d..dba57056b 100644
--- a/native/jni/src/suggest/core/dicnode/internal/dic_node_state_prevword.h
+++ b/native/jni/src/suggest/core/dicnode/internal/dic_node_state_prevword.h
@@ -30,7 +30,7 @@ class DicNodeStatePrevWord {
public:
AK_FORCE_INLINE DicNodeStatePrevWord()
: mPrevWordCount(0), mPrevWordLength(0), mPrevWordStart(0), mPrevWordProbability(0),
- mPrevWordNodePos(NOT_A_DICT_POS), mSecondWordFirstInputIndex(NOT_AN_INDEX) {
+ mPrevWordPtNodePos(NOT_A_DICT_POS), mSecondWordFirstInputIndex(NOT_AN_INDEX) {
memset(mPrevWord, 0, sizeof(mPrevWord));
}
@@ -41,7 +41,7 @@ class DicNodeStatePrevWord {
mPrevWordCount = 0;
mPrevWordStart = 0;
mPrevWordProbability = -1;
- mPrevWordNodePos = NOT_A_DICT_POS;
+ mPrevWordPtNodePos = NOT_A_DICT_POS;
mSecondWordFirstInputIndex = NOT_AN_INDEX;
}
@@ -50,7 +50,7 @@ class DicNodeStatePrevWord {
mPrevWordCount = 0;
mPrevWordStart = 0;
mPrevWordProbability = -1;
- mPrevWordNodePos = prevWordNodePos;
+ mPrevWordPtNodePos = prevWordNodePos;
mSecondWordFirstInputIndex = NOT_AN_INDEX;
}
@@ -60,7 +60,7 @@ class DicNodeStatePrevWord {
mPrevWordCount = prevWord->mPrevWordCount;
mPrevWordStart = prevWord->mPrevWordStart;
mPrevWordProbability = prevWord->mPrevWordProbability;
- mPrevWordNodePos = prevWord->mPrevWordNodePos;
+ mPrevWordPtNodePos = prevWord->mPrevWordPtNodePos;
mSecondWordFirstInputIndex = prevWord->mSecondWordFirstInputIndex;
memcpy(mPrevWord, prevWord->mPrevWord, prevWord->mPrevWordLength * sizeof(mPrevWord[0]));
}
@@ -71,7 +71,7 @@ class DicNodeStatePrevWord {
const int prevWordSecondWordFirstInputIndex, const int lastInputIndex) {
mPrevWordCount = min(prevWordCount, static_cast<int16_t>(MAX_RESULTS));
mPrevWordProbability = prevWordProbability;
- mPrevWordNodePos = prevWordNodePos;
+ mPrevWordPtNodePos = prevWordNodePos;
int twoWordsLen =
DicNodeUtils::appendTwoWords(src0, length0, src1, length1, mPrevWord);
if (twoWordsLen >= MAX_WORD_LENGTH) {
@@ -116,8 +116,8 @@ class DicNodeStatePrevWord {
return mPrevWordStart;
}
- int getPrevWordNodePos() const {
- return mPrevWordNodePos;
+ int getPrevWordPtNodePos() const {
+ return mPrevWordPtNodePos;
}
int getPrevWordCodePointAt(const int id) const {
@@ -147,7 +147,7 @@ class DicNodeStatePrevWord {
int16_t mPrevWordLength;
int16_t mPrevWordStart;
int16_t mPrevWordProbability;
- int mPrevWordNodePos;
+ int mPrevWordPtNodePos;
int mSecondWordFirstInputIndex;
};
} // namespace latinime
diff --git a/native/jni/src/suggest/core/dicnode/internal/dic_node_state_scoring.h b/native/jni/src/suggest/core/dicnode/internal/dic_node_state_scoring.h
index 3c85d0e9d..74f9eee92 100644
--- a/native/jni/src/suggest/core/dicnode/internal/dic_node_state_scoring.h
+++ b/native/jni/src/suggest/core/dicnode/internal/dic_node_state_scoring.h
@@ -21,6 +21,7 @@
#include "defines.h"
#include "suggest/core/dictionary/digraph_utils.h"
+#include "suggest/core/dictionary/error_type_utils.h"
namespace latinime {
@@ -31,7 +32,7 @@ class DicNodeStateScoring {
mDigraphIndex(DigraphUtils::NOT_A_DIGRAPH_INDEX),
mEditCorrectionCount(0), mProximityCorrectionCount(0),
mNormalizedCompoundDistance(0.0f), mSpatialDistance(0.0f), mLanguageDistance(0.0f),
- mRawLength(0.0f), mExactMatch(true),
+ mRawLength(0.0f), mContainingErrorTypes(ErrorTypeUtils::NOT_AN_ERROR),
mNormalizedCompoundDistanceAfterFirstWord(MAX_VALUE_FOR_WEIGHTING) {
}
@@ -47,7 +48,7 @@ class DicNodeStateScoring {
mDoubleLetterLevel = NOT_A_DOUBLE_LETTER;
mDigraphIndex = DigraphUtils::NOT_A_DIGRAPH_INDEX;
mNormalizedCompoundDistanceAfterFirstWord = MAX_VALUE_FOR_WEIGHTING;
- mExactMatch = true;
+ mContainingErrorTypes = ErrorTypeUtils::NOT_AN_ERROR;
}
AK_FORCE_INLINE void init(const DicNodeStateScoring *const scoring) {
@@ -59,34 +60,21 @@ class DicNodeStateScoring {
mRawLength = scoring->mRawLength;
mDoubleLetterLevel = scoring->mDoubleLetterLevel;
mDigraphIndex = scoring->mDigraphIndex;
- mExactMatch = scoring->mExactMatch;
+ mContainingErrorTypes = scoring->mContainingErrorTypes;
mNormalizedCompoundDistanceAfterFirstWord =
scoring->mNormalizedCompoundDistanceAfterFirstWord;
}
void addCost(const float spatialCost, const float languageCost, const bool doNormalization,
- const int inputSize, const int totalInputIndex, const ErrorType errorType) {
+ const int inputSize, const int totalInputIndex,
+ const ErrorTypeUtils::ErrorType errorType) {
addDistance(spatialCost, languageCost, doNormalization, inputSize, totalInputIndex);
- switch (errorType) {
- case ET_EDIT_CORRECTION:
- ++mEditCorrectionCount;
- mExactMatch = false;
- break;
- case ET_PROXIMITY_CORRECTION:
- ++mProximityCorrectionCount;
- mExactMatch = false;
- break;
- case ET_COMPLETION:
- mExactMatch = false;
- break;
- case ET_NEW_WORD:
- mExactMatch = false;
- break;
- case ET_INTENTIONAL_OMISSION:
- mExactMatch = false;
- break;
- case ET_NOT_AN_ERROR:
- break;
+ mContainingErrorTypes = mContainingErrorTypes | errorType;
+ if (ErrorTypeUtils::isEditCorrectionError(errorType)) {
+ ++mEditCorrectionCount;
+ }
+ if (ErrorTypeUtils::isProximityCorrectionError(errorType)) {
+ ++mProximityCorrectionCount;
}
}
@@ -182,7 +170,7 @@ class DicNodeStateScoring {
}
bool isExactMatch() const {
- return mExactMatch;
+ return ErrorTypeUtils::isExactMatch(mContainingErrorTypes);
}
private:
@@ -199,7 +187,8 @@ class DicNodeStateScoring {
float mSpatialDistance;
float mLanguageDistance;
float mRawLength;
- bool mExactMatch;
+ // All accumulated error types so far
+ ErrorTypeUtils::ErrorType mContainingErrorTypes;
float mNormalizedCompoundDistanceAfterFirstWord;
AK_FORCE_INLINE void addDistance(float spatialDistance, float languageDistance,
diff --git a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp
index 71f4ef6ea..2a62b555b 100644
--- a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp
+++ b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp
@@ -144,7 +144,7 @@ int BigramDictionary::getPredictions(const int *prevWord, const int prevWordLeng
int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const int prevWordLength,
const bool forceLowerCaseSearch) const {
if (0 >= prevWordLength) return NOT_A_DICT_POS;
- int pos = mDictionaryStructurePolicy->getTerminalNodePositionOfWord(prevWord, prevWordLength,
+ int pos = mDictionaryStructurePolicy->getTerminalPtNodePositionOfWord(prevWord, prevWordLength,
forceLowerCaseSearch);
if (NOT_A_DICT_POS == pos) return NOT_A_DICT_POS;
return mDictionaryStructurePolicy->getBigramsPositionOfPtNode(pos);
@@ -155,7 +155,7 @@ int BigramDictionary::getBigramProbability(const int *word0, int length0, const
int pos = getBigramListPositionForWord(word0, length0, false /* forceLowerCaseSearch */);
// getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams
if (NOT_A_DICT_POS == pos) return NOT_A_PROBABILITY;
- int nextWordPos = mDictionaryStructurePolicy->getTerminalNodePositionOfWord(word1, length1,
+ int nextWordPos = mDictionaryStructurePolicy->getTerminalPtNodePositionOfWord(word1, length1,
false /* forceLowerCaseSearch */);
if (NOT_A_DICT_POS == nextWordPos) return NOT_A_PROBABILITY;
@@ -163,7 +163,8 @@ int BigramDictionary::getBigramProbability(const int *word0, int length0, const
mDictionaryStructurePolicy->getBigramsStructurePolicy(), pos);
while (bigramsIt.hasNext()) {
bigramsIt.next();
- if (bigramsIt.getBigramPos() == nextWordPos) {
+ if (bigramsIt.getBigramPos() == nextWordPos
+ && bigramsIt.getProbability() != NOT_A_PROBABILITY) {
return mDictionaryStructurePolicy->getProbability(
mDictionaryStructurePolicy->getUnigramProbabilityOfPtNode(nextWordPos),
bigramsIt.getProbability());
diff --git a/native/jni/src/suggest/core/dictionary/bloom_filter.h b/native/jni/src/suggest/core/dictionary/bloom_filter.h
index 5205456a8..5f9700486 100644
--- a/native/jni/src/suggest/core/dictionary/bloom_filter.h
+++ b/native/jni/src/suggest/core/dictionary/bloom_filter.h
@@ -50,6 +50,8 @@ class BloomFilter {
}
private:
+ DISALLOW_ASSIGNMENT_OPERATOR(BloomFilter);
+
// Size, in bytes, of the bloom filter index for bigrams
// 128 gives us 1024 buckets. The probability of false positive is (1 - e ** (-kn/m))**k,
// where k is the number of hash functions, n the number of bigrams, and m the number of
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp
index 59ead1894..e68c0a6d8 100644
--- a/native/jni/src/suggest/core/dictionary/dictionary.cpp
+++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp
@@ -21,46 +21,39 @@
#include <stdint.h>
#include "defines.h"
-#include "suggest/core/dictionary/bigram_dictionary.h"
#include "suggest/core/policy/dictionary_header_structure_policy.h"
-#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
#include "suggest/core/session/dic_traverse_session.h"
#include "suggest/core/suggest.h"
#include "suggest/core/suggest_options.h"
#include "suggest/policyimpl/gesture/gesture_suggest_policy_factory.h"
#include "suggest/policyimpl/typing/typing_suggest_policy_factory.h"
#include "utils/log_utils.h"
+#include "utils/time_keeper.h"
namespace latinime {
const int Dictionary::HEADER_ATTRIBUTE_BUFFER_SIZE = 32;
-Dictionary::Dictionary(JNIEnv *env,
- DictionaryStructureWithBufferPolicy *const dictionaryStructureWithBufferPolicy)
+Dictionary::Dictionary(JNIEnv *env, const DictionaryStructureWithBufferPolicy::StructurePolicyPtr
+ &dictionaryStructureWithBufferPolicy)
: mDictionaryStructureWithBufferPolicy(dictionaryStructureWithBufferPolicy),
- mBigramDictionary(new BigramDictionary(mDictionaryStructureWithBufferPolicy)),
+ mBigramDictionary(new BigramDictionary(mDictionaryStructureWithBufferPolicy.get())),
mGestureSuggest(new Suggest(GestureSuggestPolicyFactory::getGestureSuggestPolicy())),
mTypingSuggest(new Suggest(TypingSuggestPolicyFactory::getTypingSuggestPolicy())) {
logDictionaryInfo(env);
}
-Dictionary::~Dictionary() {
- delete mBigramDictionary;
- delete mGestureSuggest;
- delete mTypingSuggest;
- delete mDictionaryStructureWithBufferPolicy;
-}
-
int Dictionary::getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession *traverseSession,
int *xcoordinates, int *ycoordinates, int *times, int *pointerIds, int *inputCodePoints,
int inputSize, int *prevWordCodePoints, int prevWordLength, int commitPoint,
const SuggestOptions *const suggestOptions, int *outWords, int *frequencies,
int *spaceIndices, int *outputTypes, int *outputAutoCommitFirstWordConfidence) const {
+ TimeKeeper::setCurrentTime();
int result = 0;
if (suggestOptions->isGesture()) {
DicTraverseSession::initSessionInstance(
traverseSession, this, prevWordCodePoints, prevWordLength, suggestOptions);
- result = mGestureSuggest->getSuggestions(proximityInfo, traverseSession, xcoordinates,
+ result = mGestureSuggest.get()->getSuggestions(proximityInfo, traverseSession, xcoordinates,
ycoordinates, times, pointerIds, inputCodePoints, inputSize, commitPoint, outWords,
frequencies, spaceIndices, outputTypes, outputAutoCommitFirstWordConfidence);
if (DEBUG_DICT) {
@@ -70,7 +63,7 @@ int Dictionary::getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession
} else {
DicTraverseSession::initSessionInstance(
traverseSession, this, prevWordCodePoints, prevWordLength, suggestOptions);
- result = mTypingSuggest->getSuggestions(proximityInfo, traverseSession, xcoordinates,
+ result = mTypingSuggest.get()->getSuggestions(proximityInfo, traverseSession, xcoordinates,
ycoordinates, times, pointerIds, inputCodePoints, inputSize, commitPoint,
outWords, frequencies, spaceIndices, outputTypes,
outputAutoCommitFirstWordConfidence);
@@ -83,12 +76,15 @@ int Dictionary::getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession
int Dictionary::getBigrams(const int *word, int length, int *outWords, int *frequencies,
int *outputTypes) const {
+ TimeKeeper::setCurrentTime();
if (length <= 0) return 0;
- return mBigramDictionary->getPredictions(word, length, outWords, frequencies, outputTypes);
+ return mBigramDictionary.get()->getPredictions(word, length, outWords, frequencies,
+ outputTypes);
}
int Dictionary::getProbability(const int *word, int length) const {
- int pos = getDictionaryStructurePolicy()->getTerminalNodePositionOfWord(word, length,
+ TimeKeeper::setCurrentTime();
+ int pos = getDictionaryStructurePolicy()->getTerminalPtNodePositionOfWord(word, length,
false /* forceLowerCaseSearch */);
if (NOT_A_DICT_POS == pos) {
return NOT_A_PROBABILITY;
@@ -98,39 +94,60 @@ int Dictionary::getProbability(const int *word, int length) const {
int Dictionary::getBigramProbability(const int *word0, int length0, const int *word1,
int length1) const {
- return mBigramDictionary->getBigramProbability(word0, length0, word1, length1);
+ TimeKeeper::setCurrentTime();
+ return mBigramDictionary.get()->getBigramProbability(word0, length0, word1, length1);
}
-void Dictionary::addUnigramWord(const int *const word, const int length, const int probability) {
- mDictionaryStructureWithBufferPolicy->addUnigramWord(word, length, probability);
+void Dictionary::addUnigramWord(const int *const word, const int length, const int probability,
+ const int *const shortcutTargetCodePoints, const int shortcutLength,
+ const int shortcutProbability, const bool isNotAWord, const bool isBlacklisted,
+ const int timestamp) {
+ TimeKeeper::setCurrentTime();
+ mDictionaryStructureWithBufferPolicy.get()->addUnigramWord(word, length, probability,
+ shortcutTargetCodePoints, shortcutLength, shortcutProbability, isNotAWord,
+ isBlacklisted, timestamp);
}
void Dictionary::addBigramWords(const int *const word0, const int length0, const int *const word1,
- const int length1, const int probability) {
- mDictionaryStructureWithBufferPolicy->addBigramWords(word0, length0, word1, length1,
- probability);
+ const int length1, const int probability, const int timestamp) {
+ TimeKeeper::setCurrentTime();
+ mDictionaryStructureWithBufferPolicy.get()->addBigramWords(word0, length0, word1, length1,
+ probability, timestamp);
}
void Dictionary::removeBigramWords(const int *const word0, const int length0,
const int *const word1, const int length1) {
- mDictionaryStructureWithBufferPolicy->removeBigramWords(word0, length0, word1, length1);
+ TimeKeeper::setCurrentTime();
+ mDictionaryStructureWithBufferPolicy.get()->removeBigramWords(word0, length0, word1, length1);
}
void Dictionary::flush(const char *const filePath) {
- mDictionaryStructureWithBufferPolicy->flush(filePath);
+ TimeKeeper::setCurrentTime();
+ mDictionaryStructureWithBufferPolicy.get()->flush(filePath);
}
void Dictionary::flushWithGC(const char *const filePath) {
- mDictionaryStructureWithBufferPolicy->flushWithGC(filePath);
+ TimeKeeper::setCurrentTime();
+ mDictionaryStructureWithBufferPolicy.get()->flushWithGC(filePath);
}
bool Dictionary::needsToRunGC(const bool mindsBlockByGC) {
- return mDictionaryStructureWithBufferPolicy->needsToRunGC(mindsBlockByGC);
+ TimeKeeper::setCurrentTime();
+ return mDictionaryStructureWithBufferPolicy.get()->needsToRunGC(mindsBlockByGC);
}
-void Dictionary::getProperty(const char *const query, char *const outResult,
+void Dictionary::getProperty(const char *const query, const int queryLength, char *const outResult,
const int maxResultLength) {
- return mDictionaryStructureWithBufferPolicy->getProperty(query, outResult, maxResultLength);
+ TimeKeeper::setCurrentTime();
+ return mDictionaryStructureWithBufferPolicy.get()->getProperty(query, queryLength, outResult,
+ maxResultLength);
+}
+
+const UnigramProperty Dictionary::getUnigramProperty(const int *const codePoints,
+ const int codePointCount) {
+ TimeKeeper::setCurrentTime();
+ return mDictionaryStructureWithBufferPolicy.get()->getUnigramProperty(
+ codePoints, codePointCount);
}
void Dictionary::logDictionaryInfo(JNIEnv *const env) const {
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.h b/native/jni/src/suggest/core/dictionary/dictionary.h
index 0195d5bf0..b37b4aa18 100644
--- a/native/jni/src/suggest/core/dictionary/dictionary.h
+++ b/native/jni/src/suggest/core/dictionary/dictionary.h
@@ -21,15 +21,20 @@
#include "defines.h"
#include "jni.h"
+#include "suggest/core/dictionary/bigram_dictionary.h"
+#include "suggest/core/dictionary/unigram_property.h"
+#include "suggest/core/policy/dictionary_header_structure_policy.h"
+#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
+#include "suggest/core/suggest_interface.h"
+#include "utils/exclusive_ownership_pointer.h"
namespace latinime {
-class BigramDictionary;
class DictionaryStructureWithBufferPolicy;
class DicTraverseSession;
class ProximityInfo;
-class SuggestInterface;
class SuggestOptions;
+class UnigramProperty;
class Dictionary {
public:
@@ -53,8 +58,8 @@ class Dictionary {
static const int KIND_FLAG_POSSIBLY_OFFENSIVE = 0x80000000;
static const int KIND_FLAG_EXACT_MATCH = 0x40000000;
- Dictionary(JNIEnv *env,
- DictionaryStructureWithBufferPolicy *const dictionaryStructureWithBufferPoilcy);
+ Dictionary(JNIEnv *env, const DictionaryStructureWithBufferPolicy::StructurePolicyPtr
+ &dictionaryStructureWithBufferPolicy);
int getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession *traverseSession,
int *xcoordinates, int *ycoordinates, int *times, int *pointerIds, int *inputCodePoints,
@@ -69,10 +74,13 @@ class Dictionary {
int getBigramProbability(const int *word0, int length0, const int *word1, int length1) const;
- void addUnigramWord(const int *const word, const int length, const int probability);
+ void addUnigramWord(const int *const word, const int length, const int probability,
+ const int *const shortcutTargetCodePoints, const int shortcutLength,
+ const int shortcutProbability, const bool isNotAWord, const bool isBlacklisted,
+ const int timestamp);
void addBigramWords(const int *const word0, const int length0, const int *const word1,
- const int length1, const int probability);
+ const int length1, const int probability, const int timestamp);
void removeBigramWords(const int *const word0, const int length0, const int *const word1,
const int length1);
@@ -83,24 +91,33 @@ class Dictionary {
bool needsToRunGC(const bool mindsBlockByGC);
- void getProperty(const char *const query, char *const outResult,
+ void getProperty(const char *const query, const int queryLength, char *const outResult,
const int maxResultLength);
+ const UnigramProperty getUnigramProperty(const int *const codePoints, const int codePointCount);
+
const DictionaryStructureWithBufferPolicy *getDictionaryStructurePolicy() const {
- return mDictionaryStructureWithBufferPolicy;
+ return mDictionaryStructureWithBufferPolicy.get();
}
- virtual ~Dictionary();
+ int getFormatVersionNumber() const {
+ return mDictionaryStructureWithBufferPolicy.get()->getHeaderStructurePolicy()
+ ->getFormatVersionNumber();
+ }
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(Dictionary);
+ typedef ExclusiveOwnershipPointer<BigramDictionary> BigramDictionaryPtr;
+ typedef ExclusiveOwnershipPointer<SuggestInterface> SuggestInterfacePtr;
+
static const int HEADER_ATTRIBUTE_BUFFER_SIZE;
- DictionaryStructureWithBufferPolicy *const mDictionaryStructureWithBufferPolicy;
- const BigramDictionary *const mBigramDictionary;
- const SuggestInterface *const mGestureSuggest;
- const SuggestInterface *const mTypingSuggest;
+ const DictionaryStructureWithBufferPolicy::StructurePolicyPtr
+ mDictionaryStructureWithBufferPolicy;
+ const BigramDictionaryPtr mBigramDictionary;
+ const SuggestInterfacePtr mGestureSuggest;
+ const SuggestInterfacePtr mTypingSuggest;
void logDictionaryInfo(JNIEnv *const env) const;
};
diff --git a/native/jni/src/suggest/core/dictionary/digraph_utils.cpp b/native/jni/src/suggest/core/dictionary/digraph_utils.cpp
index 3271c1bfb..5f9b8f3e2 100644
--- a/native/jni/src/suggest/core/dictionary/digraph_utils.cpp
+++ b/native/jni/src/suggest/core/dictionary/digraph_utils.cpp
@@ -28,11 +28,8 @@ const DigraphUtils::digraph_t DigraphUtils::GERMAN_UMLAUT_DIGRAPHS[] =
{ { 'a', 'e', 0x00E4 }, // U+00E4 : LATIN SMALL LETTER A WITH DIAERESIS
{ 'o', 'e', 0x00F6 }, // U+00F6 : LATIN SMALL LETTER O WITH DIAERESIS
{ 'u', 'e', 0x00FC } }; // U+00FC : LATIN SMALL LETTER U WITH DIAERESIS
-const DigraphUtils::digraph_t DigraphUtils::FRENCH_LIGATURES_DIGRAPHS[] =
- { { 'a', 'e', 0x00E6 }, // U+00E6 : LATIN SMALL LETTER AE
- { 'o', 'e', 0x0153 } }; // U+0153 : LATIN SMALL LIGATURE OE
const DigraphUtils::DigraphType DigraphUtils::USED_DIGRAPH_TYPES[] =
- { DIGRAPH_TYPE_GERMAN_UMLAUT, DIGRAPH_TYPE_FRENCH_LIGATURES };
+ { DIGRAPH_TYPE_GERMAN_UMLAUT };
/* static */ bool DigraphUtils::hasDigraphForCodePoint(
const DictionaryHeaderStructurePolicy *const headerPolicy,
@@ -50,9 +47,6 @@ const DigraphUtils::DigraphType DigraphUtils::USED_DIGRAPH_TYPES[] =
if (headerPolicy->requiresGermanUmlautProcessing()) {
return DIGRAPH_TYPE_GERMAN_UMLAUT;
}
- if (headerPolicy->requiresFrenchLigatureProcessing()) {
- return DIGRAPH_TYPE_FRENCH_LIGATURES;
- }
return DIGRAPH_TYPE_NONE;
}
@@ -86,10 +80,6 @@ const DigraphUtils::DigraphType DigraphUtils::USED_DIGRAPH_TYPES[] =
*digraphs = GERMAN_UMLAUT_DIGRAPHS;
return NELEMS(GERMAN_UMLAUT_DIGRAPHS);
}
- if (digraphType == DIGRAPH_TYPE_FRENCH_LIGATURES) {
- *digraphs = FRENCH_LIGATURES_DIGRAPHS;
- return NELEMS(FRENCH_LIGATURES_DIGRAPHS);
- }
return 0;
}
diff --git a/native/jni/src/suggest/core/dictionary/digraph_utils.h b/native/jni/src/suggest/core/dictionary/digraph_utils.h
index 6ae16e390..bec2cd6e2 100644
--- a/native/jni/src/suggest/core/dictionary/digraph_utils.h
+++ b/native/jni/src/suggest/core/dictionary/digraph_utils.h
@@ -34,7 +34,6 @@ class DigraphUtils {
typedef enum {
DIGRAPH_TYPE_NONE,
DIGRAPH_TYPE_GERMAN_UMLAUT,
- DIGRAPH_TYPE_FRENCH_LIGATURES
} DigraphType;
typedef struct { int first; int second; int compositeGlyph; } digraph_t;
@@ -55,7 +54,6 @@ class DigraphUtils {
const DigraphType digraphType, const int compositeGlyphCodePoint);
static const digraph_t GERMAN_UMLAUT_DIGRAPHS[];
- static const digraph_t FRENCH_LIGATURES_DIGRAPHS[];
static const DigraphType USED_DIGRAPH_TYPES[];
};
} // namespace latinime
diff --git a/native/jni/src/suggest/core/dictionary/error_type_utils.cpp b/native/jni/src/suggest/core/dictionary/error_type_utils.cpp
new file mode 100644
index 000000000..0635fef7e
--- /dev/null
+++ b/native/jni/src/suggest/core/dictionary/error_type_utils.cpp
@@ -0,0 +1,34 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/core/dictionary/error_type_utils.h"
+
+namespace latinime {
+
+const ErrorTypeUtils::ErrorType ErrorTypeUtils::NOT_AN_ERROR = 0x0;
+const ErrorTypeUtils::ErrorType ErrorTypeUtils::MATCH_WITH_CASE_ERROR = 0x1;
+const ErrorTypeUtils::ErrorType ErrorTypeUtils::MATCH_WITH_ACCENT_ERROR = 0x2;
+const ErrorTypeUtils::ErrorType ErrorTypeUtils::MATCH_WITH_DIGRAPH = 0x4;
+const ErrorTypeUtils::ErrorType ErrorTypeUtils::INTENTIONAL_OMISSION = 0x8;
+const ErrorTypeUtils::ErrorType ErrorTypeUtils::EDIT_CORRECTION = 0x10;
+const ErrorTypeUtils::ErrorType ErrorTypeUtils::PROXIMITY_CORRECTION = 0x20;
+const ErrorTypeUtils::ErrorType ErrorTypeUtils::COMPLETION = 0x40;
+const ErrorTypeUtils::ErrorType ErrorTypeUtils::NEW_WORD = 0x80;
+
+const ErrorTypeUtils::ErrorType ErrorTypeUtils::ERRORS_TREATED_AS_AN_EXACT_MATCH =
+ NOT_AN_ERROR | MATCH_WITH_CASE_ERROR | MATCH_WITH_ACCENT_ERROR | MATCH_WITH_DIGRAPH;
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/core/dictionary/error_type_utils.h b/native/jni/src/suggest/core/dictionary/error_type_utils.h
new file mode 100644
index 000000000..ab4a65e48
--- /dev/null
+++ b/native/jni/src/suggest/core/dictionary/error_type_utils.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_ERROR_TYPE_UTILS_H
+#define LATINIME_ERROR_TYPE_UTILS_H
+
+#include <stdint.h>
+
+#include "defines.h"
+
+namespace latinime {
+
+class ErrorTypeUtils {
+ public:
+ // ErrorType is mainly decided by CorrectionType but it is also depending on if
+ // the correction has really been performed or not.
+ typedef uint32_t ErrorType;
+
+ static const ErrorType NOT_AN_ERROR;
+ static const ErrorType MATCH_WITH_CASE_ERROR;
+ static const ErrorType MATCH_WITH_ACCENT_ERROR;
+ static const ErrorType MATCH_WITH_DIGRAPH;
+ // Treat error as an intentional omission when the CorrectionType is omission and the node can
+ // be intentional omission.
+ static const ErrorType INTENTIONAL_OMISSION;
+ // Substitution, omission and transposition
+ static const ErrorType EDIT_CORRECTION;
+ // Proximity error
+ static const ErrorType PROXIMITY_CORRECTION;
+ // Completion
+ static const ErrorType COMPLETION;
+ // New word
+ // TODO: Remove.
+ // A new word error should be an edit correction error or a proximity correction error.
+ static const ErrorType NEW_WORD;
+
+ // TODO: Differentiate errors.
+ static bool isExactMatch(const ErrorType containingErrors) {
+ return (containingErrors & ~ERRORS_TREATED_AS_AN_EXACT_MATCH) == 0;
+ }
+
+ static bool isEditCorrectionError(const ErrorType errorType) {
+ return (errorType & EDIT_CORRECTION) != 0;
+ }
+
+ static bool isProximityCorrectionError(const ErrorType errorType) {
+ return (errorType & PROXIMITY_CORRECTION) != 0;
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(ErrorTypeUtils);
+
+ static const ErrorType ERRORS_TREATED_AS_AN_EXACT_MATCH;
+};
+} // namespace latinime
+#endif // LATINIME_ERROR_TYPE_UTILS_H
diff --git a/native/jni/src/suggest/core/dictionary/multi_bigram_map.cpp b/native/jni/src/suggest/core/dictionary/multi_bigram_map.cpp
index b1d2f4b4d..49d82e69a 100644
--- a/native/jni/src/suggest/core/dictionary/multi_bigram_map.cpp
+++ b/native/jni/src/suggest/core/dictionary/multi_bigram_map.cpp
@@ -30,4 +30,75 @@ const size_t MultiBigramMap::MAX_CACHED_PREV_WORDS_IN_BIGRAM_MAP = 25;
// Most common previous word contexts currently have 100 bigrams
const int MultiBigramMap::BigramMap::DEFAULT_HASH_MAP_SIZE_FOR_EACH_BIGRAM_MAP = 100;
+// Look up the bigram probability for the given word pair from the cached bigram maps.
+// Also caches the bigrams if there is space remaining and they have not been cached already.
+int MultiBigramMap::getBigramProbability(
+ const DictionaryStructureWithBufferPolicy *const structurePolicy,
+ const int wordPosition, const int nextWordPosition, const int unigramProbability) {
+ hash_map_compat<int, BigramMap>::const_iterator mapPosition =
+ mBigramMaps.find(wordPosition);
+ if (mapPosition != mBigramMaps.end()) {
+ return mapPosition->second.getBigramProbability(structurePolicy, nextWordPosition,
+ unigramProbability);
+ }
+ if (mBigramMaps.size() < MAX_CACHED_PREV_WORDS_IN_BIGRAM_MAP) {
+ addBigramsForWordPosition(structurePolicy, wordPosition);
+ return mBigramMaps[wordPosition].getBigramProbability(structurePolicy,
+ nextWordPosition, unigramProbability);
+ }
+ return readBigramProbabilityFromBinaryDictionary(structurePolicy, wordPosition,
+ nextWordPosition, unigramProbability);
+}
+
+void MultiBigramMap::BigramMap::init(
+ const DictionaryStructureWithBufferPolicy *const structurePolicy, const int nodePos) {
+ const int bigramsListPos = structurePolicy->getBigramsPositionOfPtNode(nodePos);
+ BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(),
+ bigramsListPos);
+ while (bigramsIt.hasNext()) {
+ bigramsIt.next();
+ if (bigramsIt.getBigramPos() == NOT_A_DICT_POS) {
+ continue;
+ }
+ mBigramMap[bigramsIt.getBigramPos()] = bigramsIt.getProbability();
+ mBloomFilter.setInFilter(bigramsIt.getBigramPos());
+ }
+}
+
+int MultiBigramMap::BigramMap::getBigramProbability(
+ const DictionaryStructureWithBufferPolicy *const structurePolicy,
+ const int nextWordPosition, const int unigramProbability) const {
+ int bigramProbability = NOT_A_PROBABILITY;
+ if (mBloomFilter.isInFilter(nextWordPosition)) {
+ const hash_map_compat<int, int>::const_iterator bigramProbabilityIt =
+ mBigramMap.find(nextWordPosition);
+ if (bigramProbabilityIt != mBigramMap.end()) {
+ bigramProbability = bigramProbabilityIt->second;
+ }
+ }
+ return structurePolicy->getProbability(unigramProbability, bigramProbability);
+}
+
+void MultiBigramMap::addBigramsForWordPosition(
+ const DictionaryStructureWithBufferPolicy *const structurePolicy, const int position) {
+ mBigramMaps[position].init(structurePolicy, position);
+}
+
+int MultiBigramMap::readBigramProbabilityFromBinaryDictionary(
+ const DictionaryStructureWithBufferPolicy *const structurePolicy, const int nodePos,
+ const int nextWordPosition, const int unigramProbability) {
+ int bigramProbability = NOT_A_PROBABILITY;
+ const int bigramsListPos = structurePolicy->getBigramsPositionOfPtNode(nodePos);
+ BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(),
+ bigramsListPos);
+ while (bigramsIt.hasNext()) {
+ bigramsIt.next();
+ if (bigramsIt.getBigramPos() == nextWordPosition) {
+ bigramProbability = bigramsIt.getProbability();
+ break;
+ }
+ }
+ return structurePolicy->getProbability(unigramProbability, bigramProbability);
+}
+
} // namespace latinime
diff --git a/native/jni/src/suggest/core/dictionary/multi_bigram_map.h b/native/jni/src/suggest/core/dictionary/multi_bigram_map.h
index 4633c07b0..421b2681c 100644
--- a/native/jni/src/suggest/core/dictionary/multi_bigram_map.h
+++ b/native/jni/src/suggest/core/dictionary/multi_bigram_map.h
@@ -38,21 +38,7 @@ class MultiBigramMap {
// Look up the bigram probability for the given word pair from the cached bigram maps.
// Also caches the bigrams if there is space remaining and they have not been cached already.
int getBigramProbability(const DictionaryStructureWithBufferPolicy *const structurePolicy,
- const int wordPosition, const int nextWordPosition, const int unigramProbability) {
- hash_map_compat<int, BigramMap>::const_iterator mapPosition =
- mBigramMaps.find(wordPosition);
- if (mapPosition != mBigramMaps.end()) {
- return mapPosition->second.getBigramProbability(structurePolicy, nextWordPosition,
- unigramProbability);
- }
- if (mBigramMaps.size() < MAX_CACHED_PREV_WORDS_IN_BIGRAM_MAP) {
- addBigramsForWordPosition(structurePolicy, wordPosition);
- return mBigramMaps[wordPosition].getBigramProbability(structurePolicy,
- nextWordPosition, unigramProbability);
- }
- return readBigramProbabilityFromBinaryDictionary(structurePolicy, wordPosition,
- nextWordPosition, unigramProbability);
- }
+ const int wordPosition, const int nextWordPosition, const int unigramProbability);
void clear() {
mBigramMaps.clear();
@@ -67,33 +53,11 @@ class MultiBigramMap {
~BigramMap() {}
void init(const DictionaryStructureWithBufferPolicy *const structurePolicy,
- const int nodePos) {
- const int bigramsListPos = structurePolicy->getBigramsPositionOfPtNode(nodePos);
- BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(),
- bigramsListPos);
- while (bigramsIt.hasNext()) {
- bigramsIt.next();
- if (bigramsIt.getBigramPos() == NOT_A_DICT_POS) {
- continue;
- }
- mBigramMap[bigramsIt.getBigramPos()] = bigramsIt.getProbability();
- mBloomFilter.setInFilter(bigramsIt.getBigramPos());
- }
- }
+ const int nodePos);
- AK_FORCE_INLINE int getBigramProbability(
+ int getBigramProbability(
const DictionaryStructureWithBufferPolicy *const structurePolicy,
- const int nextWordPosition, const int unigramProbability) const {
- int bigramProbability = NOT_A_PROBABILITY;
- if (mBloomFilter.isInFilter(nextWordPosition)) {
- const hash_map_compat<int, int>::const_iterator bigramProbabilityIt =
- mBigramMap.find(nextWordPosition);
- if (bigramProbabilityIt != mBigramMap.end()) {
- bigramProbability = bigramProbabilityIt->second;
- }
- }
- return structurePolicy->getProbability(unigramProbability, bigramProbability);
- }
+ const int nextWordPosition, const int unigramProbability) const;
private:
// NOTE: The BigramMap class doesn't use DISALLOW_COPY_AND_ASSIGN() because its default
@@ -103,27 +67,12 @@ class MultiBigramMap {
BloomFilter mBloomFilter;
};
- AK_FORCE_INLINE void addBigramsForWordPosition(
- const DictionaryStructureWithBufferPolicy *const structurePolicy, const int position) {
- mBigramMaps[position].init(structurePolicy, position);
- }
+ void addBigramsForWordPosition(
+ const DictionaryStructureWithBufferPolicy *const structurePolicy, const int position);
- AK_FORCE_INLINE int readBigramProbabilityFromBinaryDictionary(
+ int readBigramProbabilityFromBinaryDictionary(
const DictionaryStructureWithBufferPolicy *const structurePolicy, const int nodePos,
- const int nextWordPosition, const int unigramProbability) {
- int bigramProbability = NOT_A_PROBABILITY;
- const int bigramsListPos = structurePolicy->getBigramsPositionOfPtNode(nodePos);
- BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(),
- bigramsListPos);
- while (bigramsIt.hasNext()) {
- bigramsIt.next();
- if (bigramsIt.getBigramPos() == nextWordPosition) {
- bigramProbability = bigramsIt.getProbability();
- break;
- }
- }
- return structurePolicy->getProbability(unigramProbability, bigramProbability);
- }
+ const int nextWordPosition, const int unigramProbability);
static const size_t MAX_CACHED_PREV_WORDS_IN_BIGRAM_MAP;
hash_map_compat<int, BigramMap> mBigramMaps;
diff --git a/native/jni/src/suggest/core/dictionary/unigram_property.cpp b/native/jni/src/suggest/core/dictionary/unigram_property.cpp
new file mode 100644
index 000000000..16bbb69d8
--- /dev/null
+++ b/native/jni/src/suggest/core/dictionary/unigram_property.cpp
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/core/dictionary/unigram_property.h"
+
+namespace latinime {
+
+void UnigramProperty::outputProperties(JNIEnv *const env, jintArray outCodePoints,
+ jbooleanArray outFlags, jintArray outProbability, jintArray outHistoricalInfo,
+ jobject outShortcutTargets, jobject outShortcutProbabilities) const {
+ env->SetIntArrayRegion(outCodePoints, 0 /* start */, mCodePointCount, mCodePoints);
+ jboolean flags[] = {mIsNotAWord, mIsBlacklisted, mHasBigrams, mHasShortcuts};
+ env->SetBooleanArrayRegion(outFlags, 0 /* start */, NELEMS(flags), flags);
+ env->SetIntArrayRegion(outProbability, 0 /* start */, 1 /* len */, &mProbability);
+ int historicalInfo[] = {mTimestamp, mLevel, mCount};
+ env->SetIntArrayRegion(outHistoricalInfo, 0 /* start */, NELEMS(historicalInfo),
+ historicalInfo);
+
+ jclass integerClass = env->FindClass("java/lang/Integer");
+ jmethodID intToIntegerConstructorId = env->GetMethodID(integerClass, "<init>", "(I)V");
+ jclass arrayListClass = env->FindClass("java/util/ArrayList");
+ jmethodID addMethodId = env->GetMethodID(arrayListClass, "add", "(Ljava/lang/Object;)Z");
+ const int shortcutTargetCount = mShortcutTargets.size();
+ for (int i = 0; i < shortcutTargetCount; ++i) {
+ jintArray shortcutTargetCodePointArray = env->NewIntArray(mShortcutTargets[i].size());
+ env->SetIntArrayRegion(shortcutTargetCodePointArray, 0 /* start */,
+ mShortcutTargets[i].size(), &mShortcutTargets[i][0]);
+ env->CallVoidMethod(outShortcutTargets, addMethodId, shortcutTargetCodePointArray);
+ env->DeleteLocalRef(shortcutTargetCodePointArray);
+ jobject integerProbability = env->NewObject(integerClass, intToIntegerConstructorId,
+ mShortcutProbabilities[i]);
+ env->CallVoidMethod(outShortcutProbabilities, addMethodId, integerProbability);
+ env->DeleteLocalRef(integerProbability);
+ }
+ env->DeleteLocalRef(integerClass);
+ env->DeleteLocalRef(arrayListClass);
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/core/dictionary/unigram_property.h b/native/jni/src/suggest/core/dictionary/unigram_property.h
new file mode 100644
index 000000000..c4ebb86ab
--- /dev/null
+++ b/native/jni/src/suggest/core/dictionary/unigram_property.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_UNIGRAM_PROPERTY_H
+#define LATINIME_UNIGRAM_PROPERTY_H
+
+#include <cstring>
+#include <vector>
+
+#include "defines.h"
+#include "jni.h"
+
+namespace latinime {
+
+// This class is used for returning information belonging to a unigram to java side.
+class UnigramProperty {
+ public:
+ // Invalid unigram.
+ UnigramProperty()
+ : mCodePoints(), mCodePointCount(0), mIsNotAWord(false), mIsBlacklisted(false),
+ mHasBigrams(false), mHasShortcuts(false), mProbability(NOT_A_PROBABILITY),
+ mTimestamp(0), mLevel(0), mCount(0), mShortcutTargets(), mShortcutProbabilities() {}
+
+ UnigramProperty(const UnigramProperty &unigramProperty)
+ : mCodePoints(), mCodePointCount(unigramProperty.mCodePointCount),
+ mIsNotAWord(unigramProperty.mIsNotAWord),
+ mIsBlacklisted(unigramProperty.mIsBlacklisted),
+ mHasBigrams(unigramProperty.mHasBigrams),
+ mHasShortcuts(unigramProperty.mHasShortcuts),
+ mProbability(unigramProperty.mProbability),
+ mTimestamp(unigramProperty.mTimestamp), mLevel(unigramProperty.mLevel),
+ mCount(unigramProperty.mCount), mShortcutTargets(unigramProperty.mShortcutTargets),
+ mShortcutProbabilities(unigramProperty.mShortcutProbabilities) {
+ memcpy(mCodePoints, unigramProperty.mCodePoints, sizeof(mCodePoints));
+ }
+
+ UnigramProperty(const int *const codePoints, const int codePointCount,
+ const bool isNotAWord, const bool isBlacklisted, const bool hasBigrams,
+ const bool hasShortcuts, const int probability, const int timestamp,
+ const int level, const int count,
+ const std::vector<std::vector<int> > *const shortcutTargets,
+ const std::vector<int> *const shortcutProbabilities)
+ : mCodePoints(), mCodePointCount(codePointCount),
+ mIsNotAWord(isNotAWord), mIsBlacklisted(isBlacklisted), mHasBigrams(hasBigrams),
+ mHasShortcuts(hasShortcuts), mProbability(probability), mTimestamp(timestamp),
+ mLevel(level), mCount(count), mShortcutTargets(*shortcutTargets),
+ mShortcutProbabilities(*shortcutProbabilities) {
+ memcpy(mCodePoints, codePoints, sizeof(mCodePoints));
+ }
+
+ void outputProperties(JNIEnv *const env, jintArray outCodePoints, jbooleanArray outFlags,
+ jintArray outProbability, jintArray outHistoricalInfo, jobject outShortcutTargets,
+ jobject outShortcutProbabilities) const;
+
+ private:
+ DISALLOW_ASSIGNMENT_OPERATOR(UnigramProperty);
+
+ int mCodePoints[MAX_WORD_LENGTH];
+ int mCodePointCount;
+ bool mIsNotAWord;
+ bool mIsBlacklisted;
+ bool mHasBigrams;
+ bool mHasShortcuts;
+ int mProbability;
+ // Historical information
+ int mTimestamp;
+ int mLevel;
+ int mCount;
+ // Shortcut
+ std::vector<std::vector<int> > mShortcutTargets;
+ std::vector<int> mShortcutProbabilities;
+};
+} // namespace latinime
+#endif // LATINIME_UNIGRAM_PROPERTY_H
diff --git a/native/jni/src/suggest/core/layout/proximity_info.cpp b/native/jni/src/suggest/core/layout/proximity_info.cpp
index e64476d82..ee8e59ef9 100644
--- a/native/jni/src/suggest/core/layout/proximity_info.cpp
+++ b/native/jni/src/suggest/core/layout/proximity_info.cpp
@@ -71,7 +71,7 @@ ProximityInfo::ProximityInfo(JNIEnv *env, const jstring localeJStr,
&& sweetSpotCenterYs && sweetSpotRadii),
mProximityCharsArray(new int[GRID_WIDTH * GRID_HEIGHT * MAX_PROXIMITY_CHARS_SIZE
/* proximityCharsLength */]),
- mCodeToKeyMap() {
+ mLowerCodePointToKeyMap() {
/* Let's check the input array length here to make sure */
const jsize proximityCharsLength = env->GetArrayLength(proximityChars);
if (proximityCharsLength != GRID_WIDTH * GRID_HEIGHT * MAX_PROXIMITY_CHARS_SIZE) {
@@ -147,7 +147,14 @@ int ProximityInfo::getCodePointOf(const int keyIndex) const {
if (keyIndex < 0 || keyIndex >= KEY_COUNT) {
return NOT_A_CODE_POINT;
}
- return mKeyIndexToCodePointG[keyIndex];
+ return mKeyIndexToLowerCodePointG[keyIndex];
+}
+
+int ProximityInfo::getOriginalCodePointOf(const int keyIndex) const {
+ if (keyIndex < 0 || keyIndex >= KEY_COUNT) {
+ return NOT_A_CODE_POINT;
+ }
+ return mKeyIndexToOriginalCodePoint[keyIndex];
}
void ProximityInfo::initializeG() {
@@ -164,8 +171,9 @@ void ProximityInfo::initializeG() {
const float gapY = sweetSpotCenterY - mCenterYsG[i];
mSweetSpotCenterYsG[i] = static_cast<int>(mCenterYsG[i] + gapY * verticalScale);
}
- mCodeToKeyMap[lowerCode] = i;
- mKeyIndexToCodePointG[i] = lowerCode;
+ mLowerCodePointToKeyMap[lowerCode] = i;
+ mKeyIndexToOriginalCodePoint[i] = code;
+ mKeyIndexToLowerCodePointG[i] = lowerCode;
}
for (int i = 0; i < KEY_COUNT; i++) {
mKeyKeyDistancesG[i][i] = 0;
diff --git a/native/jni/src/suggest/core/layout/proximity_info.h b/native/jni/src/suggest/core/layout/proximity_info.h
index f25949001..a91b9d674 100644
--- a/native/jni/src/suggest/core/layout/proximity_info.h
+++ b/native/jni/src/suggest/core/layout/proximity_info.h
@@ -39,6 +39,7 @@ class ProximityInfo {
float getNormalizedSquaredDistanceFromCenterFloatG(
const int keyId, const int x, const int y, const bool isGeometric) const;
int getCodePointOf(const int keyIndex) const;
+ int getOriginalCodePointOf(const int keyIndex) const;
bool hasSweetSpotData(const int keyIndex) const {
// When there are no calibration data for a key,
// the radius of the key is assigned to zero.
@@ -76,11 +77,11 @@ class ProximityInfo {
ProximityInfoUtils::initializeProximities(inputCodes, inputXCoordinates, inputYCoordinates,
inputSize, mKeyXCoordinates, mKeyYCoordinates, mKeyWidths, mKeyHeights,
mProximityCharsArray, CELL_HEIGHT, CELL_WIDTH, GRID_WIDTH, MOST_COMMON_KEY_WIDTH,
- KEY_COUNT, mLocaleStr, &mCodeToKeyMap, allInputCodes);
+ KEY_COUNT, mLocaleStr, &mLowerCodePointToKeyMap, allInputCodes);
}
AK_FORCE_INLINE int getKeyIndexOf(const int c) const {
- return ProximityInfoUtils::getKeyIndexOf(KEY_COUNT, c, &mCodeToKeyMap);
+ return ProximityInfoUtils::getKeyIndexOf(KEY_COUNT, c, &mLowerCodePointToKeyMap);
}
AK_FORCE_INLINE bool isCodePointOnKeyboard(const int codePoint) const {
@@ -117,9 +118,9 @@ class ProximityInfo {
// Sweet spots for geometric input. Note that we have extra sweet spots only for Y coordinates.
float mSweetSpotCenterYsG[MAX_KEY_COUNT_IN_A_KEYBOARD];
float mSweetSpotRadii[MAX_KEY_COUNT_IN_A_KEYBOARD];
- hash_map_compat<int, int> mCodeToKeyMap;
-
- int mKeyIndexToCodePointG[MAX_KEY_COUNT_IN_A_KEYBOARD];
+ hash_map_compat<int, int> mLowerCodePointToKeyMap;
+ int mKeyIndexToOriginalCodePoint[MAX_KEY_COUNT_IN_A_KEYBOARD];
+ int mKeyIndexToLowerCodePointG[MAX_KEY_COUNT_IN_A_KEYBOARD];
int mCenterXsG[MAX_KEY_COUNT_IN_A_KEYBOARD];
int mCenterYsG[MAX_KEY_COUNT_IN_A_KEYBOARD];
int mKeyKeyDistancesG[MAX_KEY_COUNT_IN_A_KEYBOARD][MAX_KEY_COUNT_IN_A_KEYBOARD];
diff --git a/native/jni/src/suggest/core/layout/proximity_info_state.cpp b/native/jni/src/suggest/core/layout/proximity_info_state.cpp
index fbabd92f2..bb4b41714 100644
--- a/native/jni/src/suggest/core/layout/proximity_info_state.cpp
+++ b/native/jni/src/suggest/core/layout/proximity_info_state.cpp
@@ -30,6 +30,12 @@
namespace latinime {
+int ProximityInfoState::getPrimaryOriginalCodePointAt(const int index) const {
+ const int primaryCodePoint = getPrimaryCodePointAt(index);
+ const int keyIndex = mProximityInfo->getKeyIndexOf(primaryCodePoint);
+ return mProximityInfo->getOriginalCodePointOf(keyIndex);
+}
+
// TODO: Remove the dependency of "isGeometric"
void ProximityInfoState::initInputParams(const int pointerId, const float maxPointToKeyLength,
const ProximityInfo *proximityInfo, const int *const inputCodes, const int inputSize,
diff --git a/native/jni/src/suggest/core/layout/proximity_info_state.h b/native/jni/src/suggest/core/layout/proximity_info_state.h
index c94060fa9..e941e43d8 100644
--- a/native/jni/src/suggest/core/layout/proximity_info_state.h
+++ b/native/jni/src/suggest/core/layout/proximity_info_state.h
@@ -65,6 +65,8 @@ class ProximityInfoState {
return getProximityCodePointsAt(index)[0];
}
+ int getPrimaryOriginalCodePointAt(const int index) const;
+
inline bool sameAsTyped(const int *word, int length) const {
if (length != mSampledInputSize) {
return false;
diff --git a/native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h b/native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h
index 5492c6070..b76b13971 100644
--- a/native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h
+++ b/native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h
@@ -29,12 +29,10 @@ class DictionaryHeaderStructurePolicy {
public:
virtual ~DictionaryHeaderStructurePolicy() {}
- virtual bool supportsDynamicUpdate() const = 0;
+ virtual int getFormatVersionNumber() const = 0;
virtual bool requiresGermanUmlautProcessing() const = 0;
- virtual bool requiresFrenchLigatureProcessing() const = 0;
-
virtual float getMultiWordCostMultiplier() const = 0;
virtual int getLastDecayedTime() const = 0;
diff --git a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
index 41f82049f..c74a4ebbe 100644
--- a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
+++ b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
@@ -18,6 +18,8 @@
#define LATINIME_DICTIONARY_STRUCTURE_POLICY_H
#include "defines.h"
+#include "suggest/core/dictionary/unigram_property.h"
+#include "utils/exclusive_ownership_pointer.h"
namespace latinime {
@@ -28,23 +30,25 @@ class DictionaryHeaderStructurePolicy;
class DictionaryShortcutsStructurePolicy;
/*
- * This class abstracts structure of dictionaries.
+ * This class abstracts the structure of dictionaries.
* Implement this policy to support additional dictionaries.
*/
class DictionaryStructureWithBufferPolicy {
public:
+ typedef ExclusiveOwnershipPointer<DictionaryStructureWithBufferPolicy> StructurePolicyPtr;
+
virtual ~DictionaryStructureWithBufferPolicy() {}
virtual int getRootPosition() const = 0;
- virtual void createAndGetAllChildNodes(const DicNode *const dicNode,
+ virtual void createAndGetAllChildDicNodes(const DicNode *const dicNode,
DicNodeVector *const childDicNodes) const = 0;
virtual int getCodePointsAndProbabilityAndReturnCodePointCount(
const int nodePos, const int maxCodePointCount, int *const outCodePoints,
int *const outUnigramProbability) const = 0;
- virtual int getTerminalNodePositionOfWord(const int *const inWord,
+ virtual int getTerminalPtNodePositionOfWord(const int *const inWord,
const int length, const bool forceLowerCaseSearch) const = 0;
virtual int getProbability(const int unigramProbability,
@@ -64,11 +68,13 @@ class DictionaryStructureWithBufferPolicy {
// Returns whether the update was success or not.
virtual bool addUnigramWord(const int *const word, const int length,
- const int probability) = 0;
+ const int probability, const int *const shortcutTargetCodePoints,
+ const int shortcutLength, const int shortcutProbability, const bool isNotAWord,
+ const bool isBlacklisted,const int timestamp) = 0;
// Returns whether the update was success or not.
virtual bool addBigramWords(const int *const word0, const int length0, const int *const word1,
- const int length1, const int probability) = 0;
+ const int length1, const int probability, const int timestamp) = 0;
// Returns whether the update was success or not.
virtual bool removeBigramWords(const int *const word0, const int length0,
@@ -82,9 +88,13 @@ class DictionaryStructureWithBufferPolicy {
// Currently, this method is used only for testing. You may want to consider creating new
// dedicated method instead of this if you want to use this in the production.
- virtual void getProperty(const char *const query, char *const outResult,
+ virtual void getProperty(const char *const query, const int queryLength, char *const outResult,
const int maxResultLength) = 0;
+ // Used for testing.
+ virtual const UnigramProperty getUnigramProperty(const int *const codePonts,
+ const int codePointCount) const = 0;
+
protected:
DictionaryStructureWithBufferPolicy() {}
diff --git a/native/jni/src/suggest/core/policy/weighting.cpp b/native/jni/src/suggest/core/policy/weighting.cpp
index 0c4016893..c202b81fe 100644
--- a/native/jni/src/suggest/core/policy/weighting.cpp
+++ b/native/jni/src/suggest/core/policy/weighting.cpp
@@ -20,6 +20,7 @@
#include "suggest/core/dicnode/dic_node.h"
#include "suggest/core/dicnode/dic_node_profiler.h"
#include "suggest/core/dicnode/dic_node_utils.h"
+#include "suggest/core/dictionary/error_type_utils.h"
#include "suggest/core/session/dic_traverse_session.h"
namespace latinime {
@@ -82,8 +83,8 @@ static inline void profile(const CorrectionType correctionType, DicNode *const n
traverseSession, parentDicNode, dicNode, &inputStateG);
const float languageCost = Weighting::getLanguageCost(weighting, correctionType,
traverseSession, parentDicNode, dicNode, multiBigramMap);
- const ErrorType errorType = weighting->getErrorType(correctionType, traverseSession,
- parentDicNode, dicNode);
+ const ErrorTypeUtils::ErrorType errorType = weighting->getErrorType(correctionType,
+ traverseSession, parentDicNode, dicNode);
profile(correctionType, dicNode);
if (inputStateG.mNeedsToUpdateInputStateG) {
dicNode->updateInputIndexG(&inputStateG);
diff --git a/native/jni/src/suggest/core/policy/weighting.h b/native/jni/src/suggest/core/policy/weighting.h
index 2d49e98a6..bd6b3cf41 100644
--- a/native/jni/src/suggest/core/policy/weighting.h
+++ b/native/jni/src/suggest/core/policy/weighting.h
@@ -18,6 +18,7 @@
#define LATINIME_WEIGHTING_H
#include "defines.h"
+#include "suggest/core/dictionary/error_type_utils.h"
namespace latinime {
@@ -84,7 +85,7 @@ class Weighting {
virtual float getSpaceSubstitutionCost(const DicTraverseSession *const traverseSession,
const DicNode *const dicNode) const = 0;
- virtual ErrorType getErrorType(const CorrectionType correctionType,
+ virtual ErrorTypeUtils::ErrorType getErrorType(const CorrectionType correctionType,
const DicTraverseSession *const traverseSession,
const DicNode *const parentDicNode, const DicNode *const dicNode) const = 0;
diff --git a/native/jni/src/suggest/core/session/dic_traverse_session.cpp b/native/jni/src/suggest/core/session/dic_traverse_session.cpp
index 50f2bbd8d..5070491f4 100644
--- a/native/jni/src/suggest/core/session/dic_traverse_session.cpp
+++ b/native/jni/src/suggest/core/session/dic_traverse_session.cpp
@@ -35,16 +35,16 @@ void DicTraverseSession::init(const Dictionary *const dictionary, const int *pre
->getMultiWordCostMultiplier();
mSuggestOptions = suggestOptions;
if (!prevWord) {
- mPrevWordPos = NOT_A_DICT_POS;
+ mPrevWordPtNodePos = NOT_A_DICT_POS;
return;
}
// TODO: merge following similar calls to getTerminalPosition into one case-insensitive call.
- mPrevWordPos = getDictionaryStructurePolicy()->getTerminalNodePositionOfWord(
+ mPrevWordPtNodePos = getDictionaryStructurePolicy()->getTerminalPtNodePositionOfWord(
prevWord, prevWordLength, false /* forceLowerCaseSearch */);
- if (mPrevWordPos == NOT_A_DICT_POS) {
+ if (mPrevWordPtNodePos == NOT_A_DICT_POS) {
// Check bigrams for lower-cased previous word if original was not found. Useful for
// auto-capitalized words like "The [current_word]".
- mPrevWordPos = getDictionaryStructurePolicy()->getTerminalNodePositionOfWord(
+ mPrevWordPtNodePos = getDictionaryStructurePolicy()->getTerminalPtNodePositionOfWord(
prevWord, prevWordLength, true /* forceLowerCaseSearch */);
}
}
diff --git a/native/jni/src/suggest/core/session/dic_traverse_session.h b/native/jni/src/suggest/core/session/dic_traverse_session.h
index e0b1c67d9..6e4dda44d 100644
--- a/native/jni/src/suggest/core/session/dic_traverse_session.h
+++ b/native/jni/src/suggest/core/session/dic_traverse_session.h
@@ -59,7 +59,7 @@ class DicTraverseSession {
}
AK_FORCE_INLINE DicTraverseSession(JNIEnv *env, jstring localeStr, bool usesLargeCache)
- : mPrevWordPos(NOT_A_DICT_POS), mProximityInfo(0),
+ : mPrevWordPtNodePos(NOT_A_DICT_POS), mProximityInfo(0),
mDictionary(0), mSuggestOptions(0), mDicNodesCache(usesLargeCache),
mMultiBigramMap(), mInputSize(0), mPartiallyCommited(false), mMaxPointerCount(1),
mMultiWordCostMultiplier(1.0f) {
@@ -86,11 +86,9 @@ class DicTraverseSession {
//--------------------
const ProximityInfo *getProximityInfo() const { return mProximityInfo; }
const SuggestOptions *getSuggestOptions() const { return mSuggestOptions; }
- int getPrevWordPos() const { return mPrevWordPos; }
+ int getPrevWordPtNodePos() const { return mPrevWordPtNodePos; }
// TODO: REMOVE
- void setPrevWordPos(int pos) { mPrevWordPos = pos; }
- // TODO: Use proper parameter when changed
- int getDicRootPos() const { return 0; }
+ void setPrevWordPtNodePos(const int ptNodePos) { mPrevWordPtNodePos = ptNodePos; }
DicNodesCache *getDicTraverseCache() { return &mDicNodesCache; }
MultiBigramMap *getMultiBigramMap() { return &mMultiBigramMap; }
const ProximityInfoState *getProximityInfoState(int id) const {
@@ -119,26 +117,13 @@ class DicTraverseSession {
return true;
}
- void getSearchKeys(const DicNode *node, std::vector<int> *const outputSearchKeyVector) const {
- for (int i = 0; i < MAX_POINTER_COUNT_G; ++i) {
- if (!mProximityInfoStates[i].isUsed()) {
- continue;
- }
- const int pointerId = node->getInputIndex(i);
- const std::vector<int> *const searchKeyVector =
- mProximityInfoStates[i].getSearchKeyVector(pointerId);
- outputSearchKeyVector->insert(outputSearchKeyVector->end(), searchKeyVector->begin(),
- searchKeyVector->end());
- }
- }
-
- ProximityType getProximityTypeG(const DicNode *const node, const int childCodePoint) const {
+ ProximityType getProximityTypeG(const DicNode *const dicNode, const int childCodePoint) const {
ProximityType proximityType = UNRELATED_CHAR;
for (int i = 0; i < MAX_POINTER_COUNT_G; ++i) {
if (!mProximityInfoStates[i].isUsed()) {
continue;
}
- const int pointerId = node->getInputIndex(i);
+ const int pointerId = dicNode->getInputIndex(i);
proximityType = mProximityInfoStates[i].getProximityTypeG(pointerId, childCodePoint);
ASSERT(proximityType == UNRELATED_CHAR || proximityType == MATCH_CHAR);
// TODO: Make this more generic
@@ -192,7 +177,7 @@ class DicTraverseSession {
const int *const inputYs, const int *const times, const int *const pointerIds,
const int inputSize, const float maxSpatialDistance, const int maxPointerCount);
- int mPrevWordPos;
+ int mPrevWordPtNodePos;
const ProximityInfo *mProximityInfo;
const Dictionary *mDictionary;
const SuggestOptions *mSuggestOptions;
diff --git a/native/jni/src/suggest/core/suggest.cpp b/native/jni/src/suggest/core/suggest.cpp
index 73ccebc88..f84c84181 100644
--- a/native/jni/src/suggest/core/suggest.cpp
+++ b/native/jni/src/suggest/core/suggest.cpp
@@ -98,7 +98,7 @@ void Suggest::initializeSearch(DicTraverseSession *traverseSession, int commitPo
// Continue suggestion after partial commit.
DicNode *topDicNode =
traverseSession->getDicTraverseCache()->setCommitPoint(commitPoint);
- traverseSession->setPrevWordPos(topDicNode->getPrevWordNodePos());
+ traverseSession->setPrevWordPtNodePos(topDicNode->getPrevWordPtNodePos());
traverseSession->getDicTraverseCache()->continueSearch();
traverseSession->setPartiallyCommited();
}
@@ -109,7 +109,7 @@ void Suggest::initializeSearch(DicTraverseSession *traverseSession, int commitPo
// Create a new dic node here
DicNode rootNode;
DicNodeUtils::initAsRoot(traverseSession->getDictionaryStructurePolicy(),
- traverseSession->getPrevWordPos(), &rootNode);
+ traverseSession->getPrevWordPtNodePos(), &rootNode);
traverseSession->getDicTraverseCache()->copyPushActive(&rootNode);
}
}
@@ -231,12 +231,15 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen
BinaryDictionaryShortcutIterator shortcutIt(
traverseSession->getDictionaryStructurePolicy()->getShortcutsStructurePolicy(),
traverseSession->getDictionaryStructurePolicy()
- ->getShortcutPositionOfPtNode(terminalDicNode->getPos()));
+ ->getShortcutPositionOfPtNode(terminalDicNode->getPtNodePos()));
// Shortcut is not supported for multiple words suggestions.
// TODO: Check shortcuts during traversal for multiple words suggestions.
const bool sameAsTyped = TRAVERSAL->sameAsTyped(traverseSession, terminalDicNode);
+ const int shortcutBaseScore = SCORING->doesAutoCorrectValidWord() ?
+ SCORING->calculateFinalScore(compoundDistance, traverseSession->getInputSize(),
+ true /* forceCommit */) : finalScore;
const int updatedOutputWordIndex = ShortcutUtils::outputShortcuts(&shortcutIt,
- outputWordIndex, finalScore, outputCodePoints, frequencies, outputTypes,
+ outputWordIndex, shortcutBaseScore, outputCodePoints, frequencies, outputTypes,
sameAsTyped);
const int secondWordFirstInputIndex = terminalDicNode->getSecondWordFirstInputIndex(
traverseSession->getProximityInfoState(0));
@@ -421,15 +424,15 @@ void Suggest::expandCurrentDicNodes(DicTraverseSession *traverseSession) const {
}
break;
case UNRELATED_CHAR:
- // Just drop this node and do nothing.
+ // Just drop this dicNode and do nothing.
break;
default:
- // Just drop this node and do nothing.
+ // Just drop this dicNode and do nothing.
break;
}
}
- // Push the node for look-ahead correction
+ // Push the dicNode for look-ahead correction
if (allowsErrorCorrections && canDoLookAheadCorrection) {
traverseSession->getDicTraverseCache()->copyPushNextActive(&dicNode);
}
@@ -442,7 +445,7 @@ void Suggest::processTerminalDicNode(
if (dicNode->getCompoundDistance() >= static_cast<float>(MAX_VALUE_FOR_WEIGHTING)) {
return;
}
- if (!dicNode->isTerminalWordNode()) {
+ if (!dicNode->isTerminalDicNode()) {
return;
}
if (dicNode->shouldBeFilteredBySafetyNetForBigram()) {
@@ -463,7 +466,7 @@ void Suggest::processTerminalDicNode(
/**
* Adds the expanded dicNode to the next search priority queue. Also creates an additional next word
- * (by the space omission error correction) search path if input dicNode is on a terminal node.
+ * (by the space omission error correction) search path if input dicNode is on a terminal.
*/
void Suggest::processExpandedDicNode(
DicTraverseSession *traverseSession, DicNode *dicNode) const {
@@ -505,7 +508,7 @@ void Suggest::processDicNodeAsSubstitution(DicTraverseSession *traverseSession,
processExpandedDicNode(traverseSession, childDicNode);
}
-// Process the node codepoint as a digraph. This means that composite glyphs like the German
+// Process the DicNode codepoint as a digraph. This means that composite glyphs like the German
// u-umlaut is expanded to the transliteration "ue". Note that this happens in parallel with
// the normal non-digraph traversal, so both "uber" and "ueber" can be corrected to "[u-umlaut]ber".
void Suggest::processDicNodeAsDigraph(DicTraverseSession *traverseSession,
@@ -518,7 +521,7 @@ void Suggest::processDicNodeAsDigraph(DicTraverseSession *traverseSession,
/**
* Handle the dicNode as an omission error (e.g., ths => this). Skip the current letter and consider
* matches for all possible next letters. Note that just skipping the current letter without any
- * other conditions tends to flood the search dic nodes cache with omission nodes. Instead, check
+ * other conditions tends to flood the search DicNodes cache with omission DicNodes. Instead, check
* the possible *next* letters after the omission to better limit search to plausible omissions.
* Note that apostrophes are handled as omissions.
*/
@@ -605,7 +608,7 @@ void Suggest::processDicNodeAsTransposition(DicTraverseSession *traverseSession,
}
/**
- * Weight child node by aligning it to the key
+ * Weight child dicNode by aligning it to the key
*/
void Suggest::weightChildNode(DicTraverseSession *traverseSession, DicNode *dicNode) const {
const int inputSize = traverseSession->getInputSize();
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp
index 1926b9831..7d0d09631 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp
@@ -16,7 +16,6 @@
#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
@@ -38,7 +37,6 @@ const BigramListReadWriteUtils::BigramFlags BigramListReadWriteUtils::FLAG_ATTRI
// Mask for attribute probability, stored on 4 bits inside the flags byte.
const BigramListReadWriteUtils::BigramFlags
BigramListReadWriteUtils::MASK_ATTRIBUTE_PROBABILITY = 0x0F;
-const int BigramListReadWriteUtils::ATTRIBUTE_ADDRESS_SHIFT = 4;
/* static */ void BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(
const uint8_t *const bigramsBuf, BigramFlags *const outBigramFlags,
@@ -79,11 +77,6 @@ const int BigramListReadWriteUtils::ATTRIBUTE_ADDRESS_SHIFT = 4;
offset = ByteArrayUtils::readUint24AndAdvancePosition(bigramsBuf, pos);
break;
}
- if (offset == DynamicPatriciaTrieReadingUtils::DICT_OFFSET_INVALID) {
- return NOT_A_DICT_POS;
- } else if (offset == DynamicPatriciaTrieReadingUtils::DICT_OFFSET_ZERO_OFFSET) {
- return origin;
- }
if (isOffsetNegative(flags)) {
return origin - offset;
} else {
@@ -91,92 +84,4 @@ const int BigramListReadWriteUtils::ATTRIBUTE_ADDRESS_SHIFT = 4;
}
}
-/* static */ bool BigramListReadWriteUtils::setHasNextFlag(
- BufferWithExtendableBuffer *const buffer, const bool hasNext, const int entryPos) {
- const bool usesAdditionalBuffer = buffer->isInAdditionalBuffer(entryPos);
- int readingPos = entryPos;
- if (usesAdditionalBuffer) {
- readingPos -= buffer->getOriginalBufferSize();
- }
- BigramFlags bigramFlags = ByteArrayUtils::readUint8AndAdvancePosition(
- buffer->getBuffer(usesAdditionalBuffer), &readingPos);
- if (hasNext) {
- bigramFlags = bigramFlags | FLAG_ATTRIBUTE_HAS_NEXT;
- } else {
- bigramFlags = bigramFlags & (~FLAG_ATTRIBUTE_HAS_NEXT);
- }
- int writingPos = entryPos;
- return buffer->writeUintAndAdvancePosition(bigramFlags, 1 /* size */, &writingPos);
-}
-
-/* static */ bool BigramListReadWriteUtils::createAndWriteBigramEntry(
- BufferWithExtendableBuffer *const buffer, const int targetPos, const int probability,
- const bool hasNext, int *const writingPos) {
- BigramFlags flags;
- if (!createAndGetBigramFlags(*writingPos, targetPos, probability, hasNext, &flags)) {
- return false;
- }
- return writeBigramEntry(buffer, flags, targetPos, writingPos);
-}
-
-/* static */ bool BigramListReadWriteUtils::writeBigramEntry(
- BufferWithExtendableBuffer *const bufferToWrite, const BigramFlags flags,
- const int targetPtNodePos, int *const writingPos) {
- const int offset = getBigramTargetOffset(targetPtNodePos, *writingPos);
- const BigramFlags flagsToWrite = (offset < 0) ?
- (flags | FLAG_ATTRIBUTE_OFFSET_NEGATIVE) : (flags & ~FLAG_ATTRIBUTE_OFFSET_NEGATIVE);
- if (!bufferToWrite->writeUintAndAdvancePosition(flagsToWrite, 1 /* size */, writingPos)) {
- return false;
- }
- const uint32_t absOffest = abs(offset);
- const int bigramTargetFieldSize = attributeAddressSize(flags);
- return bufferToWrite->writeUintAndAdvancePosition(absOffest, bigramTargetFieldSize,
- writingPos);
-}
-
-// Returns true if the bigram entry is valid and put entry flags into out*.
-/* static */ bool BigramListReadWriteUtils::createAndGetBigramFlags(const int entryPos,
- const int targetPtNodePos, const int probability, const bool hasNext,
- BigramFlags *const outBigramFlags) {
- BigramFlags flags = probability & MASK_ATTRIBUTE_PROBABILITY;
- if (hasNext) {
- flags |= FLAG_ATTRIBUTE_HAS_NEXT;
- }
- const int offset = getBigramTargetOffset(targetPtNodePos, entryPos);
- if (offset < 0) {
- flags |= FLAG_ATTRIBUTE_OFFSET_NEGATIVE;
- }
- const uint32_t absOffest = abs(offset);
- if ((absOffest >> 24) != 0) {
- // Offset is too large.
- return false;
- } else if ((absOffest >> 16) != 0) {
- flags |= FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES;
- } else if ((absOffest >> 8) != 0) {
- flags |= FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES;
- } else {
- flags |= FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE;
- }
- // Currently, all newly written bigram position fields are 3 bytes to simplify dictionary
- // writing.
- // TODO: Remove following 2 lines and optimize memory space.
- flags = (flags & (~MASK_ATTRIBUTE_ADDRESS_TYPE)) | FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES;
- *outBigramFlags = flags;
- return true;
-}
-
-/* static */ int BigramListReadWriteUtils::getBigramTargetOffset(const int targetPtNodePos,
- const int entryPos) {
- if (targetPtNodePos == NOT_A_DICT_POS) {
- return DynamicPatriciaTrieReadingUtils::DICT_OFFSET_INVALID;
- } else {
- const int offset = targetPtNodePos - (entryPos + 1 /* bigramFlagsField */);
- if (offset == 0) {
- return DynamicPatriciaTrieReadingUtils::DICT_OFFSET_ZERO_OFFSET;
- } else {
- return offset;
- }
- }
-}
-
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h
index eabe4e099..7e1038300 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h
@@ -45,34 +45,6 @@ public:
// Bigrams reading methods
static void skipExistingBigrams(const uint8_t *const bigramsBuf, int *const bigramListPos);
- // Returns the size of the bigram position field that is stored in bigram flags.
- static AK_FORCE_INLINE int attributeAddressSize(const BigramFlags flags) {
- return (flags & MASK_ATTRIBUTE_ADDRESS_TYPE) >> ATTRIBUTE_ADDRESS_SHIFT;
- /* Note: this is a value-dependant optimization of what may probably be
- more readably written this way:
- switch (flags * BinaryFormat::MASK_ATTRIBUTE_ADDRESS_TYPE) {
- case FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE: return 1;
- case FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES: return 2;
- case FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTE: return 3;
- default: return 0;
- }
- */
- }
-
- static bool setHasNextFlag(BufferWithExtendableBuffer *const buffer,
- const bool hasNext, const int entryPos);
-
- static AK_FORCE_INLINE BigramFlags setProbabilityInFlags(const BigramFlags flags,
- const int probability) {
- return (flags & (~MASK_ATTRIBUTE_PROBABILITY)) | (probability & MASK_ATTRIBUTE_PROBABILITY);
- }
-
- static bool createAndWriteBigramEntry(BufferWithExtendableBuffer *const buffer,
- const int targetPos, const int probability, const bool hasNext, int *const writingPos);
-
- static bool writeBigramEntry(BufferWithExtendableBuffer *const buffer, const BigramFlags flags,
- const int targetOffset, int *const writingPos);
-
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(BigramListReadWriteUtils);
@@ -83,11 +55,6 @@ private:
static const BigramFlags FLAG_ATTRIBUTE_OFFSET_NEGATIVE;
static const BigramFlags FLAG_ATTRIBUTE_HAS_NEXT;
static const BigramFlags MASK_ATTRIBUTE_PROBABILITY;
- static const int ATTRIBUTE_ADDRESS_SHIFT;
-
- // Returns true if the bigram entry is valid and put entry flags into out*.
- static bool createAndGetBigramFlags(const int entryPos, const int targetPos,
- const int probability, const bool hasNext, BigramFlags *const outBigramFlags);
static AK_FORCE_INLINE bool isOffsetNegative(const BigramFlags flags) {
return (flags & FLAG_ATTRIBUTE_OFFSET_NEGATIVE) != 0;
@@ -95,8 +62,6 @@ private:
static int getBigramAddressAndAdvancePosition(const uint8_t *const bigramsBuf,
const BigramFlags flags, int *const pos);
-
- static int getBigramTargetOffset(const int targetPtNodePos, const int entryPos);
};
} // namespace latinime
#endif // LATINIME_BIGRAM_LIST_READ_WRITE_UTILS_H
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp
deleted file mode 100644
index b1170e251..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp
+++ /dev/null
@@ -1,391 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h"
-
-#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h"
-#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
-#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
-
-namespace latinime {
-
-const int DynamicBigramListPolicy::CONTINUING_BIGRAM_LINK_COUNT_LIMIT = 10000;
-const int DynamicBigramListPolicy::BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT = 100000;
-
-void DynamicBigramListPolicy::getNextBigram(int *const outBigramPos, int *const outProbability,
- bool *const outHasNext, int *const bigramEntryPos) const {
- const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*bigramEntryPos);
- const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
- if (usesAdditionalBuffer) {
- *bigramEntryPos -= mBuffer->getOriginalBufferSize();
- }
- BigramListReadWriteUtils::BigramFlags bigramFlags;
- int originalBigramPos;
- BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(buffer, &bigramFlags,
- &originalBigramPos, bigramEntryPos);
- if (usesAdditionalBuffer && originalBigramPos != NOT_A_DICT_POS) {
- originalBigramPos += mBuffer->getOriginalBufferSize();
- }
- *outProbability = BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags);
- *outHasNext = BigramListReadWriteUtils::hasNext(bigramFlags);
- if (mIsDecayingDict && !ForgettingCurveUtils::isValidEncodedProbability(*outProbability)) {
- // This bigram is too weak to output.
- *outBigramPos = NOT_A_DICT_POS;
- } else {
- *outBigramPos = followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos);
- }
- if (usesAdditionalBuffer) {
- *bigramEntryPos += mBuffer->getOriginalBufferSize();
- }
-}
-
-void DynamicBigramListPolicy::skipAllBigrams(int *const bigramListPos) const {
- const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*bigramListPos);
- const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
- if (usesAdditionalBuffer) {
- *bigramListPos -= mBuffer->getOriginalBufferSize();
- }
- BigramListReadWriteUtils::skipExistingBigrams(buffer, bigramListPos);
- if (usesAdditionalBuffer) {
- *bigramListPos += mBuffer->getOriginalBufferSize();
- }
-}
-
-bool DynamicBigramListPolicy::copyAllBigrams(BufferWithExtendableBuffer *const bufferToWrite,
- int *const fromPos, int *const toPos, int *const outBigramsCount) const {
- const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*fromPos);
- if (usesAdditionalBuffer) {
- *fromPos -= mBuffer->getOriginalBufferSize();
- }
- *outBigramsCount = 0;
- BigramListReadWriteUtils::BigramFlags bigramFlags;
- int bigramEntryCount = 0;
- int lastWrittenEntryPos = NOT_A_DICT_POS;
- do {
- if (++bigramEntryCount > BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT) {
- AKLOGE("Too many bigram entries. Entry count: %d, Limit: %d",
- bigramEntryCount, BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT);
- ASSERT(false);
- return false;
- }
- // The buffer address can be changed after calling buffer writing methods.
- int originalBigramPos;
- BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(
- mBuffer->getBuffer(usesAdditionalBuffer), &bigramFlags, &originalBigramPos,
- fromPos);
- if (originalBigramPos == NOT_A_DICT_POS) {
- // skip invalid bigram entry.
- continue;
- }
- if (usesAdditionalBuffer) {
- originalBigramPos += mBuffer->getOriginalBufferSize();
- }
- const int bigramPos = followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos);
- if (bigramPos == NOT_A_DICT_POS) {
- // Target PtNode has been invalidated.
- continue;
- }
- lastWrittenEntryPos = *toPos;
- if (!BigramListReadWriteUtils::createAndWriteBigramEntry(bufferToWrite, bigramPos,
- BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags),
- BigramListReadWriteUtils::hasNext(bigramFlags), toPos)) {
- return false;
- }
- (*outBigramsCount)++;
- } while(BigramListReadWriteUtils::hasNext(bigramFlags));
- // Makes the last entry the terminal of the list. Updates the flags.
- if (lastWrittenEntryPos != NOT_A_DICT_POS) {
- if (!BigramListReadWriteUtils::setHasNextFlag(bufferToWrite, false /* hasNext */,
- lastWrittenEntryPos)) {
- return false;
- }
- }
- if (usesAdditionalBuffer) {
- *fromPos += mBuffer->getOriginalBufferSize();
- }
- return true;
-}
-
-// Finding useless bigram entries and remove them. Bigram entry is useless when the target PtNode
-// has been deleted or is not a valid terminal.
-bool DynamicBigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries(
- int *const bigramListPos, int *const outValidBigramEntryCount) {
- const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*bigramListPos);
- if (usesAdditionalBuffer) {
- *bigramListPos -= mBuffer->getOriginalBufferSize();
- }
- DynamicPatriciaTrieNodeReader nodeReader(mBuffer, this /* bigramsPolicy */, mShortcutPolicy);
- BigramListReadWriteUtils::BigramFlags bigramFlags;
- int bigramEntryCount = 0;
- do {
- if (++bigramEntryCount > BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT) {
- AKLOGE("Too many bigram entries. Entry count: %d, Limit: %d",
- bigramEntryCount, BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT);
- ASSERT(false);
- return false;
- }
- int bigramEntryPos = *bigramListPos;
- int originalBigramPos;
- // The buffer address can be changed after calling buffer writing methods.
- BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(
- mBuffer->getBuffer(usesAdditionalBuffer), &bigramFlags, &originalBigramPos,
- bigramListPos);
- if (usesAdditionalBuffer) {
- bigramEntryPos += mBuffer->getOriginalBufferSize();
- }
- if (originalBigramPos == NOT_A_DICT_POS) {
- // This entry has already been removed.
- continue;
- }
- if (usesAdditionalBuffer) {
- originalBigramPos += mBuffer->getOriginalBufferSize();
- }
- const int bigramTargetNodePos =
- followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos);
- nodeReader.fetchNodeInfoInBufferFromPtNodePos(bigramTargetNodePos);
- if (nodeReader.isDeleted() || !nodeReader.isTerminal()
- || bigramTargetNodePos == NOT_A_DICT_POS) {
- // The target is no longer valid terminal. Invalidate the current bigram entry.
- if (!BigramListReadWriteUtils::writeBigramEntry(mBuffer, bigramFlags,
- NOT_A_DICT_POS /* targetPtNodePos */, &bigramEntryPos)) {
- return false;
- }
- continue;
- }
- bool isRemoved = false;
- if (!updateProbabilityForDecay(bigramFlags, bigramTargetNodePos, &bigramEntryPos,
- &isRemoved)) {
- return false;
- }
- if (!isRemoved) {
- (*outValidBigramEntryCount) += 1;
- }
- } while(BigramListReadWriteUtils::hasNext(bigramFlags));
- return true;
-}
-
-// Updates bigram target PtNode positions in the list after the placing step in GC.
-bool DynamicBigramListPolicy::updateAllBigramTargetPtNodePositions(int *const bigramListPos,
- const DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap *const
- ptNodePositionRelocationMap, int *const outBigramEntryCount) {
- const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*bigramListPos);
- if (usesAdditionalBuffer) {
- *bigramListPos -= mBuffer->getOriginalBufferSize();
- }
- BigramListReadWriteUtils::BigramFlags bigramFlags;
- int bigramEntryCount = 0;
- do {
- if (++bigramEntryCount > BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT) {
- AKLOGE("Too many bigram entries. Entry count: %d, Limit: %d",
- bigramEntryCount, BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT);
- ASSERT(false);
- return false;
- }
- int bigramEntryPos = *bigramListPos;
- if (usesAdditionalBuffer) {
- bigramEntryPos += mBuffer->getOriginalBufferSize();
- }
- int bigramTargetPtNodePos;
- // The buffer address can be changed after calling buffer writing methods.
- BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(
- mBuffer->getBuffer(usesAdditionalBuffer), &bigramFlags, &bigramTargetPtNodePos,
- bigramListPos);
- if (bigramTargetPtNodePos == NOT_A_DICT_POS) {
- continue;
- }
- if (usesAdditionalBuffer) {
- bigramTargetPtNodePos += mBuffer->getOriginalBufferSize();
- }
-
- DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap::const_iterator it =
- ptNodePositionRelocationMap->find(bigramTargetPtNodePos);
- if (it != ptNodePositionRelocationMap->end()) {
- bigramTargetPtNodePos = it->second;
- } else {
- bigramTargetPtNodePos = NOT_A_DICT_POS;
- }
- if (!BigramListReadWriteUtils::writeBigramEntry(mBuffer, bigramFlags,
- bigramTargetPtNodePos, &bigramEntryPos)) {
- return false;
- }
- } while(BigramListReadWriteUtils::hasNext(bigramFlags));
- (*outBigramEntryCount) = bigramEntryCount;
- return true;
-}
-
-bool DynamicBigramListPolicy::addNewBigramEntryToBigramList(const int bigramTargetPos,
- const int probability, int *const bigramListPos, bool *const outAddedNewBigram) {
- const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*bigramListPos);
- if (usesAdditionalBuffer) {
- *bigramListPos -= mBuffer->getOriginalBufferSize();
- }
- BigramListReadWriteUtils::BigramFlags bigramFlags;
- int bigramEntryCount = 0;
- do {
- if (++bigramEntryCount > BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT) {
- AKLOGE("Too many bigram entries. Entry count: %d, Limit: %d",
- bigramEntryCount, BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT);
- ASSERT(false);
- return false;
- }
- int entryPos = *bigramListPos;
- if (usesAdditionalBuffer) {
- entryPos += mBuffer->getOriginalBufferSize();
- }
- int originalBigramPos;
- // The buffer address can be changed after calling buffer writing methods.
- BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(
- mBuffer->getBuffer(usesAdditionalBuffer), &bigramFlags, &originalBigramPos,
- bigramListPos);
- if (usesAdditionalBuffer && originalBigramPos != NOT_A_DICT_POS) {
- originalBigramPos += mBuffer->getOriginalBufferSize();
- }
- if (followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos) == bigramTargetPos) {
- // Update this bigram entry.
- *outAddedNewBigram = false;
- const int originalProbability = BigramListReadWriteUtils::getProbabilityFromFlags(
- bigramFlags);
- const int probabilityToWrite = mIsDecayingDict ?
- ForgettingCurveUtils::getUpdatedEncodedProbability(originalProbability,
- probability) : probability;
- const BigramListReadWriteUtils::BigramFlags updatedFlags =
- BigramListReadWriteUtils::setProbabilityInFlags(bigramFlags,
- probabilityToWrite);
- return BigramListReadWriteUtils::writeBigramEntry(mBuffer, updatedFlags,
- originalBigramPos, &entryPos);
- }
- if (BigramListReadWriteUtils::hasNext(bigramFlags)) {
- continue;
- }
- // The current last entry is found.
- // First, update the flags of the last entry.
- if (!BigramListReadWriteUtils::setHasNextFlag(mBuffer, true /* hasNext */, entryPos)) {
- *outAddedNewBigram = false;
- return false;
- }
- if (usesAdditionalBuffer) {
- *bigramListPos += mBuffer->getOriginalBufferSize();
- }
- // Then, add a new entry after the last entry.
- *outAddedNewBigram = true;
- return writeNewBigramEntry(bigramTargetPos, probability, bigramListPos);
- } while(BigramListReadWriteUtils::hasNext(bigramFlags));
- // We return directly from the while loop.
- ASSERT(false);
- return false;
-}
-
-bool DynamicBigramListPolicy::writeNewBigramEntry(const int bigramTargetPos, const int probability,
- int *const writingPos) {
- // hasNext is false because we are adding a new bigram entry at the end of the bigram list.
- const int probabilityToWrite = mIsDecayingDict ?
- ForgettingCurveUtils::getUpdatedEncodedProbability(NOT_A_PROBABILITY, probability) :
- probability;
- return BigramListReadWriteUtils::createAndWriteBigramEntry(mBuffer, bigramTargetPos,
- probabilityToWrite, false /* hasNext */, writingPos);
-}
-
-bool DynamicBigramListPolicy::removeBigram(const int bigramListPos, const int bigramTargetPos) {
- const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(bigramListPos);
- int pos = bigramListPos;
- if (usesAdditionalBuffer) {
- pos -= mBuffer->getOriginalBufferSize();
- }
- BigramListReadWriteUtils::BigramFlags bigramFlags;
- int bigramEntryCount = 0;
- do {
- if (++bigramEntryCount > BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT) {
- AKLOGE("Too many bigram entries. Entry count: %d, Limit: %d",
- bigramEntryCount, BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT);
- ASSERT(false);
- return false;
- }
- int bigramEntryPos = pos;
- int originalBigramPos;
- // The buffer address can be changed after calling buffer writing methods.
- BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(
- mBuffer->getBuffer(usesAdditionalBuffer), &bigramFlags, &originalBigramPos, &pos);
- if (usesAdditionalBuffer) {
- bigramEntryPos += mBuffer->getOriginalBufferSize();
- }
- if (usesAdditionalBuffer && originalBigramPos != NOT_A_DICT_POS) {
- originalBigramPos += mBuffer->getOriginalBufferSize();
- }
- const int bigramPos = followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos);
- if (bigramPos != bigramTargetPos) {
- continue;
- }
- // Target entry is found. Write an invalid target position to mark the bigram invalid.
- return BigramListReadWriteUtils::writeBigramEntry(mBuffer, bigramFlags,
- NOT_A_DICT_POS /* targetOffset */, &bigramEntryPos);
- } while(BigramListReadWriteUtils::hasNext(bigramFlags));
- return false;
-}
-
-int DynamicBigramListPolicy::followBigramLinkAndGetCurrentBigramPtNodePos(
- const int originalBigramPos) const {
- if (originalBigramPos == NOT_A_DICT_POS) {
- return NOT_A_DICT_POS;
- }
- int currentPos = originalBigramPos;
- DynamicPatriciaTrieNodeReader nodeReader(mBuffer, this /* bigramsPolicy */, mShortcutPolicy);
- nodeReader.fetchNodeInfoInBufferFromPtNodePos(currentPos);
- int bigramLinkCount = 0;
- while (nodeReader.getBigramLinkedNodePos() != NOT_A_DICT_POS) {
- currentPos = nodeReader.getBigramLinkedNodePos();
- nodeReader.fetchNodeInfoInBufferFromPtNodePos(currentPos);
- bigramLinkCount++;
- if (bigramLinkCount > CONTINUING_BIGRAM_LINK_COUNT_LIMIT) {
- AKLOGE("Bigram link is invalid. start position: %d", originalBigramPos);
- ASSERT(false);
- return NOT_A_DICT_POS;
- }
- }
- return currentPos;
-}
-
-bool DynamicBigramListPolicy::updateProbabilityForDecay(
- const BigramListReadWriteUtils::BigramFlags bigramFlags, const int targetPtNodePos,
- int *const bigramEntryPos, bool *const outRemoved) const {
- *outRemoved = false;
- if (mIsDecayingDict) {
- // Update bigram probability for decaying.
- const int newProbability = ForgettingCurveUtils::getEncodedProbabilityToSave(
- BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags), mHeaderPolicy);
- if (ForgettingCurveUtils::isValidEncodedProbability(newProbability)) {
- // Write new probability.
- const BigramListReadWriteUtils::BigramFlags updatedBigramFlags =
- BigramListReadWriteUtils::setProbabilityInFlags(
- bigramFlags, newProbability);
- if (!BigramListReadWriteUtils::writeBigramEntry(mBuffer, updatedBigramFlags,
- targetPtNodePos, bigramEntryPos)) {
- return false;
- }
- } else {
- // Remove current bigram entry.
- *outRemoved = true;
- if (!BigramListReadWriteUtils::writeBigramEntry(mBuffer, bigramFlags,
- NOT_A_DICT_POS /* targetPtNodePos */, bigramEntryPos)) {
- return false;
- }
- }
- }
- return true;
-}
-
-} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h
deleted file mode 100644
index 0504b59d5..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LATINIME_DYNAMIC_BIGRAM_LIST_POLICY_H
-#define LATINIME_DYNAMIC_BIGRAM_LIST_POLICY_H
-
-#include <stdint.h>
-
-#include "defines.h"
-#include "suggest/core/policy/dictionary_bigrams_structure_policy.h"
-#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h"
-
-namespace latinime {
-
-class BufferWithExtendableBuffer;
-class DictionaryHeaderStructurePolicy;
-class DictionaryShortcutsStructurePolicy;
-
-/*
- * This is a dynamic version of BigramListPolicy and supports an additional buffer.
- */
-class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy {
- public:
- DynamicBigramListPolicy(const DictionaryHeaderStructurePolicy *const headerPolicy,
- BufferWithExtendableBuffer *const buffer,
- const DictionaryShortcutsStructurePolicy *const shortcutPolicy,
- const bool isDecayingDict)
- : mHeaderPolicy(headerPolicy), mBuffer(buffer), mShortcutPolicy(shortcutPolicy),
- mIsDecayingDict(isDecayingDict) {}
-
- ~DynamicBigramListPolicy() {}
-
- void getNextBigram(int *const outBigramPos, int *const outProbability, bool *const outHasNext,
- int *const bigramEntryPos) const;
-
- void skipAllBigrams(int *const bigramListPos) const;
-
- // Copy bigrams from the bigram list that starts at fromPos in mBuffer to toPos in
- // bufferToWrite and advance these positions after bigram lists. This method skips invalid
- // bigram entries and write the valid bigram entry count to outBigramsCount.
- bool copyAllBigrams(BufferWithExtendableBuffer *const bufferToWrite, int *const fromPos,
- int *const toPos, int *const outBigramsCount) const;
-
- bool updateAllBigramEntriesAndDeleteUselessEntries(int *const bigramListPos,
- int *const outBigramEntryCount);
-
- bool updateAllBigramTargetPtNodePositions(int *const bigramListPos,
- const DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap *const
- ptNodePositionRelocationMap, int *const outValidBigramEntryCount);
-
- bool addNewBigramEntryToBigramList(const int bigramTargetPos, const int probability,
- int *const bigramListPos, bool *const outAddedNewBigram);
-
- bool writeNewBigramEntry(const int bigramTargetPos, const int probability,
- int *const writingPos);
-
- // Return whether or not targetBigramPos is found.
- bool removeBigram(const int bigramListPos, const int bigramTargetPos);
-
- private:
- DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicBigramListPolicy);
-
- static const int CONTINUING_BIGRAM_LINK_COUNT_LIMIT;
- static const int BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT;
-
- const DictionaryHeaderStructurePolicy *const mHeaderPolicy;
- BufferWithExtendableBuffer *const mBuffer;
- const DictionaryShortcutsStructurePolicy *const mShortcutPolicy;
- const bool mIsDecayingDict;
-
- // Follow bigram link and return the position of bigram target PtNode that is currently valid.
- int followBigramLinkAndGetCurrentBigramPtNodePos(const int originalBigramPos) const;
-
- bool updateProbabilityForDecay(const BigramListReadWriteUtils::BigramFlags bigramFlags,
- const int targetPtNodePos, int *const bigramEntryPos, bool *const outRemoved) const;
-};
-} // namespace latinime
-#endif // LATINIME_DYNAMIC_BIGRAM_LIST_POLICY_H
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp
new file mode 100644
index 000000000..cd2243025
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp
@@ -0,0 +1,240 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h"
+
+#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h"
+#include "suggest/policyimpl/dictionary/header/header_policy.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
+#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
+
+namespace latinime {
+
+void Ver4BigramListPolicy::getNextBigram(int *const outBigramPos, int *const outProbability,
+ bool *const outHasNext, int *const bigramEntryPos) const {
+ const BigramEntry bigramEntry =
+ mBigramDictContent->getBigramEntryAndAdvancePosition(bigramEntryPos);
+ if (outBigramPos) {
+ // Lookup target PtNode position.
+ *outBigramPos = mTerminalPositionLookupTable->getTerminalPtNodePosition(
+ bigramEntry.getTargetTerminalId());
+ }
+ if (outProbability) {
+ if (bigramEntry.hasHistoricalInfo()) {
+ *outProbability =
+ ForgettingCurveUtils::decodeProbability(bigramEntry.getHistoricalInfo());
+ } else {
+ *outProbability = bigramEntry.getProbability();
+ }
+ }
+ if (outHasNext) {
+ *outHasNext = bigramEntry.hasNext();
+ }
+}
+
+bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTargetTerminalId,
+ const int newProbability, const int timestamp, bool *const outAddedNewEntry) {
+ if (outAddedNewEntry) {
+ *outAddedNewEntry = false;
+ }
+ const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId);
+ if (bigramListPos == NOT_A_DICT_POS) {
+ // Updating PtNode doesn't have a bigram list.
+ // Create new bigram list.
+ if (!mBigramDictContent->createNewBigramList(terminalId)) {
+ return false;
+ }
+ const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY,
+ newTargetTerminalId);
+ const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(&newBigramEntry,
+ newProbability, timestamp);
+ // Write an entry.
+ const int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId);
+ if (!mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, writingPos)) {
+ return false;
+ }
+ if (outAddedNewEntry) {
+ *outAddedNewEntry = true;
+ }
+ return true;
+ }
+
+ const int entryPosToUpdate = getEntryPosToUpdate(newTargetTerminalId, bigramListPos);
+ if (entryPosToUpdate != NOT_A_DICT_POS) {
+ // Overwrite existing entry.
+ const BigramEntry originalBigramEntry =
+ mBigramDictContent->getBigramEntry(entryPosToUpdate);
+ if (!originalBigramEntry.isValid()) {
+ // Reuse invalid entry.
+ if (outAddedNewEntry) {
+ *outAddedNewEntry = true;
+ }
+ }
+ const BigramEntry updatedBigramEntry =
+ originalBigramEntry.updateTargetTerminalIdAndGetEntry(newTargetTerminalId);
+ const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(
+ &updatedBigramEntry, newProbability, timestamp);
+ return mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, entryPosToUpdate);
+ }
+
+ // Add new entry to the bigram list.
+ // Create new bigram list.
+ if (!mBigramDictContent->createNewBigramList(terminalId)) {
+ return false;
+ }
+ // Write new entry at a head position of the bigram list.
+ int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId);
+ const BigramEntry newBigramEntry(true /* hasNext */, NOT_A_PROBABILITY, newTargetTerminalId);
+ const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(
+ &newBigramEntry, newProbability, timestamp);
+ if (!mBigramDictContent->writeBigramEntryAndAdvancePosition(&bigramEntryToWrite, &writingPos)) {
+ return false;
+ }
+ if (outAddedNewEntry) {
+ *outAddedNewEntry = true;
+ }
+ // Append existing entries by copying.
+ return mBigramDictContent->copyBigramList(bigramListPos, writingPos);
+}
+
+bool Ver4BigramListPolicy::removeEntry(const int terminalId, const int targetTerminalId) {
+ const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId);
+ if (bigramListPos == NOT_A_DICT_POS) {
+ // Bigram list doesn't exist.
+ return false;
+ }
+ const int entryPosToUpdate = getEntryPosToUpdate(targetTerminalId, bigramListPos);
+ if (entryPosToUpdate == NOT_A_DICT_POS) {
+ // Bigram entry doesn't exist.
+ return false;
+ }
+ const BigramEntry bigramEntry = mBigramDictContent->getBigramEntry(entryPosToUpdate);
+ if (targetTerminalId != bigramEntry.getTargetTerminalId()) {
+ // Bigram entry doesn't exist.
+ return false;
+ }
+ // Remove bigram entry by marking it as invalid entry and overwriting the original entry.
+ const BigramEntry updatedBigramEntry = bigramEntry.getInvalidatedEntry();
+ return mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPosToUpdate);
+}
+
+bool Ver4BigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries(const int terminalId,
+ int *const outBigramCount) {
+ const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId);
+ if (bigramListPos == NOT_A_DICT_POS) {
+ // Bigram list doesn't exist.
+ return true;
+ }
+ bool hasNext = true;
+ int readingPos = bigramListPos;
+ while (hasNext) {
+ const int entryPos = readingPos;
+ const BigramEntry bigramEntry =
+ mBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
+ hasNext = bigramEntry.hasNext();
+ if (!bigramEntry.isValid()) {
+ continue;
+ }
+ const int targetPtNodePos = mTerminalPositionLookupTable->getTerminalPtNodePosition(
+ bigramEntry.getTargetTerminalId());
+ if (targetPtNodePos == NOT_A_DICT_POS) {
+ // Invalidate bigram entry.
+ const BigramEntry updatedBigramEntry = bigramEntry.getInvalidatedEntry();
+ if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) {
+ return false;
+ }
+ } else if (bigramEntry.hasHistoricalInfo()) {
+ const HistoricalInfo historicalInfo = ForgettingCurveUtils::createHistoricalInfoToSave(
+ bigramEntry.getHistoricalInfo());
+ if (ForgettingCurveUtils::needsToKeep(&historicalInfo)) {
+ const BigramEntry updatedBigramEntry =
+ bigramEntry.updateHistoricalInfoAndGetEntry(&historicalInfo);
+ if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) {
+ return false;
+ }
+ *outBigramCount += 1;
+ } else {
+ // Remove entry.
+ const BigramEntry updatedBigramEntry = bigramEntry.getInvalidatedEntry();
+ if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) {
+ return false;
+ }
+ }
+ } else {
+ *outBigramCount += 1;
+ }
+ }
+ return true;
+}
+
+int Ver4BigramListPolicy::getBigramEntryConut(const int terminalId) {
+ const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId);
+ if (bigramListPos == NOT_A_DICT_POS) {
+ // Bigram list doesn't exist.
+ return 0;
+ }
+ int bigramCount = 0;
+ bool hasNext = true;
+ int readingPos = bigramListPos;
+ while (hasNext) {
+ const BigramEntry bigramEntry =
+ mBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
+ hasNext = bigramEntry.hasNext();
+ if (bigramEntry.isValid()) {
+ bigramCount++;
+ }
+ }
+ return bigramCount;
+}
+
+int Ver4BigramListPolicy::getEntryPosToUpdate(const int targetTerminalIdToFind,
+ const int bigramListPos) const {
+ bool hasNext = true;
+ int invalidEntryPos = NOT_A_DICT_POS;
+ int readingPos = bigramListPos;
+ while (hasNext) {
+ const int entryPos = readingPos;
+ const BigramEntry bigramEntry =
+ mBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
+ hasNext = bigramEntry.hasNext();
+ if (bigramEntry.getTargetTerminalId() == targetTerminalIdToFind) {
+ // Entry with same target is found.
+ return entryPos;
+ } else if (!bigramEntry.isValid()) {
+ // Invalid entry that can be reused is found.
+ invalidEntryPos = entryPos;
+ }
+ }
+ return invalidEntryPos;
+}
+
+const BigramEntry Ver4BigramListPolicy::createUpdatedBigramEntryFrom(
+ const BigramEntry *const originalBigramEntry, const int newProbability,
+ const int timestamp) const {
+ // TODO: Consolidate historical info and probability.
+ if (mHeaderPolicy->hasHistoricalInfoOfWords()) {
+ const HistoricalInfo updatedHistoricalInfo =
+ ForgettingCurveUtils::createUpdatedHistoricalInfo(
+ originalBigramEntry->getHistoricalInfo(), newProbability, timestamp);
+ return originalBigramEntry->updateHistoricalInfoAndGetEntry(&updatedHistoricalInfo);
+ } else {
+ return originalBigramEntry->updateProbabilityAndGetEntry(newProbability);
+ }
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h
new file mode 100644
index 000000000..5b6c5a173
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_VER4_BIGRAM_LIST_POLICY_H
+#define LATINIME_VER4_BIGRAM_LIST_POLICY_H
+
+#include "defines.h"
+#include "suggest/core/policy/dictionary_bigrams_structure_policy.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_entry.h"
+
+namespace latinime {
+
+class BigramDictContent;
+class HeaderPolicy;
+class TerminalPositionLookupTable;
+
+class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy {
+ public:
+ Ver4BigramListPolicy(BigramDictContent *const bigramDictContent,
+ const TerminalPositionLookupTable *const terminalPositionLookupTable,
+ const HeaderPolicy *const headerPolicy)
+ : mBigramDictContent(bigramDictContent),
+ mTerminalPositionLookupTable(terminalPositionLookupTable),
+ mHeaderPolicy(headerPolicy) {}
+
+ void getNextBigram(int *const outBigramPos, int *const outProbability,
+ bool *const outHasNext, int *const bigramEntryPos) const;
+
+ void skipAllBigrams(int *const pos) const {
+ // Do nothing because we don't need to skip bigram lists in ver4 dictionaries.
+ }
+
+ bool addNewEntry(const int terminalId, const int newTargetTerminalId, const int newProbability,
+ const int timestamp, bool *const outAddedNewEntry);
+
+ bool removeEntry(const int terminalId, const int targetTerminalId);
+
+ bool updateAllBigramEntriesAndDeleteUselessEntries(const int terminalId,
+ int *const outBigramCount);
+
+ int getBigramEntryConut(const int terminalId);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4BigramListPolicy);
+
+ int getEntryPosToUpdate(const int targetTerminalIdToFind, const int bigramListPos) const;
+
+ const BigramEntry createUpdatedBigramEntryFrom(const BigramEntry *const originalBigramEntry,
+ const int newProbability, const int timestamp) const;
+
+ BigramDictContent *const mBigramDictContent;
+ const TerminalPositionLookupTable *const mTerminalPositionLookupTable;
+ const HeaderPolicy *const mHeaderPolicy;
+};
+} // namespace latinime
+#endif /* LATINIME_VER4_BIGRAM_LIST_POLICY_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.cpp b/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.cpp
deleted file mode 100644
index ff80dd2f6..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.cpp
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h"
-
-#include <stdint.h>
-
-#include "defines.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h"
-#include "suggest/policyimpl/dictionary/patricia_trie_policy.h"
-#include "suggest/policyimpl/dictionary/utils/format_utils.h"
-#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
-
-namespace latinime {
-
-/* static */ DictionaryStructureWithBufferPolicy *DictionaryStructureWithBufferPolicyFactory
- ::newDictionaryStructureWithBufferPolicy(const char *const path, const int bufOffset,
- const int size, const bool isUpdatable) {
- // Allocated buffer in MmapedBuffer::openBuffer() will be freed in the destructor of
- // impl classes of DictionaryStructureWithBufferPolicy.
- const MmappedBuffer *const mmapedBuffer = MmappedBuffer::openBuffer(path, bufOffset, size,
- isUpdatable);
- if (!mmapedBuffer) {
- return 0;
- }
- switch (FormatUtils::detectFormatVersion(mmapedBuffer->getBuffer(),
- mmapedBuffer->getBufferSize())) {
- case FormatUtils::VERSION_2:
- return new PatriciaTriePolicy(mmapedBuffer);
- case FormatUtils::VERSION_3:
- return new DynamicPatriciaTriePolicy(mmapedBuffer);
- default:
- AKLOGE("DICT: dictionary format is unknown, bad magic number");
- delete mmapedBuffer;
- ASSERT(false);
- return 0;
- }
-}
-
-} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp
deleted file mode 100644
index 5724c5d88..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp
+++ /dev/null
@@ -1,191 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h"
-
-#include "suggest/core/policy/dictionary_header_structure_policy.h"
-#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
-
-namespace latinime {
-
-bool DynamicPatriciaTrieGcEventListeners
- ::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
- ::onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
- const int *const nodeCodePoints) {
- // PtNode is useless when the PtNode is not a terminal and doesn't have any not useless
- // children.
- bool isUselessPtNode = !node->isTerminal();
- if (node->isTerminal() && mIsDecayingDict) {
- const int newProbability =
- ForgettingCurveUtils::getEncodedProbabilityToSave(node->getProbability(),
- mHeaderPolicy);
- int writingPos = node->getProbabilityFieldPos();
- // Update probability.
- if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(
- mBuffer, newProbability, &writingPos)) {
- return false;
- }
- if (!ForgettingCurveUtils::isValidEncodedProbability(newProbability)) {
- isUselessPtNode = true;
- }
- }
- if (mChildrenValue > 0) {
- isUselessPtNode = false;
- } else if (node->isTerminal()) {
- // Remove children as all children are useless.
- int writingPos = node->getChildrenPosFieldPos();
- if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(
- mBuffer, NOT_A_DICT_POS /* childrenPosition */, &writingPos)) {
- return false;
- }
- }
- if (isUselessPtNode) {
- // Current PtNode is no longer needed. Mark it as deleted.
- if (!mWritingHelper->markNodeAsDeleted(node)) {
- return false;
- }
- } else {
- mValueStack.back() += 1;
- if (node->isTerminal()) {
- mValidUnigramCount += 1;
- }
- }
- return true;
-}
-
-bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateBigramProbability
- ::onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
- const int *const nodeCodePoints) {
- if (!node->isDeleted()) {
- int pos = node->getBigramsPos();
- if (pos != NOT_A_DICT_POS) {
- int bigramEntryCount = 0;
- if (!mBigramPolicy->updateAllBigramEntriesAndDeleteUselessEntries(&pos,
- &bigramEntryCount)) {
- return false;
- }
- mValidBigramEntryCount += bigramEntryCount;
- }
- }
- return true;
-}
-
-// Writes dummy PtNode array size when the head of PtNode array is read.
-bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
- ::onDescend(const int ptNodeArrayPos) {
- mValidPtNodeCount = 0;
- int writingPos = mBufferToWrite->getTailPosition();
- mDictPositionRelocationMap->mPtNodeArrayPositionRelocationMap.insert(
- DynamicPatriciaTrieWritingHelper::PtNodeArrayPositionRelocationMap::value_type(
- ptNodeArrayPos, writingPos));
- // Writes dummy PtNode array size because arrays can have a forward link or needles PtNodes.
- // This field will be updated later in onReadingPtNodeArrayTail() with actual PtNode count.
- mPtNodeArraySizeFieldPos = writingPos;
- return DynamicPatriciaTrieWritingUtils::writePtNodeArraySizeAndAdvancePosition(
- mBufferToWrite, 0 /* arraySize */, &writingPos);
-}
-
-// Write PtNode array terminal and actual PtNode array size.
-bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
- ::onReadingPtNodeArrayTail() {
- int writingPos = mBufferToWrite->getTailPosition();
- // Write PtNode array terminal.
- if (!DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition(
- mBufferToWrite, NOT_A_DICT_POS /* forwardLinkPos */, &writingPos)) {
- return false;
- }
- // Write actual PtNode array size.
- if (!DynamicPatriciaTrieWritingUtils::writePtNodeArraySizeAndAdvancePosition(
- mBufferToWrite, mValidPtNodeCount, &mPtNodeArraySizeFieldPos)) {
- return false;
- }
- return true;
-}
-
-// Write valid PtNode to buffer and memorize mapping from the old position to the new position.
-bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
- ::onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
- const int *const nodeCodePoints) {
- if (node->isDeleted()) {
- // Current PtNode is not written in new buffer because it has been deleted.
- mDictPositionRelocationMap->mPtNodePositionRelocationMap.insert(
- DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap::value_type(
- node->getHeadPos(), NOT_A_DICT_POS));
- return true;
- }
- int writingPos = mBufferToWrite->getTailPosition();
- mDictPositionRelocationMap->mPtNodePositionRelocationMap.insert(
- DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap::value_type(
- node->getHeadPos(), writingPos));
- mValidPtNodeCount++;
- // Writes current PtNode.
- return mWritingHelper->writePtNodeToBufferByCopyingPtNodeInfo(mBufferToWrite, node,
- node->getParentPos(), nodeCodePoints, node->getCodePointCount(),
- node->getProbability(), &writingPos);
-}
-
-bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateAllPositionFields
- ::onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
- const int *const nodeCodePoints) {
- // Updates parent position.
- int parentPos = node->getParentPos();
- if (parentPos != NOT_A_DICT_POS) {
- DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap::const_iterator it =
- mDictPositionRelocationMap->mPtNodePositionRelocationMap.find(parentPos);
- if (it != mDictPositionRelocationMap->mPtNodePositionRelocationMap.end()) {
- parentPos = it->second;
- }
- }
- int writingPos = node->getHeadPos() + DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE;
- // Write updated parent offset.
- if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(mBufferToWrite,
- parentPos, node->getHeadPos(), &writingPos)) {
- return false;
- }
-
- // Updates children position.
- int childrenPos = node->getChildrenPos();
- if (childrenPos != NOT_A_DICT_POS) {
- DynamicPatriciaTrieWritingHelper::PtNodeArrayPositionRelocationMap::const_iterator it =
- mDictPositionRelocationMap->mPtNodeArrayPositionRelocationMap.find(childrenPos);
- if (it != mDictPositionRelocationMap->mPtNodeArrayPositionRelocationMap.end()) {
- childrenPos = it->second;
- }
- }
- writingPos = node->getChildrenPosFieldPos();
- if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBufferToWrite,
- childrenPos, &writingPos)) {
- return false;
- }
-
- // Updates bigram target PtNode positions in the bigram list.
- int bigramsPos = node->getBigramsPos();
- if (bigramsPos != NOT_A_DICT_POS) {
- int bigramEntryCount;
- if (!mBigramPolicy->updateAllBigramTargetPtNodePositions(&bigramsPos,
- &mDictPositionRelocationMap->mPtNodePositionRelocationMap, &bigramEntryCount)) {
- return false;
- }
- mBigramCount += bigramEntryCount;
- }
- if (node->isTerminal()) {
- mUnigramCount++;
- }
-
- return true;
-}
-
-} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp
deleted file mode 100644
index 2fa3111d3..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp
+++ /dev/null
@@ -1,124 +0,0 @@
-/*
- * Copyright (C) 2013, The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h"
-
-#include "suggest/core/policy/dictionary_bigrams_structure_policy.h"
-#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
-#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
-
-namespace latinime {
-
-void DynamicPatriciaTrieNodeReader::fetchPtNodeInfoFromBufferAndProcessMovedPtNode(
- const int ptNodePos, const int maxCodePointCount, int *const outCodePoints) {
- if (ptNodePos < 0 || ptNodePos >= mBuffer->getTailPosition()) {
- // Reading invalid position because of bug or broken dictionary.
- AKLOGE("Fetching PtNode info from invalid dictionary position: %d, dictionary size: %d",
- ptNodePos, mBuffer->getTailPosition());
- ASSERT(false);
- invalidatePtNodeInfo();
- return;
- }
- const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(ptNodePos);
- const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
- int pos = ptNodePos;
- mHeadPos = ptNodePos;
- if (usesAdditionalBuffer) {
- pos -= mBuffer->getOriginalBufferSize();
- }
- mFlags = PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
- const int parentPosOffset =
- DynamicPatriciaTrieReadingUtils::getParentPtNodePosOffsetAndAdvancePosition(dictBuf,
- &pos);
- mParentPos = DynamicPatriciaTrieReadingUtils::getParentPtNodePos(parentPosOffset, mHeadPos);
- if (outCodePoints != 0) {
- mCodePointCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition(
- dictBuf, mFlags, maxCodePointCount, outCodePoints, &pos);
- } else {
- mCodePointCount = PatriciaTrieReadingUtils::skipCharacters(
- dictBuf, mFlags, MAX_WORD_LENGTH, &pos);
- }
- if (isTerminal()) {
- mProbabilityFieldPos = pos;
- if (usesAdditionalBuffer) {
- mProbabilityFieldPos += mBuffer->getOriginalBufferSize();
- }
- mProbability = PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(dictBuf, &pos);
- } else {
- mProbabilityFieldPos = NOT_A_DICT_POS;
- mProbability = NOT_A_PROBABILITY;
- }
- mChildrenPosFieldPos = pos;
- if (usesAdditionalBuffer) {
- mChildrenPosFieldPos += mBuffer->getOriginalBufferSize();
- }
- mChildrenPos = DynamicPatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(
- dictBuf, &pos);
- if (usesAdditionalBuffer && mChildrenPos != NOT_A_DICT_POS) {
- mChildrenPos += mBuffer->getOriginalBufferSize();
- }
- if (mSiblingPos == NOT_A_DICT_POS) {
- if (DynamicPatriciaTrieReadingUtils::isMoved(mFlags)) {
- mBigramLinkedNodePos = mChildrenPos;
- } else {
- mBigramLinkedNodePos = NOT_A_DICT_POS;
- }
- }
- if (usesAdditionalBuffer) {
- pos += mBuffer->getOriginalBufferSize();
- }
- if (PatriciaTrieReadingUtils::hasShortcutTargets(mFlags)) {
- mShortcutPos = pos;
- mShortcutsPolicy->skipAllShortcuts(&pos);
- } else {
- mShortcutPos = NOT_A_DICT_POS;
- }
- if (PatriciaTrieReadingUtils::hasBigrams(mFlags)) {
- mBigramPos = pos;
- mBigramsPolicy->skipAllBigrams(&pos);
- } else {
- mBigramPos = NOT_A_DICT_POS;
- }
- // Update siblingPos if needed.
- if (mSiblingPos == NOT_A_DICT_POS) {
- // Sibling position is the tail position of current node.
- mSiblingPos = pos;
- }
- // Read destination node if the read node is a moved node.
- if (DynamicPatriciaTrieReadingUtils::isMoved(mFlags)) {
- // The destination position is stored at the same place as the parent position.
- fetchPtNodeInfoFromBufferAndProcessMovedPtNode(mParentPos, maxCodePointCount,
- outCodePoints);
- }
-}
-
-void DynamicPatriciaTrieNodeReader::invalidatePtNodeInfo() {
- mHeadPos = NOT_A_DICT_POS;
- mFlags = 0;
- mParentPos = NOT_A_DICT_POS;
- mCodePointCount = 0;
- mProbabilityFieldPos = NOT_A_DICT_POS;
- mProbability = NOT_A_PROBABILITY;
- mChildrenPosFieldPos = NOT_A_DICT_POS;
- mChildrenPos = NOT_A_DICT_POS;
- mBigramLinkedNodePos = NOT_A_DICT_POS;
- mShortcutPos = NOT_A_DICT_POS;
- mBigramPos = NOT_A_DICT_POS;
- mSiblingPos = NOT_A_DICT_POS;
-}
-
-}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h
deleted file mode 100644
index 3b36d425f..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h
+++ /dev/null
@@ -1,163 +0,0 @@
-/*
- * Copyright (C) 2013, The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_NODE_READER_H
-#define LATINIME_DYNAMIC_PATRICIA_TRIE_NODE_READER_H
-
-#include <stdint.h>
-
-#include "defines.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
-#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h"
-
-namespace latinime {
-
-class BufferWithExtendableBuffer;
-class DictionaryBigramsStructurePolicy;
-class DictionaryShortcutsStructurePolicy;
-
-/*
- * This class is used for helping to read nodes of dynamic patricia trie. This class handles moved
- * node and reads node attributes.
- */
-class DynamicPatriciaTrieNodeReader {
- public:
- DynamicPatriciaTrieNodeReader(const BufferWithExtendableBuffer *const buffer,
- const DictionaryBigramsStructurePolicy *const bigramsPolicy,
- const DictionaryShortcutsStructurePolicy *const shortcutsPolicy)
- : mBuffer(buffer), mBigramsPolicy(bigramsPolicy),
- mShortcutsPolicy(shortcutsPolicy), mHeadPos(NOT_A_DICT_POS), mFlags(0),
- mParentPos(NOT_A_DICT_POS), mCodePointCount(0), mProbabilityFieldPos(NOT_A_DICT_POS),
- mProbability(NOT_A_PROBABILITY), mChildrenPosFieldPos(NOT_A_DICT_POS),
- mChildrenPos(NOT_A_DICT_POS), mBigramLinkedNodePos(NOT_A_DICT_POS),
- mShortcutPos(NOT_A_DICT_POS), mBigramPos(NOT_A_DICT_POS),
- mSiblingPos(NOT_A_DICT_POS) {}
-
- ~DynamicPatriciaTrieNodeReader() {}
-
- // Reads PtNode information from dictionary buffer and updates members with the information.
- AK_FORCE_INLINE void fetchNodeInfoInBufferFromPtNodePos(const int ptNodePos) {
- fetchNodeInfoInBufferFromPtNodePosAndGetNodeCodePoints(ptNodePos ,
- 0 /* maxCodePointCount */, 0 /* outCodePoints */);
- }
-
- AK_FORCE_INLINE void fetchNodeInfoInBufferFromPtNodePosAndGetNodeCodePoints(
- const int ptNodePos, const int maxCodePointCount, int *const outCodePoints) {
- mSiblingPos = NOT_A_DICT_POS;
- mBigramLinkedNodePos = NOT_A_DICT_POS;
- fetchPtNodeInfoFromBufferAndProcessMovedPtNode(ptNodePos, maxCodePointCount, outCodePoints);
- }
-
- // HeadPos is different from NodePos when the current PtNode is a moved PtNode.
- AK_FORCE_INLINE int getHeadPos() const {
- return mHeadPos;
- }
-
- // Flags
- AK_FORCE_INLINE bool isDeleted() const {
- return DynamicPatriciaTrieReadingUtils::isDeleted(mFlags);
- }
-
- AK_FORCE_INLINE bool hasChildren() const {
- return mChildrenPos != NOT_A_DICT_POS;
- }
-
- AK_FORCE_INLINE bool isTerminal() const {
- return PatriciaTrieReadingUtils::isTerminal(mFlags);
- }
-
- AK_FORCE_INLINE bool isBlacklisted() const {
- return PatriciaTrieReadingUtils::isBlacklisted(mFlags);
- }
-
- AK_FORCE_INLINE bool isNotAWord() const {
- return PatriciaTrieReadingUtils::isNotAWord(mFlags);
- }
-
- // Parent node position
- AK_FORCE_INLINE int getParentPos() const {
- return mParentPos;
- }
-
- // Number of code points
- AK_FORCE_INLINE uint8_t getCodePointCount() const {
- return mCodePointCount;
- }
-
- // Probability
- AK_FORCE_INLINE int getProbabilityFieldPos() const {
- return mProbabilityFieldPos;
- }
-
- AK_FORCE_INLINE int getProbability() const {
- return mProbability;
- }
-
- // Children PtNode array position
- AK_FORCE_INLINE int getChildrenPosFieldPos() const {
- return mChildrenPosFieldPos;
- }
-
- AK_FORCE_INLINE int getChildrenPos() const {
- return mChildrenPos;
- }
-
- // Bigram linked node position.
- AK_FORCE_INLINE int getBigramLinkedNodePos() const {
- return mBigramLinkedNodePos;
- }
-
- // Shortcutlist position
- AK_FORCE_INLINE int getShortcutPos() const {
- return mShortcutPos;
- }
-
- // Bigrams position
- AK_FORCE_INLINE int getBigramsPos() const {
- return mBigramPos;
- }
-
- // Sibling node position
- AK_FORCE_INLINE int getSiblingNodePos() const {
- return mSiblingPos;
- }
-
- private:
- DISALLOW_COPY_AND_ASSIGN(DynamicPatriciaTrieNodeReader);
-
- const BufferWithExtendableBuffer *const mBuffer;
- const DictionaryBigramsStructurePolicy *const mBigramsPolicy;
- const DictionaryShortcutsStructurePolicy *const mShortcutsPolicy;
- int mHeadPos;
- DynamicPatriciaTrieReadingUtils::NodeFlags mFlags;
- int mParentPos;
- uint8_t mCodePointCount;
- int mProbabilityFieldPos;
- int mProbability;
- int mChildrenPosFieldPos;
- int mChildrenPos;
- int mBigramLinkedNodePos;
- int mShortcutPos;
- int mBigramPos;
- int mSiblingPos;
-
- void fetchPtNodeInfoFromBufferAndProcessMovedPtNode(const int ptNodePos,
- const int maxCodePointCount, int *const outCodePoints);
-
- void invalidatePtNodeInfo();
-};
-} // namespace latinime
-#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_NODE_READER_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp
deleted file mode 100644
index 495b146c2..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp
+++ /dev/null
@@ -1,380 +0,0 @@
-/*
- * Copyright (C) 2013, The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h"
-
-#include <cstdio>
-#include <cstring>
-#include <ctime>
-
-#include "defines.h"
-#include "suggest/core/dicnode/dic_node.h"
-#include "suggest/core/dicnode/dic_node_vector.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h"
-#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h"
-#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
-#include "suggest/policyimpl/dictionary/utils/probability_utils.h"
-
-namespace latinime {
-
-// Note that these are corresponding definitions in Java side in BinaryDictionaryTests and
-// BinaryDictionaryDecayingTests.
-const char *const DynamicPatriciaTriePolicy::UNIGRAM_COUNT_QUERY = "UNIGRAM_COUNT";
-const char *const DynamicPatriciaTriePolicy::BIGRAM_COUNT_QUERY = "BIGRAM_COUNT";
-const char *const DynamicPatriciaTriePolicy::MAX_UNIGRAM_COUNT_QUERY = "MAX_UNIGRAM_COUNT";
-const char *const DynamicPatriciaTriePolicy::MAX_BIGRAM_COUNT_QUERY = "MAX_BIGRAM_COUNT";
-const char *const DynamicPatriciaTriePolicy::SET_NEEDS_TO_DECAY_FOR_TESTING_QUERY =
- "SET_NEEDS_TO_DECAY_FOR_TESTING";
-const int DynamicPatriciaTriePolicy::MAX_DICT_EXTENDED_REGION_SIZE = 1024 * 1024;
-const int DynamicPatriciaTriePolicy::MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS =
- DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE - 1024;
-
-void DynamicPatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode,
- DicNodeVector *const childDicNodes) const {
- if (!dicNode->hasChildren()) {
- return;
- }
- DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer,
- getBigramsStructurePolicy(), getShortcutsStructurePolicy());
- readingHelper.initWithPtNodeArrayPos(dicNode->getChildrenPos());
- const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader();
- while (!readingHelper.isEnd()) {
- bool isTerminal = nodeReader->isTerminal() && !nodeReader->isDeleted();
- if (isTerminal && mHeaderPolicy.isDecayingDict()) {
- // A DecayingDict may have a terminal PtNode that has a terminal DicNode whose
- // probability is NOT_A_PROBABILITY. In such case, we don't want to treat it as a
- // valid terminal DicNode.
- isTerminal = getProbability(nodeReader->getProbability(), NOT_A_PROBABILITY)
- != NOT_A_PROBABILITY;
- }
- childDicNodes->pushLeavingChild(dicNode, nodeReader->getHeadPos(),
- nodeReader->getChildrenPos(), nodeReader->getProbability(), isTerminal,
- nodeReader->hasChildren(), nodeReader->isBlacklisted() || nodeReader->isNotAWord(),
- nodeReader->getCodePointCount(), readingHelper.getMergedNodeCodePoints());
- readingHelper.readNextSiblingNode();
- }
-}
-
-int DynamicPatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
- const int ptNodePos, const int maxCodePointCount, int *const outCodePoints,
- int *const outUnigramProbability) const {
- // This method traverses parent nodes from the terminal by following parent pointers; thus,
- // node code points are stored in the buffer in the reverse order.
- int reverseCodePoints[maxCodePointCount];
- DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer,
- getBigramsStructurePolicy(), getShortcutsStructurePolicy());
- // First, read the terminal node and get its probability.
- readingHelper.initWithPtNodePos(ptNodePos);
- if (!readingHelper.isValidTerminalNode()) {
- // Node at the ptNodePos is not a valid terminal node.
- *outUnigramProbability = NOT_A_PROBABILITY;
- return 0;
- }
- // Store terminal node probability.
- *outUnigramProbability = readingHelper.getNodeReader()->getProbability();
- // Then, following parent node link to the dictionary root and fetch node code points.
- while (!readingHelper.isEnd()) {
- if (readingHelper.getTotalCodePointCount() > maxCodePointCount) {
- // The ptNodePos is not a valid terminal node position in the dictionary.
- *outUnigramProbability = NOT_A_PROBABILITY;
- return 0;
- }
- // Store node code points to buffer in the reverse order.
- readingHelper.fetchMergedNodeCodePointsInReverseOrder(
- readingHelper.getPrevTotalCodePointCount(), reverseCodePoints);
- // Follow parent node toward the root node.
- readingHelper.readParentNode();
- }
- if (readingHelper.isError()) {
- // The node position or the dictionary is invalid.
- *outUnigramProbability = NOT_A_PROBABILITY;
- return 0;
- }
- // Reverse the stored code points to output them.
- const int codePointCount = readingHelper.getTotalCodePointCount();
- for (int i = 0; i < codePointCount; ++i) {
- outCodePoints[i] = reverseCodePoints[codePointCount - i - 1];
- }
- return codePointCount;
-}
-
-int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord,
- const int length, const bool forceLowerCaseSearch) const {
- int searchCodePoints[length];
- for (int i = 0; i < length; ++i) {
- searchCodePoints[i] = forceLowerCaseSearch ? CharUtils::toLowerCase(inWord[i]) : inWord[i];
- }
- DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer,
- getBigramsStructurePolicy(), getShortcutsStructurePolicy());
- readingHelper.initWithPtNodeArrayPos(getRootPosition());
- const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader();
- while (!readingHelper.isEnd()) {
- const int matchedCodePointCount = readingHelper.getPrevTotalCodePointCount();
- if (readingHelper.getTotalCodePointCount() > length
- || !readingHelper.isMatchedCodePoint(0 /* index */,
- searchCodePoints[matchedCodePointCount])) {
- // Current node has too many code points or its first code point is different from
- // target code point. Skip this node and read the next sibling node.
- readingHelper.readNextSiblingNode();
- continue;
- }
- // Check following merged node code points.
- const int nodeCodePointCount = nodeReader->getCodePointCount();
- for (int j = 1; j < nodeCodePointCount; ++j) {
- if (!readingHelper.isMatchedCodePoint(
- j, searchCodePoints[matchedCodePointCount + j])) {
- // Different code point is found. The given word is not included in the dictionary.
- return NOT_A_DICT_POS;
- }
- }
- // All characters are matched.
- if (length == readingHelper.getTotalCodePointCount()) {
- // Terminal position is found.
- return nodeReader->getHeadPos();
- }
- if (!nodeReader->hasChildren()) {
- return NOT_A_DICT_POS;
- }
- // Advance to the children nodes.
- readingHelper.readChildNode();
- }
- // If we already traversed the tree further than the word is long, there means
- // there was no match (or we would have found it).
- return NOT_A_DICT_POS;
-}
-
-int DynamicPatriciaTriePolicy::getProbability(const int unigramProbability,
- const int bigramProbability) const {
- if (mHeaderPolicy.isDecayingDict()) {
- return ForgettingCurveUtils::getProbability(unigramProbability, bigramProbability);
- } else {
- if (unigramProbability == NOT_A_PROBABILITY) {
- return NOT_A_PROBABILITY;
- } else if (bigramProbability == NOT_A_PROBABILITY) {
- return ProbabilityUtils::backoff(unigramProbability);
- } else {
- return ProbabilityUtils::computeProbabilityForBigram(unigramProbability,
- bigramProbability);
- }
- }
-}
-
-int DynamicPatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int ptNodePos) const {
- if (ptNodePos == NOT_A_DICT_POS) {
- return NOT_A_PROBABILITY;
- }
- DynamicPatriciaTrieNodeReader nodeReader(&mBufferWithExtendableBuffer,
- getBigramsStructurePolicy(), getShortcutsStructurePolicy());
- nodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos);
- if (nodeReader.isDeleted() || nodeReader.isBlacklisted() || nodeReader.isNotAWord()) {
- return NOT_A_PROBABILITY;
- }
- return getProbability(nodeReader.getProbability(), NOT_A_PROBABILITY);
-}
-
-int DynamicPatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const {
- if (ptNodePos == NOT_A_DICT_POS) {
- return NOT_A_DICT_POS;
- }
- DynamicPatriciaTrieNodeReader nodeReader(&mBufferWithExtendableBuffer,
- getBigramsStructurePolicy(), getShortcutsStructurePolicy());
- nodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos);
- if (nodeReader.isDeleted()) {
- return NOT_A_DICT_POS;
- }
- return nodeReader.getShortcutPos();
-}
-
-int DynamicPatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const {
- if (ptNodePos == NOT_A_DICT_POS) {
- return NOT_A_DICT_POS;
- }
- DynamicPatriciaTrieNodeReader nodeReader(&mBufferWithExtendableBuffer,
- getBigramsStructurePolicy(), getShortcutsStructurePolicy());
- nodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos);
- if (nodeReader.isDeleted()) {
- return NOT_A_DICT_POS;
- }
- return nodeReader.getBigramsPos();
-}
-
-bool DynamicPatriciaTriePolicy::addUnigramWord(const int *const word, const int length,
- const int probability) {
- if (!mBuffer->isUpdatable()) {
- AKLOGI("Warning: addUnigramWord() is called for non-updatable dictionary.");
- return false;
- }
- if (mBufferWithExtendableBuffer.getTailPosition()
- >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
- AKLOGE("The dictionary is too large to dynamically update.");
- return false;
- }
- DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer,
- getBigramsStructurePolicy(), getShortcutsStructurePolicy());
- readingHelper.initWithPtNodeArrayPos(getRootPosition());
- DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
- &mBigramListPolicy, &mShortcutListPolicy, mHeaderPolicy.isDecayingDict());
- bool addedNewUnigram = false;
- if (writingHelper.addUnigramWord(&readingHelper, word, length, probability,
- &addedNewUnigram)) {
- if (addedNewUnigram) {
- mUnigramCount++;
- }
- return true;
- } else {
- return false;
- }
-}
-
-bool DynamicPatriciaTriePolicy::addBigramWords(const int *const word0, const int length0,
- const int *const word1, const int length1, const int probability) {
- if (!mBuffer->isUpdatable()) {
- AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary.");
- return false;
- }
- if (mBufferWithExtendableBuffer.getTailPosition()
- >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
- AKLOGE("The dictionary is too large to dynamically update.");
- return false;
- }
- const int word0Pos = getTerminalNodePositionOfWord(word0, length0,
- false /* forceLowerCaseSearch */);
- if (word0Pos == NOT_A_DICT_POS) {
- return false;
- }
- const int word1Pos = getTerminalNodePositionOfWord(word1, length1,
- false /* forceLowerCaseSearch */);
- if (word1Pos == NOT_A_DICT_POS) {
- return false;
- }
- DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
- &mBigramListPolicy, &mShortcutListPolicy, mHeaderPolicy.isDecayingDict());
- bool addedNewBigram = false;
- if (writingHelper.addBigramWords(word0Pos, word1Pos, probability, &addedNewBigram)) {
- if (addedNewBigram) {
- mBigramCount++;
- }
- return true;
- } else {
- return false;
- }
-}
-
-bool DynamicPatriciaTriePolicy::removeBigramWords(const int *const word0, const int length0,
- const int *const word1, const int length1) {
- if (!mBuffer->isUpdatable()) {
- AKLOGI("Warning: removeBigramWords() is called for non-updatable dictionary.");
- return false;
- }
- if (mBufferWithExtendableBuffer.getTailPosition()
- >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
- AKLOGE("The dictionary is too large to dynamically update.");
- return false;
- }
- const int word0Pos = getTerminalNodePositionOfWord(word0, length0,
- false /* forceLowerCaseSearch */);
- if (word0Pos == NOT_A_DICT_POS) {
- return false;
- }
- const int word1Pos = getTerminalNodePositionOfWord(word1, length1,
- false /* forceLowerCaseSearch */);
- if (word1Pos == NOT_A_DICT_POS) {
- return false;
- }
- DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
- &mBigramListPolicy, &mShortcutListPolicy, mHeaderPolicy.isDecayingDict());
- if (writingHelper.removeBigramWords(word0Pos, word1Pos)) {
- mBigramCount--;
- return true;
- } else {
- return false;
- }
-}
-
-void DynamicPatriciaTriePolicy::flush(const char *const filePath) {
- if (!mBuffer->isUpdatable()) {
- AKLOGI("Warning: flush() is called for non-updatable dictionary.");
- return;
- }
- DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
- &mBigramListPolicy, &mShortcutListPolicy, false /* needsToDecay */);
- writingHelper.writeToDictFile(filePath, &mHeaderPolicy, mUnigramCount, mBigramCount);
-}
-
-void DynamicPatriciaTriePolicy::flushWithGC(const char *const filePath) {
- if (!mBuffer->isUpdatable()) {
- AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary.");
- return;
- }
- const bool needsToDecay = mHeaderPolicy.isDecayingDict()
- && (mNeedsToDecayForTesting || ForgettingCurveUtils::needsToDecay(
- false /* mindsBlockByDecay */, mUnigramCount, mBigramCount, &mHeaderPolicy));
- DynamicBigramListPolicy bigramListPolicyForGC(&mHeaderPolicy, &mBufferWithExtendableBuffer,
- &mShortcutListPolicy, needsToDecay);
- DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
- &bigramListPolicyForGC, &mShortcutListPolicy, needsToDecay);
- writingHelper.writeToDictFileWithGC(getRootPosition(), filePath, &mHeaderPolicy);
- mNeedsToDecayForTesting = false;
-}
-
-bool DynamicPatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const {
- if (!mBuffer->isUpdatable()) {
- AKLOGI("Warning: needsToRunGC() is called for non-updatable dictionary.");
- return false;
- }
- if (mBufferWithExtendableBuffer.isNearSizeLimit()) {
- // Additional buffer size is near the limit.
- return true;
- } else if (mHeaderPolicy.getExtendedRegionSize()
- + mBufferWithExtendableBuffer.getUsedAdditionalBufferSize()
- > MAX_DICT_EXTENDED_REGION_SIZE) {
- // Total extended region size exceeds the limit.
- return true;
- } else if (mBufferWithExtendableBuffer.getTailPosition()
- >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS
- && mBufferWithExtendableBuffer.getUsedAdditionalBufferSize() > 0) {
- // Needs to reduce dictionary size.
- return true;
- } else if (mHeaderPolicy.isDecayingDict()) {
- return mNeedsToDecayForTesting || ForgettingCurveUtils::needsToDecay(
- mindsBlockByGC, mUnigramCount, mBigramCount, &mHeaderPolicy);
- }
- return false;
-}
-
-void DynamicPatriciaTriePolicy::getProperty(const char *const query, char *const outResult,
- const int maxResultLength) {
- if (strncmp(query, UNIGRAM_COUNT_QUERY, maxResultLength) == 0) {
- snprintf(outResult, maxResultLength, "%d", mUnigramCount);
- } else if (strncmp(query, BIGRAM_COUNT_QUERY, maxResultLength) == 0) {
- snprintf(outResult, maxResultLength, "%d", mBigramCount);
- } else if (strncmp(query, MAX_UNIGRAM_COUNT_QUERY, maxResultLength) == 0) {
- snprintf(outResult, maxResultLength, "%d",
- mHeaderPolicy.isDecayingDict() ? ForgettingCurveUtils::MAX_UNIGRAM_COUNT :
- static_cast<int>(DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE));
- } else if (strncmp(query, MAX_BIGRAM_COUNT_QUERY, maxResultLength) == 0) {
- snprintf(outResult, maxResultLength, "%d",
- mHeaderPolicy.isDecayingDict() ? ForgettingCurveUtils::MAX_BIGRAM_COUNT :
- static_cast<int>(DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE));
- } else if (strncmp(query, SET_NEEDS_TO_DECAY_FOR_TESTING_QUERY, maxResultLength) == 0) {
- mNeedsToDecayForTesting = true;
- }
-}
-
-} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h
deleted file mode 100644
index be97ee1a5..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- * Copyright (C) 2013, The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H
-#define LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H
-
-#include "defines.h"
-#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
-#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h"
-#include "suggest/policyimpl/dictionary/header/header_policy.h"
-#include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h"
-#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
-#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
-
-namespace latinime {
-
-class DicNode;
-class DicNodeVector;
-
-class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
- public:
- DynamicPatriciaTriePolicy(const MmappedBuffer *const buffer)
- : mBuffer(buffer), mHeaderPolicy(mBuffer->getBuffer(), buffer->getBufferSize()),
- mBufferWithExtendableBuffer(mBuffer->getBuffer() + mHeaderPolicy.getSize(),
- mBuffer->getBufferSize() - mHeaderPolicy.getSize()),
- mShortcutListPolicy(&mBufferWithExtendableBuffer),
- mBigramListPolicy(&mHeaderPolicy, &mBufferWithExtendableBuffer, &mShortcutListPolicy,
- mHeaderPolicy.isDecayingDict()),
- mUnigramCount(mHeaderPolicy.getUnigramCount()),
- mBigramCount(mHeaderPolicy.getBigramCount()), mNeedsToDecayForTesting(false) {}
-
- ~DynamicPatriciaTriePolicy() {
- delete mBuffer;
- }
-
- AK_FORCE_INLINE int getRootPosition() const {
- return 0;
- }
-
- void createAndGetAllChildNodes(const DicNode *const dicNode,
- DicNodeVector *const childDicNodes) const;
-
- int getCodePointsAndProbabilityAndReturnCodePointCount(
- const int terminalPtNodePos, const int maxCodePointCount, int *const outCodePoints,
- int *const outUnigramProbability) const;
-
- int getTerminalNodePositionOfWord(const int *const inWord,
- const int length, const bool forceLowerCaseSearch) const;
-
- int getProbability(const int unigramProbability, const int bigramProbability) const;
-
- int getUnigramProbabilityOfPtNode(const int ptNodePos) const;
-
- int getShortcutPositionOfPtNode(const int ptNodePos) const;
-
- int getBigramsPositionOfPtNode(const int ptNodePos) const;
-
- const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const {
- return &mHeaderPolicy;
- }
-
- const DictionaryBigramsStructurePolicy *getBigramsStructurePolicy() const {
- return &mBigramListPolicy;
- }
-
- const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const {
- return &mShortcutListPolicy;
- }
-
- bool addUnigramWord(const int *const word, const int length, const int probability);
-
- bool addBigramWords(const int *const word0, const int length0, const int *const word1,
- const int length1, const int probability);
-
- bool removeBigramWords(const int *const word0, const int length0, const int *const word1,
- const int length1);
-
- void flush(const char *const filePath);
-
- void flushWithGC(const char *const filePath);
-
- bool needsToRunGC(const bool mindsBlockByGC) const;
-
- void getProperty(const char *const query, char *const outResult,
- const int maxResultLength);
-
- private:
- DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTriePolicy);
-
- static const char *const UNIGRAM_COUNT_QUERY;
- static const char *const BIGRAM_COUNT_QUERY;
- static const char *const MAX_UNIGRAM_COUNT_QUERY;
- static const char *const MAX_BIGRAM_COUNT_QUERY;
- static const char *const SET_NEEDS_TO_DECAY_FOR_TESTING_QUERY;
- static const int MAX_DICT_EXTENDED_REGION_SIZE;
- static const int MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS;
-
- const MmappedBuffer *const mBuffer;
- const HeaderPolicy mHeaderPolicy;
- BufferWithExtendableBuffer mBufferWithExtendableBuffer;
- DynamicShortcutListPolicy mShortcutListPolicy;
- DynamicBigramListPolicy mBigramListPolicy;
- int mUnigramCount;
- int mBigramCount;
- int mNeedsToDecayForTesting;
-};
-} // namespace latinime
-#endif // LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp
deleted file mode 100644
index 052558bfc..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp
+++ /dev/null
@@ -1,558 +0,0 @@
-/*
- * Copyright (C) 2013, The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h"
-
-#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h"
-#include "suggest/policyimpl/dictionary/header/header_policy.h"
-#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h"
-#include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h"
-#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
-#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
-#include "utils/hash_map_compat.h"
-
-namespace latinime {
-
-const int DynamicPatriciaTrieWritingHelper::CHILDREN_POSITION_FIELD_SIZE = 3;
-// TODO: Make MAX_DICTIONARY_SIZE 8MB.
-const size_t DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE = 2 * 1024 * 1024;
-
-bool DynamicPatriciaTrieWritingHelper::addUnigramWord(
- DynamicPatriciaTrieReadingHelper *const readingHelper,
- const int *const wordCodePoints, const int codePointCount, const int probability,
- bool *const outAddedNewUnigram) {
- int parentPos = NOT_A_DICT_POS;
- while (!readingHelper->isEnd()) {
- const int matchedCodePointCount = readingHelper->getPrevTotalCodePointCount();
- if (!readingHelper->isMatchedCodePoint(0 /* index */,
- wordCodePoints[matchedCodePointCount])) {
- // The first code point is different from target code point. Skip this node and read
- // the next sibling node.
- readingHelper->readNextSiblingNode();
- continue;
- }
- // Check following merged node code points.
- const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper->getNodeReader();
- const int nodeCodePointCount = nodeReader->getCodePointCount();
- for (int j = 1; j < nodeCodePointCount; ++j) {
- const int nextIndex = matchedCodePointCount + j;
- if (nextIndex >= codePointCount || !readingHelper->isMatchedCodePoint(j,
- wordCodePoints[matchedCodePointCount + j])) {
- *outAddedNewUnigram = true;
- return reallocatePtNodeAndAddNewPtNodes(nodeReader,
- readingHelper->getMergedNodeCodePoints(), j,
- getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */,
- probability),
- wordCodePoints + matchedCodePointCount,
- codePointCount - matchedCodePointCount);
- }
- }
- // All characters are matched.
- if (codePointCount == readingHelper->getTotalCodePointCount()) {
- return setPtNodeProbability(nodeReader, probability,
- readingHelper->getMergedNodeCodePoints(), outAddedNewUnigram);
- }
- if (!nodeReader->hasChildren()) {
- *outAddedNewUnigram = true;
- return createChildrenPtNodeArrayAndAChildPtNode(nodeReader,
- getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, probability),
- wordCodePoints + readingHelper->getTotalCodePointCount(),
- codePointCount - readingHelper->getTotalCodePointCount());
- }
- // Advance to the children nodes.
- parentPos = nodeReader->getHeadPos();
- readingHelper->readChildNode();
- }
- if (readingHelper->isError()) {
- // The dictionary is invalid.
- return false;
- }
- int pos = readingHelper->getPosOfLastForwardLinkField();
- *outAddedNewUnigram = true;
- return createAndInsertNodeIntoPtNodeArray(parentPos,
- wordCodePoints + readingHelper->getPrevTotalCodePointCount(),
- codePointCount - readingHelper->getPrevTotalCodePointCount(),
- getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, probability), &pos);
-}
-
-bool DynamicPatriciaTrieWritingHelper::addBigramWords(const int word0Pos, const int word1Pos,
- const int probability, bool *const outAddedNewBigram) {
- int mMergedNodeCodePoints[MAX_WORD_LENGTH];
- DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
- nodeReader.fetchNodeInfoInBufferFromPtNodePosAndGetNodeCodePoints(word0Pos, MAX_WORD_LENGTH,
- mMergedNodeCodePoints);
- // Move node to add bigram entry.
- const int newNodePos = mBuffer->getTailPosition();
- if (!markNodeAsMovedAndSetPosition(&nodeReader, newNodePos, newNodePos)) {
- return false;
- }
- int writingPos = newNodePos;
- // Write a new PtNode using original PtNode's info to the tail of the dictionary in mBuffer.
- if (!writePtNodeToBufferByCopyingPtNodeInfo(mBuffer, &nodeReader, nodeReader.getParentPos(),
- mMergedNodeCodePoints, nodeReader.getCodePointCount(), nodeReader.getProbability(),
- &writingPos)) {
- return false;
- }
- nodeReader.fetchNodeInfoInBufferFromPtNodePos(newNodePos);
- if (nodeReader.getBigramsPos() != NOT_A_DICT_POS) {
- // Insert a new bigram entry into the existing bigram list.
- int bigramListPos = nodeReader.getBigramsPos();
- return mBigramPolicy->addNewBigramEntryToBigramList(word1Pos, probability, &bigramListPos,
- outAddedNewBigram);
- } else {
- // The PtNode doesn't have a bigram list.
- *outAddedNewBigram = true;
- // First, Write a bigram entry at the tail position of the PtNode.
- if (!mBigramPolicy->writeNewBigramEntry(word1Pos, probability, &writingPos)) {
- return false;
- }
- // Then, Mark as the PtNode having bigram list in the flags.
- const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
- PatriciaTrieReadingUtils::createAndGetFlags(nodeReader.isBlacklisted(),
- nodeReader.isNotAWord(), nodeReader.getProbability() != NOT_A_PROBABILITY,
- nodeReader.getShortcutPos() != NOT_A_DICT_POS, true /* hasBigrams */,
- nodeReader.getCodePointCount() > 1, CHILDREN_POSITION_FIELD_SIZE);
- writingPos = newNodePos;
- // Write updated flags into the moved PtNode's flags field.
- return DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags,
- &writingPos);
- }
-}
-
-// Remove a bigram relation from word0Pos to word1Pos.
-bool DynamicPatriciaTrieWritingHelper::removeBigramWords(const int word0Pos, const int word1Pos) {
- DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
- nodeReader.fetchNodeInfoInBufferFromPtNodePos(word0Pos);
- if (nodeReader.getBigramsPos() == NOT_A_DICT_POS) {
- return false;
- }
- return mBigramPolicy->removeBigram(nodeReader.getBigramsPos(), word1Pos);
-}
-
-void DynamicPatriciaTrieWritingHelper::writeToDictFile(const char *const fileName,
- const HeaderPolicy *const headerPolicy, const int unigramCount, const int bigramCount) {
- BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */);
- const int extendedRegionSize = headerPolicy->getExtendedRegionSize() +
- mBuffer->getUsedAdditionalBufferSize();
- if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, false /* updatesLastUpdatedTime */,
- false /* updatesLastDecayedTime */, unigramCount, bigramCount, extendedRegionSize)) {
- return;
- }
- DictFileWritingUtils::flushAllHeaderAndBodyToFile(fileName, &headerBuffer, mBuffer);
-}
-
-void DynamicPatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNodeArrayPos,
- const char *const fileName, const HeaderPolicy *const headerPolicy) {
- BufferWithExtendableBuffer newDictBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */,
- MAX_DICTIONARY_SIZE);
- int unigramCount = 0;
- int bigramCount = 0;
- if (mNeedsToDecay) {
- ForgettingCurveUtils::sTimeKeeper.setCurrentTime();
- }
- if (!runGC(rootPtNodeArrayPos, headerPolicy, &newDictBuffer, &unigramCount, &bigramCount)) {
- return;
- }
- BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */);
- if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, true /* updatesLastUpdatedTime */,
- mNeedsToDecay, unigramCount, bigramCount, 0 /* extendedRegionSize */)) {
- return;
- }
- DictFileWritingUtils::flushAllHeaderAndBodyToFile(fileName, &headerBuffer, &newDictBuffer);
-}
-
-bool DynamicPatriciaTrieWritingHelper::markNodeAsDeleted(
- const DynamicPatriciaTrieNodeReader *const nodeToUpdate) {
- int pos = nodeToUpdate->getHeadPos();
- const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(pos);
- const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
- if (usesAdditionalBuffer) {
- pos -= mBuffer->getOriginalBufferSize();
- }
- // Read original flags
- const PatriciaTrieReadingUtils::NodeFlags originalFlags =
- PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
- const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
- DynamicPatriciaTrieReadingUtils::updateAndGetFlags(originalFlags, false /* isMoved */,
- true /* isDeleted */);
- int writingPos = nodeToUpdate->getHeadPos();
- // Update flags.
- return DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags,
- &writingPos);
-}
-
-bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition(
- const DynamicPatriciaTrieNodeReader *const originalNode, const int movedPos,
- const int bigramLinkedNodePos) {
- int pos = originalNode->getHeadPos();
- const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(pos);
- const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
- if (usesAdditionalBuffer) {
- pos -= mBuffer->getOriginalBufferSize();
- }
- // Read original flags
- const PatriciaTrieReadingUtils::NodeFlags originalFlags =
- PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
- const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
- DynamicPatriciaTrieReadingUtils::updateAndGetFlags(originalFlags, true /* isMoved */,
- false /* isDeleted */);
- int writingPos = originalNode->getHeadPos();
- // Update flags.
- if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags,
- &writingPos)) {
- return false;
- }
- // Update moved position, which is stored in the parent offset field.
- if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(
- mBuffer, movedPos, originalNode->getHeadPos(), &writingPos)) {
- return false;
- }
- // Update bigram linked node position, which is stored in the children position field.
- int childrenPosFieldPos = originalNode->getChildrenPosFieldPos();
- if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(
- mBuffer, bigramLinkedNodePos, &childrenPosFieldPos)) {
- return false;
- }
- if (originalNode->hasChildren()) {
- // Update children's parent position.
- DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, mBigramPolicy, mShortcutPolicy);
- const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader();
- readingHelper.initWithPtNodeArrayPos(originalNode->getChildrenPos());
- while (!readingHelper.isEnd()) {
- int parentOffsetFieldPos = nodeReader->getHeadPos()
- + DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE;
- if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(
- mBuffer, bigramLinkedNodePos, nodeReader->getHeadPos(),
- &parentOffsetFieldPos)) {
- // Parent offset cannot be written because of a bug or a broken dictionary; thus,
- // we give up to update dictionary.
- return false;
- }
- readingHelper.readNextSiblingNode();
- }
- }
- return true;
-}
-
-// Write new PtNode at writingPos.
-bool DynamicPatriciaTrieWritingHelper::writePtNodeWithFullInfoToBuffer(
- BufferWithExtendableBuffer *const bufferToWrite, const bool isBlacklisted,
- const bool isNotAWord, const int parentPos, const int *const codePoints,
- const int codePointCount, const int probability, const int childrenPos,
- const int originalBigramListPos, const int originalShortcutListPos,
- int *const writingPos) {
- const int nodePos = *writingPos;
- // Write dummy flags. The Node flags are updated with appropriate flags at the last step of the
- // PtNode writing.
- if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(bufferToWrite,
- 0 /* nodeFlags */, writingPos)) {
- return false;
- }
- // Calculate a parent offset and write the offset.
- if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(bufferToWrite,
- parentPos, nodePos, writingPos)) {
- return false;
- }
- // Write code points
- if (!DynamicPatriciaTrieWritingUtils::writeCodePointsAndAdvancePosition(bufferToWrite,
- codePoints, codePointCount, writingPos)) {
- return false;
- }
- // Write probability when the probability is a valid probability, which means this node is
- // terminal.
- if (probability != NOT_A_PROBABILITY) {
- if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(bufferToWrite,
- probability, writingPos)) {
- return false;
- }
- }
- // Write children position
- if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(bufferToWrite,
- childrenPos, writingPos)) {
- return false;
- }
- // Copy shortcut list when the originalShortcutListPos is valid dictionary position.
- if (originalShortcutListPos != NOT_A_DICT_POS) {
- int fromPos = originalShortcutListPos;
- if (!mShortcutPolicy->copyAllShortcutsAndReturnIfSucceededOrNot(bufferToWrite, &fromPos,
- writingPos)) {
- return false;
- }
- }
- // Copy bigram list when the originalBigramListPos is valid dictionary position.
- int bigramCount = 0;
- if (originalBigramListPos != NOT_A_DICT_POS) {
- int fromPos = originalBigramListPos;
- if (!mBigramPolicy->copyAllBigrams(bufferToWrite, &fromPos, writingPos, &bigramCount)) {
- return false;
- }
- }
- // Create node flags and write them.
- PatriciaTrieReadingUtils::NodeFlags nodeFlags =
- PatriciaTrieReadingUtils::createAndGetFlags(isBlacklisted, isNotAWord,
- probability != NOT_A_PROBABILITY /* isTerminal */,
- originalShortcutListPos != NOT_A_DICT_POS /* hasShortcutTargets */,
- bigramCount > 0 /* hasBigrams */, codePointCount > 1 /* hasMultipleChars */,
- CHILDREN_POSITION_FIELD_SIZE);
- int flagsFieldPos = nodePos;
- if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(bufferToWrite, nodeFlags,
- &flagsFieldPos)) {
- return false;
- }
- return true;
-}
-
-bool DynamicPatriciaTrieWritingHelper::writePtNodeToBuffer(
- BufferWithExtendableBuffer *const bufferToWrite, const int parentPos,
- const int *const codePoints, const int codePointCount, const int probability,
- int *const writingPos) {
- return writePtNodeWithFullInfoToBuffer(bufferToWrite, false /* isBlacklisted */,
- false /* isNotAWord */, parentPos, codePoints, codePointCount, probability,
- NOT_A_DICT_POS /* childrenPos */, NOT_A_DICT_POS /* originalBigramsPos */,
- NOT_A_DICT_POS /* originalShortcutPos */, writingPos);
-}
-
-bool DynamicPatriciaTrieWritingHelper::writePtNodeToBufferByCopyingPtNodeInfo(
- BufferWithExtendableBuffer *const bufferToWrite,
- const DynamicPatriciaTrieNodeReader *const originalNode, const int parentPos,
- const int *const codePoints, const int codePointCount, const int probability,
- int *const writingPos) {
- return writePtNodeWithFullInfoToBuffer(bufferToWrite, originalNode->isBlacklisted(),
- originalNode->isNotAWord(), parentPos, codePoints, codePointCount, probability,
- originalNode->getChildrenPos(), originalNode->getBigramsPos(),
- originalNode->getShortcutPos(), writingPos);
-}
-
-bool DynamicPatriciaTrieWritingHelper::createAndInsertNodeIntoPtNodeArray(const int parentPos,
- const int *const nodeCodePoints, const int nodeCodePointCount, const int probability,
- int *const forwardLinkFieldPos) {
- const int newPtNodeArrayPos = mBuffer->getTailPosition();
- if (!DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer,
- newPtNodeArrayPos, forwardLinkFieldPos)) {
- return false;
- }
- return createNewPtNodeArrayWithAChildPtNode(parentPos, nodeCodePoints, nodeCodePointCount,
- probability);
-}
-
-bool DynamicPatriciaTrieWritingHelper::setPtNodeProbability(
- const DynamicPatriciaTrieNodeReader *const originalPtNode, const int probability,
- const int *const codePoints, bool *const outAddedNewUnigram) {
- if (originalPtNode->isTerminal()) {
- // Overwrites the probability.
- *outAddedNewUnigram = false;
- const int probabilityToWrite = getUpdatedProbability(originalPtNode->getProbability(),
- probability);
- int probabilityFieldPos = originalPtNode->getProbabilityFieldPos();
- if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(mBuffer,
- probabilityToWrite, &probabilityFieldPos)) {
- return false;
- }
- } else {
- // Make the node terminal and write the probability.
- *outAddedNewUnigram = true;
- int movedPos = mBuffer->getTailPosition();
- if (!markNodeAsMovedAndSetPosition(originalPtNode, movedPos, movedPos)) {
- return false;
- }
- if (!writePtNodeToBufferByCopyingPtNodeInfo(mBuffer, originalPtNode,
- originalPtNode->getParentPos(), codePoints, originalPtNode->getCodePointCount(),
- getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, probability),
- &movedPos)) {
- return false;
- }
- }
- return true;
-}
-
-bool DynamicPatriciaTrieWritingHelper::createChildrenPtNodeArrayAndAChildPtNode(
- const DynamicPatriciaTrieNodeReader *const parentNode, const int probability,
- const int *const codePoints, const int codePointCount) {
- const int newPtNodeArrayPos = mBuffer->getTailPosition();
- int childrenPosFieldPos = parentNode->getChildrenPosFieldPos();
- if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBuffer,
- newPtNodeArrayPos, &childrenPosFieldPos)) {
- return false;
- }
- return createNewPtNodeArrayWithAChildPtNode(parentNode->getHeadPos(), codePoints,
- codePointCount, probability);
-}
-
-bool DynamicPatriciaTrieWritingHelper::createNewPtNodeArrayWithAChildPtNode(
- const int parentPtNodePos, const int *const nodeCodePoints, const int nodeCodePointCount,
- const int probability) {
- int writingPos = mBuffer->getTailPosition();
- if (!DynamicPatriciaTrieWritingUtils::writePtNodeArraySizeAndAdvancePosition(mBuffer,
- 1 /* arraySize */, &writingPos)) {
- return false;
- }
- if (!writePtNodeToBuffer(mBuffer, parentPtNodePos, nodeCodePoints, nodeCodePointCount,
- probability, &writingPos)) {
- return false;
- }
- if (!DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer,
- NOT_A_DICT_POS /* forwardLinkPos */, &writingPos)) {
- return false;
- }
- return true;
-}
-
-// Returns whether the dictionary updating was succeeded or not.
-bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes(
- const DynamicPatriciaTrieNodeReader *const reallocatingPtNode,
- const int *const reallocatingPtNodeCodePoints, const int overlappingCodePointCount,
- const int probabilityOfNewPtNode, const int *const newNodeCodePoints,
- const int newNodeCodePointCount) {
- // When addsExtraChild is true, split the reallocating PtNode and add new child.
- // Reallocating PtNode: abcde, newNode: abcxy.
- // abc (1st, not terminal) __ de (2nd)
- // \_ xy (extra child, terminal)
- // Otherwise, this method makes 1st part terminal and write probabilityOfNewPtNode.
- // Reallocating PtNode: abcde, newNode: abc.
- // abc (1st, terminal) __ de (2nd)
- const bool addsExtraChild = newNodeCodePointCount > overlappingCodePointCount;
- const int firstPartOfReallocatedPtNodePos = mBuffer->getTailPosition();
- int writingPos = firstPartOfReallocatedPtNodePos;
- // Write the 1st part of the reallocating node. The children position will be updated later
- // with actual children position.
- const int newProbability = addsExtraChild ? NOT_A_PROBABILITY : probabilityOfNewPtNode;
- if (!writePtNodeToBuffer(mBuffer, reallocatingPtNode->getParentPos(),
- reallocatingPtNodeCodePoints, overlappingCodePointCount, newProbability,
- &writingPos)) {
- return false;
- }
- const int actualChildrenPos = writingPos;
- // Create new children PtNode array.
- const size_t newPtNodeCount = addsExtraChild ? 2 : 1;
- if (!DynamicPatriciaTrieWritingUtils::writePtNodeArraySizeAndAdvancePosition(mBuffer,
- newPtNodeCount, &writingPos)) {
- return false;
- }
- // Write the 2nd part of the reallocating node.
- const int secondPartOfReallocatedPtNodePos = writingPos;
- if (!writePtNodeToBufferByCopyingPtNodeInfo(mBuffer, reallocatingPtNode,
- firstPartOfReallocatedPtNodePos,
- reallocatingPtNodeCodePoints + overlappingCodePointCount,
- reallocatingPtNode->getCodePointCount() - overlappingCodePointCount,
- reallocatingPtNode->getProbability(), &writingPos)) {
- return false;
- }
- if (addsExtraChild) {
- if (!writePtNodeToBuffer(mBuffer, firstPartOfReallocatedPtNodePos,
- newNodeCodePoints + overlappingCodePointCount,
- newNodeCodePointCount - overlappingCodePointCount, probabilityOfNewPtNode,
- &writingPos)) {
- return false;
- }
- }
- if (!DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer,
- NOT_A_DICT_POS /* forwardLinkPos */, &writingPos)) {
- return false;
- }
- // Update original reallocatingPtNode as moved.
- if (!markNodeAsMovedAndSetPosition(reallocatingPtNode, firstPartOfReallocatedPtNodePos,
- secondPartOfReallocatedPtNodePos)) {
- return false;
- }
- // Load node info. Information of the 1st part will be fetched.
- DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
- nodeReader.fetchNodeInfoInBufferFromPtNodePos(firstPartOfReallocatedPtNodePos);
- // Update children position.
- int childrenPosFieldPos = nodeReader.getChildrenPosFieldPos();
- if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBuffer,
- actualChildrenPos, &childrenPosFieldPos)) {
- return false;
- }
- return true;
-}
-
-bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
- const HeaderPolicy *const headerPolicy, BufferWithExtendableBuffer *const bufferToWrite,
- int *const outUnigramCount, int *const outBigramCount) {
- DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, mBigramPolicy, mShortcutPolicy);
- readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
- DynamicPatriciaTrieGcEventListeners
- ::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
- traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted(
- headerPolicy, this, mBuffer, mNeedsToDecay);
- if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
- &traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted)) {
- return false;
- }
- if (mNeedsToDecay && traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
- .getValidUnigramCount() > ForgettingCurveUtils::MAX_UNIGRAM_COUNT_AFTER_GC) {
- // TODO: Remove more unigrams.
- }
-
- readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
- DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateBigramProbability
- traversePolicyToUpdateBigramProbability(mBigramPolicy);
- if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
- &traversePolicyToUpdateBigramProbability)) {
- return false;
- }
- if (mNeedsToDecay && traversePolicyToUpdateBigramProbability.getValidBigramEntryCount()
- > ForgettingCurveUtils::MAX_BIGRAM_COUNT_AFTER_GC) {
- // TODO: Remove more bigrams.
- }
-
- // Mapping from positions in mBuffer to positions in bufferToWrite.
- DictPositionRelocationMap dictPositionRelocationMap;
- readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
- DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
- traversePolicyToPlaceAndWriteValidPtNodesToBuffer(this, bufferToWrite,
- &dictPositionRelocationMap);
- if (!readingHelper.traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
- &traversePolicyToPlaceAndWriteValidPtNodesToBuffer)) {
- return false;
- }
-
- // Create policy instance for the GCed dictionary.
- DynamicShortcutListPolicy newDictShortcutPolicy(bufferToWrite);
- DynamicBigramListPolicy newDictBigramPolicy(headerPolicy, bufferToWrite, &newDictShortcutPolicy,
- mNeedsToDecay);
- // Create reading helper for the GCed dictionary.
- DynamicPatriciaTrieReadingHelper newDictReadingHelper(bufferToWrite, &newDictBigramPolicy,
- &newDictShortcutPolicy);
- newDictReadingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
- DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateAllPositionFields
- traversePolicyToUpdateAllPositionFields(this, &newDictBigramPolicy, bufferToWrite,
- &dictPositionRelocationMap);
- if (!newDictReadingHelper.traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
- &traversePolicyToUpdateAllPositionFields)) {
- return false;
- }
- *outUnigramCount = traversePolicyToUpdateAllPositionFields.getUnigramCount();
- *outBigramCount = traversePolicyToUpdateAllPositionFields.getBigramCount();
- return true;
-}
-
-int DynamicPatriciaTrieWritingHelper::getUpdatedProbability(const int originalProbability,
- const int newProbability) {
- if (mNeedsToDecay) {
- return ForgettingCurveUtils::getUpdatedEncodedProbability(originalProbability,
- newProbability);
- } else {
- return newProbability;
- }
-}
-
-} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h
deleted file mode 100644
index ca8664729..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h
+++ /dev/null
@@ -1,138 +0,0 @@
-/*
- * Copyright (C) 2013, The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H
-#define LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H
-
-#include <stdint.h>
-
-#include "defines.h"
-#include "utils/hash_map_compat.h"
-
-namespace latinime {
-
-class BufferWithExtendableBuffer;
-class DynamicBigramListPolicy;
-class DynamicPatriciaTrieNodeReader;
-class DynamicPatriciaTrieReadingHelper;
-class DynamicShortcutListPolicy;
-class HeaderPolicy;
-
-class DynamicPatriciaTrieWritingHelper {
- public:
- typedef hash_map_compat<int, int> PtNodeArrayPositionRelocationMap;
- typedef hash_map_compat<int, int> PtNodePositionRelocationMap;
- struct DictPositionRelocationMap {
- public:
- DictPositionRelocationMap()
- : mPtNodeArrayPositionRelocationMap(), mPtNodePositionRelocationMap() {}
-
- PtNodeArrayPositionRelocationMap mPtNodeArrayPositionRelocationMap;
- PtNodePositionRelocationMap mPtNodePositionRelocationMap;
-
- private:
- DISALLOW_COPY_AND_ASSIGN(DictPositionRelocationMap);
- };
-
- static const size_t MAX_DICTIONARY_SIZE;
-
- DynamicPatriciaTrieWritingHelper(BufferWithExtendableBuffer *const buffer,
- DynamicBigramListPolicy *const bigramPolicy,
- DynamicShortcutListPolicy *const shortcutPolicy, const bool needsToDecay)
- : mBuffer(buffer), mBigramPolicy(bigramPolicy), mShortcutPolicy(shortcutPolicy),
- mNeedsToDecay(needsToDecay) {}
-
- ~DynamicPatriciaTrieWritingHelper() {}
-
- // Add a word to the dictionary. If the word already exists, update the probability.
- bool addUnigramWord(DynamicPatriciaTrieReadingHelper *const readingHelper,
- const int *const wordCodePoints, const int codePointCount, const int probability,
- bool *const outAddedNewUnigram);
-
- // Add a bigram relation from word0Pos to word1Pos.
- bool addBigramWords(const int word0Pos, const int word1Pos, const int probability,
- bool *const outAddedNewBigram);
-
- // Remove a bigram relation from word0Pos to word1Pos.
- bool removeBigramWords(const int word0Pos, const int word1Pos);
-
- void writeToDictFile(const char *const fileName, const HeaderPolicy *const headerPolicy,
- const int unigramCount, const int bigramCount);
-
- void writeToDictFileWithGC(const int rootPtNodeArrayPos, const char *const fileName,
- const HeaderPolicy *const headerPolicy);
-
- // CAVEAT: This method must be called only from inner classes of
- // DynamicPatriciaTrieGcEventListeners.
- bool markNodeAsDeleted(const DynamicPatriciaTrieNodeReader *const nodeToUpdate);
-
- // CAVEAT: This method must be called only from this class or inner classes of
- // DynamicPatriciaTrieGcEventListeners.
- bool writePtNodeToBufferByCopyingPtNodeInfo(BufferWithExtendableBuffer *const bufferToWrite,
- const DynamicPatriciaTrieNodeReader *const originalNode, const int parentPos,
- const int *const codePoints, const int codePointCount, const int probability,
- int *const writingPos);
-
- private:
- DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieWritingHelper);
-
- static const int CHILDREN_POSITION_FIELD_SIZE;
-
- BufferWithExtendableBuffer *const mBuffer;
- DynamicBigramListPolicy *const mBigramPolicy;
- DynamicShortcutListPolicy *const mShortcutPolicy;
- const bool mNeedsToDecay;
-
- bool markNodeAsMovedAndSetPosition(const DynamicPatriciaTrieNodeReader *const nodeToUpdate,
- const int movedPos, const int bigramLinkedNodePos);
-
- bool writePtNodeWithFullInfoToBuffer(BufferWithExtendableBuffer *const bufferToWrite,
- const bool isBlacklisted, const bool isNotAWord,
- const int parentPos, const int *const codePoints, const int codePointCount,
- const int probability, const int childrenPos, const int originalBigramListPos,
- const int originalShortcutListPos, int *const writingPos);
-
- bool writePtNodeToBuffer(BufferWithExtendableBuffer *const bufferToWrite,
- const int parentPos, const int *const codePoints, const int codePointCount,
- const int probability, int *const writingPos);
-
- bool createAndInsertNodeIntoPtNodeArray(const int parentPos, const int *const nodeCodePoints,
- const int nodeCodePointCount, const int probability, int *const forwardLinkFieldPos);
-
- bool setPtNodeProbability(const DynamicPatriciaTrieNodeReader *const originalNode,
- const int probability, const int *const codePoints, bool *const outAddedNewUnigram);
-
- bool createChildrenPtNodeArrayAndAChildPtNode(
- const DynamicPatriciaTrieNodeReader *const parentNode, const int probability,
- const int *const codePoints, const int codePointCount);
-
- bool createNewPtNodeArrayWithAChildPtNode(const int parentPos, const int *const nodeCodePoints,
- const int nodeCodePointCount, const int probability);
-
- bool reallocatePtNodeAndAddNewPtNodes(
- const DynamicPatriciaTrieNodeReader *const reallocatingPtNode,
- const int *const reallocatingPtNodeCodePoints, const int overlappingCodePointCount,
- const int probabilityOfNewPtNode, const int *const newNodeCodePoints,
- const int newNodeCodePointCount);
-
- bool runGC(const int rootPtNodeArrayPos, const HeaderPolicy *const headerPolicy,
- BufferWithExtendableBuffer *const bufferToWrite, int *const outUnigramCount,
- int *const outBigramCount);
-
- int getUpdatedProbability(const int originalProbability, const int newProbability);
-};
-} // namespace latinime
-#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp
index eb072fbaf..be7a3c228 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp
@@ -20,6 +20,8 @@ namespace latinime {
// Note that these are corresponding definitions in Java side in FormatSpec.FileHeader.
const char *const HeaderPolicy::MULTIPLE_WORDS_DEMOTION_RATE_KEY = "MULTIPLE_WORDS_DEMOTION_RATE";
+const char *const HeaderPolicy::REQUIRES_GERMAN_UMLAUT_PROCESSING_KEY =
+ "REQUIRES_GERMAN_UMLAUT_PROCESSING";
// TODO: Change attribute string to "IS_DECAYING_DICT".
const char *const HeaderPolicy::IS_DECAYING_DICT_KEY = "USES_FORGETTING_CURVE";
const char *const HeaderPolicy::LAST_UPDATED_TIME_KEY = "date";
@@ -27,6 +29,9 @@ const char *const HeaderPolicy::LAST_DECAYED_TIME_KEY = "LAST_DECAYED_TIME";
const char *const HeaderPolicy::UNIGRAM_COUNT_KEY = "UNIGRAM_COUNT";
const char *const HeaderPolicy::BIGRAM_COUNT_KEY = "BIGRAM_COUNT";
const char *const HeaderPolicy::EXTENDED_REGION_SIZE_KEY = "EXTENDED_REGION_SIZE";
+// Historical info is information that is needed to support decaying such as timestamp, level and
+// count.
+const char *const HeaderPolicy::HAS_HISTORICAL_INFO_KEY = "HAS_HISTORICAL_INFO";
const int HeaderPolicy::DEFAULT_MULTIPLE_WORDS_DEMOTION_RATE = 100;
const float HeaderPolicy::MULTIPLE_WORD_COST_MULTIPLIER_SCALE = 100.0f;
@@ -63,6 +68,11 @@ float HeaderPolicy::readMultipleWordCostMultiplier() const {
return MULTIPLE_WORD_COST_MULTIPLIER_SCALE / static_cast<float>(demotionRate);
}
+bool HeaderPolicy::readRequiresGermanUmlautProcessing() const {
+ return HeaderReadWriteUtils::readBoolAttributeValue(&mAttributeMap,
+ REQUIRES_GERMAN_UMLAUT_PROCESSING_KEY, false);
+}
+
bool HeaderPolicy::writeHeaderToBuffer(BufferWithExtendableBuffer *const bufferToWrite,
const bool updatesLastUpdatedTime, const bool updatesLastDecayedTime,
const int unigramCount, const int bigramCount, const int extendedRegionSize) const {
@@ -89,12 +99,12 @@ bool HeaderPolicy::writeHeaderToBuffer(BufferWithExtendableBuffer *const bufferT
if (updatesLastUpdatedTime) {
// Set current time as a last updated time.
HeaderReadWriteUtils::setIntAttribute(&attributeMapTowrite, LAST_UPDATED_TIME_KEY,
- time(0));
+ TimeKeeper::peekCurrentTime());
}
if (updatesLastDecayedTime) {
// Set current time as a last updated time.
HeaderReadWriteUtils::setIntAttribute(&attributeMapTowrite, LAST_DECAYED_TIME_KEY,
- time(0));
+ TimeKeeper::peekCurrentTime());
}
if (!HeaderReadWriteUtils::writeHeaderAttributes(bufferToWrite, &attributeMapTowrite,
&writingPos)) {
diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
index a9c7805a8..1208d2c2a 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
@@ -17,37 +17,40 @@
#ifndef LATINIME_HEADER_POLICY_H
#define LATINIME_HEADER_POLICY_H
-#include <ctime>
#include <stdint.h>
#include "defines.h"
#include "suggest/core/policy/dictionary_header_structure_policy.h"
#include "suggest/policyimpl/dictionary/header/header_read_write_utils.h"
#include "suggest/policyimpl/dictionary/utils/format_utils.h"
+#include "utils/time_keeper.h"
namespace latinime {
class HeaderPolicy : public DictionaryHeaderStructurePolicy {
public:
// Reads information from existing dictionary buffer.
- HeaderPolicy(const uint8_t *const dictBuf, const int dictSize)
- : mDictFormatVersion(FormatUtils::detectFormatVersion(dictBuf, dictSize)),
+ HeaderPolicy(const uint8_t *const dictBuf, const FormatUtils::FORMAT_VERSION formatVersion)
+ : mDictFormatVersion(formatVersion),
mDictionaryFlags(HeaderReadWriteUtils::getFlags(dictBuf)),
mSize(HeaderReadWriteUtils::getHeaderSize(dictBuf)),
mAttributeMap(createAttributeMapAndReadAllAttributes(dictBuf)),
mMultiWordCostMultiplier(readMultipleWordCostMultiplier()),
+ mRequiresGermanUmlautProcessing(readRequiresGermanUmlautProcessing()),
mIsDecayingDict(HeaderReadWriteUtils::readBoolAttributeValue(&mAttributeMap,
IS_DECAYING_DICT_KEY, false /* defaultValue */)),
mLastUpdatedTime(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
- LAST_UPDATED_TIME_KEY, time(0) /* defaultValue */)),
+ LAST_UPDATED_TIME_KEY, TimeKeeper::peekCurrentTime() /* defaultValue */)),
mLastDecayedTime(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
- LAST_DECAYED_TIME_KEY, time(0) /* defaultValue */)),
+ LAST_DECAYED_TIME_KEY, TimeKeeper::peekCurrentTime() /* defaultValue */)),
mUnigramCount(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
UNIGRAM_COUNT_KEY, 0 /* defaultValue */)),
mBigramCount(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
BIGRAM_COUNT_KEY, 0 /* defaultValue */)),
mExtendedRegionSize(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
- EXTENDED_REGION_SIZE_KEY, 0 /* defaultValue */)) {}
+ EXTENDED_REGION_SIZE_KEY, 0 /* defaultValue */)),
+ mHasHistoricalInfoOfWords(HeaderReadWriteUtils::readBoolAttributeValue(
+ &mAttributeMap, HAS_HISTORICAL_INFO_KEY, false /* defaultValue */)) {}
// Constructs header information using an attribute map.
HeaderPolicy(const FormatUtils::FORMAT_VERSION dictFormatVersion,
@@ -56,30 +59,55 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
mDictionaryFlags(HeaderReadWriteUtils::createAndGetDictionaryFlagsUsingAttributeMap(
attributeMap)), mSize(0), mAttributeMap(*attributeMap),
mMultiWordCostMultiplier(readMultipleWordCostMultiplier()),
+ mRequiresGermanUmlautProcessing(readRequiresGermanUmlautProcessing()),
mIsDecayingDict(HeaderReadWriteUtils::readBoolAttributeValue(&mAttributeMap,
IS_DECAYING_DICT_KEY, false /* defaultValue */)),
mLastUpdatedTime(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
- LAST_UPDATED_TIME_KEY, time(0) /* defaultValue */)),
+ LAST_UPDATED_TIME_KEY, TimeKeeper::peekCurrentTime() /* defaultValue */)),
mLastDecayedTime(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
- LAST_UPDATED_TIME_KEY, time(0) /* defaultValue */)),
- mUnigramCount(0), mBigramCount(0), mExtendedRegionSize(0) {}
+ LAST_UPDATED_TIME_KEY, TimeKeeper::peekCurrentTime() /* defaultValue */)),
+ mUnigramCount(0), mBigramCount(0), mExtendedRegionSize(0),
+ mHasHistoricalInfoOfWords(HeaderReadWriteUtils::readBoolAttributeValue(
+ &mAttributeMap, HAS_HISTORICAL_INFO_KEY, false /* defaultValue */)) {}
+
+ // Temporary dummy header.
+ HeaderPolicy()
+ : mDictFormatVersion(FormatUtils::UNKNOWN_VERSION), mDictionaryFlags(0), mSize(0),
+ mAttributeMap(), mMultiWordCostMultiplier(0.0f),
+ mRequiresGermanUmlautProcessing(false), mIsDecayingDict(false),
+ mLastUpdatedTime(0), mLastDecayedTime(0), mUnigramCount(0), mBigramCount(0),
+ mExtendedRegionSize(0), mHasHistoricalInfoOfWords(false) {}
~HeaderPolicy() {}
- AK_FORCE_INLINE int getSize() const {
- return mSize;
- }
-
- AK_FORCE_INLINE bool supportsDynamicUpdate() const {
- return HeaderReadWriteUtils::supportsDynamicUpdate(mDictionaryFlags);
+ virtual int getFormatVersionNumber() const {
+ // Conceptually this converts the symbolic value we use in the code into the
+ // hardcoded of the bytes in the file. But we want the constants to be the
+ // same so we use them for both here.
+ switch (mDictFormatVersion) {
+ case FormatUtils::VERSION_2:
+ return FormatUtils::VERSION_2;
+ case FormatUtils::VERSION_4:
+ return FormatUtils::VERSION_4;
+ default:
+ return FormatUtils::UNKNOWN_VERSION;
+ }
}
- AK_FORCE_INLINE bool requiresGermanUmlautProcessing() const {
- return HeaderReadWriteUtils::requiresGermanUmlautProcessing(mDictionaryFlags);
+ AK_FORCE_INLINE bool isValid() const {
+ // Decaying dictionary must have historical information.
+ if (!mIsDecayingDict) {
+ return true;
+ }
+ if (mHasHistoricalInfoOfWords) {
+ return true;
+ } else {
+ return false;
+ }
}
- AK_FORCE_INLINE bool requiresFrenchLigatureProcessing() const {
- return HeaderReadWriteUtils::requiresFrenchLigatureProcessing(mDictionaryFlags);
+ AK_FORCE_INLINE int getSize() const {
+ return mSize;
}
AK_FORCE_INLINE float getMultiWordCostMultiplier() const {
@@ -90,6 +118,10 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
return mIsDecayingDict;
}
+ AK_FORCE_INLINE bool requiresGermanUmlautProcessing() const {
+ return mRequiresGermanUmlautProcessing;
+ }
+
AK_FORCE_INLINE int getLastUpdatedTime() const {
return mLastUpdatedTime;
}
@@ -110,6 +142,10 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
return mExtendedRegionSize;
}
+ AK_FORCE_INLINE bool hasHistoricalInfoOfWords() const {
+ return mHasHistoricalInfoOfWords;
+ }
+
void readHeaderValueOrQuestionMark(const char *const key,
int *outValue, int outValueSize) const;
@@ -118,15 +154,17 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
const int unigramCount, const int bigramCount, const int extendedRegionSize) const;
private:
- DISALLOW_IMPLICIT_CONSTRUCTORS(HeaderPolicy);
+ DISALLOW_COPY_AND_ASSIGN(HeaderPolicy);
static const char *const MULTIPLE_WORDS_DEMOTION_RATE_KEY;
+ static const char *const REQUIRES_GERMAN_UMLAUT_PROCESSING_KEY;
static const char *const IS_DECAYING_DICT_KEY;
static const char *const LAST_UPDATED_TIME_KEY;
static const char *const LAST_DECAYED_TIME_KEY;
static const char *const UNIGRAM_COUNT_KEY;
static const char *const BIGRAM_COUNT_KEY;
static const char *const EXTENDED_REGION_SIZE_KEY;
+ static const char *const HAS_HISTORICAL_INFO_KEY;
static const int DEFAULT_MULTIPLE_WORDS_DEMOTION_RATE;
static const float MULTIPLE_WORD_COST_MULTIPLIER_SCALE;
@@ -135,14 +173,17 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
const int mSize;
HeaderReadWriteUtils::AttributeMap mAttributeMap;
const float mMultiWordCostMultiplier;
+ const bool mRequiresGermanUmlautProcessing;
const bool mIsDecayingDict;
const int mLastUpdatedTime;
const int mLastDecayedTime;
const int mUnigramCount;
const int mBigramCount;
const int mExtendedRegionSize;
+ const bool mHasHistoricalInfoOfWords;
float readMultipleWordCostMultiplier() const;
+ bool readRequiresGermanUmlautProcessing() const;
static HeaderReadWriteUtils::AttributeMap createAttributeMapAndReadAllAttributes(
const uint8_t *const dictBuf);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp
index 5ded8f6a1..6b4598642 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp
@@ -35,22 +35,6 @@ const int HeaderReadWriteUtils::HEADER_FLAG_SIZE = 2;
const int HeaderReadWriteUtils::HEADER_SIZE_FIELD_SIZE = 4;
const HeaderReadWriteUtils::DictionaryFlags HeaderReadWriteUtils::NO_FLAGS = 0;
-// Flags for special processing
-// Those *must* match the flags in makedict (FormatSpec#*_PROCESSING_FLAG) or
-// something very bad (like, the apocalypse) will happen. Please update both at the same time.
-const HeaderReadWriteUtils::DictionaryFlags
- HeaderReadWriteUtils::GERMAN_UMLAUT_PROCESSING_FLAG = 0x1;
-const HeaderReadWriteUtils::DictionaryFlags
- HeaderReadWriteUtils::SUPPORTS_DYNAMIC_UPDATE_FLAG = 0x2;
-const HeaderReadWriteUtils::DictionaryFlags
- HeaderReadWriteUtils::FRENCH_LIGATURE_PROCESSING_FLAG = 0x4;
-
-// Note that these are corresponding definitions in Java side in FormatSpec.FileHeader.
-const char *const HeaderReadWriteUtils::SUPPORTS_DYNAMIC_UPDATE_KEY = "SUPPORTS_DYNAMIC_UPDATE";
-const char *const HeaderReadWriteUtils::REQUIRES_GERMAN_UMLAUT_PROCESSING_KEY =
- "REQUIRES_GERMAN_UMLAUT_PROCESSING";
-const char *const HeaderReadWriteUtils::REQUIRES_FRENCH_LIGATURE_PROCESSING_KEY =
- "REQUIRES_FRENCH_LIGATURE_PROCESSING";
/* static */ int HeaderReadWriteUtils::getHeaderSize(const uint8_t *const dictBuf) {
// See the format of the header in the comment in
@@ -68,17 +52,7 @@ const char *const HeaderReadWriteUtils::REQUIRES_FRENCH_LIGATURE_PROCESSING_KEY
/* static */ HeaderReadWriteUtils::DictionaryFlags
HeaderReadWriteUtils::createAndGetDictionaryFlagsUsingAttributeMap(
const HeaderReadWriteUtils::AttributeMap *const attributeMap) {
- const bool requiresGermanUmlautProcessing = readBoolAttributeValue(attributeMap,
- REQUIRES_GERMAN_UMLAUT_PROCESSING_KEY, false /* defaultValue */);
- const bool requiresFrenchLigatureProcessing = readBoolAttributeValue(attributeMap,
- REQUIRES_FRENCH_LIGATURE_PROCESSING_KEY, false /* defaultValue */);
- const bool supportsDynamicUpdate = readBoolAttributeValue(attributeMap,
- SUPPORTS_DYNAMIC_UPDATE_KEY, false /* defaultValue */);
- DictionaryFlags dictflags = NO_FLAGS;
- dictflags |= requiresGermanUmlautProcessing ? GERMAN_UMLAUT_PROCESSING_FLAG : 0;
- dictflags |= requiresFrenchLigatureProcessing ? FRENCH_LIGATURE_PROCESSING_FLAG : 0;
- dictflags |= supportsDynamicUpdate ? SUPPORTS_DYNAMIC_UPDATE_FLAG : 0;
- return dictflags;
+ return NO_FLAGS;
}
/* static */ void HeaderReadWriteUtils::fetchAllHeaderAttributes(const uint8_t *const dictBuf,
@@ -115,8 +89,8 @@ const char *const HeaderReadWriteUtils::REQUIRES_FRENCH_LIGATURE_PROCESSING_KEY
case FormatUtils::VERSION_2:
// Version 2 dictionary writing is not supported.
return false;
- case FormatUtils::VERSION_3:
- return buffer->writeUintAndAdvancePosition(3 /* data */,
+ case FormatUtils::VERSION_4:
+ return buffer->writeUintAndAdvancePosition(FormatUtils::VERSION_4 /* data */,
HEADER_DICTIONARY_VERSION_SIZE, writingPos);
default:
return false;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.h
index 225968323..fc24bbdd5 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.h
@@ -37,18 +37,6 @@ class HeaderReadWriteUtils {
static DictionaryFlags getFlags(const uint8_t *const dictBuf);
- static AK_FORCE_INLINE bool supportsDynamicUpdate(const DictionaryFlags flags) {
- return (flags & SUPPORTS_DYNAMIC_UPDATE_FLAG) != 0;
- }
-
- static AK_FORCE_INLINE bool requiresGermanUmlautProcessing(const DictionaryFlags flags) {
- return (flags & GERMAN_UMLAUT_PROCESSING_FLAG) != 0;
- }
-
- static AK_FORCE_INLINE bool requiresFrenchLigatureProcessing(const DictionaryFlags flags) {
- return (flags & FRENCH_LIGATURE_PROCESSING_FLAG) != 0;
- }
-
static AK_FORCE_INLINE int getHeaderOptionsPosition() {
return HEADER_MAGIC_NUMBER_SIZE + HEADER_DICTIONARY_VERSION_SIZE + HEADER_FLAG_SIZE
+ HEADER_SIZE_FIELD_SIZE;
@@ -101,17 +89,8 @@ class HeaderReadWriteUtils {
static const int HEADER_FLAG_SIZE;
static const int HEADER_SIZE_FIELD_SIZE;
+ // Value for the "flags" field. It's unused at the moment.
static const DictionaryFlags NO_FLAGS;
- // Flags for special processing
- // Those *must* match the flags in makedict (FormatSpec#*_PROCESSING_FLAGS) or
- // something very bad (like, the apocalypse) will happen. Please update both at the same time.
- static const DictionaryFlags GERMAN_UMLAUT_PROCESSING_FLAG;
- static const DictionaryFlags SUPPORTS_DYNAMIC_UPDATE_FLAG;
- static const DictionaryFlags FRENCH_LIGATURE_PROCESSING_FLAG;
-
- static const char *const SUPPORTS_DYNAMIC_UPDATE_KEY;
- static const char *const REQUIRES_GERMAN_UMLAUT_PROCESSING_KEY;
- static const char *const REQUIRES_FRENCH_LIGATURE_PROCESSING_KEY;
static void setIntAttributeInner(AttributeMap *const headerAttributes,
const AttributeMap::key_type *const key, const int value);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h
deleted file mode 100644
index bd3211f6a..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LATINIME_DYNAMIC_SHORTCUT_LIST_POLICY_H
-#define LATINIME_DYNAMIC_SHORTCUT_LIST_POLICY_H
-
-#include <stdint.h>
-
-#include "defines.h"
-#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h"
-#include "suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h"
-#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
-
-namespace latinime {
-
-/*
- * This is a dynamic version of ShortcutListPolicy and supports an additional buffer.
- */
-class DynamicShortcutListPolicy : public DictionaryShortcutsStructurePolicy {
- public:
- explicit DynamicShortcutListPolicy(const BufferWithExtendableBuffer *const buffer)
- : mBuffer(buffer) {}
-
- ~DynamicShortcutListPolicy() {}
-
- int getStartPos(const int pos) const {
- if (pos == NOT_A_DICT_POS) {
- return NOT_A_DICT_POS;
- }
- return pos + ShortcutListReadingUtils::getShortcutListSizeFieldSize();
- }
-
- void getNextShortcut(const int maxCodePointCount, int *const outCodePoint,
- int *const outCodePointCount, bool *const outIsWhitelist, bool *const outHasNext,
- int *const pos) const {
- const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*pos);
- const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
- if (usesAdditionalBuffer) {
- *pos -= mBuffer->getOriginalBufferSize();
- }
- const ShortcutListReadingUtils::ShortcutFlags flags =
- ShortcutListReadingUtils::getFlagsAndForwardPointer(buffer, pos);
- if (outHasNext) {
- *outHasNext = ShortcutListReadingUtils::hasNext(flags);
- }
- if (outIsWhitelist) {
- *outIsWhitelist = ShortcutListReadingUtils::isWhitelist(flags);
- }
- if (outCodePoint) {
- *outCodePointCount = ShortcutListReadingUtils::readShortcutTarget(
- buffer, maxCodePointCount, outCodePoint, pos);
- }
- if (usesAdditionalBuffer) {
- *pos += mBuffer->getOriginalBufferSize();
- }
- }
-
- void skipAllShortcuts(int *const pos) const {
- const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*pos);
- const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
- if (usesAdditionalBuffer) {
- *pos -= mBuffer->getOriginalBufferSize();
- }
- const int shortcutListSize = ShortcutListReadingUtils
- ::getShortcutListSizeAndForwardPointer(buffer, pos);
- *pos += shortcutListSize;
- if (usesAdditionalBuffer) {
- *pos += mBuffer->getOriginalBufferSize();
- }
- }
-
- // Copy shortcuts from the shortcut list that starts at fromPos in mBuffer to toPos in
- // bufferToWrite and advance these positions after the shortcut lists. This returns whether
- // the copy was succeeded or not.
- bool copyAllShortcutsAndReturnIfSucceededOrNot(BufferWithExtendableBuffer *const bufferToWrite,
- int *const fromPos, int *const toPos) const {
- const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*fromPos);
- if (usesAdditionalBuffer) {
- *fromPos -= mBuffer->getOriginalBufferSize();
- }
- const int shortcutListSize = ShortcutListReadingUtils
- ::getShortcutListSizeAndForwardPointer(mBuffer->getBuffer(usesAdditionalBuffer),
- fromPos);
- // Copy shortcut list size.
- if (!bufferToWrite->writeUintAndAdvancePosition(
- shortcutListSize + ShortcutListReadingUtils::getShortcutListSizeFieldSize(),
- ShortcutListReadingUtils::getShortcutListSizeFieldSize(), toPos)) {
- return false;
- }
- // Copy shortcut list.
- for (int i = 0; i < shortcutListSize; ++i) {
- const uint8_t data = ByteArrayUtils::readUint8AndAdvancePosition(
- mBuffer->getBuffer(usesAdditionalBuffer), fromPos);
- if (!bufferToWrite->writeUintAndAdvancePosition(data, 1 /* size */, toPos)) {
- return false;
- }
- }
- if (usesAdditionalBuffer) {
- *fromPos += mBuffer->getOriginalBufferSize();
- }
- return true;
- }
-
- private:
- DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicShortcutListPolicy);
-
- const BufferWithExtendableBuffer *const mBuffer;
-};
-} // namespace latinime
-#endif // LATINIME_DYNAMIC_SHORTCUT_LIST_POLICY_H
diff --git a/native/jni/src/suggest/policyimpl/dictionary/shortcut/ver4_shortcut_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/shortcut/ver4_shortcut_list_policy.h
new file mode 100644
index 000000000..ae863af57
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/shortcut/ver4_shortcut_list_policy.h
@@ -0,0 +1,110 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_VER4_SHORTCUT_LIST_POLICY_H
+#define LATINIME_VER4_SHORTCUT_LIST_POLICY_H
+
+#include <stdint.h>
+
+#include "defines.h"
+#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h"
+#include "suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
+
+namespace latinime {
+
+class Ver4ShortcutListPolicy : public DictionaryShortcutsStructurePolicy {
+ public:
+ Ver4ShortcutListPolicy(ShortcutDictContent *const shortcutDictContent,
+ const TerminalPositionLookupTable *const terminalPositionLookupTable)
+ : mShortcutDictContent(shortcutDictContent),
+ mTerminalPositionLookupTable(terminalPositionLookupTable) {}
+
+ ~Ver4ShortcutListPolicy() {}
+
+ int getStartPos(const int pos) const {
+ // The first shortcut entry is located at the head position of the shortcut list.
+ return pos;
+ }
+
+ void getNextShortcut(const int maxCodePointCount, int *const outCodePoint,
+ int *const outCodePointCount, bool *const outIsWhitelist, bool *const outHasNext,
+ int *const pos) const {
+ int probability = 0;
+ mShortcutDictContent->getShortcutEntryAndAdvancePosition(maxCodePointCount,
+ outCodePoint, outCodePointCount, &probability, outHasNext, pos);
+ if (outIsWhitelist) {
+ *outIsWhitelist = ShortcutListReadingUtils::isWhitelist(probability);
+ }
+ }
+
+ void skipAllShortcuts(int *const pos) const {
+ // Do nothing because we don't need to skip shortcut lists in ver4 dictionaries.
+ }
+
+ bool addNewShortcut(const int terminalId, const int *const codePoints, const int codePointCount,
+ const int probability) {
+ const int shortcutListPos = mShortcutDictContent->getShortcutListHeadPos(terminalId);
+ if (shortcutListPos == NOT_A_DICT_POS) {
+ // Create shortcut list.
+ if (!mShortcutDictContent->createNewShortcutList(terminalId)) {
+ AKLOGE("Cannot create new shortcut list. terminal id: %d", terminalId);
+ return false;
+ }
+ const int writingPos = mShortcutDictContent->getShortcutListHeadPos(terminalId);
+ return mShortcutDictContent->writeShortcutEntry(codePoints, codePointCount, probability,
+ false /* hasNext */, writingPos);
+ }
+ const int entryPos = mShortcutDictContent->findShortcutEntryAndGetPos(shortcutListPos,
+ codePoints, codePointCount);
+ if (entryPos == NOT_A_DICT_POS) {
+ // Add new entry to the shortcut list.
+ // Create new shortcut list.
+ if (!mShortcutDictContent->createNewShortcutList(terminalId)) {
+ AKLOGE("Cannot create new shortcut list. terminal id: %d", terminalId);
+ return false;
+ }
+ int writingPos = mShortcutDictContent->getShortcutListHeadPos(terminalId);
+ if (!mShortcutDictContent->writeShortcutEntryAndAdvancePosition(codePoints,
+ codePointCount, probability, true /* hasNext */, &writingPos)) {
+ AKLOGE("Cannot write shortcut entry. terminal id: %d, pos: %d", terminalId,
+ writingPos);
+ return false;
+ }
+ return mShortcutDictContent->copyShortcutList(shortcutListPos, writingPos);
+ }
+ // Overwrite existing entry.
+ bool hasNext = false;
+ mShortcutDictContent->getShortcutEntry(MAX_WORD_LENGTH, 0 /* outCodePoint */,
+ 0 /* outCodePointCount */ , 0 /* probability */, &hasNext, entryPos);
+ if (!mShortcutDictContent->writeShortcutEntry(codePoints,
+ codePointCount, probability, hasNext, entryPos)) {
+ AKLOGE("Cannot overwrite shortcut entry. terminal id: %d, pos: %d", terminalId,
+ entryPos);
+ return false;
+ }
+ return true;
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4ShortcutListPolicy);
+
+ ShortcutDictContent *const mShortcutDictContent;
+ const TerminalPositionLookupTable *const mTerminalPositionLookupTable;
+};
+} // namespace latinime
+#endif // LATINIME_VER4_SHORTCUT_LIST_POLICY_H
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp
new file mode 100644
index 000000000..c81c61d23
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp
@@ -0,0 +1,132 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h"
+
+#include <climits>
+#include <stdint.h>
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h"
+#include "suggest/policyimpl/dictionary/utils/file_utils.h"
+#include "suggest/policyimpl/dictionary/utils/format_utils.h"
+#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
+
+namespace latinime {
+
+/* static */ DictionaryStructureWithBufferPolicy::StructurePolicyPtr
+ DictionaryStructureWithBufferPolicyFactory
+ ::newDictionaryStructureWithBufferPolicy(const char *const path,
+ const int bufOffset, const int size, const bool isUpdatable) {
+ if (FileUtils::existsDir(path)) {
+ // Given path represents a directory.
+ return newPolicyforDirectoryDict(path, isUpdatable);
+ } else {
+ if (isUpdatable) {
+ AKLOGE("One file dictionaries don't support updating. path: %s", path);
+ ASSERT(false);
+ return DictionaryStructureWithBufferPolicy::StructurePolicyPtr(0);
+ }
+ return newPolicyforFileDict(path, bufOffset, size);
+ }
+}
+
+/* static */ DictionaryStructureWithBufferPolicy::StructurePolicyPtr
+ DictionaryStructureWithBufferPolicyFactory::newPolicyforDirectoryDict(
+ const char *const path, const bool isUpdatable) {
+ const int headerFilePathBufSize = PATH_MAX + 1 /* terminator */;
+ char headerFilePath[headerFilePathBufSize];
+ getHeaderFilePathInDictDir(path, headerFilePathBufSize, headerFilePath);
+ // Allocated buffer in MmapedBuffer::openBuffer() will be freed in the destructor of
+ // MmappedBufferPtr if the instance has the responsibility.
+ MmappedBuffer::MmappedBufferPtr mmappedBuffer = MmappedBuffer::openBuffer(headerFilePath,
+ isUpdatable);
+ if (!mmappedBuffer.get()) {
+ return DictionaryStructureWithBufferPolicy::StructurePolicyPtr(0);
+ }
+ switch (FormatUtils::detectFormatVersion(mmappedBuffer.get()->getBuffer(),
+ mmappedBuffer.get()->getBufferSize())) {
+ case FormatUtils::VERSION_2:
+ AKLOGE("Given path is a directory but the format is version 2. path: %s", path);
+ break;
+ case FormatUtils::VERSION_4: {
+ const int dictDirPathBufSize = strlen(headerFilePath) + 1 /* terminator */;
+ char dictPath[dictDirPathBufSize];
+ if (!FileUtils::getFilePathWithoutSuffix(headerFilePath,
+ Ver4DictConstants::HEADER_FILE_EXTENSION, dictDirPathBufSize, dictPath)) {
+ AKLOGE("Dictionary file name is not valid as a ver4 dictionary. path: %s", path);
+ ASSERT(false);
+ return DictionaryStructureWithBufferPolicy::StructurePolicyPtr(0);
+ }
+ const Ver4DictBuffers::Ver4DictBuffersPtr dictBuffers =
+ Ver4DictBuffers::openVer4DictBuffers(dictPath, mmappedBuffer);
+ if (!dictBuffers.get()->isValid()) {
+ AKLOGE("DICT: The dictionary doesn't satisfy ver4 format requirements. path: %s",
+ path);
+ ASSERT(false);
+ return DictionaryStructureWithBufferPolicy::StructurePolicyPtr(0);
+ }
+ return DictionaryStructureWithBufferPolicy::StructurePolicyPtr(
+ new Ver4PatriciaTriePolicy(dictBuffers));
+ }
+ default:
+ AKLOGE("DICT: dictionary format is unknown, bad magic number. path: %s", path);
+ break;
+ }
+ ASSERT(false);
+ return DictionaryStructureWithBufferPolicy::StructurePolicyPtr(0);
+}
+
+/* static */ DictionaryStructureWithBufferPolicy::StructurePolicyPtr
+ DictionaryStructureWithBufferPolicyFactory::newPolicyforFileDict(
+ const char *const path, const int bufOffset, const int size) {
+ // Allocated buffer in MmapedBuffer::openBuffer() will be freed in the destructor of
+ // MmappedBufferPtr if the instance has the responsibility.
+ MmappedBuffer::MmappedBufferPtr mmappedBuffer = MmappedBuffer::openBuffer(path, bufOffset,
+ size, false /* isUpdatable */);
+ if (!mmappedBuffer.get()) {
+ return DictionaryStructureWithBufferPolicy::StructurePolicyPtr(0);
+ }
+ switch (FormatUtils::detectFormatVersion(mmappedBuffer.get()->getBuffer(),
+ mmappedBuffer.get()->getBufferSize())) {
+ case FormatUtils::VERSION_2:
+ return DictionaryStructureWithBufferPolicy::StructurePolicyPtr(
+ new PatriciaTriePolicy(mmappedBuffer));
+ case FormatUtils::VERSION_4:
+ AKLOGE("Given path is a file but the format is version 4. path: %s", path);
+ break;
+ default:
+ AKLOGE("DICT: dictionary format is unknown, bad magic number. path: %s", path);
+ break;
+ }
+ ASSERT(false);
+ return DictionaryStructureWithBufferPolicy::StructurePolicyPtr(0);
+}
+
+/* static */ void DictionaryStructureWithBufferPolicyFactory::getHeaderFilePathInDictDir(
+ const char *const dictDirPath, const int outHeaderFileBufSize,
+ char *const outHeaderFilePath) {
+ const int dictNameBufSize = strlen(dictDirPath) + 1 /* terminator */;
+ char dictName[dictNameBufSize];
+ FileUtils::getBasename(dictDirPath, dictNameBufSize, dictName);
+ snprintf(outHeaderFilePath, outHeaderFileBufSize, "%s/%s%s", dictDirPath,
+ dictName, Ver4DictConstants::HEADER_FILE_EXTENSION);
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h
index 8cebc3b16..45ab52931 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h
@@ -21,16 +21,27 @@
#include "defines.h"
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
+#include "utils/exclusive_ownership_pointer.h"
namespace latinime {
class DictionaryStructureWithBufferPolicyFactory {
public:
- static DictionaryStructureWithBufferPolicy *newDictionaryStructureWithBufferPolicy(
- const char *const path, const int bufOffset, const int size, const bool isUpdatable);
+ static DictionaryStructureWithBufferPolicy::StructurePolicyPtr
+ newDictionaryStructureWithBufferPolicy(const char *const path, const int bufOffset,
+ const int size, const bool isUpdatable);
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(DictionaryStructureWithBufferPolicyFactory);
+
+ static DictionaryStructureWithBufferPolicy::StructurePolicyPtr
+ newPolicyforDirectoryDict(const char *const path, const bool isUpdatable);
+
+ static DictionaryStructureWithBufferPolicy::StructurePolicyPtr
+ newPolicyforFileDict(const char *const path, const int bufOffset, const int size);
+
+ static void getHeaderFilePathInDictDir(const char *const dirPath,
+ const int outHeaderFileBufSize, char *const outHeaderFilePath);
};
} // namespace latinime
#endif // LATINIME_DICTIONARY_STRUCTURE_WITH_BUFFER_POLICY_FACTORY_H
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.cpp
new file mode 100644
index 000000000..8f42df6d2
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.cpp
@@ -0,0 +1,144 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.h"
+
+#include "suggest/core/policy/dictionary_header_structure_policy.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h"
+
+namespace latinime {
+
+bool DynamicPtGcEventListeners
+ ::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
+ ::onVisitingPtNode(const PtNodeParams *const ptNodeParams) {
+ // PtNode is useless when the PtNode is not a terminal and doesn't have any not useless
+ // children.
+ bool isUselessPtNode = !ptNodeParams->isTerminal();
+ if (ptNodeParams->isTerminal()) {
+ bool needsToKeepPtNode = true;
+ if (!mPtNodeWriter->updatePtNodeProbabilityAndGetNeedsToKeepPtNodeAfterGC(ptNodeParams,
+ &needsToKeepPtNode)) {
+ AKLOGE("Cannot update PtNode probability or get needs to keep PtNode after GC.");
+ return false;
+ }
+ if (!needsToKeepPtNode) {
+ isUselessPtNode = true;
+ }
+ }
+ if (mChildrenValue > 0) {
+ isUselessPtNode = false;
+ } else if (ptNodeParams->isTerminal()) {
+ // Remove children as all children are useless.
+ if (!mPtNodeWriter->updateChildrenPosition(ptNodeParams,
+ NOT_A_DICT_POS /* newChildrenPosition */)) {
+ return false;
+ }
+ }
+ if (isUselessPtNode) {
+ // Current PtNode is no longer needed. Mark it as deleted.
+ if (!mPtNodeWriter->markPtNodeAsDeleted(ptNodeParams)) {
+ return false;
+ }
+ } else {
+ mValueStack.back() += 1;
+ if (ptNodeParams->isTerminal()) {
+ mValidUnigramCount += 1;
+ }
+ }
+ return true;
+}
+
+bool DynamicPtGcEventListeners::TraversePolicyToUpdateBigramProbability
+ ::onVisitingPtNode(const PtNodeParams *const ptNodeParams) {
+ if (!ptNodeParams->isDeleted() && ptNodeParams->hasBigrams()) {
+ int bigramEntryCount = 0;
+ if (!mPtNodeWriter->updateAllBigramEntriesAndDeleteUselessEntries(ptNodeParams,
+ &bigramEntryCount)) {
+ return false;
+ }
+ mValidBigramEntryCount += bigramEntryCount;
+ }
+ return true;
+}
+
+// Writes dummy PtNode array size when the head of PtNode array is read.
+bool DynamicPtGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
+ ::onDescend(const int ptNodeArrayPos) {
+ mValidPtNodeCount = 0;
+ int writingPos = mBufferToWrite->getTailPosition();
+ mDictPositionRelocationMap->mPtNodeArrayPositionRelocationMap.insert(
+ PtNodeWriter::PtNodeArrayPositionRelocationMap::value_type(ptNodeArrayPos, writingPos));
+ // Writes dummy PtNode array size because arrays can have a forward link or needles PtNodes.
+ // This field will be updated later in onReadingPtNodeArrayTail() with actual PtNode count.
+ mPtNodeArraySizeFieldPos = writingPos;
+ return DynamicPtWritingUtils::writePtNodeArraySizeAndAdvancePosition(
+ mBufferToWrite, 0 /* arraySize */, &writingPos);
+}
+
+// Write PtNode array terminal and actual PtNode array size.
+bool DynamicPtGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
+ ::onReadingPtNodeArrayTail() {
+ int writingPos = mBufferToWrite->getTailPosition();
+ // Write PtNode array terminal.
+ if (!DynamicPtWritingUtils::writeForwardLinkPositionAndAdvancePosition(
+ mBufferToWrite, NOT_A_DICT_POS /* forwardLinkPos */, &writingPos)) {
+ return false;
+ }
+ // Write actual PtNode array size.
+ if (!DynamicPtWritingUtils::writePtNodeArraySizeAndAdvancePosition(
+ mBufferToWrite, mValidPtNodeCount, &mPtNodeArraySizeFieldPos)) {
+ return false;
+ }
+ return true;
+}
+
+// Write valid PtNode to buffer and memorize mapping from the old position to the new position.
+bool DynamicPtGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
+ ::onVisitingPtNode(const PtNodeParams *const ptNodeParams) {
+ if (ptNodeParams->isDeleted()) {
+ // Current PtNode is not written in new buffer because it has been deleted.
+ mDictPositionRelocationMap->mPtNodePositionRelocationMap.insert(
+ PtNodeWriter::PtNodePositionRelocationMap::value_type(
+ ptNodeParams->getHeadPos(), NOT_A_DICT_POS));
+ return true;
+ }
+ int writingPos = mBufferToWrite->getTailPosition();
+ mDictPositionRelocationMap->mPtNodePositionRelocationMap.insert(
+ PtNodeWriter::PtNodePositionRelocationMap::value_type(
+ ptNodeParams->getHeadPos(), writingPos));
+ mValidPtNodeCount++;
+ // Writes current PtNode.
+ return mPtNodeWriter->writePtNodeAndAdvancePosition(ptNodeParams, &writingPos);
+}
+
+bool DynamicPtGcEventListeners::TraversePolicyToUpdateAllPositionFields
+ ::onVisitingPtNode(const PtNodeParams *const ptNodeParams) {
+ // Updates parent position.
+ int bigramCount = 0;
+ if (!mPtNodeWriter->updateAllPositionFields(ptNodeParams, mDictPositionRelocationMap,
+ &bigramCount)) {
+ return false;
+ }
+ mBigramCount += bigramCount;
+ if (ptNodeParams->isTerminal()) {
+ mUnigramCount++;
+ }
+ return true;
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.h
index 9755120b0..d8867754d 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.h
@@ -14,37 +14,32 @@
* limitations under the License.
*/
-#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_GC_EVENT_LISTENERS_H
-#define LATINIME_DYNAMIC_PATRICIA_TRIE_GC_EVENT_LISTENERS_H
+#ifndef LATINIME_DYNAMIC_PT_GC_EVENT_LISTENERS_H
+#define LATINIME_DYNAMIC_PT_GC_EVENT_LISTENERS_H
#include <vector>
#include "defines.h"
-#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
#include "utils/hash_map_compat.h"
namespace latinime {
-class DictionaryHeaderStructurePolicy;
+class PtNodeParams;
-class DynamicPatriciaTrieGcEventListeners {
+class DynamicPtGcEventListeners {
public:
// Updates all PtNodes that can be reached from the root. Checks if each PtNode is useless or
// not and marks useless PtNodes as deleted. Such deleted PtNodes will be discarded in the GC.
// TODO: Concatenate non-terminal PtNodes.
class TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
- : public DynamicPatriciaTrieReadingHelper::TraversingEventListener {
+ : public DynamicPtReadingHelper::TraversingEventListener {
public:
TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted(
- const DictionaryHeaderStructurePolicy *const headerPolicy,
- DynamicPatriciaTrieWritingHelper *const writingHelper,
- BufferWithExtendableBuffer *const buffer, const bool isDecayingDict)
- : mHeaderPolicy(headerPolicy), mWritingHelper(writingHelper), mBuffer(buffer),
- mIsDecayingDict(isDecayingDict), mValueStack(), mChildrenValue(0),
+ PtNodeWriter *const ptNodeWriter)
+ : mPtNodeWriter(ptNodeWriter), mValueStack(), mChildrenValue(0),
mValidUnigramCount(0) {}
~TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted() {};
@@ -66,8 +61,7 @@ class DynamicPatriciaTrieGcEventListeners {
bool onReadingPtNodeArrayTail() { return true; }
- bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
- const int *const nodeCodePoints);
+ bool onVisitingPtNode(const PtNodeParams *const ptNodeParams);
int getValidUnigramCount() const {
return mValidUnigramCount;
@@ -77,10 +71,7 @@ class DynamicPatriciaTrieGcEventListeners {
DISALLOW_IMPLICIT_CONSTRUCTORS(
TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted);
- const DictionaryHeaderStructurePolicy *const mHeaderPolicy;
- DynamicPatriciaTrieWritingHelper *const mWritingHelper;
- BufferWithExtendableBuffer *const mBuffer;
- const bool mIsDecayingDict;
+ PtNodeWriter *const mPtNodeWriter;
std::vector<int> mValueStack;
int mChildrenValue;
int mValidUnigramCount;
@@ -89,11 +80,10 @@ class DynamicPatriciaTrieGcEventListeners {
// Updates all bigram entries that are held by valid PtNodes. This removes useless bigram
// entries.
class TraversePolicyToUpdateBigramProbability
- : public DynamicPatriciaTrieReadingHelper::TraversingEventListener {
+ : public DynamicPtReadingHelper::TraversingEventListener {
public:
- TraversePolicyToUpdateBigramProbability(
- DynamicBigramListPolicy *const bigramPolicy)
- : mBigramPolicy(bigramPolicy), mValidBigramEntryCount(0) {}
+ TraversePolicyToUpdateBigramProbability(PtNodeWriter *const ptNodeWriter)
+ : mPtNodeWriter(ptNodeWriter), mValidBigramEntryCount(0) {}
bool onAscend() { return true; }
@@ -101,8 +91,7 @@ class DynamicPatriciaTrieGcEventListeners {
bool onReadingPtNodeArrayTail() { return true; }
- bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
- const int *const nodeCodePoints);
+ bool onVisitingPtNode(const PtNodeParams *const ptNodeParams);
int getValidBigramEntryCount() const {
return mValidBigramEntryCount;
@@ -111,19 +100,17 @@ class DynamicPatriciaTrieGcEventListeners {
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(TraversePolicyToUpdateBigramProbability);
- DynamicBigramListPolicy *const mBigramPolicy;
+ PtNodeWriter *const mPtNodeWriter;
int mValidBigramEntryCount;
};
class TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
- : public DynamicPatriciaTrieReadingHelper::TraversingEventListener {
+ : public DynamicPtReadingHelper::TraversingEventListener {
public:
TraversePolicyToPlaceAndWriteValidPtNodesToBuffer(
- DynamicPatriciaTrieWritingHelper *const writingHelper,
- BufferWithExtendableBuffer *const bufferToWrite,
- DynamicPatriciaTrieWritingHelper::DictPositionRelocationMap *const
- dictPositionRelocationMap)
- : mWritingHelper(writingHelper), mBufferToWrite(bufferToWrite),
+ PtNodeWriter *const ptNodeWriter, BufferWithExtendableBuffer *const bufferToWrite,
+ PtNodeWriter::DictPositionRelocationMap *const dictPositionRelocationMap)
+ : mPtNodeWriter(ptNodeWriter), mBufferToWrite(bufferToWrite),
mDictPositionRelocationMap(dictPositionRelocationMap), mValidPtNodeCount(0),
mPtNodeArraySizeFieldPos(NOT_A_DICT_POS) {};
@@ -133,31 +120,24 @@ class DynamicPatriciaTrieGcEventListeners {
bool onReadingPtNodeArrayTail();
- bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
- const int *const nodeCodePoints);
+ bool onVisitingPtNode(const PtNodeParams *const ptNodeParams);
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(TraversePolicyToPlaceAndWriteValidPtNodesToBuffer);
- DynamicPatriciaTrieWritingHelper *const mWritingHelper;
+ PtNodeWriter *const mPtNodeWriter;
BufferWithExtendableBuffer *const mBufferToWrite;
- DynamicPatriciaTrieWritingHelper::DictPositionRelocationMap *const
- mDictPositionRelocationMap;
+ PtNodeWriter::DictPositionRelocationMap *const mDictPositionRelocationMap;
int mValidPtNodeCount;
int mPtNodeArraySizeFieldPos;
};
class TraversePolicyToUpdateAllPositionFields
- : public DynamicPatriciaTrieReadingHelper::TraversingEventListener {
+ : public DynamicPtReadingHelper::TraversingEventListener {
public:
- TraversePolicyToUpdateAllPositionFields(
- DynamicPatriciaTrieWritingHelper *const writingHelper,
- DynamicBigramListPolicy *const bigramPolicy,
- BufferWithExtendableBuffer *const bufferToWrite,
- const DynamicPatriciaTrieWritingHelper::DictPositionRelocationMap *const
- dictPositionRelocationMap)
- : mWritingHelper(writingHelper), mBigramPolicy(bigramPolicy),
- mBufferToWrite(bufferToWrite),
+ TraversePolicyToUpdateAllPositionFields(PtNodeWriter *const ptNodeWriter,
+ const PtNodeWriter::DictPositionRelocationMap *const dictPositionRelocationMap)
+ : mPtNodeWriter(ptNodeWriter),
mDictPositionRelocationMap(dictPositionRelocationMap), mUnigramCount(0),
mBigramCount(0) {};
@@ -167,8 +147,7 @@ class DynamicPatriciaTrieGcEventListeners {
bool onReadingPtNodeArrayTail() { return true; }
- bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
- const int *const nodeCodePoints);
+ bool onVisitingPtNode(const PtNodeParams *const ptNodeParams);
int getUnigramCount() const {
return mUnigramCount;
@@ -181,17 +160,14 @@ class DynamicPatriciaTrieGcEventListeners {
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(TraversePolicyToUpdateAllPositionFields);
- DynamicPatriciaTrieWritingHelper *const mWritingHelper;
- DynamicBigramListPolicy *const mBigramPolicy;
- BufferWithExtendableBuffer *const mBufferToWrite;
- const DynamicPatriciaTrieWritingHelper::DictPositionRelocationMap *const
- mDictPositionRelocationMap;
+ PtNodeWriter *const mPtNodeWriter;
+ const PtNodeWriter::DictPositionRelocationMap *const mDictPositionRelocationMap;
int mUnigramCount;
int mBigramCount;
};
private:
- DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieGcEventListeners);
+ DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPtGcEventListeners);
};
} // namespace latinime
-#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_GC_EVENT_LISTENERS_H */
+#endif /* LATINIME_DYNAMIC_PT_GC_EVENT_LISTENERS_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.cpp
index f108c219f..b918e0765 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.cpp
@@ -14,22 +14,25 @@
* limitations under the License.
*/
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h"
+#include "utils/char_utils.h"
namespace latinime {
// To avoid infinite loop caused by invalid or malicious forward links.
-const int DynamicPatriciaTrieReadingHelper::MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP = 100000;
-const int DynamicPatriciaTrieReadingHelper::MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP = 100000;
-const size_t DynamicPatriciaTrieReadingHelper::MAX_READING_STATE_STACK_SIZE = MAX_WORD_LENGTH;
+const int DynamicPtReadingHelper::MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP = 100000;
+const int DynamicPtReadingHelper::MAX_PT_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP = 100000;
+const size_t DynamicPtReadingHelper::MAX_READING_STATE_STACK_SIZE = MAX_WORD_LENGTH;
// Visits all PtNodes in post-order depth first manner.
// For example, visits c -> b -> y -> x -> a for the following dictionary:
// a _ b _ c
// \ x _ y
-bool DynamicPatriciaTrieReadingHelper::traverseAllPtNodesInPostorderDepthFirstManner(
+bool DynamicPtReadingHelper::traverseAllPtNodesInPostorderDepthFirstManner(
TraversingEventListener *const listener) {
bool alreadyVisitedChildren = false;
// Descend from the root to the root PtNode array.
@@ -37,22 +40,26 @@ bool DynamicPatriciaTrieReadingHelper::traverseAllPtNodesInPostorderDepthFirstMa
return false;
}
while (!isEnd()) {
+ const PtNodeParams ptNodeParams(getPtNodeParams());
+ if (!ptNodeParams.isValid()) {
+ break;
+ }
if (!alreadyVisitedChildren) {
- if (mNodeReader.hasChildren()) {
+ if (ptNodeParams.hasChildren()) {
// Move to the first child.
- if (!listener->onDescend(mNodeReader.getChildrenPos())) {
+ if (!listener->onDescend(ptNodeParams.getChildrenPos())) {
return false;
}
pushReadingStateToStack();
- readChildNode();
+ readChildNode(ptNodeParams);
} else {
alreadyVisitedChildren = true;
}
} else {
- if (!listener->onVisitingPtNode(&mNodeReader, mMergedNodeCodePoints)) {
+ if (!listener->onVisitingPtNode(&ptNodeParams)) {
return false;
}
- readNextSiblingNode();
+ readNextSiblingNode(ptNodeParams);
if (isEnd()) {
// All PtNodes in current linked PtNode arrays have been visited.
// Return to the parent.
@@ -85,7 +92,7 @@ bool DynamicPatriciaTrieReadingHelper::traverseAllPtNodesInPostorderDepthFirstMa
// For example, visits a -> b -> x -> c -> y for the following dictionary:
// a _ b _ c
// \ x _ y
-bool DynamicPatriciaTrieReadingHelper::traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
+bool DynamicPtReadingHelper::traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
TraversingEventListener *const listener) {
bool alreadyVisitedAllPtNodesInArray = false;
bool alreadyVisitedChildren = false;
@@ -101,10 +108,14 @@ bool DynamicPatriciaTrieReadingHelper::traverseAllPtNodesInPtNodeArrayLevelPreor
}
pushReadingStateToStack();
while (!isEnd()) {
+ const PtNodeParams ptNodeParams(getPtNodeParams());
+ if (!ptNodeParams.isValid()) {
+ break;
+ }
if (alreadyVisitedAllPtNodesInArray) {
if (alreadyVisitedChildren) {
// Move to next sibling PtNode's children.
- readNextSiblingNode();
+ readNextSiblingNode(ptNodeParams);
if (isEnd()) {
// Return to the parent PTNode.
if (!listener->onAscend()) {
@@ -120,13 +131,13 @@ bool DynamicPatriciaTrieReadingHelper::traverseAllPtNodesInPtNodeArrayLevelPreor
alreadyVisitedChildren = false;
}
} else {
- if (mNodeReader.hasChildren()) {
+ if (ptNodeParams.hasChildren()) {
// Move to the first child.
- if (!listener->onDescend(mNodeReader.getChildrenPos())) {
+ if (!listener->onDescend(ptNodeParams.getChildrenPos())) {
return false;
}
pushReadingStateToStack();
- readChildNode();
+ readChildNode(ptNodeParams);
// Push state to return the head of PtNode array.
pushReadingStateToStack();
alreadyVisitedAllPtNodesInArray = false;
@@ -136,10 +147,10 @@ bool DynamicPatriciaTrieReadingHelper::traverseAllPtNodesInPtNodeArrayLevelPreor
}
}
} else {
- if (!listener->onVisitingPtNode(&mNodeReader, mMergedNodeCodePoints)) {
+ if (!listener->onVisitingPtNode(&ptNodeParams)) {
return false;
}
- readNextSiblingNode();
+ readNextSiblingNode(ptNodeParams);
if (isEnd()) {
if (!listener->onReadingPtNodeArrayTail()) {
return false;
@@ -158,9 +169,95 @@ bool DynamicPatriciaTrieReadingHelper::traverseAllPtNodesInPtNodeArrayLevelPreor
return !isError();
}
+int DynamicPtReadingHelper::getCodePointsAndProbabilityAndReturnCodePointCount(
+ const int maxCodePointCount, int *const outCodePoints, int *const outUnigramProbability) {
+ // This method traverses parent nodes from the terminal by following parent pointers; thus,
+ // node code points are stored in the buffer in the reverse order.
+ int reverseCodePoints[maxCodePointCount];
+ const PtNodeParams terminalPtNodeParams(getPtNodeParams());
+ // First, read the terminal node and get its probability.
+ if (!isValidTerminalNode(terminalPtNodeParams)) {
+ // Node at the ptNodePos is not a valid terminal node.
+ *outUnigramProbability = NOT_A_PROBABILITY;
+ return 0;
+ }
+ // Store terminal node probability.
+ *outUnigramProbability = terminalPtNodeParams.getProbability();
+ // Then, following parent node link to the dictionary root and fetch node code points.
+ int totalCodePointCount = 0;
+ while (!isEnd()) {
+ const PtNodeParams ptNodeParams(getPtNodeParams());
+ totalCodePointCount = getTotalCodePointCount(ptNodeParams);
+ if (!ptNodeParams.isValid() || totalCodePointCount > maxCodePointCount) {
+ // The ptNodePos is not a valid terminal node position in the dictionary.
+ *outUnigramProbability = NOT_A_PROBABILITY;
+ return 0;
+ }
+ // Store node code points to buffer in the reverse order.
+ fetchMergedNodeCodePointsInReverseOrder(ptNodeParams, getPrevTotalCodePointCount(),
+ reverseCodePoints);
+ // Follow parent node toward the root node.
+ readParentNode(ptNodeParams);
+ }
+ if (isError()) {
+ // The node position or the dictionary is invalid.
+ *outUnigramProbability = NOT_A_PROBABILITY;
+ return 0;
+ }
+ // Reverse the stored code points to output them.
+ for (int i = 0; i < totalCodePointCount; ++i) {
+ outCodePoints[i] = reverseCodePoints[totalCodePointCount - i - 1];
+ }
+ return totalCodePointCount;
+}
+
+int DynamicPtReadingHelper::getTerminalPtNodePositionOfWord(const int *const inWord,
+ const int length, const bool forceLowerCaseSearch) {
+ int searchCodePoints[length];
+ for (int i = 0; i < length; ++i) {
+ searchCodePoints[i] = forceLowerCaseSearch ? CharUtils::toLowerCase(inWord[i]) : inWord[i];
+ }
+ while (!isEnd()) {
+ const PtNodeParams ptNodeParams(getPtNodeParams());
+ const int matchedCodePointCount = getPrevTotalCodePointCount();
+ if (getTotalCodePointCount(ptNodeParams) > length
+ || !isMatchedCodePoint(ptNodeParams, 0 /* index */,
+ searchCodePoints[matchedCodePointCount])) {
+ // Current node has too many code points or its first code point is different from
+ // target code point. Skip this node and read the next sibling node.
+ readNextSiblingNode(ptNodeParams);
+ continue;
+ }
+ // Check following merged node code points.
+ const int nodeCodePointCount = ptNodeParams.getCodePointCount();
+ for (int j = 1; j < nodeCodePointCount; ++j) {
+ if (!isMatchedCodePoint(ptNodeParams, j, searchCodePoints[matchedCodePointCount + j])) {
+ // Different code point is found. The given word is not included in the dictionary.
+ return NOT_A_DICT_POS;
+ }
+ }
+ // All characters are matched.
+ if (length == getTotalCodePointCount(ptNodeParams)) {
+ if (!ptNodeParams.isTerminal()) {
+ return NOT_A_DICT_POS;
+ }
+ // Terminal position is found.
+ return ptNodeParams.getHeadPos();
+ }
+ if (!ptNodeParams.hasChildren()) {
+ return NOT_A_DICT_POS;
+ }
+ // Advance to the children nodes.
+ readChildNode(ptNodeParams);
+ }
+ // If we already traversed the tree further than the word is long, there means
+ // there was no match (or we would have found it).
+ return NOT_A_DICT_POS;
+}
+
// Read node array size and process empty node arrays. Nodes and arrays are counted up in this
// method to avoid an infinite loop.
-void DynamicPatriciaTrieReadingHelper::nextPtNodeArray() {
+void DynamicPtReadingHelper::nextPtNodeArray() {
if (mReadingState.mPos < 0 || mReadingState.mPos >= mBuffer->getTailPosition()) {
// Reading invalid position because of a bug or a broken dictionary.
AKLOGE("Reading PtNode array info from invalid dictionary position: %d, dict size: %d",
@@ -170,42 +267,48 @@ void DynamicPatriciaTrieReadingHelper::nextPtNodeArray() {
mReadingState.mPos = NOT_A_DICT_POS;
return;
}
- mReadingState.mPosOfLastPtNodeArrayHead = mReadingState.mPos;
+ mReadingState.mPosOfThisPtNodeArrayHead = mReadingState.mPos;
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(mReadingState.mPos);
const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
if (usesAdditionalBuffer) {
mReadingState.mPos -= mBuffer->getOriginalBufferSize();
}
- mReadingState.mNodeCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(
- dictBuf, &mReadingState.mPos);
+ mReadingState.mRemainingPtNodeCountInThisArray =
+ PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(dictBuf,
+ &mReadingState.mPos);
if (usesAdditionalBuffer) {
mReadingState.mPos += mBuffer->getOriginalBufferSize();
}
// Count up nodes and node arrays to avoid infinite loop.
- mReadingState.mTotalNodeCount += mReadingState.mNodeCount;
- mReadingState.mNodeArrayCount++;
- if (mReadingState.mNodeCount < 0
- || mReadingState.mTotalNodeCount > MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP
- || mReadingState.mNodeArrayCount > MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP) {
+ mReadingState.mTotalPtNodeIndexInThisArrayChain +=
+ mReadingState.mRemainingPtNodeCountInThisArray;
+ mReadingState.mPtNodeArrayIndexInThisArrayChain++;
+ if (mReadingState.mRemainingPtNodeCountInThisArray < 0
+ || mReadingState.mTotalPtNodeIndexInThisArrayChain
+ > MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP
+ || mReadingState.mPtNodeArrayIndexInThisArrayChain
+ > MAX_PT_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP) {
// Invalid dictionary.
AKLOGI("Invalid dictionary. nodeCount: %d, totalNodeCount: %d, MAX_CHILD_COUNT: %d"
"nodeArrayCount: %d, MAX_NODE_ARRAY_COUNT: %d",
- mReadingState.mNodeCount, mReadingState.mTotalNodeCount,
- MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP, mReadingState.mNodeArrayCount,
- MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP);
+ mReadingState.mRemainingPtNodeCountInThisArray,
+ mReadingState.mTotalPtNodeIndexInThisArrayChain,
+ MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP,
+ mReadingState.mPtNodeArrayIndexInThisArrayChain,
+ MAX_PT_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP);
ASSERT(false);
mIsError = true;
mReadingState.mPos = NOT_A_DICT_POS;
return;
}
- if (mReadingState.mNodeCount == 0) {
+ if (mReadingState.mRemainingPtNodeCountInThisArray == 0) {
// Empty node array. Try following forward link.
followForwardLink();
}
}
// Follow the forward link and read the next node array if exists.
-void DynamicPatriciaTrieReadingHelper::followForwardLink() {
+void DynamicPtReadingHelper::followForwardLink() {
if (mReadingState.mPos < 0 || mReadingState.mPos >= mBuffer->getTailPosition()) {
// Reading invalid position because of bug or broken dictionary.
AKLOGE("Reading forward link from invalid dictionary position: %d, dict size: %d",
@@ -221,12 +324,12 @@ void DynamicPatriciaTrieReadingHelper::followForwardLink() {
mReadingState.mPos -= mBuffer->getOriginalBufferSize();
}
const int forwardLinkPosition =
- DynamicPatriciaTrieReadingUtils::getForwardLinkPosition(dictBuf, mReadingState.mPos);
+ DynamicPtReadingUtils::getForwardLinkPosition(dictBuf, mReadingState.mPos);
if (usesAdditionalBuffer) {
mReadingState.mPos += mBuffer->getOriginalBufferSize();
}
mReadingState.mPosOfLastForwardLinkField = mReadingState.mPos;
- if (DynamicPatriciaTrieReadingUtils::isValidForwardLinkPosition(forwardLinkPosition)) {
+ if (DynamicPtReadingUtils::isValidForwardLinkPosition(forwardLinkPosition)) {
// Follow the forward link.
mReadingState.mPos += forwardLinkPosition;
nextPtNodeArray();
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h
index a71c06971..a69490943 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h
@@ -14,16 +14,15 @@
* limitations under the License.
*/
-#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_READING_HELPER_H
-#define LATINIME_DYNAMIC_PATRICIA_TRIE_READING_HELPER_H
+#ifndef LATINIME_DYNAMIC_PT_READING_HELPER_H
+#define LATINIME_DYNAMIC_PT_READING_HELPER_H
#include <cstddef>
#include <vector>
#include "defines.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
-#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h"
namespace latinime {
@@ -35,7 +34,7 @@ class DictionaryShortcutsStructurePolicy;
* This class is used for traversing dynamic patricia trie. This class supports iterating nodes and
* dealing with additional buffer. This class counts nodes and node arrays to avoid infinite loop.
*/
-class DynamicPatriciaTrieReadingHelper {
+class DynamicPtReadingHelper {
public:
class TraversingEventListener {
public:
@@ -51,8 +50,7 @@ class DynamicPatriciaTrieReadingHelper {
virtual bool onReadingPtNodeArrayTail() = 0;
// Returns whether the event handling was succeeded or not.
- virtual bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
- const int *const nodeCodePoints) = 0;
+ virtual bool onVisitingPtNode(const PtNodeParams *const node) = 0;
protected:
TraversingEventListener() {};
@@ -61,13 +59,12 @@ class DynamicPatriciaTrieReadingHelper {
DISALLOW_COPY_AND_ASSIGN(TraversingEventListener);
};
- DynamicPatriciaTrieReadingHelper(const BufferWithExtendableBuffer *const buffer,
- const DictionaryBigramsStructurePolicy *const bigramsPolicy,
- const DictionaryShortcutsStructurePolicy *const shortcutsPolicy)
+ DynamicPtReadingHelper(const BufferWithExtendableBuffer *const buffer,
+ const PtNodeReader *const ptNodeReader)
: mIsError(false), mReadingState(), mBuffer(buffer),
- mNodeReader(mBuffer, bigramsPolicy, shortcutsPolicy), mReadingStateStack() {}
+ mPtNodeReader(ptNodeReader), mReadingStateStack() {}
- ~DynamicPatriciaTrieReadingHelper() {}
+ ~DynamicPtReadingHelper() {}
AK_FORCE_INLINE bool isError() const {
return mIsError;
@@ -84,15 +81,12 @@ class DynamicPatriciaTrieReadingHelper {
} else {
mIsError = false;
mReadingState.mPos = ptNodeArrayPos;
- mReadingState.mPrevTotalCodePointCount = 0;
- mReadingState.mTotalNodeCount = 0;
- mReadingState.mNodeArrayCount = 0;
+ mReadingState.mTotalCodePointCountSinceInitialization = 0;
+ mReadingState.mTotalPtNodeIndexInThisArrayChain = 0;
+ mReadingState.mPtNodeArrayIndexInThisArrayChain = 0;
mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
mReadingStateStack.clear();
nextPtNodeArray();
- if (!isEnd()) {
- fetchPtNodeInfo();
- }
}
}
@@ -103,94 +97,88 @@ class DynamicPatriciaTrieReadingHelper {
} else {
mIsError = false;
mReadingState.mPos = ptNodePos;
- mReadingState.mNodeCount = 1;
- mReadingState.mPrevTotalCodePointCount = 0;
- mReadingState.mTotalNodeCount = 1;
- mReadingState.mNodeArrayCount = 1;
+ mReadingState.mRemainingPtNodeCountInThisArray = 1;
+ mReadingState.mTotalCodePointCountSinceInitialization = 0;
+ mReadingState.mTotalPtNodeIndexInThisArrayChain = 1;
+ mReadingState.mPtNodeArrayIndexInThisArrayChain = 1;
mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
- mReadingState.mPosOfLastPtNodeArrayHead = NOT_A_DICT_POS;
+ mReadingState.mPosOfThisPtNodeArrayHead = NOT_A_DICT_POS;
mReadingStateStack.clear();
- fetchPtNodeInfo();
}
}
- AK_FORCE_INLINE const DynamicPatriciaTrieNodeReader* getNodeReader() const {
- return &mNodeReader;
+ AK_FORCE_INLINE const PtNodeParams getPtNodeParams() const {
+ if (isEnd()) {
+ return PtNodeParams();
+ }
+ return mPtNodeReader->fetchNodeInfoInBufferFromPtNodePos(mReadingState.mPos);
}
- AK_FORCE_INLINE bool isValidTerminalNode() const {
- return !isEnd() && !mNodeReader.isDeleted() && mNodeReader.isTerminal();
+ AK_FORCE_INLINE bool isValidTerminalNode(const PtNodeParams &ptNodeParams) const {
+ return !isEnd() && !ptNodeParams.isDeleted() && ptNodeParams.isTerminal();
}
- AK_FORCE_INLINE bool isMatchedCodePoint(const int index, const int codePoint) const {
- return mMergedNodeCodePoints[index] == codePoint;
+ AK_FORCE_INLINE bool isMatchedCodePoint(const PtNodeParams &ptNodeParams, const int index,
+ const int codePoint) const {
+ return ptNodeParams.getCodePoints()[index] == codePoint;
}
// Return code point count exclude the last read node's code points.
AK_FORCE_INLINE int getPrevTotalCodePointCount() const {
- return mReadingState.mPrevTotalCodePointCount;
+ return mReadingState.mTotalCodePointCountSinceInitialization;
}
// Return code point count include the last read node's code points.
- AK_FORCE_INLINE int getTotalCodePointCount() const {
- return mReadingState.mPrevTotalCodePointCount + mNodeReader.getCodePointCount();
+ AK_FORCE_INLINE int getTotalCodePointCount(const PtNodeParams &ptNodeParams) const {
+ return mReadingState.mTotalCodePointCountSinceInitialization
+ + ptNodeParams.getCodePointCount();
}
- AK_FORCE_INLINE void fetchMergedNodeCodePointsInReverseOrder(
+ AK_FORCE_INLINE void fetchMergedNodeCodePointsInReverseOrder(const PtNodeParams &ptNodeParams,
const int index, int *const outCodePoints) const {
- const int nodeCodePointCount = mNodeReader.getCodePointCount();
+ const int nodeCodePointCount = ptNodeParams.getCodePointCount();
+ const int *const nodeCodePoints = ptNodeParams.getCodePoints();
for (int i = 0; i < nodeCodePointCount; ++i) {
- outCodePoints[index + i] = mMergedNodeCodePoints[nodeCodePointCount - 1 - i];
+ outCodePoints[index + i] = nodeCodePoints[nodeCodePointCount - 1 - i];
}
}
- AK_FORCE_INLINE const int *getMergedNodeCodePoints() const {
- return mMergedNodeCodePoints;
- }
-
- AK_FORCE_INLINE void readNextSiblingNode() {
- mReadingState.mNodeCount -= 1;
- mReadingState.mPos = mNodeReader.getSiblingNodePos();
- if (mReadingState.mNodeCount <= 0) {
+ AK_FORCE_INLINE void readNextSiblingNode(const PtNodeParams &ptNodeParams) {
+ mReadingState.mRemainingPtNodeCountInThisArray -= 1;
+ mReadingState.mPos = ptNodeParams.getSiblingNodePos();
+ if (mReadingState.mRemainingPtNodeCountInThisArray <= 0) {
// All nodes in the current node array have been read.
followForwardLink();
- if (!isEnd()) {
- fetchPtNodeInfo();
- }
- } else {
- fetchPtNodeInfo();
}
}
// Read the first child node of the current node.
- AK_FORCE_INLINE void readChildNode() {
- if (mNodeReader.hasChildren()) {
- mReadingState.mPrevTotalCodePointCount += mNodeReader.getCodePointCount();
- mReadingState.mTotalNodeCount = 0;
- mReadingState.mNodeArrayCount = 0;
- mReadingState.mPos = mNodeReader.getChildrenPos();
+ AK_FORCE_INLINE void readChildNode(const PtNodeParams &ptNodeParams) {
+ if (ptNodeParams.hasChildren()) {
+ mReadingState.mTotalCodePointCountSinceInitialization +=
+ ptNodeParams.getCodePointCount();
+ mReadingState.mTotalPtNodeIndexInThisArrayChain = 0;
+ mReadingState.mPtNodeArrayIndexInThisArrayChain = 0;
+ mReadingState.mPos = ptNodeParams.getChildrenPos();
mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
// Read children node array.
nextPtNodeArray();
- if (!isEnd()) {
- fetchPtNodeInfo();
- }
} else {
mReadingState.mPos = NOT_A_DICT_POS;
}
}
// Read the parent node of the current node.
- AK_FORCE_INLINE void readParentNode() {
- if (mNodeReader.getParentPos() != NOT_A_DICT_POS) {
- mReadingState.mPrevTotalCodePointCount += mNodeReader.getCodePointCount();
- mReadingState.mTotalNodeCount = 1;
- mReadingState.mNodeArrayCount = 1;
- mReadingState.mNodeCount = 1;
- mReadingState.mPos = mNodeReader.getParentPos();
+ AK_FORCE_INLINE void readParentNode(const PtNodeParams &ptNodeParams) {
+ if (ptNodeParams.getParentPos() != NOT_A_DICT_POS) {
+ mReadingState.mTotalCodePointCountSinceInitialization +=
+ ptNodeParams.getCodePointCount();
+ mReadingState.mTotalPtNodeIndexInThisArrayChain = 1;
+ mReadingState.mPtNodeArrayIndexInThisArrayChain = 1;
+ mReadingState.mRemainingPtNodeCountInThisArray = 1;
+ mReadingState.mPos = ptNodeParams.getParentPos();
mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
- mReadingState.mPosOfLastPtNodeArrayHead = NOT_A_DICT_POS;
- fetchPtNodeInfo();
+ mReadingState.mPosOfThisPtNodeArrayHead = NOT_A_DICT_POS;
} else {
mReadingState.mPos = NOT_A_DICT_POS;
}
@@ -201,13 +189,7 @@ class DynamicPatriciaTrieReadingHelper {
}
AK_FORCE_INLINE int getPosOfLastPtNodeArrayHead() const {
- return mReadingState.mPosOfLastPtNodeArrayHead;
- }
-
- AK_FORCE_INLINE void reloadCurrentPtNodeInfo() {
- if (!isEnd()) {
- fetchPtNodeInfo();
- }
+ return mReadingState.mPosOfThisPtNodeArrayHead;
}
bool traverseAllPtNodesInPostorderDepthFirstManner(TraversingEventListener *const listener);
@@ -215,53 +197,54 @@ class DynamicPatriciaTrieReadingHelper {
bool traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
TraversingEventListener *const listener);
+ int getCodePointsAndProbabilityAndReturnCodePointCount(const int maxCodePointCount,
+ int *const outCodePoints, int *const outUnigramProbability);
+
+ int getTerminalPtNodePositionOfWord(const int *const inWord, const int length,
+ const bool forceLowerCaseSearch);
+
private:
- DISALLOW_COPY_AND_ASSIGN(DynamicPatriciaTrieReadingHelper);
+ DISALLOW_COPY_AND_ASSIGN(DynamicPtReadingHelper);
- class ReadingState {
+ // This class encapsulates the reading state of a position in the dictionary. It points at a
+ // specific PtNode in the dictionary.
+ class PtNodeReadingState {
public:
// Note that copy constructor and assignment operator are used for this class to use
// std::vector.
- ReadingState() : mPos(NOT_A_DICT_POS), mNodeCount(0), mPrevTotalCodePointCount(0),
- mTotalNodeCount(0), mNodeArrayCount(0), mPosOfLastForwardLinkField(NOT_A_DICT_POS),
- mPosOfLastPtNodeArrayHead(NOT_A_DICT_POS) {}
+ PtNodeReadingState() : mPos(NOT_A_DICT_POS), mRemainingPtNodeCountInThisArray(0),
+ mTotalCodePointCountSinceInitialization(0), mTotalPtNodeIndexInThisArrayChain(0),
+ mPtNodeArrayIndexInThisArrayChain(0), mPosOfLastForwardLinkField(NOT_A_DICT_POS),
+ mPosOfThisPtNodeArrayHead(NOT_A_DICT_POS) {}
int mPos;
- // Node count of a node array.
- int mNodeCount;
- int mPrevTotalCodePointCount;
- int mTotalNodeCount;
- int mNodeArrayCount;
+ // Remaining node count in the current array.
+ int mRemainingPtNodeCountInThisArray;
+ int mTotalCodePointCountSinceInitialization;
+ // Counter of PtNodes used to avoid infinite loops caused by broken or malicious links.
+ int mTotalPtNodeIndexInThisArrayChain;
+ // Counter of PtNode arrays used to avoid infinite loops caused by cyclic links of empty
+ // PtNode arrays.
+ int mPtNodeArrayIndexInThisArrayChain;
int mPosOfLastForwardLinkField;
- int mPosOfLastPtNodeArrayHead;
+ int mPosOfThisPtNodeArrayHead;
};
static const int MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP;
- static const int MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP;
+ static const int MAX_PT_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP;
static const size_t MAX_READING_STATE_STACK_SIZE;
// TODO: Introduce error code to track what caused the error.
bool mIsError;
- ReadingState mReadingState;
+ PtNodeReadingState mReadingState;
const BufferWithExtendableBuffer *const mBuffer;
- DynamicPatriciaTrieNodeReader mNodeReader;
- int mMergedNodeCodePoints[MAX_WORD_LENGTH];
- std::vector<ReadingState> mReadingStateStack;
+ const PtNodeReader *const mPtNodeReader;
+ std::vector<PtNodeReadingState> mReadingStateStack;
void nextPtNodeArray();
void followForwardLink();
- AK_FORCE_INLINE void fetchPtNodeInfo() {
- mNodeReader.fetchNodeInfoInBufferFromPtNodePosAndGetNodeCodePoints(mReadingState.mPos,
- MAX_WORD_LENGTH, mMergedNodeCodePoints);
- if (mNodeReader.getCodePointCount() <= 0) {
- // Empty node is not allowed.
- mIsError = true;
- mReadingState.mPos = NOT_A_DICT_POS;
- }
- }
-
AK_FORCE_INLINE void pushReadingStateToStack() {
if (mReadingStateStack.size() > MAX_READING_STATE_STACK_SIZE) {
AKLOGI("Reading state stack overflow. Max size: %zd", MAX_READING_STATE_STACK_SIZE);
@@ -279,11 +262,8 @@ class DynamicPatriciaTrieReadingHelper {
} else {
mReadingState = mReadingStateStack.back();
mReadingStateStack.pop_back();
- if (!isEnd()) {
- fetchPtNodeInfo();
- }
}
}
};
} // namespace latinime
-#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_READING_HELPER_H */
+#endif /* LATINIME_DYNAMIC_PT_READING_HELPER_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.cpp
index d68446db6..3586b50ab 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.cpp
@@ -14,38 +14,38 @@
* limitations under the License.
*/
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h"
#include "defines.h"
#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
namespace latinime {
-typedef DynamicPatriciaTrieReadingUtils DptReadingUtils;
-
-const DptReadingUtils::NodeFlags DptReadingUtils::MASK_MOVED = 0xC0;
-const DptReadingUtils::NodeFlags DptReadingUtils::FLAG_IS_NOT_MOVED = 0xC0;
-const DptReadingUtils::NodeFlags DptReadingUtils::FLAG_IS_MOVED = 0x40;
-const DptReadingUtils::NodeFlags DptReadingUtils::FLAG_IS_DELETED = 0x80;
+const DynamicPtReadingUtils::NodeFlags DynamicPtReadingUtils::MASK_MOVED = 0xC0;
+const DynamicPtReadingUtils::NodeFlags DynamicPtReadingUtils::FLAG_IS_NOT_MOVED = 0xC0;
+const DynamicPtReadingUtils::NodeFlags DynamicPtReadingUtils::FLAG_IS_MOVED = 0x40;
+const DynamicPtReadingUtils::NodeFlags DynamicPtReadingUtils::FLAG_IS_DELETED = 0x80;
+const DynamicPtReadingUtils::NodeFlags DynamicPtReadingUtils::FLAG_WILL_BECOME_NON_TERMINAL = 0x00;
// TODO: Make DICT_OFFSET_ZERO_OFFSET = 0.
// Currently, DICT_OFFSET_INVALID is 0 in Java side but offset can be 0 during GC. So, the maximum
// value of offsets, which is 0x7FFFFF is used to represent 0 offset.
-const int DptReadingUtils::DICT_OFFSET_INVALID = 0;
-const int DptReadingUtils::DICT_OFFSET_ZERO_OFFSET = 0x7FFFFF;
+const int DynamicPtReadingUtils::DICT_OFFSET_INVALID = 0;
+const int DynamicPtReadingUtils::DICT_OFFSET_ZERO_OFFSET = 0x7FFFFF;
-/* static */ int DptReadingUtils::getForwardLinkPosition(const uint8_t *const buffer,
+/* static */ int DynamicPtReadingUtils::getForwardLinkPosition(const uint8_t *const buffer,
const int pos) {
int linkAddressPos = pos;
return ByteArrayUtils::readSint24AndAdvancePosition(buffer, &linkAddressPos);
}
-/* static */ int DptReadingUtils::getParentPtNodePosOffsetAndAdvancePosition(
+/* static */ int DynamicPtReadingUtils::getParentPtNodePosOffsetAndAdvancePosition(
const uint8_t *const buffer, int *const pos) {
return ByteArrayUtils::readSint24AndAdvancePosition(buffer, pos);
}
-/* static */ int DptReadingUtils::getParentPtNodePos(const int parentOffset, const int ptNodePos) {
+/* static */ int DynamicPtReadingUtils::getParentPtNodePos(const int parentOffset,
+ const int ptNodePos) {
if (parentOffset == DICT_OFFSET_INVALID) {
return NOT_A_DICT_POS;
} else if (parentOffset == DICT_OFFSET_ZERO_OFFSET) {
@@ -55,7 +55,7 @@ const int DptReadingUtils::DICT_OFFSET_ZERO_OFFSET = 0x7FFFFF;
}
}
-/* static */ int DptReadingUtils::readChildrenPositionAndAdvancePosition(
+/* static */ int DynamicPtReadingUtils::readChildrenPositionAndAdvancePosition(
const uint8_t *const buffer, int *const pos) {
const int base = *pos;
const int offset = ByteArrayUtils::readSint24AndAdvancePosition(buffer, pos);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h
index 67c3cc57e..89ae12c0b 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_READING_UTILS_H
-#define LATINIME_DYNAMIC_PATRICIA_TRIE_READING_UTILS_H
+#ifndef LATINIME_DYNAMIC_PT_READING_UTILS_H
+#define LATINIME_DYNAMIC_PT_READING_UTILS_H
#include <stdint.h>
@@ -23,7 +23,7 @@
namespace latinime {
-class DynamicPatriciaTrieReadingUtils {
+class DynamicPtReadingUtils {
public:
typedef uint8_t NodeFlags;
@@ -54,22 +54,30 @@ class DynamicPatriciaTrieReadingUtils {
return FLAG_IS_DELETED == (MASK_MOVED & flags);
}
+ static AK_FORCE_INLINE bool willBecomeNonTerminal(const NodeFlags flags) {
+ return FLAG_WILL_BECOME_NON_TERMINAL == (MASK_MOVED & flags);
+ }
+
static AK_FORCE_INLINE NodeFlags updateAndGetFlags(const NodeFlags originalFlags,
- const bool isMoved, const bool isDeleted) {
+ const bool isMoved, const bool isDeleted, const bool willBecomeNonTerminal) {
NodeFlags flags = originalFlags;
+ flags = willBecomeNonTerminal ?
+ ((flags & (~MASK_MOVED)) | FLAG_WILL_BECOME_NON_TERMINAL) : flags;
flags = isMoved ? ((flags & (~MASK_MOVED)) | FLAG_IS_MOVED) : flags;
flags = isDeleted ? ((flags & (~MASK_MOVED)) | FLAG_IS_DELETED) : flags;
- flags = (!isMoved && !isDeleted) ? ((flags & (~MASK_MOVED)) | FLAG_IS_NOT_MOVED) : flags;
+ flags = (!isMoved && !isDeleted && !willBecomeNonTerminal) ?
+ ((flags & (~MASK_MOVED)) | FLAG_IS_NOT_MOVED) : flags;
return flags;
}
private:
- DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieReadingUtils);
+ DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPtReadingUtils);
static const NodeFlags MASK_MOVED;
static const NodeFlags FLAG_IS_NOT_MOVED;
static const NodeFlags FLAG_IS_MOVED;
static const NodeFlags FLAG_IS_DELETED;
+ static const NodeFlags FLAG_WILL_BECOME_NON_TERMINAL;
};
} // namespace latinime
-#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_READING_UTILS_H */
+#endif /* LATINIME_DYNAMIC_PT_READING_UTILS_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp
new file mode 100644
index 000000000..2457b49c8
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp
@@ -0,0 +1,294 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h"
+
+#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h"
+#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+
+namespace latinime {
+
+const int DynamicPtUpdatingHelper::CHILDREN_POSITION_FIELD_SIZE = 3;
+
+bool DynamicPtUpdatingHelper::addUnigramWord(
+ DynamicPtReadingHelper *const readingHelper,
+ const int *const wordCodePoints, const int codePointCount, const int probability,
+ const bool isNotAWord, const bool isBlacklisted, const int timestamp,
+ bool *const outAddedNewUnigram) {
+ int parentPos = NOT_A_DICT_POS;
+ while (!readingHelper->isEnd()) {
+ const PtNodeParams ptNodeParams(readingHelper->getPtNodeParams());
+ if (!ptNodeParams.isValid()) {
+ break;
+ }
+ const int matchedCodePointCount = readingHelper->getPrevTotalCodePointCount();
+ if (!readingHelper->isMatchedCodePoint(ptNodeParams, 0 /* index */,
+ wordCodePoints[matchedCodePointCount])) {
+ // The first code point is different from target code point. Skip this node and read
+ // the next sibling node.
+ readingHelper->readNextSiblingNode(ptNodeParams);
+ continue;
+ }
+ // Check following merged node code points.
+ const int nodeCodePointCount = ptNodeParams.getCodePointCount();
+ for (int j = 1; j < nodeCodePointCount; ++j) {
+ const int nextIndex = matchedCodePointCount + j;
+ if (nextIndex >= codePointCount || !readingHelper->isMatchedCodePoint(ptNodeParams, j,
+ wordCodePoints[matchedCodePointCount + j])) {
+ *outAddedNewUnigram = true;
+ return reallocatePtNodeAndAddNewPtNodes(&ptNodeParams, j, isNotAWord, isBlacklisted,
+ probability, timestamp, wordCodePoints + matchedCodePointCount,
+ codePointCount - matchedCodePointCount);
+ }
+ }
+ // All characters are matched.
+ if (codePointCount == readingHelper->getTotalCodePointCount(ptNodeParams)) {
+ return setPtNodeProbability(&ptNodeParams, isNotAWord, isBlacklisted, probability,
+ timestamp, outAddedNewUnigram);
+ }
+ if (!ptNodeParams.hasChildren()) {
+ *outAddedNewUnigram = true;
+ return createChildrenPtNodeArrayAndAChildPtNode(&ptNodeParams,
+ isNotAWord, isBlacklisted, probability, timestamp,
+ wordCodePoints + readingHelper->getTotalCodePointCount(ptNodeParams),
+ codePointCount - readingHelper->getTotalCodePointCount(ptNodeParams));
+ }
+ // Advance to the children nodes.
+ parentPos = ptNodeParams.getHeadPos();
+ readingHelper->readChildNode(ptNodeParams);
+ }
+ if (readingHelper->isError()) {
+ // The dictionary is invalid.
+ return false;
+ }
+ int pos = readingHelper->getPosOfLastForwardLinkField();
+ *outAddedNewUnigram = true;
+ return createAndInsertNodeIntoPtNodeArray(parentPos,
+ wordCodePoints + readingHelper->getPrevTotalCodePointCount(),
+ codePointCount - readingHelper->getPrevTotalCodePointCount(),
+ isNotAWord, isBlacklisted, probability, timestamp, &pos);
+}
+
+bool DynamicPtUpdatingHelper::addBigramWords(const int word0Pos, const int word1Pos,
+ const int probability, const int timestamp, bool *const outAddedNewBigram) {
+ const PtNodeParams sourcePtNodeParams(
+ mPtNodeReader->fetchNodeInfoInBufferFromPtNodePos(word0Pos));
+ const PtNodeParams targetPtNodeParams(
+ mPtNodeReader->fetchNodeInfoInBufferFromPtNodePos(word1Pos));
+ return mPtNodeWriter->addNewBigramEntry(&sourcePtNodeParams, &targetPtNodeParams, probability,
+ timestamp, outAddedNewBigram);
+}
+
+// Remove a bigram relation from word0Pos to word1Pos.
+bool DynamicPtUpdatingHelper::removeBigramWords(const int word0Pos, const int word1Pos) {
+ const PtNodeParams sourcePtNodeParams(
+ mPtNodeReader->fetchNodeInfoInBufferFromPtNodePos(word0Pos));
+ const PtNodeParams targetPtNodeParams(
+ mPtNodeReader->fetchNodeInfoInBufferFromPtNodePos(word1Pos));
+ return mPtNodeWriter->removeBigramEntry(&sourcePtNodeParams, &targetPtNodeParams);
+}
+
+bool DynamicPtUpdatingHelper::addShortcutTarget(const int wordPos,
+ const int *const targetCodePoints, const int targetCodePointCount,
+ const int shortcutProbability) {
+ const PtNodeParams ptNodeParams(mPtNodeReader->fetchNodeInfoInBufferFromPtNodePos(wordPos));
+ return mPtNodeWriter->addShortcutTarget(&ptNodeParams, targetCodePoints, targetCodePointCount,
+ shortcutProbability);
+}
+
+bool DynamicPtUpdatingHelper::createAndInsertNodeIntoPtNodeArray(const int parentPos,
+ const int *const nodeCodePoints, const int nodeCodePointCount,
+ const bool isNotAWord, const bool isBlacklisted, const int probability,
+ const int timestamp, int *const forwardLinkFieldPos) {
+ const int newPtNodeArrayPos = mBuffer->getTailPosition();
+ if (!DynamicPtWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer,
+ newPtNodeArrayPos, forwardLinkFieldPos)) {
+ return false;
+ }
+ return createNewPtNodeArrayWithAChildPtNode(parentPos, nodeCodePoints, nodeCodePointCount,
+ isNotAWord, isBlacklisted, probability, timestamp);
+}
+
+bool DynamicPtUpdatingHelper::setPtNodeProbability(
+ const PtNodeParams *const originalPtNodeParams, const bool isNotAWord,
+ const bool isBlacklisted, const int probability, const int timestamp,
+ bool *const outAddedNewUnigram) {
+ if (originalPtNodeParams->isTerminal()) {
+ // Overwrites the probability.
+ *outAddedNewUnigram = false;
+ return mPtNodeWriter->updatePtNodeProbability(originalPtNodeParams, probability, timestamp);
+ } else {
+ // Make the node terminal and write the probability.
+ *outAddedNewUnigram = true;
+ const int movedPos = mBuffer->getTailPosition();
+ int writingPos = movedPos;
+ const PtNodeParams ptNodeParamsToWrite(getUpdatedPtNodeParams(originalPtNodeParams,
+ isNotAWord, isBlacklisted, true /* isTerminal */,
+ originalPtNodeParams->getParentPos(), originalPtNodeParams->getCodePointCount(),
+ originalPtNodeParams->getCodePoints(), probability));
+ if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite,
+ timestamp, &writingPos)) {
+ return false;
+ }
+ if (!mPtNodeWriter->markPtNodeAsMoved(originalPtNodeParams, movedPos, movedPos)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+bool DynamicPtUpdatingHelper::createChildrenPtNodeArrayAndAChildPtNode(
+ const PtNodeParams *const parentPtNodeParams, const bool isNotAWord,
+ const bool isBlacklisted, const int probability, const int timestamp,
+ const int *const codePoints, const int codePointCount) {
+ const int newPtNodeArrayPos = mBuffer->getTailPosition();
+ if (!mPtNodeWriter->updateChildrenPosition(parentPtNodeParams, newPtNodeArrayPos)) {
+ return false;
+ }
+ return createNewPtNodeArrayWithAChildPtNode(parentPtNodeParams->getHeadPos(), codePoints,
+ codePointCount, isNotAWord, isBlacklisted, probability, timestamp);
+}
+
+bool DynamicPtUpdatingHelper::createNewPtNodeArrayWithAChildPtNode(
+ const int parentPtNodePos, const int *const nodeCodePoints, const int nodeCodePointCount,
+ const bool isNotAWord, const bool isBlacklisted, const int probability,
+ const int timestamp) {
+ int writingPos = mBuffer->getTailPosition();
+ if (!DynamicPtWritingUtils::writePtNodeArraySizeAndAdvancePosition(mBuffer,
+ 1 /* arraySize */, &writingPos)) {
+ return false;
+ }
+ const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(
+ isNotAWord, isBlacklisted, true /* isTerminal */,
+ parentPtNodePos, nodeCodePointCount, nodeCodePoints, probability));
+ if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite, timestamp,
+ &writingPos)) {
+ return false;
+ }
+ if (!DynamicPtWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer,
+ NOT_A_DICT_POS /* forwardLinkPos */, &writingPos)) {
+ return false;
+ }
+ return true;
+}
+
+// Returns whether the dictionary updating was succeeded or not.
+bool DynamicPtUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
+ const PtNodeParams *const reallocatingPtNodeParams, const int overlappingCodePointCount,
+ const bool isNotAWord, const bool isBlacklisted, const int probabilityOfNewPtNode,
+ const int timestamp, const int *const newNodeCodePoints, const int newNodeCodePointCount) {
+ // When addsExtraChild is true, split the reallocating PtNode and add new child.
+ // Reallocating PtNode: abcde, newNode: abcxy.
+ // abc (1st, not terminal) __ de (2nd)
+ // \_ xy (extra child, terminal)
+ // Otherwise, this method makes 1st part terminal and write probabilityOfNewPtNode.
+ // Reallocating PtNode: abcde, newNode: abc.
+ // abc (1st, terminal) __ de (2nd)
+ const bool addsExtraChild = newNodeCodePointCount > overlappingCodePointCount;
+ const int firstPartOfReallocatedPtNodePos = mBuffer->getTailPosition();
+ int writingPos = firstPartOfReallocatedPtNodePos;
+ // Write the 1st part of the reallocating node. The children position will be updated later
+ // with actual children position.
+ if (addsExtraChild) {
+ const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(
+ false /* isNotAWord */, false /* isBlacklisted */, false /* isTerminal */,
+ reallocatingPtNodeParams->getParentPos(), overlappingCodePointCount,
+ reallocatingPtNodeParams->getCodePoints(), NOT_A_PROBABILITY));
+ if (!mPtNodeWriter->writePtNodeAndAdvancePosition(&ptNodeParamsToWrite, &writingPos)) {
+ return false;
+ }
+ } else {
+ const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(
+ isNotAWord, isBlacklisted, true /* isTerminal */,
+ reallocatingPtNodeParams->getParentPos(), overlappingCodePointCount,
+ reallocatingPtNodeParams->getCodePoints(), probabilityOfNewPtNode));
+ if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite,
+ timestamp, &writingPos)) {
+ return false;
+ }
+ }
+ const int actualChildrenPos = writingPos;
+ // Create new children PtNode array.
+ const size_t newPtNodeCount = addsExtraChild ? 2 : 1;
+ if (!DynamicPtWritingUtils::writePtNodeArraySizeAndAdvancePosition(mBuffer,
+ newPtNodeCount, &writingPos)) {
+ return false;
+ }
+ // Write the 2nd part of the reallocating node.
+ const int secondPartOfReallocatedPtNodePos = writingPos;
+ const PtNodeParams childPartPtNodeParams(getUpdatedPtNodeParams(reallocatingPtNodeParams,
+ reallocatingPtNodeParams->isNotAWord(), reallocatingPtNodeParams->isBlacklisted(),
+ reallocatingPtNodeParams->isTerminal(), firstPartOfReallocatedPtNodePos,
+ reallocatingPtNodeParams->getCodePointCount() - overlappingCodePointCount,
+ reallocatingPtNodeParams->getCodePoints() + overlappingCodePointCount,
+ reallocatingPtNodeParams->getProbability()));
+ if (!mPtNodeWriter->writePtNodeAndAdvancePosition(&childPartPtNodeParams, &writingPos)) {
+ return false;
+ }
+ if (addsExtraChild) {
+ const PtNodeParams extraChildPtNodeParams(getPtNodeParamsForNewPtNode(
+ isNotAWord, isBlacklisted, true /* isTerminal */,
+ firstPartOfReallocatedPtNodePos, newNodeCodePointCount - overlappingCodePointCount,
+ newNodeCodePoints + overlappingCodePointCount, probabilityOfNewPtNode));
+ if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&extraChildPtNodeParams,
+ timestamp, &writingPos)) {
+ return false;
+ }
+ }
+ if (!DynamicPtWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer,
+ NOT_A_DICT_POS /* forwardLinkPos */, &writingPos)) {
+ return false;
+ }
+ // Update original reallocating PtNode as moved.
+ if (!mPtNodeWriter->markPtNodeAsMoved(reallocatingPtNodeParams, firstPartOfReallocatedPtNodePos,
+ secondPartOfReallocatedPtNodePos)) {
+ return false;
+ }
+ // Load node info. Information of the 1st part will be fetched.
+ const PtNodeParams ptNodeParams(
+ mPtNodeReader->fetchNodeInfoInBufferFromPtNodePos(firstPartOfReallocatedPtNodePos));
+ // Update children position.
+ return mPtNodeWriter->updateChildrenPosition(&ptNodeParams, actualChildrenPos);
+}
+
+const PtNodeParams DynamicPtUpdatingHelper::getUpdatedPtNodeParams(
+ const PtNodeParams *const originalPtNodeParams, const bool isNotAWord,
+ const bool isBlacklisted, const bool isTerminal, const int parentPos,
+ const int codePointCount, const int *const codePoints, const int probability) const {
+ const PatriciaTrieReadingUtils::NodeFlags flags = PatriciaTrieReadingUtils::createAndGetFlags(
+ isBlacklisted, isNotAWord, isTerminal, originalPtNodeParams->hasShortcutTargets(),
+ originalPtNodeParams->hasBigrams(), codePointCount > 1 /* hasMultipleChars */,
+ CHILDREN_POSITION_FIELD_SIZE);
+ return PtNodeParams(originalPtNodeParams, flags, parentPos, codePointCount, codePoints,
+ probability);
+}
+
+const PtNodeParams DynamicPtUpdatingHelper::getPtNodeParamsForNewPtNode(
+ const bool isNotAWord, const bool isBlacklisted, const bool isTerminal,
+ const int parentPos, const int codePointCount, const int *const codePoints,
+ const int probability) const {
+ const PatriciaTrieReadingUtils::NodeFlags flags = PatriciaTrieReadingUtils::createAndGetFlags(
+ isBlacklisted, isNotAWord, isTerminal, false /* hasShortcutTargets */,
+ false /* hasBigrams */, codePointCount > 1 /* hasMultipleChars */,
+ CHILDREN_POSITION_FIELD_SIZE);
+ return PtNodeParams(flags, parentPos, codePointCount, codePoints, probability);
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h
new file mode 100644
index 000000000..71f473096
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h
@@ -0,0 +1,99 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DYNAMIC_PT_UPDATING_HELPER_H
+#define LATINIME_DYNAMIC_PT_UPDATING_HELPER_H
+
+#include <stdint.h>
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
+#include "utils/hash_map_compat.h"
+
+namespace latinime {
+
+class BufferWithExtendableBuffer;
+class DynamicPtReadingHelper;
+class PtNodeReader;
+class PtNodeWriter;
+
+class DynamicPtUpdatingHelper {
+ public:
+ DynamicPtUpdatingHelper(BufferWithExtendableBuffer *const buffer,
+ const PtNodeReader *const ptNodeReader, PtNodeWriter *const ptNodeWriter)
+ : mBuffer(buffer), mPtNodeReader(ptNodeReader), mPtNodeWriter(ptNodeWriter) {}
+
+ ~DynamicPtUpdatingHelper() {}
+
+ // Add a word to the dictionary. If the word already exists, update the probability.
+ bool addUnigramWord(DynamicPtReadingHelper *const readingHelper,
+ const int *const wordCodePoints, const int codePointCount, const int probability,
+ const bool isNotAWord, const bool isBlacklisted, const int timestamp,
+ bool *const outAddedNewUnigram);
+
+ // Add a bigram relation from word0Pos to word1Pos.
+ bool addBigramWords(const int word0Pos, const int word1Pos, const int probability,
+ const int timestamp, bool *const outAddedNewBigram);
+
+ // Remove a bigram relation from word0Pos to word1Pos.
+ bool removeBigramWords(const int word0Pos, const int word1Pos);
+
+ // Add a shortcut target.
+ bool addShortcutTarget(const int wordPos, const int *const targetCodePoints,
+ const int targetCodePointCount, const int shortcutProbability);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPtUpdatingHelper);
+
+ static const int CHILDREN_POSITION_FIELD_SIZE;
+
+ BufferWithExtendableBuffer *const mBuffer;
+ const PtNodeReader *const mPtNodeReader;
+ PtNodeWriter *const mPtNodeWriter;
+
+ bool createAndInsertNodeIntoPtNodeArray(const int parentPos, const int *const nodeCodePoints,
+ const int nodeCodePointCount, const bool isNotAWord, const bool isBlacklisted,
+ const int probability, const int timestamp, int *const forwardLinkFieldPos);
+
+ bool setPtNodeProbability(const PtNodeParams *const originalPtNodeParams, const bool isNotAWord,
+ const bool isBlacklisted, const int probability, const int timestamp,
+ bool *const outAddedNewUnigram);
+
+ bool createChildrenPtNodeArrayAndAChildPtNode(const PtNodeParams *const parentPtNodeParams,
+ const bool isNotAWord, const bool isBlacklisted, const int probability,
+ const int timestamp, const int *const codePoints, const int codePointCount);
+
+ bool createNewPtNodeArrayWithAChildPtNode(const int parentPos, const int *const nodeCodePoints,
+ const int nodeCodePointCount, const bool isNotAWord, const bool isBlacklisted,
+ const int probability, const int timestamp);
+
+ bool reallocatePtNodeAndAddNewPtNodes(
+ const PtNodeParams *const reallocatingPtNodeParams, const int overlappingCodePointCount,
+ const bool isNotAWord, const bool isBlacklisted, const int probabilityOfNewPtNode,
+ const int timestamp, const int *const newNodeCodePoints,
+ const int newNodeCodePointCount);
+
+ const PtNodeParams getUpdatedPtNodeParams(const PtNodeParams *const originalPtNodeParams,
+ const bool isNotAWord, const bool isBlacklisted, const bool isTerminal,
+ const int parentPos, const int codePointCount,
+ const int *const codePoints, const int probability) const;
+
+ const PtNodeParams getPtNodeParamsForNewPtNode(const bool isNotAWord, const bool isBlacklisted,
+ const bool isTerminal, const int parentPos,
+ const int codePointCount, const int *const codePoints, const int probability) const;
+};
+} // namespace latinime
+#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_UPDATING_HELPER_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.cpp
index 30ff10cd6..ebbdc2ea2 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.h"
#include <cstddef>
#include <cstdlib>
@@ -24,19 +24,18 @@
namespace latinime {
-const size_t DynamicPatriciaTrieWritingUtils::MAX_PTNODE_ARRAY_SIZE_TO_USE_SMALL_SIZE_FIELD = 0x7F;
-const size_t DynamicPatriciaTrieWritingUtils::MAX_PTNODE_ARRAY_SIZE = 0x7FFF;
-const int DynamicPatriciaTrieWritingUtils::SMALL_PTNODE_ARRAY_SIZE_FIELD_SIZE = 1;
-const int DynamicPatriciaTrieWritingUtils::LARGE_PTNODE_ARRAY_SIZE_FIELD_SIZE = 2;
-const int DynamicPatriciaTrieWritingUtils::LARGE_PTNODE_ARRAY_SIZE_FIELD_SIZE_FLAG = 0x8000;
-const int DynamicPatriciaTrieWritingUtils::DICT_OFFSET_FIELD_SIZE = 3;
-const int DynamicPatriciaTrieWritingUtils::MAX_DICT_OFFSET_VALUE = 0x7FFFFF;
-const int DynamicPatriciaTrieWritingUtils::MIN_DICT_OFFSET_VALUE = -0x7FFFFF;
-const int DynamicPatriciaTrieWritingUtils::DICT_OFFSET_NEGATIVE_FLAG = 0x800000;
-const int DynamicPatriciaTrieWritingUtils::PROBABILITY_FIELD_SIZE = 1;
-const int DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE = 1;
+const size_t DynamicPtWritingUtils::MAX_PTNODE_ARRAY_SIZE_TO_USE_SMALL_SIZE_FIELD = 0x7F;
+const size_t DynamicPtWritingUtils::MAX_PTNODE_ARRAY_SIZE = 0x7FFF;
+const int DynamicPtWritingUtils::SMALL_PTNODE_ARRAY_SIZE_FIELD_SIZE = 1;
+const int DynamicPtWritingUtils::LARGE_PTNODE_ARRAY_SIZE_FIELD_SIZE = 2;
+const int DynamicPtWritingUtils::LARGE_PTNODE_ARRAY_SIZE_FIELD_SIZE_FLAG = 0x8000;
+const int DynamicPtWritingUtils::DICT_OFFSET_FIELD_SIZE = 3;
+const int DynamicPtWritingUtils::MAX_DICT_OFFSET_VALUE = 0x7FFFFF;
+const int DynamicPtWritingUtils::MIN_DICT_OFFSET_VALUE = -0x7FFFFF;
+const int DynamicPtWritingUtils::DICT_OFFSET_NEGATIVE_FLAG = 0x800000;
+const int DynamicPtWritingUtils::NODE_FLAG_FIELD_SIZE = 1;
-/* static */ bool DynamicPatriciaTrieWritingUtils::writeEmptyDictionary(
+/* static */ bool DynamicPtWritingUtils::writeEmptyDictionary(
BufferWithExtendableBuffer *const buffer, const int rootPos) {
int writingPos = rootPos;
if (!writePtNodeArraySizeAndAdvancePosition(buffer, 0 /* arraySize */, &writingPos)) {
@@ -46,13 +45,13 @@ const int DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE = 1;
&writingPos);
}
-/* static */ bool DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition(
+/* static */ bool DynamicPtWritingUtils::writeForwardLinkPositionAndAdvancePosition(
BufferWithExtendableBuffer *const buffer, const int forwardLinkPos,
int *const forwardLinkFieldPos) {
return writeDictOffset(buffer, forwardLinkPos, (*forwardLinkFieldPos), forwardLinkFieldPos);
}
-/* static */ bool DynamicPatriciaTrieWritingUtils::writePtNodeArraySizeAndAdvancePosition(
+/* static */ bool DynamicPtWritingUtils::writePtNodeArraySizeAndAdvancePosition(
BufferWithExtendableBuffer *const buffer, const size_t arraySize,
int *const arraySizeFieldPos) {
// Currently, all array size field to be created has LARGE_PTNODE_ARRAY_SIZE_FIELD_SIZE to
@@ -74,20 +73,20 @@ const int DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE = 1;
}
}
-/* static */ bool DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(
+/* static */ bool DynamicPtWritingUtils::writeFlagsAndAdvancePosition(
BufferWithExtendableBuffer *const buffer,
- const DynamicPatriciaTrieReadingUtils::NodeFlags nodeFlags, int *const nodeFlagsFieldPos) {
+ const DynamicPtReadingUtils::NodeFlags nodeFlags, int *const nodeFlagsFieldPos) {
return buffer->writeUintAndAdvancePosition(nodeFlags, NODE_FLAG_FIELD_SIZE, nodeFlagsFieldPos);
}
// Note that parentOffset is offset from node's head position.
-/* static */ bool DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(
+/* static */ bool DynamicPtWritingUtils::writeParentPosOffsetAndAdvancePosition(
BufferWithExtendableBuffer *const buffer, const int parentPos, const int basePos,
int *const parentPosFieldPos) {
return writeDictOffset(buffer, parentPos, basePos, parentPosFieldPos);
}
-/* static */ bool DynamicPatriciaTrieWritingUtils::writeCodePointsAndAdvancePosition(
+/* static */ bool DynamicPtWritingUtils::writeCodePointsAndAdvancePosition(
BufferWithExtendableBuffer *const buffer, const int *const codePoints,
const int codePointCount, int *const codePointFieldPos) {
if (codePointCount <= 0) {
@@ -101,34 +100,20 @@ const int DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE = 1;
hasMultipleCodePoints, codePointFieldPos);
}
-/* static */ bool DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(
- BufferWithExtendableBuffer *const buffer, const int probability,
- int *const probabilityFieldPos) {
- if (probability < 0 || probability > MAX_PROBABILITY) {
- AKLOGI("probability cannot be written because the probability is invalid: %d",
- probability);
- ASSERT(false);
- return false;
- }
- return buffer->writeUintAndAdvancePosition(probability, PROBABILITY_FIELD_SIZE,
- probabilityFieldPos);
-}
-
-/* static */ bool DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(
+/* static */ bool DynamicPtWritingUtils::writeChildrenPositionAndAdvancePosition(
BufferWithExtendableBuffer *const buffer, const int childrenPosition,
int *const childrenPositionFieldPos) {
return writeDictOffset(buffer, childrenPosition, (*childrenPositionFieldPos),
childrenPositionFieldPos);
}
-/* static */ bool DynamicPatriciaTrieWritingUtils::writeDictOffset(
- BufferWithExtendableBuffer *const buffer, const int targetPos, const int basePos,
- int *const offsetFieldPos) {
+/* static */ bool DynamicPtWritingUtils::writeDictOffset(BufferWithExtendableBuffer *const buffer,
+ const int targetPos, const int basePos, int *const offsetFieldPos) {
int offset = targetPos - basePos;
if (targetPos == NOT_A_DICT_POS) {
- offset = DynamicPatriciaTrieReadingUtils::DICT_OFFSET_INVALID;
+ offset = DynamicPtReadingUtils::DICT_OFFSET_INVALID;
} else if (offset == 0) {
- offset = DynamicPatriciaTrieReadingUtils::DICT_OFFSET_ZERO_OFFSET;
+ offset = DynamicPtReadingUtils::DICT_OFFSET_ZERO_OFFSET;
}
if (offset > MAX_DICT_OFFSET_VALUE || offset < MIN_DICT_OFFSET_VALUE) {
AKLOGI("offset cannot be written because the offset is too large or too small: %d",
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.h
index af76bc6b5..362fbd1cc 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.h
@@ -14,19 +14,19 @@
* limitations under the License.
*/
-#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_UTILS_H
-#define LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_UTILS_H
+#ifndef LATINIME_DYNAMIC_PT_WRITING_UTILS_H
+#define LATINIME_DYNAMIC_PT_WRITING_UTILS_H
#include <cstddef>
#include "defines.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h"
namespace latinime {
class BufferWithExtendableBuffer;
-class DynamicPatriciaTrieWritingUtils {
+class DynamicPtWritingUtils {
public:
static const int NODE_FLAG_FIELD_SIZE;
@@ -39,8 +39,15 @@ class DynamicPatriciaTrieWritingUtils {
static bool writePtNodeArraySizeAndAdvancePosition(BufferWithExtendableBuffer *const buffer,
const size_t arraySize, int *const arraySizeFieldPos);
+ static bool writeFlags(BufferWithExtendableBuffer *const buffer,
+ const DynamicPtReadingUtils::NodeFlags nodeFlags,
+ const int nodeFlagsFieldPos) {
+ int writingPos = nodeFlagsFieldPos;
+ return writeFlagsAndAdvancePosition(buffer, nodeFlags, &writingPos);
+ }
+
static bool writeFlagsAndAdvancePosition(BufferWithExtendableBuffer *const buffer,
- const DynamicPatriciaTrieReadingUtils::NodeFlags nodeFlags,
+ const DynamicPtReadingUtils::NodeFlags nodeFlags,
int *const nodeFlagsFieldPos);
static bool writeParentPosOffsetAndAdvancePosition(BufferWithExtendableBuffer *const buffer,
@@ -49,14 +56,11 @@ class DynamicPatriciaTrieWritingUtils {
static bool writeCodePointsAndAdvancePosition(BufferWithExtendableBuffer *const buffer,
const int *const codePoints, const int codePointCount, int *const codePointFieldPos);
- static bool writeProbabilityAndAdvancePosition(BufferWithExtendableBuffer *const buffer,
- const int probability, int *const probabilityFieldPos);
-
static bool writeChildrenPositionAndAdvancePosition(BufferWithExtendableBuffer *const buffer,
const int childrenPosition, int *const childrenPositionFieldPos);
private:
- DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieWritingUtils);
+ DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPtWritingUtils);
static const size_t MAX_PTNODE_ARRAY_SIZE_TO_USE_SMALL_SIZE_FIELD;
static const size_t MAX_PTNODE_ARRAY_SIZE;
@@ -67,10 +71,9 @@ class DynamicPatriciaTrieWritingUtils {
static const int MAX_DICT_OFFSET_VALUE;
static const int MIN_DICT_OFFSET_VALUE;
static const int DICT_OFFSET_NEGATIVE_FLAG;
- static const int PROBABILITY_FIELD_SIZE;
static bool writeDictOffset(BufferWithExtendableBuffer *const buffer, const int targetPos,
const int basePos, int *const offsetFieldPos);
};
} // namespace latinime
-#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_UTILS_H */
+#endif /* LATINIME_DYNAMIC_PT_WRITING_UTILS_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h
new file mode 100644
index 000000000..84731eb17
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h
@@ -0,0 +1,229 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_PT_NODE_PARAMS_H
+#define LATINIME_PT_NODE_PARAMS_H
+
+#include <cstring>
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h"
+#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
+
+namespace latinime {
+
+// This class has information of a PtNode. This class is immutable.
+class PtNodeParams {
+ public:
+ // Invalid PtNode.
+ PtNodeParams() : mHeadPos(NOT_A_DICT_POS), mFlags(0), mParentPos(NOT_A_DICT_POS),
+ mCodePointCount(0), mCodePoints(), mTerminalIdFieldPos(NOT_A_DICT_POS),
+ mTerminalId(Ver4DictConstants::NOT_A_TERMINAL_ID), mProbabilityFieldPos(NOT_A_DICT_POS),
+ mProbability(NOT_A_PROBABILITY), mChildrenPosFieldPos(NOT_A_DICT_POS),
+ mChildrenPos(NOT_A_DICT_POS), mBigramLinkedNodePos(NOT_A_DICT_POS),
+ mShortcutPos(NOT_A_DICT_POS), mBigramPos(NOT_A_DICT_POS),
+ mSiblingPos(NOT_A_DICT_POS) {}
+
+ PtNodeParams(const PtNodeParams& ptNodeParams)
+ : mHeadPos(ptNodeParams.mHeadPos), mFlags(ptNodeParams.mFlags),
+ mParentPos(ptNodeParams.mParentPos), mCodePointCount(ptNodeParams.mCodePointCount),
+ mCodePoints(), mTerminalIdFieldPos(ptNodeParams.mTerminalIdFieldPos),
+ mTerminalId(ptNodeParams.mTerminalId),
+ mProbabilityFieldPos(ptNodeParams.mProbabilityFieldPos),
+ mProbability(ptNodeParams.mProbability),
+ mChildrenPosFieldPos(ptNodeParams.mChildrenPosFieldPos),
+ mChildrenPos(ptNodeParams.mChildrenPos),
+ mBigramLinkedNodePos(ptNodeParams.mBigramLinkedNodePos),
+ mShortcutPos(ptNodeParams.mShortcutPos), mBigramPos(ptNodeParams.mBigramPos),
+ mSiblingPos(ptNodeParams.mSiblingPos) {
+ memcpy(mCodePoints, ptNodeParams.getCodePoints(), sizeof(int) * mCodePointCount);
+ }
+
+ // PtNode with a terminal id.
+ PtNodeParams(const int headPos, const PatriciaTrieReadingUtils::NodeFlags flags,
+ const int parentPos, const int codePointCount, const int *const codePoints,
+ const int terminalIdFieldPos, const int terminalId, const int probability,
+ const int childrenPosFieldPos, const int childrenPos, const int siblingPos)
+ : mHeadPos(headPos), mFlags(flags), mParentPos(parentPos),
+ mCodePointCount(codePointCount), mCodePoints(),
+ mTerminalIdFieldPos(terminalIdFieldPos), mTerminalId(terminalId),
+ mProbabilityFieldPos(NOT_A_DICT_POS), mProbability(probability),
+ mChildrenPosFieldPos(childrenPosFieldPos), mChildrenPos(childrenPos),
+ mBigramLinkedNodePos(NOT_A_DICT_POS), mShortcutPos(terminalId),
+ mBigramPos(terminalId), mSiblingPos(siblingPos) {
+ memcpy(mCodePoints, codePoints, sizeof(int) * mCodePointCount);
+ }
+
+ // Construct new params by updating existing PtNode params.
+ PtNodeParams(const PtNodeParams *const ptNodeParams,
+ const PatriciaTrieReadingUtils::NodeFlags flags, const int parentPos,
+ const int codePointCount, const int *const codePoints, const int probability)
+ : mHeadPos(ptNodeParams->getHeadPos()), mFlags(flags), mParentPos(parentPos),
+ mCodePointCount(codePointCount), mCodePoints(),
+ mTerminalIdFieldPos(ptNodeParams->getTerminalIdFieldPos()),
+ mTerminalId(ptNodeParams->getTerminalId()),
+ mProbabilityFieldPos(ptNodeParams->getProbabilityFieldPos()),
+ mProbability(probability),
+ mChildrenPosFieldPos(ptNodeParams->getChildrenPosFieldPos()),
+ mChildrenPos(ptNodeParams->getChildrenPos()),
+ mBigramLinkedNodePos(ptNodeParams->getBigramLinkedNodePos()),
+ mShortcutPos(ptNodeParams->getShortcutPos()),
+ mBigramPos(ptNodeParams->getBigramsPos()),
+ mSiblingPos(ptNodeParams->getSiblingNodePos()) {
+ memcpy(mCodePoints, codePoints, sizeof(int) * mCodePointCount);
+ }
+
+ PtNodeParams(const PatriciaTrieReadingUtils::NodeFlags flags, const int parentPos,
+ const int codePointCount, const int *const codePoints, const int probability)
+ : mHeadPos(NOT_A_DICT_POS), mFlags(flags), mParentPos(parentPos),
+ mCodePointCount(codePointCount), mCodePoints(),
+ mTerminalIdFieldPos(NOT_A_DICT_POS),
+ mTerminalId(Ver4DictConstants::NOT_A_TERMINAL_ID),
+ mProbabilityFieldPos(NOT_A_DICT_POS), mProbability(probability),
+ mChildrenPosFieldPos(NOT_A_DICT_POS), mChildrenPos(NOT_A_DICT_POS),
+ mBigramLinkedNodePos(NOT_A_DICT_POS), mShortcutPos(NOT_A_DICT_POS),
+ mBigramPos(NOT_A_DICT_POS), mSiblingPos(NOT_A_DICT_POS) {
+ memcpy(mCodePoints, codePoints, sizeof(int) * mCodePointCount);
+ }
+
+ AK_FORCE_INLINE bool isValid() const {
+ return mCodePointCount > 0;
+ }
+
+ // Head position of the PtNode
+ AK_FORCE_INLINE int getHeadPos() const {
+ return mHeadPos;
+ }
+
+ // Flags
+ AK_FORCE_INLINE bool isDeleted() const {
+ return DynamicPtReadingUtils::isDeleted(mFlags);
+ }
+
+ AK_FORCE_INLINE bool willBecomeNonTerminal() const {
+ return DynamicPtReadingUtils::willBecomeNonTerminal(mFlags);
+ }
+
+ AK_FORCE_INLINE bool hasChildren() const {
+ return mChildrenPos != NOT_A_DICT_POS;
+ }
+
+ AK_FORCE_INLINE bool isTerminal() const {
+ return PatriciaTrieReadingUtils::isTerminal(mFlags);
+ }
+
+ AK_FORCE_INLINE bool isBlacklisted() const {
+ return PatriciaTrieReadingUtils::isBlacklisted(mFlags);
+ }
+
+ AK_FORCE_INLINE bool isNotAWord() const {
+ return PatriciaTrieReadingUtils::isNotAWord(mFlags);
+ }
+
+ AK_FORCE_INLINE bool hasBigrams() const {
+ return PatriciaTrieReadingUtils::hasBigrams(mFlags);
+ }
+
+ AK_FORCE_INLINE bool hasShortcutTargets() const {
+ return PatriciaTrieReadingUtils::hasShortcutTargets(mFlags);
+ }
+
+ // Parent node position
+ AK_FORCE_INLINE int getParentPos() const {
+ return mParentPos;
+ }
+
+ // Number of code points
+ AK_FORCE_INLINE uint8_t getCodePointCount() const {
+ return mCodePointCount;
+ }
+
+ AK_FORCE_INLINE const int *getCodePoints() const {
+ return mCodePoints;
+ }
+
+ // Probability
+ AK_FORCE_INLINE int getTerminalIdFieldPos() const {
+ return mTerminalIdFieldPos;
+ }
+
+ AK_FORCE_INLINE int getTerminalId() const {
+ return mTerminalId;
+ }
+
+ // Probability
+ AK_FORCE_INLINE int getProbabilityFieldPos() const {
+ return mProbabilityFieldPos;
+ }
+
+ AK_FORCE_INLINE int getProbability() const {
+ return mProbability;
+ }
+
+ // Children PtNode array position
+ AK_FORCE_INLINE int getChildrenPosFieldPos() const {
+ return mChildrenPosFieldPos;
+ }
+
+ AK_FORCE_INLINE int getChildrenPos() const {
+ return mChildrenPos;
+ }
+
+ // Bigram linked node position.
+ AK_FORCE_INLINE int getBigramLinkedNodePos() const {
+ return mBigramLinkedNodePos;
+ }
+
+ // Shortcutlist position
+ AK_FORCE_INLINE int getShortcutPos() const {
+ return mShortcutPos;
+ }
+
+ // Bigrams position
+ AK_FORCE_INLINE int getBigramsPos() const {
+ return mBigramPos;
+ }
+
+ // Sibling node position
+ AK_FORCE_INLINE int getSiblingNodePos() const {
+ return mSiblingPos;
+ }
+
+ private:
+ // This class have a public copy constructor to be used as a return value.
+
+ // Disallowing the assignment operator.
+ PtNodeParams &operator=(PtNodeParams &ptNodeParams);
+
+ const int mHeadPos;
+ const PatriciaTrieReadingUtils::NodeFlags mFlags;
+ const int mParentPos;
+ const uint8_t mCodePointCount;
+ int mCodePoints[MAX_WORD_LENGTH];
+ const int mTerminalIdFieldPos;
+ const int mTerminalId;
+ const int mProbabilityFieldPos;
+ const int mProbability;
+ const int mChildrenPosFieldPos;
+ const int mChildrenPos;
+ const int mBigramLinkedNodePos;
+ const int mShortcutPos;
+ const int mBigramPos;
+ const int mSiblingPos;
+};
+} // namespace latinime
+#endif /* LATINIME_PT_NODE_PARAMS_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h
new file mode 100644
index 000000000..c6b2a8bed
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_PT_NODE_READER_H
+#define LATINIME_PT_NODE_READER_H
+
+#include "defines.h"
+
+#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
+
+namespace latinime {
+
+// Interface class used to read PtNode information.
+class PtNodeReader {
+ public:
+ virtual ~PtNodeReader() {}
+ virtual const PtNodeParams fetchNodeInfoInBufferFromPtNodePos(const int ptNodePos) const = 0;
+
+ protected:
+ PtNodeReader() {};
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(PtNodeReader);
+};
+} // namespace latinime
+#endif /* LATINIME_PT_NODE_READER_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h
new file mode 100644
index 000000000..84dd6870e
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h
@@ -0,0 +1,95 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_PT_NODE_WRITER_H
+#define LATINIME_PT_NODE_WRITER_H
+
+#include "defines.h"
+
+#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
+#include "utils/hash_map_compat.h"
+
+namespace latinime {
+
+// Interface class used to write PtNode information.
+class PtNodeWriter {
+ public:
+ typedef hash_map_compat<int, int> PtNodeArrayPositionRelocationMap;
+ typedef hash_map_compat<int, int> PtNodePositionRelocationMap;
+ struct DictPositionRelocationMap {
+ public:
+ DictPositionRelocationMap()
+ : mPtNodeArrayPositionRelocationMap(), mPtNodePositionRelocationMap() {}
+
+ PtNodeArrayPositionRelocationMap mPtNodeArrayPositionRelocationMap;
+ PtNodePositionRelocationMap mPtNodePositionRelocationMap;
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(DictPositionRelocationMap);
+ };
+
+ virtual ~PtNodeWriter() {}
+
+ virtual bool markPtNodeAsDeleted(const PtNodeParams *const toBeUpdatedPtNodeParams) = 0;
+
+ virtual bool markPtNodeAsMoved(const PtNodeParams *const toBeUpdatedPtNodeParams,
+ const int movedPos, const int bigramLinkedNodePos) = 0;
+
+ virtual bool markPtNodeAsWillBecomeNonTerminal(
+ const PtNodeParams *const toBeUpdatedPtNodeParams) = 0;
+
+ virtual bool updatePtNodeProbability(const PtNodeParams *const toBeUpdatedPtNodeParams,
+ const int probability, const int timestamp) = 0;
+
+ virtual bool updatePtNodeProbabilityAndGetNeedsToKeepPtNodeAfterGC(
+ const PtNodeParams *const toBeUpdatedPtNodeParams,
+ bool *const outNeedsToKeepPtNode) = 0;
+
+ virtual bool updateChildrenPosition(const PtNodeParams *const toBeUpdatedPtNodeParams,
+ const int newChildrenPosition) = 0;
+
+ virtual bool writePtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams,
+ int *const ptNodeWritingPos) = 0;
+
+ virtual bool writeNewTerminalPtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams,
+ const int timestamp, int *const ptNodeWritingPos) = 0;
+
+ virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams,
+ const PtNodeParams *const targetPtNodeParam, const int probability, const int timestamp,
+ bool *const outAddedNewBigram) = 0;
+
+ virtual bool removeBigramEntry(const PtNodeParams *const sourcePtNodeParams,
+ const PtNodeParams *const targetPtNodeParam) = 0;
+
+ virtual bool updateAllBigramEntriesAndDeleteUselessEntries(
+ const PtNodeParams *const sourcePtNodeParams, int *const outBigramEntryCount) = 0;
+
+ virtual bool updateAllPositionFields(const PtNodeParams *const toBeUpdatedPtNodeParams,
+ const DictPositionRelocationMap *const dictPositionRelocationMap,
+ int *const outBigramEntryCount) = 0;
+
+ virtual bool addShortcutTarget(const PtNodeParams *const ptNodeParams,
+ const int *const targetCodePoints, const int targetCodePointCount,
+ const int shortcutProbability) = 0;
+
+ protected:
+ PtNodeWriter() {};
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(PtNodeWriter);
+};
+} // namespace latinime
+#endif /* LATINIME_PT_NODE_WRITER_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
index 8a84bd261..960c1b936 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
@@ -15,22 +15,22 @@
*/
-#include "suggest/policyimpl/dictionary/patricia_trie_policy.h"
+#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h"
#include "defines.h"
#include "suggest/core/dicnode/dic_node.h"
#include "suggest/core/dicnode/dic_node_vector.h"
-#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/utils/probability_utils.h"
namespace latinime {
-void PatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode,
+void PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode,
DicNodeVector *const childDicNodes) const {
if (!dicNode->hasChildren()) {
return;
}
- int nextPos = dicNode->getChildrenPos();
+ int nextPos = dicNode->getChildrenPtNodeArrayPos();
if (nextPos < 0 || nextPos >= mDictBufferSize) {
AKLOGE("Children PtNode array position is invalid. pos: %d, dict size: %d",
nextPos, mDictBufferSize);
@@ -52,14 +52,14 @@ void PatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode,
// This retrieves code points and the probability of the word by its terminal position.
// Due to the fact that words are ordered in the dictionary in a strict breadth-first order,
-// it is possible to check for this with advantageous complexity. For each node, we search
+// it is possible to check for this with advantageous complexity. For each PtNode array, we search
// for PtNodes with children and compare the children position with the position we look for.
// When we shoot the position we look for, it means the word we look for is in the children
// of the previous PtNode. The only tricky part is the fact that if we arrive at the end of a
// PtNode array with the last PtNode's children position still less than what we are searching for,
// we must descend the last PtNode's children (for example, if the word we are searching for starts
// with a z, it's the last PtNode of the root array, so all children addresses will be smaller
-// than the position we look for, and we have to descend the z node).
+// than the position we look for, and we have to descend the z PtNode).
/* Parameters :
* ptNodePos: the byte position of the terminal PtNode of the word we are searching for (this is
* what is stored as the "bigram position" in each bigram)
@@ -74,9 +74,9 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
int pos = getRootPosition();
int wordPos = 0;
// One iteration of the outer loop iterates through PtNode arrays. As stated above, we will
- // only traverse nodes that are actually a part of the terminal we are searching, so each time
- // we enter this loop we are one depth level further than last time.
- // The only reason we count nodes is because we want to reduce the probability of infinite
+ // only traverse PtNodes that are actually a part of the terminal we are searching, so each
+ // time we enter this loop we are one depth level further than last time.
+ // The only reason we count PtNodes is because we want to reduce the probability of infinite
// looping in case there is a bug. Since we know there is an upper bound to the depth we are
// supposed to traverse, it does not hurt to count iterations.
for (int loopCount = maxCodePointCount; loopCount > 0; --loopCount) {
@@ -140,8 +140,9 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
found = true;
} else if (1 >= ptNodeCount) {
// However if we are on the LAST PtNode of this array, and we have NOT shot the
- // position we should descend THIS node. So we trick the lastCandidatePtNodePos
- // so that we will descend this PtNode, not the previous one.
+ // position we should descend THIS PtNode. So we trick the
+ // lastCandidatePtNodePos so that we will descend this PtNode, not the previous
+ // one.
lastCandidatePtNodePos = startPos;
found = true;
} else {
@@ -149,7 +150,7 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
found = false;
}
} else {
- // Even if we don't have children here, we could still be on the last PtNode of /
+ // Even if we don't have children here, we could still be on the last PtNode of
// this array. If this is the case, we should descend the last PtNode that had
// children, and their position is already in lastCandidatePtNodePos.
found = (1 >= ptNodeCount);
@@ -230,9 +231,9 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
return 0;
}
-// This function gets the position of the terminal node of the exact matching word in the
+// This function gets the position of the terminal PtNode of the exact matching word in the
// dictionary. If no match is found, it returns NOT_A_DICT_POS.
-int PatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord,
+int PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord,
const int length, const bool forceLowerCaseSearch) const {
int pos = getRootPosition();
int wordPos = 0;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
index 0f8662aea..2adafd22b 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
@@ -24,6 +24,7 @@
#include "suggest/policyimpl/dictionary/bigram/bigram_list_policy.h"
#include "suggest/policyimpl/dictionary/header/header_policy.h"
#include "suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h"
+#include "suggest/policyimpl/dictionary/utils/format_utils.h"
#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
namespace latinime {
@@ -33,28 +34,26 @@ class DicNodeVector;
class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
public:
- PatriciaTriePolicy(const MmappedBuffer *const buffer)
- : mBuffer(buffer), mHeaderPolicy(mBuffer->getBuffer(), buffer->getBufferSize()),
- mDictRoot(mBuffer->getBuffer() + mHeaderPolicy.getSize()),
- mDictBufferSize(mBuffer->getBufferSize() - mHeaderPolicy.getSize()),
+ PatriciaTriePolicy(const MmappedBuffer::MmappedBufferPtr &mmappedBuffer)
+ : mMmappedBuffer(mmappedBuffer),
+ mHeaderPolicy(mMmappedBuffer.get()->getBuffer(), FormatUtils::VERSION_2),
+ mDictRoot(mMmappedBuffer.get()->getBuffer() + mHeaderPolicy.getSize()),
+ mDictBufferSize(mMmappedBuffer.get()->getBufferSize()
+ - mHeaderPolicy.getSize()),
mBigramListPolicy(mDictRoot), mShortcutListPolicy(mDictRoot) {}
- ~PatriciaTriePolicy() {
- delete mBuffer;
- }
-
AK_FORCE_INLINE int getRootPosition() const {
return 0;
}
- void createAndGetAllChildNodes(const DicNode *const dicNode,
+ void createAndGetAllChildDicNodes(const DicNode *const dicNode,
DicNodeVector *const childDicNodes) const;
int getCodePointsAndProbabilityAndReturnCodePointCount(
const int terminalNodePos, const int maxCodePointCount, int *const outCodePoints,
int *const outUnigramProbability) const;
- int getTerminalNodePositionOfWord(const int *const inWord,
+ int getTerminalPtNodePositionOfWord(const int *const inWord,
const int length, const bool forceLowerCaseSearch) const;
int getProbability(const int unigramProbability, const int bigramProbability) const;
@@ -77,14 +76,17 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
return &mShortcutListPolicy;
}
- bool addUnigramWord(const int *const word, const int length, const int probability) {
+ bool addUnigramWord(const int *const word, const int length, const int probability,
+ const int *const shortcutTargetCodePoints, const int shortcutLength,
+ const int shortcutProbability, const bool isNotAWord, const bool isBlacklisted,
+ const int timestamp) {
// This method should not be called for non-updatable dictionary.
AKLOGI("Warning: addUnigramWord() is called for non-updatable dictionary.");
return false;
}
bool addBigramWords(const int *const word0, const int length0, const int *const word1,
- const int length1, const int probability) {
+ const int length1, const int probability, const int timestamp) {
// This method should not be called for non-updatable dictionary.
AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary.");
return false;
@@ -113,7 +115,7 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
return false;
}
- void getProperty(const char *const query, char *const outResult,
+ void getProperty(const char *const query, const int queryLength, char *const outResult,
const int maxResultLength) {
// getProperty is not supported for this class.
if (maxResultLength > 0) {
@@ -121,10 +123,16 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
}
}
+ const UnigramProperty getUnigramProperty(const int *const codePoints,
+ const int codePointCount) const {
+ // getUnigramProperty is not supported.
+ return UnigramProperty();
+ }
+
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTriePolicy);
- const MmappedBuffer *const mBuffer;
+ const MmappedBuffer::MmappedBufferPtr mMmappedBuffer;
const HeaderPolicy mHeaderPolicy;
const uint8_t *const mDictRoot;
const int mDictBufferSize;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.cpp
index 7df55815f..82b3593c8 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
#include "defines.h"
#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h
index 8420ee95a..b28f58336 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h
@@ -23,6 +23,7 @@
namespace latinime {
+// TODO: Move to pt_common
class PatriciaTrieReadingUtils {
public:
typedef uint8_t NodeFlags;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp
new file mode 100644
index 000000000..cb9d450ec
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp
@@ -0,0 +1,202 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h"
+
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+
+namespace latinime {
+
+const BigramEntry BigramDictContent::getBigramEntryAndAdvancePosition(
+ int *const bigramEntryPos) const {
+ const BufferWithExtendableBuffer *const bigramListBuffer = getContentBuffer();
+ const int bigramFlags = bigramListBuffer->readUintAndAdvancePosition(
+ Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, bigramEntryPos);
+ const bool hasNext = (bigramFlags & Ver4DictConstants::BIGRAM_HAS_NEXT_MASK) != 0;
+ int probability = NOT_A_PROBABILITY;
+ int timestamp = NOT_A_TIMESTAMP;
+ int level = 0;
+ int count = 0;
+ if (mHasHistoricalInfo) {
+ probability = bigramListBuffer->readUintAndAdvancePosition(
+ Ver4DictConstants::PROBABILITY_SIZE, bigramEntryPos);
+ timestamp = bigramListBuffer->readUintAndAdvancePosition(
+ Ver4DictConstants::TIME_STAMP_FIELD_SIZE, bigramEntryPos);
+ level = bigramListBuffer->readUintAndAdvancePosition(
+ Ver4DictConstants::WORD_LEVEL_FIELD_SIZE, bigramEntryPos);
+ count = bigramListBuffer->readUintAndAdvancePosition(
+ Ver4DictConstants::WORD_COUNT_FIELD_SIZE, bigramEntryPos);
+ } else {
+ probability = bigramFlags & Ver4DictConstants::BIGRAM_PROBABILITY_MASK;
+ }
+ const int encodedTargetTerminalId = bigramListBuffer->readUintAndAdvancePosition(
+ Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, bigramEntryPos);
+ const int targetTerminalId =
+ (encodedTargetTerminalId == Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID) ?
+ Ver4DictConstants::NOT_A_TERMINAL_ID : encodedTargetTerminalId;
+ if (mHasHistoricalInfo) {
+ const HistoricalInfo historicalInfo(timestamp, level, count);
+ return BigramEntry(hasNext, probability, &historicalInfo, targetTerminalId);
+ } else {
+ return BigramEntry(hasNext, probability, targetTerminalId);
+ }
+}
+
+bool BigramDictContent::writeBigramEntryAndAdvancePosition(
+ const BigramEntry *const bigramEntryToWrite, int *const entryWritingPos) {
+ BufferWithExtendableBuffer *const bigramListBuffer = getWritableContentBuffer();
+ const int bigramFlags = createAndGetBigramFlags(
+ mHasHistoricalInfo ? 0 : bigramEntryToWrite->getProbability(),
+ bigramEntryToWrite->hasNext());
+ if (!bigramListBuffer->writeUintAndAdvancePosition(bigramFlags,
+ Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, entryWritingPos)) {
+ AKLOGE("Cannot write bigram flags. pos: %d, flags: %x", *entryWritingPos, bigramFlags);
+ return false;
+ }
+ if (mHasHistoricalInfo) {
+ if (!bigramListBuffer->writeUintAndAdvancePosition(bigramEntryToWrite->getProbability(),
+ Ver4DictConstants::PROBABILITY_SIZE, entryWritingPos)) {
+ AKLOGE("Cannot write bigram probability. pos: %d, probability: %d", *entryWritingPos,
+ bigramEntryToWrite->getProbability());
+ return false;
+ }
+ const HistoricalInfo *const historicalInfo = bigramEntryToWrite->getHistoricalInfo();
+ if (!bigramListBuffer->writeUintAndAdvancePosition(historicalInfo->getTimeStamp(),
+ Ver4DictConstants::TIME_STAMP_FIELD_SIZE, entryWritingPos)) {
+ AKLOGE("Cannot write bigram timestamps. pos: %d, timestamp: %d", *entryWritingPos,
+ historicalInfo->getTimeStamp());
+ return false;
+ }
+ if (!bigramListBuffer->writeUintAndAdvancePosition(historicalInfo->getLevel(),
+ Ver4DictConstants::WORD_LEVEL_FIELD_SIZE, entryWritingPos)) {
+ AKLOGE("Cannot write bigram level. pos: %d, level: %d", *entryWritingPos,
+ historicalInfo->getLevel());
+ return false;
+ }
+ if (!bigramListBuffer->writeUintAndAdvancePosition(historicalInfo->getCount(),
+ Ver4DictConstants::WORD_COUNT_FIELD_SIZE, entryWritingPos)) {
+ AKLOGE("Cannot write bigram count. pos: %d, count: %d", *entryWritingPos,
+ historicalInfo->getCount());
+ return false;
+ }
+ }
+ const int targetTerminalIdToWrite =
+ (bigramEntryToWrite->getTargetTerminalId() == Ver4DictConstants::NOT_A_TERMINAL_ID) ?
+ Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID :
+ bigramEntryToWrite->getTargetTerminalId();
+ if (!bigramListBuffer->writeUintAndAdvancePosition(targetTerminalIdToWrite,
+ Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, entryWritingPos)) {
+ AKLOGE("Cannot write bigram target terminal id. pos: %d, target terminal id: %d",
+ *entryWritingPos, bigramEntryToWrite->getTargetTerminalId());
+ return false;
+ }
+ return true;
+}
+
+bool BigramDictContent::copyBigramList(const int bigramListPos, const int toPos) {
+ int readingPos = bigramListPos;
+ int writingPos = toPos;
+ bool hasNext = true;
+ while (hasNext) {
+ const BigramEntry bigramEntry = getBigramEntryAndAdvancePosition(&readingPos);
+ hasNext = bigramEntry.hasNext();
+ if (!writeBigramEntryAndAdvancePosition(&bigramEntry, &writingPos)) {
+ AKLOGE("Cannot write bigram entry to copy. pos: %d", writingPos);
+ return false;
+ }
+ }
+ return true;
+}
+
+bool BigramDictContent::runGC(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
+ const BigramDictContent *const originalBigramDictContent,
+ int *const outBigramEntryCount) {
+ for (TerminalPositionLookupTable::TerminalIdMap::const_iterator it = terminalIdMap->begin();
+ it != terminalIdMap->end(); ++it) {
+ const int originalBigramListPos =
+ originalBigramDictContent->getBigramListHeadPos(it->first);
+ if (originalBigramListPos == NOT_A_DICT_POS) {
+ // This terminal does not have a bigram list.
+ continue;
+ }
+ const int bigramListPos = getContentBuffer()->getTailPosition();
+ int bigramEntryCount = 0;
+ // Copy bigram list with GC from original content.
+ if (!runGCBigramList(originalBigramListPos, originalBigramDictContent, bigramListPos,
+ terminalIdMap, &bigramEntryCount)) {
+ AKLOGE("Cannot complete GC for the bigram list. original pos: %d, pos: %d",
+ originalBigramListPos, bigramListPos);
+ return false;
+ }
+ if (bigramEntryCount == 0) {
+ // All bigram entries are useless. This terminal does not have a bigram list.
+ continue;
+ }
+ *outBigramEntryCount += bigramEntryCount;
+ // Set bigram list position to the lookup table.
+ if (!getUpdatableAddressLookupTable()->set(it->second, bigramListPos)) {
+ AKLOGE("Cannot set bigram list position. terminal id: %d, pos: %d",
+ it->second, bigramListPos);
+ return false;
+ }
+ }
+ return true;
+}
+
+// Returns whether GC for the bigram list was succeeded or not.
+bool BigramDictContent::runGCBigramList(const int bigramListPos,
+ const BigramDictContent *const sourceBigramDictContent, const int toPos,
+ const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
+ int *const outEntrycount) {
+ bool hasNext = true;
+ int readingPos = bigramListPos;
+ int writingPos = toPos;
+ int lastEntryPos = NOT_A_DICT_POS;
+ while (hasNext) {
+ const BigramEntry originalBigramEntry =
+ sourceBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
+ hasNext = originalBigramEntry.hasNext();
+ if (originalBigramEntry.getTargetTerminalId() == Ver4DictConstants::NOT_A_TERMINAL_ID) {
+ continue;
+ }
+ TerminalPositionLookupTable::TerminalIdMap::const_iterator it =
+ terminalIdMap->find(originalBigramEntry.getTargetTerminalId());
+ if (it == terminalIdMap->end()) {
+ // Target word has been removed.
+ continue;
+ }
+ lastEntryPos = hasNext ? writingPos : NOT_A_DICT_POS;
+ const BigramEntry updatedBigramEntry =
+ originalBigramEntry.updateTargetTerminalIdAndGetEntry(it->second);
+ if (!writeBigramEntryAndAdvancePosition(&updatedBigramEntry, &writingPos)) {
+ AKLOGE("Cannot write bigram entry to run GC. pos: %d", writingPos);
+ return false;
+ }
+ *outEntrycount += 1;
+ }
+ if (lastEntryPos != NOT_A_DICT_POS) {
+ // Update has next flag in the last written entry.
+ const BigramEntry bigramEntry = getBigramEntry(lastEntryPos).updateHasNextAndGetEntry(
+ false /* hasNext */);
+ if (!writeBigramEntry(&bigramEntry, lastEntryPos)) {
+ AKLOGE("Cannot write bigram entry to set hasNext flag after GC. pos: %d", writingPos);
+ return false;
+ }
+ }
+ return true;
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h
new file mode 100644
index 000000000..ba2a05209
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h
@@ -0,0 +1,102 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_BIGRAM_DICT_CONTENT_H
+#define LATINIME_BIGRAM_DICT_CONTENT_H
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_entry.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
+
+namespace latinime {
+
+class BigramDictContent : public SparseTableDictContent {
+ public:
+ BigramDictContent(const char *const dictPath, const bool hasHistoricalInfo,
+ const bool isUpdatable)
+ : SparseTableDictContent(dictPath,
+ Ver4DictConstants::BIGRAM_LOOKUP_TABLE_FILE_EXTENSION,
+ Ver4DictConstants::BIGRAM_CONTENT_TABLE_FILE_EXTENSION,
+ Ver4DictConstants::BIGRAM_FILE_EXTENSION, isUpdatable,
+ Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE,
+ Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE),
+ mHasHistoricalInfo(hasHistoricalInfo) {}
+
+ BigramDictContent(const bool hasHistoricalInfo)
+ : SparseTableDictContent(Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE,
+ Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE),
+ mHasHistoricalInfo(hasHistoricalInfo) {}
+
+ const BigramEntry getBigramEntry(const int bigramEntryPos) const {
+ int readingPos = bigramEntryPos;
+ return getBigramEntryAndAdvancePosition(&readingPos);
+ }
+
+ const BigramEntry getBigramEntryAndAdvancePosition(int *const bigramEntryPos) const;
+
+ // Returns head position of bigram list for a PtNode specified by terminalId.
+ int getBigramListHeadPos(const int terminalId) const {
+ const SparseTable *const addressLookupTable = getAddressLookupTable();
+ if (!addressLookupTable->contains(terminalId)) {
+ return NOT_A_DICT_POS;
+ }
+ return addressLookupTable->get(terminalId);
+ }
+
+ bool writeBigramEntry(const BigramEntry *const bigramEntryToWrite, const int entryWritingPos) {
+ int writingPos = entryWritingPos;
+ return writeBigramEntryAndAdvancePosition(bigramEntryToWrite, &writingPos);
+ }
+
+ bool writeBigramEntryAndAdvancePosition(const BigramEntry *const bigramEntryToWrite,
+ int *const entryWritingPos);
+
+ bool createNewBigramList(const int terminalId) {
+ const int bigramListPos = getContentBuffer()->getTailPosition();
+ return getUpdatableAddressLookupTable()->set(terminalId, bigramListPos);
+ }
+
+ bool copyBigramList(const int bigramListPos, const int toPos);
+
+ bool flushToFile(const char *const dictPath) const {
+ return flush(dictPath, Ver4DictConstants::BIGRAM_LOOKUP_TABLE_FILE_EXTENSION,
+ Ver4DictConstants::BIGRAM_CONTENT_TABLE_FILE_EXTENSION,
+ Ver4DictConstants::BIGRAM_FILE_EXTENSION);
+ }
+
+ bool runGC(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
+ const BigramDictContent *const originalBigramDictContent,
+ int *const outBigramEntryCount);
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(BigramDictContent);
+
+ int createAndGetBigramFlags(const int probability, const bool hasNext) const {
+ return (probability & Ver4DictConstants::BIGRAM_PROBABILITY_MASK)
+ | (hasNext ? Ver4DictConstants::BIGRAM_HAS_NEXT_MASK : 0);
+ }
+
+ bool runGCBigramList(const int bigramListPos,
+ const BigramDictContent *const sourceBigramDictContent, const int toPos,
+ const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
+ int *const outEntryCount);
+
+ bool mHasHistoricalInfo;
+};
+} // namespace latinime
+#endif /* LATINIME_BIGRAM_DICT_CONTENT_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_entry.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_entry.h
new file mode 100644
index 000000000..2b0cbd93b
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_entry.h
@@ -0,0 +1,99 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_BIGRAM_ENTRY_H
+#define LATINIME_BIGRAM_ENTRY_H
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
+#include "suggest/policyimpl/dictionary/utils/historical_info.h"
+
+namespace latinime {
+
+class BigramEntry {
+ public:
+ BigramEntry(const BigramEntry& bigramEntry)
+ : mHasNext(bigramEntry.mHasNext), mProbability(bigramEntry.mProbability),
+ mHistoricalInfo(), mTargetTerminalId(bigramEntry.mTargetTerminalId) {}
+
+ // Entry with historical information.
+ BigramEntry(const bool hasNext, const int probability, const int targetTerminalId)
+ : mHasNext(hasNext), mProbability(probability), mHistoricalInfo(),
+ mTargetTerminalId(targetTerminalId) {}
+
+ // Entry with historical information.
+ BigramEntry(const bool hasNext, const int probability,
+ const HistoricalInfo *const historicalInfo, const int targetTerminalId)
+ : mHasNext(hasNext), mProbability(probability), mHistoricalInfo(*historicalInfo),
+ mTargetTerminalId(targetTerminalId) {}
+
+ const BigramEntry getInvalidatedEntry() const {
+ return updateTargetTerminalIdAndGetEntry(Ver4DictConstants::NOT_A_TERMINAL_ID);
+ }
+
+ const BigramEntry updateHasNextAndGetEntry(const bool hasNext) const {
+ return BigramEntry(hasNext, mProbability, &mHistoricalInfo, mTargetTerminalId);
+ }
+
+ const BigramEntry updateTargetTerminalIdAndGetEntry(const int newTargetTerminalId) const {
+ return BigramEntry(mHasNext, mProbability, &mHistoricalInfo, newTargetTerminalId);
+ }
+
+ const BigramEntry updateProbabilityAndGetEntry(const int probability) const {
+ return BigramEntry(mHasNext, probability, &mHistoricalInfo, mTargetTerminalId);
+ }
+
+ const BigramEntry updateHistoricalInfoAndGetEntry(
+ const HistoricalInfo *const historicalInfo) const {
+ return BigramEntry(mHasNext, mProbability, historicalInfo, mTargetTerminalId);
+ }
+
+ bool isValid() const {
+ return mTargetTerminalId != Ver4DictConstants::NOT_A_TERMINAL_ID;
+ }
+
+ bool hasNext() const {
+ return mHasNext;
+ }
+
+ int getProbability() const {
+ return mProbability;
+ }
+
+ bool hasHistoricalInfo() const {
+ return mHistoricalInfo.isValid();
+ }
+
+ const HistoricalInfo *getHistoricalInfo() const {
+ return &mHistoricalInfo;
+ }
+
+ int getTargetTerminalId() const {
+ return mTargetTerminalId;
+ }
+
+ private:
+ // Copy constructor is public to use this class as a type of return value.
+ DISALLOW_DEFAULT_CONSTRUCTOR(BigramEntry);
+ DISALLOW_ASSIGNMENT_OPERATOR(BigramEntry);
+
+ const bool mHasNext;
+ const int mProbability;
+ const HistoricalInfo mHistoricalInfo;
+ const int mTargetTerminalId;
+};
+} // namespace latinime
+#endif /* LATINIME_BIGRAM_ENTRY_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/dict_content.h
new file mode 100644
index 000000000..0c2f47073
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/dict_content.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DICT_CONTENT_H
+#define LATINIME_DICT_CONTENT_H
+
+#include "defines.h"
+
+namespace latinime {
+
+class DictContent {
+ public:
+ virtual ~DictContent() {}
+ virtual bool isValid() const = 0;
+
+ protected:
+ DictContent() {}
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(DictContent);
+};
+} // namespace latinime
+#endif /* LATINIME_DICT_CONTENT_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.cpp
new file mode 100644
index 000000000..3b7c70efd
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.cpp
@@ -0,0 +1,160 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h"
+
+#include "suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+
+namespace latinime {
+
+const ProbabilityEntry ProbabilityDictContent::getProbabilityEntry(const int terminalId) const {
+ if (terminalId < 0 || terminalId >= mSize) {
+ // This method can be called with invalid terminal id during GC.
+ return ProbabilityEntry(0 /* flags */, NOT_A_PROBABILITY);
+ }
+ const BufferWithExtendableBuffer *const buffer = getBuffer();
+ int entryPos = getEntryPos(terminalId);
+ const int flags = buffer->readUintAndAdvancePosition(
+ Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE, &entryPos);
+ const int probability = buffer->readUintAndAdvancePosition(
+ Ver4DictConstants::PROBABILITY_SIZE, &entryPos);
+ if (mHasHistoricalInfo) {
+ const int timestamp = buffer->readUintAndAdvancePosition(
+ Ver4DictConstants::TIME_STAMP_FIELD_SIZE, &entryPos);
+ const int level = buffer->readUintAndAdvancePosition(
+ Ver4DictConstants::WORD_LEVEL_FIELD_SIZE, &entryPos);
+ const int count = buffer->readUintAndAdvancePosition(
+ Ver4DictConstants::WORD_COUNT_FIELD_SIZE, &entryPos);
+ const HistoricalInfo historicalInfo(timestamp, level, count);
+ return ProbabilityEntry(flags, probability, &historicalInfo);
+ } else {
+ return ProbabilityEntry(flags, probability);
+ }
+}
+
+bool ProbabilityDictContent::setProbabilityEntry(const int terminalId,
+ const ProbabilityEntry *const probabilityEntry) {
+ if (terminalId < 0) {
+ return false;
+ }
+ const int entryPos = getEntryPos(terminalId);
+ if (terminalId >= mSize) {
+ ProbabilityEntry dummyEntry;
+ // Write new entry.
+ int writingPos = getBuffer()->getTailPosition();
+ while (writingPos <= entryPos) {
+ // Fulfilling with dummy entries until writingPos.
+ if (!writeEntry(&dummyEntry, writingPos)) {
+ AKLOGE("Cannot write dummy entry. pos: %d, mSize: %d", writingPos, mSize);
+ return false;
+ }
+ writingPos += getEntrySize();
+ mSize++;
+ }
+ }
+ return writeEntry(probabilityEntry, entryPos);
+}
+
+bool ProbabilityDictContent::flushToFile(const char *const dictPath) const {
+ if (getEntryPos(mSize) < getBuffer()->getTailPosition()) {
+ ProbabilityDictContent probabilityDictContentToWrite(mHasHistoricalInfo);
+ for (int i = 0; i < mSize; ++i) {
+ const ProbabilityEntry probabilityEntry = getProbabilityEntry(i);
+ if (!probabilityDictContentToWrite.setProbabilityEntry(i, &probabilityEntry)) {
+ AKLOGE("Cannot set probability entry in flushToFile. terminalId: %d", i);
+ return false;
+ }
+ }
+ return probabilityDictContentToWrite.flush(dictPath,
+ Ver4DictConstants::FREQ_FILE_EXTENSION);
+ } else {
+ return flush(dictPath, Ver4DictConstants::FREQ_FILE_EXTENSION);
+ }
+}
+
+bool ProbabilityDictContent::runGC(
+ const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
+ const ProbabilityDictContent *const originalProbabilityDictContent) {
+ mSize = 0;
+ for (TerminalPositionLookupTable::TerminalIdMap::const_iterator it = terminalIdMap->begin();
+ it != terminalIdMap->end(); ++it) {
+ const ProbabilityEntry probabilityEntry =
+ originalProbabilityDictContent->getProbabilityEntry(it->first);
+ if (!setProbabilityEntry(it->second, &probabilityEntry)) {
+ AKLOGE("Cannot set probability entry in runGC. terminalId: %d", it->second);
+ return false;
+ }
+ mSize++;
+ }
+ return true;
+}
+
+int ProbabilityDictContent::getEntrySize() const {
+ if (mHasHistoricalInfo) {
+ return Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE
+ + Ver4DictConstants::PROBABILITY_SIZE
+ + Ver4DictConstants::TIME_STAMP_FIELD_SIZE
+ + Ver4DictConstants::WORD_LEVEL_FIELD_SIZE
+ + Ver4DictConstants::WORD_COUNT_FIELD_SIZE;
+ } else {
+ return Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE
+ + Ver4DictConstants::PROBABILITY_SIZE;
+ }
+}
+
+int ProbabilityDictContent::getEntryPos(const int terminalId) const {
+ return terminalId * getEntrySize();
+}
+
+bool ProbabilityDictContent::writeEntry(const ProbabilityEntry *const probabilityEntry,
+ const int entryPos) {
+ BufferWithExtendableBuffer *const bufferToWrite = getWritableBuffer();
+ int writingPos = entryPos;
+ if (!bufferToWrite->writeUintAndAdvancePosition(probabilityEntry->getFlags(),
+ Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE, &writingPos)) {
+ AKLOGE("Cannot write flags in probability dict content. pos: %d", writingPos);
+ return false;
+ }
+ if (!bufferToWrite->writeUintAndAdvancePosition(probabilityEntry->getProbability(),
+ Ver4DictConstants::PROBABILITY_SIZE, &writingPos)) {
+ AKLOGE("Cannot write probability in probability dict content. pos: %d", writingPos);
+ return false;
+ }
+ if (mHasHistoricalInfo) {
+ const HistoricalInfo *const historicalInfo = probabilityEntry->getHistoricalInfo();
+ if (!bufferToWrite->writeUintAndAdvancePosition(historicalInfo->getTimeStamp(),
+ Ver4DictConstants::TIME_STAMP_FIELD_SIZE, &writingPos)) {
+ AKLOGE("Cannot write timestamp in probability dict content. pos: %d", writingPos);
+ return false;
+ }
+ if (!bufferToWrite->writeUintAndAdvancePosition(historicalInfo->getLevel(),
+ Ver4DictConstants::WORD_LEVEL_FIELD_SIZE, &writingPos)) {
+ AKLOGE("Cannot write level in probability dict content. pos: %d", writingPos);
+ return false;
+ }
+ if (!bufferToWrite->writeUintAndAdvancePosition(historicalInfo->getCount(),
+ Ver4DictConstants::WORD_COUNT_FIELD_SIZE, &writingPos)) {
+ AKLOGE("Cannot write count in probability dict content. pos: %d", writingPos);
+ return false;
+ }
+ }
+ return true;
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h
new file mode 100644
index 000000000..b065bc954
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_PROBABILITY_DICT_CONTENT_H
+#define LATINIME_PROBABILITY_DICT_CONTENT_H
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+
+namespace latinime {
+
+class ProbabilityEntry;
+
+class ProbabilityDictContent : public SingleDictContent {
+ public:
+ ProbabilityDictContent(const char *const dictPath, const bool hasHistoricalInfo,
+ const bool isUpdatable)
+ : SingleDictContent(dictPath, Ver4DictConstants::FREQ_FILE_EXTENSION, isUpdatable),
+ mHasHistoricalInfo(hasHistoricalInfo),
+ mSize(getBuffer()->getTailPosition() / getEntrySize()) {}
+
+ ProbabilityDictContent(const bool hasHistoricalInfo)
+ : mHasHistoricalInfo(hasHistoricalInfo), mSize(0) {}
+
+ const ProbabilityEntry getProbabilityEntry(const int terminalId) const;
+
+ bool setProbabilityEntry(const int terminalId, const ProbabilityEntry *const probabilityEntry);
+
+ bool flushToFile(const char *const dictPath) const;
+
+ bool runGC(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
+ const ProbabilityDictContent *const originalProbabilityDictContent);
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(ProbabilityDictContent);
+
+ int getEntrySize() const;
+
+ int getEntryPos(const int terminalId) const;
+
+ bool writeEntry(const ProbabilityEntry *const probabilityEntry, const int entryPos);
+
+ bool mHasHistoricalInfo;
+ int mSize;
+};
+} // namespace latinime
+#endif /* LATINIME_PROBABILITY_DICT_CONTENT_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h
new file mode 100644
index 000000000..36ba82be1
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_PROBABILITY_ENTRY_H
+#define LATINIME_PROBABILITY_ENTRY_H
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
+#include "suggest/policyimpl/dictionary/utils/historical_info.h"
+
+namespace latinime {
+
+class ProbabilityEntry {
+ public:
+ ProbabilityEntry(const ProbabilityEntry &probabilityEntry)
+ : mFlags(probabilityEntry.mFlags), mProbability(probabilityEntry.mProbability),
+ mHistoricalInfo(probabilityEntry.mHistoricalInfo) {}
+
+ // Dummy entry
+ ProbabilityEntry()
+ : mFlags(0), mProbability(NOT_A_PROBABILITY), mHistoricalInfo() {}
+
+ // Entry without historical information
+ ProbabilityEntry(const int flags, const int probability)
+ : mFlags(flags), mProbability(probability), mHistoricalInfo() {}
+
+ // Entry with historical information.
+ ProbabilityEntry(const int flags, const int probability,
+ const HistoricalInfo *const historicalInfo)
+ : mFlags(flags), mProbability(probability), mHistoricalInfo(*historicalInfo) {}
+
+ const ProbabilityEntry createEntryWithUpdatedProbability(const int probability) const {
+ return ProbabilityEntry(mFlags, probability, &mHistoricalInfo);
+ }
+
+ const ProbabilityEntry createEntryWithUpdatedHistoricalInfo(
+ const HistoricalInfo *const historicalInfo) const {
+ return ProbabilityEntry(mFlags, mProbability, historicalInfo);
+ }
+
+ bool hasHistoricalInfo() const {
+ return mHistoricalInfo.isValid();
+ }
+
+ int getFlags() const {
+ return mFlags;
+ }
+
+ int getProbability() const {
+ return mProbability;
+ }
+
+ const HistoricalInfo *getHistoricalInfo() const {
+ return &mHistoricalInfo;
+ }
+
+ private:
+ // Copy constructor is public to use this class as a type of return value.
+ DISALLOW_ASSIGNMENT_OPERATOR(ProbabilityEntry);
+
+ const int mFlags;
+ const int mProbability;
+ const HistoricalInfo mHistoricalInfo;
+};
+} // namespace latinime
+#endif /* LATINIME_PROBABILITY_ENTRY_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.cpp
new file mode 100644
index 000000000..29972a4e8
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.cpp
@@ -0,0 +1,175 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h"
+
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+
+namespace latinime {
+
+void ShortcutDictContent::getShortcutEntryAndAdvancePosition(const int maxCodePointCount,
+ int *const outCodePoint, int *const outCodePointCount, int *const outProbability,
+ bool *const outhasNext, int *const shortcutEntryPos) const {
+ const BufferWithExtendableBuffer *const shortcutListBuffer = getContentBuffer();
+ const int shortcutFlags = shortcutListBuffer->readUintAndAdvancePosition(
+ Ver4DictConstants::SHORTCUT_FLAGS_FIELD_SIZE, shortcutEntryPos);
+ if (outProbability) {
+ *outProbability = shortcutFlags & Ver4DictConstants::SHORTCUT_PROBABILITY_MASK;
+ }
+ if (outhasNext) {
+ *outhasNext = shortcutFlags & Ver4DictConstants::SHORTCUT_HAS_NEXT_MASK;
+ }
+ if (outCodePoint && outCodePointCount) {
+ shortcutListBuffer->readCodePointsAndAdvancePosition(
+ maxCodePointCount, outCodePoint, outCodePointCount, shortcutEntryPos);
+ }
+}
+
+int ShortcutDictContent::getShortcutListHeadPos(const int terminalId) const {
+ const SparseTable *const addressLookupTable = getAddressLookupTable();
+ if (!addressLookupTable->contains(terminalId)) {
+ return NOT_A_DICT_POS;
+ }
+ return addressLookupTable->get(terminalId);
+}
+
+bool ShortcutDictContent::flushToFile(const char *const dictPath) const {
+ return flush(dictPath, Ver4DictConstants::SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION,
+ Ver4DictConstants::SHORTCUT_CONTENT_TABLE_FILE_EXTENSION,
+ Ver4DictConstants::SHORTCUT_FILE_EXTENSION);
+}
+
+bool ShortcutDictContent::runGC(
+ const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
+ const ShortcutDictContent *const originalShortcutDictContent) {
+ for (TerminalPositionLookupTable::TerminalIdMap::const_iterator it = terminalIdMap->begin();
+ it != terminalIdMap->end(); ++it) {
+ const int originalShortcutListPos =
+ originalShortcutDictContent->getShortcutListHeadPos(it->first);
+ if (originalShortcutListPos == NOT_A_DICT_POS) {
+ continue;
+ }
+ const int shortcutListPos = getContentBuffer()->getTailPosition();
+ // Copy shortcut list from original content.
+ if (!copyShortcutListFromDictContent(originalShortcutListPos, originalShortcutDictContent,
+ shortcutListPos)) {
+ AKLOGE("Cannot copy shortcut list during GC. original pos: %d, pos: %d",
+ originalShortcutListPos, shortcutListPos);
+ return false;
+ }
+ // Set shortcut list position to the lookup table.
+ if (!getUpdatableAddressLookupTable()->set(it->second, shortcutListPos)) {
+ AKLOGE("Cannot set shortcut list position. terminal id: %d, pos: %d",
+ it->second, shortcutListPos);
+ return false;
+ }
+ }
+ return true;
+}
+
+bool ShortcutDictContent::createNewShortcutList(const int terminalId) {
+ const int shortcutListListPos = getContentBuffer()->getTailPosition();
+ return getUpdatableAddressLookupTable()->set(terminalId, shortcutListListPos);
+}
+
+bool ShortcutDictContent::copyShortcutList(const int shortcutListPos, const int toPos) {
+ return copyShortcutListFromDictContent(shortcutListPos, this, toPos);
+}
+
+bool ShortcutDictContent::copyShortcutListFromDictContent(const int shortcutListPos,
+ const ShortcutDictContent *const sourceShortcutDictContent, const int toPos) {
+ bool hasNext = true;
+ int readingPos = shortcutListPos;
+ int writingPos = toPos;
+ int codePoints[MAX_WORD_LENGTH];
+ while (hasNext) {
+ int probability = 0;
+ int codePointCount = 0;
+ sourceShortcutDictContent->getShortcutEntryAndAdvancePosition(MAX_WORD_LENGTH,
+ codePoints, &codePointCount, &probability, &hasNext, &readingPos);
+ if (!writeShortcutEntryAndAdvancePosition(codePoints, codePointCount, probability,
+ hasNext, &writingPos)) {
+ AKLOGE("Cannot write shortcut entry to copy. pos: %d", writingPos);
+ return false;
+ }
+ }
+ return true;
+}
+
+bool ShortcutDictContent::setProbability(const int probability, const int shortcutEntryPos) {
+ BufferWithExtendableBuffer *const shortcutListBuffer = getWritableContentBuffer();
+ const int shortcutFlags = shortcutListBuffer->readUint(
+ Ver4DictConstants::SHORTCUT_FLAGS_FIELD_SIZE, shortcutEntryPos);
+ const bool hasNext = shortcutFlags & Ver4DictConstants::SHORTCUT_HAS_NEXT_MASK;
+ const int shortcutFlagsToWrite = createAndGetShortcutFlags(probability, hasNext);
+ return shortcutListBuffer->writeUint(shortcutFlagsToWrite,
+ Ver4DictConstants::SHORTCUT_FLAGS_FIELD_SIZE, shortcutEntryPos);
+}
+
+bool ShortcutDictContent::writeShortcutEntryAndAdvancePosition(const int *const codePoint,
+ const int codePointCount, const int probability, const bool hasNext,
+ int *const shortcutEntryPos) {
+ BufferWithExtendableBuffer *const shortcutListBuffer = getWritableContentBuffer();
+ const int shortcutFlags = createAndGetShortcutFlags(probability, hasNext);
+ if (!shortcutListBuffer->writeUintAndAdvancePosition(shortcutFlags,
+ Ver4DictConstants::SHORTCUT_FLAGS_FIELD_SIZE, shortcutEntryPos)) {
+ AKLOGE("Cannot write shortcut flags. flags; %x, pos: %d", shortcutFlags, *shortcutEntryPos);
+ return false;
+ }
+ if (!shortcutListBuffer->writeCodePointsAndAdvancePosition(codePoint, codePointCount,
+ true /* writesTerminator */, shortcutEntryPos)) {
+ AKLOGE("Cannot write shortcut target code points. pos: %d", *shortcutEntryPos);
+ return false;
+ }
+ return true;
+}
+
+// Find a shortcut entry that has specified target and return its position.
+int ShortcutDictContent::findShortcutEntryAndGetPos(const int shortcutListPos,
+ const int *const targetCodePointsToFind, const int codePointCount) const {
+ bool hasNext = true;
+ int readingPos = shortcutListPos;
+ int targetCodePoints[MAX_WORD_LENGTH];
+ while (hasNext) {
+ const int entryPos = readingPos;
+ int probability = 0;
+ int targetCodePointCount = 0;
+ getShortcutEntryAndAdvancePosition(MAX_WORD_LENGTH, targetCodePoints, &targetCodePointCount,
+ &probability, &hasNext, &readingPos);
+ if (targetCodePointCount != codePointCount) {
+ continue;
+ }
+ bool matched = true;
+ for (int i = 0; i < codePointCount; ++i) {
+ if (targetCodePointsToFind[i] != targetCodePoints[i]) {
+ matched = false;
+ break;
+ }
+ }
+ if (matched) {
+ return entryPos;
+ }
+ }
+ return NOT_A_DICT_POS;
+}
+
+int ShortcutDictContent::createAndGetShortcutFlags(const int probability,
+ const bool hasNext) const {
+ return (probability & Ver4DictConstants::SHORTCUT_PROBABILITY_MASK)
+ | (hasNext ? Ver4DictConstants::SHORTCUT_HAS_NEXT_MASK : 0);
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h
new file mode 100644
index 000000000..eaafc27bc
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h
@@ -0,0 +1,90 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_SHORTCUT_DICT_CONTENT_H
+#define LATINIME_SHORTCUT_DICT_CONTENT_H
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
+
+namespace latinime {
+
+class ShortcutDictContent : public SparseTableDictContent {
+ public:
+ ShortcutDictContent(const char *const dictPath, const bool isUpdatable)
+ : SparseTableDictContent(dictPath,
+ Ver4DictConstants::SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION,
+ Ver4DictConstants::SHORTCUT_CONTENT_TABLE_FILE_EXTENSION,
+ Ver4DictConstants::SHORTCUT_FILE_EXTENSION, isUpdatable,
+ Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE,
+ Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE) {}
+
+ ShortcutDictContent()
+ : SparseTableDictContent(Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE,
+ Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE) {}
+
+ void getShortcutEntry(const int maxCodePointCount, int *const outCodePoint,
+ int *const outCodePointCount, int *const outProbability, bool *const outhasNext,
+ const int shortcutEntryPos) {
+ int readingPos = shortcutEntryPos;
+ return getShortcutEntryAndAdvancePosition(maxCodePointCount, outCodePoint,
+ outCodePointCount, outProbability, outhasNext, &readingPos);
+ }
+
+ void getShortcutEntryAndAdvancePosition(const int maxCodePointCount,
+ int *const outCodePoint, int *const outCodePointCount, int *const outProbability,
+ bool *const outhasNext, int *const shortcutEntryPos) const;
+
+ // Returns head position of shortcut list for a PtNode specified by terminalId.
+ int getShortcutListHeadPos(const int terminalId) const;
+
+ bool flushToFile(const char *const dictPath) const;
+
+ bool runGC(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
+ const ShortcutDictContent *const originalShortcutDictContent);
+
+ bool createNewShortcutList(const int terminalId);
+
+ bool copyShortcutList(const int shortcutListPos, const int toPos);
+
+ bool setProbability(const int probability, const int shortcutEntryPos);
+
+ bool writeShortcutEntry(const int *const codePoint, const int codePointCount,
+ const int probability, const bool hasNext, const int shortcutEntryPos) {
+ int writingPos = shortcutEntryPos;
+ return writeShortcutEntryAndAdvancePosition(codePoint, codePointCount, probability,
+ hasNext, &writingPos);
+ }
+
+ bool writeShortcutEntryAndAdvancePosition(const int *const codePoint,
+ const int codePointCount, const int probability, const bool hasNext,
+ int *const shortcutEntryPos);
+
+ int findShortcutEntryAndGetPos(const int shortcutListPos,
+ const int *const targetCodePointsToFind, const int codePointCount) const;
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(ShortcutDictContent);
+
+ bool copyShortcutListFromDictContent(const int shortcutListPos,
+ const ShortcutDictContent *const sourceShortcutDictContent, const int toPos);
+
+ int createAndGetShortcutFlags(const int probability, const bool hasNext) const;
+};
+} // namespace latinime
+#endif /* LATINIME_SHORTCUT_DICT_CONTENT_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h
new file mode 100644
index 000000000..9064b7e72
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_SINGLE_DICT_CONTENT_H
+#define LATINIME_SINGLE_DICT_CONTENT_H
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/dict_content.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
+#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
+
+namespace latinime {
+
+class SingleDictContent : public DictContent {
+ public:
+ SingleDictContent(const char *const dictPath, const char *const contentFileName,
+ const bool isUpdatable)
+ : mMmappedBuffer(MmappedBuffer::openBuffer(dictPath, contentFileName, isUpdatable)),
+ mExpandableContentBuffer(mMmappedBuffer.get() ? mMmappedBuffer.get()->getBuffer() : 0,
+ mMmappedBuffer.get() ? mMmappedBuffer.get()->getBufferSize() : 0,
+ BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
+ mIsValid(mMmappedBuffer.get() != 0) {}
+
+ SingleDictContent()
+ : mMmappedBuffer(0), mExpandableContentBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
+ mIsValid(true) {}
+
+ virtual ~SingleDictContent() {}
+
+ virtual bool isValid() const {
+ return mIsValid;
+ }
+
+ bool isNearSizeLimit() const {
+ return mExpandableContentBuffer.isNearSizeLimit();
+ }
+
+ protected:
+ BufferWithExtendableBuffer *getWritableBuffer() {
+ return &mExpandableContentBuffer;
+ }
+
+ const BufferWithExtendableBuffer *getBuffer() const {
+ return &mExpandableContentBuffer;
+ }
+
+ bool flush(const char *const dictPath, const char *const contentFileNameSuffix) const {
+ return DictFileWritingUtils::flushBufferToFileWithSuffix(dictPath,
+ contentFileNameSuffix, &mExpandableContentBuffer);
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(SingleDictContent);
+
+ const MmappedBuffer::MmappedBufferPtr mMmappedBuffer;
+ BufferWithExtendableBuffer mExpandableContentBuffer;
+ const bool mIsValid;
+};
+} // namespace latinime
+#endif /* LATINIME_SINGLE_DICT_CONTENT_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.cpp
new file mode 100644
index 000000000..63c6ea3a4
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h"
+
+namespace latinime {
+
+bool SparseTableDictContent::flush(const char *const dictPath,
+ const char *const lookupTableFileNameSuffix, const char *const addressTableFileNameSuffix,
+ const char *const contentFileNameSuffix) const {
+ if (!DictFileWritingUtils::flushBufferToFileWithSuffix(dictPath, lookupTableFileNameSuffix,
+ &mExpandableLookupTableBuffer)){
+ return false;
+ }
+ if (!DictFileWritingUtils::flushBufferToFileWithSuffix(dictPath, addressTableFileNameSuffix,
+ &mExpandableAddressTableBuffer)) {
+ return false;
+ }
+ if (!DictFileWritingUtils::flushBufferToFileWithSuffix(dictPath, contentFileNameSuffix,
+ &mExpandableContentBuffer)) {
+ return false;
+ }
+ return true;
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h
new file mode 100644
index 000000000..a82e3f50a
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h
@@ -0,0 +1,111 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_SPARSE_TABLE_DICT_CONTENT_H
+#define LATINIME_SPARSE_TABLE_DICT_CONTENT_H
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/dict_content.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
+#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
+#include "suggest/policyimpl/dictionary/utils/sparse_table.h"
+
+namespace latinime {
+
+// TODO: Support multiple contents.
+class SparseTableDictContent : public DictContent {
+ public:
+ AK_FORCE_INLINE SparseTableDictContent(const char *const dictPath,
+ const char *const lookupTableFileName, const char *const addressTableFileName,
+ const char *const contentFileName, const bool isUpdatable,
+ const int sparseTableBlockSize, const int sparseTableDataSize)
+ : mLookupTableBuffer(
+ MmappedBuffer::openBuffer(dictPath, lookupTableFileName, isUpdatable)),
+ mAddressTableBuffer(
+ MmappedBuffer::openBuffer(dictPath, addressTableFileName, isUpdatable)),
+ mContentBuffer(MmappedBuffer::openBuffer(dictPath, contentFileName, isUpdatable)),
+ mExpandableLookupTableBuffer(
+ mLookupTableBuffer.get() ? mLookupTableBuffer.get()->getBuffer() : 0,
+ mLookupTableBuffer.get() ? mLookupTableBuffer.get()->getBufferSize() : 0,
+ BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
+ mExpandableAddressTableBuffer(
+ mAddressTableBuffer.get() ? mAddressTableBuffer.get()->getBuffer() : 0,
+ mAddressTableBuffer.get() ? mAddressTableBuffer.get()->getBufferSize() : 0,
+ BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
+ mExpandableContentBuffer(mContentBuffer.get() ? mContentBuffer.get()->getBuffer() : 0,
+ mContentBuffer.get() ? mContentBuffer.get()->getBufferSize() : 0,
+ BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
+ mAddressLookupTable(&mExpandableLookupTableBuffer, &mExpandableAddressTableBuffer,
+ sparseTableBlockSize, sparseTableDataSize),
+ mIsValid(mLookupTableBuffer.get() != 0 && mAddressTableBuffer.get() != 0
+ && mContentBuffer.get() != 0) {}
+
+ SparseTableDictContent(const int sparseTableBlockSize, const int sparseTableDataSize)
+ : mLookupTableBuffer(0), mAddressTableBuffer(0), mContentBuffer(0),
+ mExpandableLookupTableBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
+ mExpandableAddressTableBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
+ mExpandableContentBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
+ mAddressLookupTable(&mExpandableLookupTableBuffer, &mExpandableAddressTableBuffer,
+ sparseTableBlockSize, sparseTableDataSize), mIsValid(true) {}
+
+ virtual ~SparseTableDictContent() {}
+
+ virtual bool isValid() const {
+ return mIsValid;
+ }
+
+ bool isNearSizeLimit() const {
+ return mExpandableLookupTableBuffer.isNearSizeLimit()
+ || mExpandableAddressTableBuffer.isNearSizeLimit()
+ || mExpandableContentBuffer.isNearSizeLimit();
+ }
+
+ protected:
+ SparseTable *getUpdatableAddressLookupTable() {
+ return &mAddressLookupTable;
+ }
+
+ const SparseTable *getAddressLookupTable() const {
+ return &mAddressLookupTable;
+ }
+
+ BufferWithExtendableBuffer *getWritableContentBuffer() {
+ return &mExpandableContentBuffer;
+ }
+
+ const BufferWithExtendableBuffer *getContentBuffer() const {
+ return &mExpandableContentBuffer;
+ }
+
+ bool flush(const char *const dictDirPath, const char *const lookupTableFileName,
+ const char *const addressTableFileName, const char *const contentFileName) const;
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(SparseTableDictContent);
+
+ const MmappedBuffer::MmappedBufferPtr mLookupTableBuffer;
+ const MmappedBuffer::MmappedBufferPtr mAddressTableBuffer;
+ const MmappedBuffer::MmappedBufferPtr mContentBuffer;
+ BufferWithExtendableBuffer mExpandableLookupTableBuffer;
+ BufferWithExtendableBuffer mExpandableAddressTableBuffer;
+ BufferWithExtendableBuffer mExpandableContentBuffer;
+ SparseTable mAddressLookupTable;
+ const bool mIsValid;
+};
+} // namespace latinime
+#endif /* LATINIME_SPARSE_TABLE_DICT_CONTENT_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.cpp
new file mode 100644
index 000000000..0b17a009d
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.cpp
@@ -0,0 +1,100 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
+
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+
+namespace latinime {
+
+int TerminalPositionLookupTable::getTerminalPtNodePosition(const int terminalId) const {
+ if (terminalId < 0 || terminalId >= mSize) {
+ return NOT_A_DICT_POS;
+ }
+ const int terminalPos = getBuffer()->readUint(
+ Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE, getEntryPos(terminalId));
+ return (terminalPos == Ver4DictConstants::NOT_A_TERMINAL_ADDRESS) ?
+ NOT_A_DICT_POS : terminalPos;
+}
+
+bool TerminalPositionLookupTable::setTerminalPtNodePosition(
+ const int terminalId, const int terminalPtNodePos) {
+ if (terminalId < 0) {
+ return NOT_A_DICT_POS;
+ }
+ while (terminalId >= mSize) {
+ // Write new entry.
+ if (!getWritableBuffer()->writeUint(Ver4DictConstants::NOT_A_TERMINAL_ADDRESS,
+ Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE, getEntryPos(mSize))) {
+ return false;
+ }
+ mSize++;
+ }
+ const int terminalPos = (terminalPtNodePos != NOT_A_DICT_POS) ?
+ terminalPtNodePos : Ver4DictConstants::NOT_A_TERMINAL_ADDRESS;
+ return getWritableBuffer()->writeUint(terminalPos,
+ Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE, getEntryPos(terminalId));
+}
+
+bool TerminalPositionLookupTable::flushToFile(const char *const dictPath) const {
+ // If the used buffer size is smaller than the actual buffer size, regenerate the lookup
+ // table and write the new table to the file.
+ if (getEntryPos(mSize) < getBuffer()->getTailPosition()) {
+ TerminalPositionLookupTable lookupTableToWrite;
+ for (int i = 0; i < mSize; ++i) {
+ const int terminalPtNodePosition = getTerminalPtNodePosition(i);
+ if (!lookupTableToWrite.setTerminalPtNodePosition(i, terminalPtNodePosition)) {
+ AKLOGE("Cannot set terminal position to lookupTableToWrite."
+ " terminalId: %d, position: %d", i, terminalPtNodePosition);
+ return false;
+ }
+ }
+ return lookupTableToWrite.flush(dictPath,
+ Ver4DictConstants::TERMINAL_ADDRESS_TABLE_FILE_EXTENSION);
+ } else {
+ // We can simply use this lookup table because the buffer size has not been
+ // changed.
+ return flush(dictPath, Ver4DictConstants::TERMINAL_ADDRESS_TABLE_FILE_EXTENSION);
+ }
+}
+
+bool TerminalPositionLookupTable::runGCTerminalIds(TerminalIdMap *const terminalIdMap) {
+ int removedEntryCount = 0;
+ int nextNewTerminalId = 0;
+ for (int i = 0; i < mSize; ++i) {
+ const int terminalPos = getBuffer()->readUint(
+ Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE, getEntryPos(i));
+ if (terminalPos == Ver4DictConstants::NOT_A_TERMINAL_ADDRESS) {
+ // This entry is a garbage.
+ removedEntryCount++;
+ } else {
+ // Give a new terminal id to the entry.
+ if (!getWritableBuffer()->writeUint(terminalPos,
+ Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE,
+ getEntryPos(nextNewTerminalId))) {
+ return false;
+ }
+ // Memorize the mapping to the old terminal id to the new terminal id.
+ terminalIdMap->insert(TerminalIdMap::value_type(i, nextNewTerminalId));
+ nextNewTerminalId++;
+ }
+ }
+ mSize = nextNewTerminalId;
+ return true;
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h
new file mode 100644
index 000000000..f73e22754
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_TERMINAL_POSITION_LOOKUP_TABLE_H
+#define LATINIME_TERMINAL_POSITION_LOOKUP_TABLE_H
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
+#include "utils/hash_map_compat.h"
+
+namespace latinime {
+
+class TerminalPositionLookupTable : public SingleDictContent {
+ public:
+ typedef hash_map_compat<int, int> TerminalIdMap;
+
+ TerminalPositionLookupTable(const char *const dictPath, const bool isUpdatable)
+ : SingleDictContent(dictPath,
+ Ver4DictConstants::TERMINAL_ADDRESS_TABLE_FILE_EXTENSION, isUpdatable),
+ mSize(getBuffer()->getTailPosition()
+ / Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE) {}
+
+ TerminalPositionLookupTable() : mSize(0) {}
+
+ int getTerminalPtNodePosition(const int terminalId) const;
+
+ bool setTerminalPtNodePosition(const int terminalId, const int terminalPtNodePos);
+
+ int getNextTerminalId() const {
+ return mSize;
+ }
+
+ bool flushToFile(const char *const dictPath) const;
+
+ bool runGCTerminalIds(TerminalIdMap *const terminalIdMap);
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(TerminalPositionLookupTable);
+
+ int getEntryPos(const int terminalId) const {
+ return terminalId * Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE;
+ }
+
+ int mSize;
+};
+} // namespace latinime
+#endif // LATINIME_TERMINAL_POSITION_LOOKUP_TABLE_H
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp
new file mode 100644
index 000000000..918c02ba2
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp
@@ -0,0 +1,138 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h"
+
+#include <cerrno>
+#include <cstring>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
+#include "suggest/policyimpl/dictionary/utils/file_utils.h"
+
+namespace latinime {
+
+/* static */ Ver4DictBuffers::Ver4DictBuffersPtr Ver4DictBuffers::openVer4DictBuffers(
+ const char *const dictPath, const MmappedBuffer::MmappedBufferPtr &headerBuffer) {
+ const bool isUpdatable = headerBuffer.get() ? headerBuffer.get()->isUpdatable() : false;
+ // TODO: take only dictDirPath, and open both header and trie files in the constructor below
+ return Ver4DictBuffersPtr(new Ver4DictBuffers(dictPath, headerBuffer, isUpdatable));
+}
+
+bool Ver4DictBuffers::flushHeaderAndDictBuffers(const char *const dictDirPath,
+ const BufferWithExtendableBuffer *const headerBuffer) const {
+ // Create temporary directory.
+ const int tmpDirPathBufSize = FileUtils::getFilePathWithSuffixBufSize(dictDirPath,
+ DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE);
+ char tmpDirPath[tmpDirPathBufSize];
+ FileUtils::getFilePathWithSuffix(dictDirPath,
+ DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE, tmpDirPathBufSize,
+ tmpDirPath);
+ if (FileUtils::existsDir(tmpDirPath)) {
+ if (!FileUtils::removeDirAndFiles(tmpDirPath)) {
+ AKLOGE("Existing directory %s cannot be removed.", tmpDirPath);
+ ASSERT(false);
+ return false;
+ }
+ }
+ if (mkdir(tmpDirPath, S_IRWXU) == -1) {
+ AKLOGE("Cannot create directory: %s. errno: %d.", tmpDirPath, errno);
+ return false;
+ }
+ // Get dictionary base path.
+ const int dictNameBufSize = strlen(dictDirPath) + 1 /* terminator */;
+ char dictName[dictNameBufSize];
+ FileUtils::getBasename(dictDirPath, dictNameBufSize, dictName);
+ const int dictPathBufSize = FileUtils::getFilePathBufSize(tmpDirPath, dictName);
+ char dictPath[dictPathBufSize];
+ FileUtils::getFilePath(tmpDirPath, dictName, dictPathBufSize, dictPath);
+
+ // Write header file.
+ if (!DictFileWritingUtils::flushBufferToFileWithSuffix(dictPath,
+ Ver4DictConstants::HEADER_FILE_EXTENSION, headerBuffer)) {
+ AKLOGE("Dictionary header file %s%s cannot be written.", tmpDirPath,
+ Ver4DictConstants::HEADER_FILE_EXTENSION);
+ return false;
+ }
+ // Write trie file.
+ if (!DictFileWritingUtils::flushBufferToFileWithSuffix(dictPath,
+ Ver4DictConstants::TRIE_FILE_EXTENSION, &mExpandableTrieBuffer)) {
+ AKLOGE("Dictionary trie file %s%s cannot be written.", tmpDirPath,
+ Ver4DictConstants::TRIE_FILE_EXTENSION);
+ return false;
+ }
+ // Write dictionary contents.
+ if (!mTerminalPositionLookupTable.flushToFile(dictPath)) {
+ AKLOGE("Terminal position lookup table cannot be written. %s", tmpDirPath);
+ return false;
+ }
+ if (!mProbabilityDictContent.flushToFile(dictPath)) {
+ AKLOGE("Probability dict content cannot be written. %s", tmpDirPath);
+ return false;
+ }
+ if (!mBigramDictContent.flushToFile(dictPath)) {
+ AKLOGE("Bigram dict content cannot be written. %s", tmpDirPath);
+ return false;
+ }
+ if (!mShortcutDictContent.flushToFile(dictPath)) {
+ AKLOGE("Shortcut dict content cannot be written. %s", tmpDirPath);
+ return false;
+ }
+ // Remove existing dictionary.
+ if (!FileUtils::removeDirAndFiles(dictDirPath)) {
+ AKLOGE("Existing directory %s cannot be removed.", dictDirPath);
+ ASSERT(false);
+ return false;
+ }
+ // Rename temporary directory.
+ if (rename(tmpDirPath, dictDirPath) != 0) {
+ AKLOGE("%s cannot be renamed to %s", tmpDirPath, dictDirPath);
+ ASSERT(false);
+ return false;
+ }
+ return true;
+}
+
+Ver4DictBuffers::Ver4DictBuffers(const char *const dictPath,
+ const MmappedBuffer::MmappedBufferPtr &headerBuffer, const bool isUpdatable)
+ : mHeaderBuffer(headerBuffer),
+ mDictBuffer(MmappedBuffer::openBuffer(dictPath,
+ Ver4DictConstants::TRIE_FILE_EXTENSION, isUpdatable)),
+ mHeaderPolicy(headerBuffer.get()->getBuffer(), FormatUtils::VERSION_4),
+ mExpandableHeaderBuffer(headerBuffer.get()->getBuffer(), mHeaderPolicy.getSize(),
+ BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
+ mExpandableTrieBuffer(mDictBuffer.get()->getBuffer(),
+ mDictBuffer.get()->getBufferSize(),
+ BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
+ mTerminalPositionLookupTable(dictPath, isUpdatable),
+ mProbabilityDictContent(dictPath, mHeaderPolicy.hasHistoricalInfoOfWords(),
+ isUpdatable),
+ mBigramDictContent(dictPath, mHeaderPolicy.hasHistoricalInfoOfWords(),
+ isUpdatable),
+ mShortcutDictContent(dictPath, isUpdatable),
+ mIsUpdatable(isUpdatable) {}
+
+Ver4DictBuffers::Ver4DictBuffers(const HeaderPolicy *const headerPolicy)
+ : mHeaderBuffer(0), mDictBuffer(0), mHeaderPolicy(),
+ mExpandableHeaderBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
+ mExpandableTrieBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
+ mTerminalPositionLookupTable(),
+ mProbabilityDictContent(headerPolicy->hasHistoricalInfoOfWords()),
+ mBigramDictContent(headerPolicy->hasHistoricalInfoOfWords()), mShortcutDictContent(),
+ mIsUpdatable(true) {}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h
new file mode 100644
index 000000000..a0c219e4d
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h
@@ -0,0 +1,137 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_VER4_DICT_BUFFER_H
+#define LATINIME_VER4_DICT_BUFFER_H
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/header/header_policy.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
+
+namespace latinime {
+
+class Ver4DictBuffers {
+ public:
+ typedef ExclusiveOwnershipPointer<Ver4DictBuffers> Ver4DictBuffersPtr;
+
+ static Ver4DictBuffersPtr openVer4DictBuffers(const char *const dictDirPath,
+ const MmappedBuffer::MmappedBufferPtr &headerBuffer);
+
+ static AK_FORCE_INLINE Ver4DictBuffersPtr createVer4DictBuffers(
+ const HeaderPolicy *const headerPolicy) {
+ return Ver4DictBuffersPtr(new Ver4DictBuffers(headerPolicy));
+ }
+
+ AK_FORCE_INLINE bool isValid() const {
+ return mDictBuffer.get() != 0 && mHeaderPolicy.isValid()
+ && mProbabilityDictContent.isValid() && mTerminalPositionLookupTable.isValid()
+ && mBigramDictContent.isValid() && mShortcutDictContent.isValid();
+ }
+
+ AK_FORCE_INLINE bool isNearSizeLimit() const {
+ return mExpandableTrieBuffer.isNearSizeLimit()
+ || mTerminalPositionLookupTable.isNearSizeLimit()
+ || mProbabilityDictContent.isNearSizeLimit()
+ || mBigramDictContent.isNearSizeLimit()
+ || mShortcutDictContent.isNearSizeLimit();
+ }
+
+ AK_FORCE_INLINE const HeaderPolicy *getHeaderPolicy() const {
+ return &mHeaderPolicy;
+ }
+
+ AK_FORCE_INLINE BufferWithExtendableBuffer *getWritableHeaderBuffer() {
+ return &mExpandableHeaderBuffer;
+ }
+
+ AK_FORCE_INLINE BufferWithExtendableBuffer *getWritableTrieBuffer() {
+ return &mExpandableTrieBuffer;
+ }
+
+ AK_FORCE_INLINE const BufferWithExtendableBuffer *getTrieBuffer() const {
+ return &mExpandableTrieBuffer;
+ }
+
+ AK_FORCE_INLINE TerminalPositionLookupTable *getMutableTerminalPositionLookupTable() {
+ return &mTerminalPositionLookupTable;
+ }
+
+ AK_FORCE_INLINE const TerminalPositionLookupTable *getTerminalPositionLookupTable() const {
+ return &mTerminalPositionLookupTable;
+ }
+
+ AK_FORCE_INLINE ProbabilityDictContent *getMutableProbabilityDictContent() {
+ return &mProbabilityDictContent;
+ }
+
+ AK_FORCE_INLINE const ProbabilityDictContent *getProbabilityDictContent() const {
+ return &mProbabilityDictContent;
+ }
+
+ AK_FORCE_INLINE BigramDictContent *getMutableBigramDictContent() {
+ return &mBigramDictContent;
+ }
+
+ AK_FORCE_INLINE const BigramDictContent *getBigramDictContent() const {
+ return &mBigramDictContent;
+ }
+
+ AK_FORCE_INLINE ShortcutDictContent *getMutableShortcutDictContent() {
+ return &mShortcutDictContent;
+ }
+
+ AK_FORCE_INLINE const ShortcutDictContent *getShortcutDictContent() const {
+ return &mShortcutDictContent;
+ }
+
+ AK_FORCE_INLINE bool isUpdatable() const {
+ return mIsUpdatable;
+ }
+
+ bool flush(const char *const dictDirPath) const {
+ return flushHeaderAndDictBuffers(dictDirPath, &mExpandableHeaderBuffer);
+ }
+
+ bool flushHeaderAndDictBuffers(const char *const dictDirPath,
+ const BufferWithExtendableBuffer *const headerBuffer) const;
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(Ver4DictBuffers);
+
+ Ver4DictBuffers(const char *const dictDirPath,
+ const MmappedBuffer::MmappedBufferPtr &headerBuffer, const bool isUpdatable);
+
+ Ver4DictBuffers(const HeaderPolicy *const headerPolicy);
+
+ const MmappedBuffer::MmappedBufferPtr mHeaderBuffer;
+ const MmappedBuffer::MmappedBufferPtr mDictBuffer;
+ const HeaderPolicy mHeaderPolicy;
+ BufferWithExtendableBuffer mExpandableHeaderBuffer;
+ BufferWithExtendableBuffer mExpandableTrieBuffer;
+ TerminalPositionLookupTable mTerminalPositionLookupTable;
+ ProbabilityDictContent mProbabilityDictContent;
+ BigramDictContent mBigramDictContent;
+ ShortcutDictContent mShortcutDictContent;
+ const int mIsUpdatable;
+};
+} // namespace latinime
+#endif /* LATINIME_VER4_DICT_BUFFER_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp
new file mode 100644
index 000000000..34fecc25f
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp
@@ -0,0 +1,71 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
+
+namespace latinime {
+
+// These values MUST match the definitions in FormatSpec.java.
+const char *const Ver4DictConstants::TRIE_FILE_EXTENSION = ".trie";
+const char *const Ver4DictConstants::HEADER_FILE_EXTENSION = ".header";
+const char *const Ver4DictConstants::FREQ_FILE_EXTENSION = ".freq";
+// tat = Terminal Address Table
+const char *const Ver4DictConstants::TERMINAL_ADDRESS_TABLE_FILE_EXTENSION = ".tat";
+const char *const Ver4DictConstants::BIGRAM_FILE_EXTENSION = ".bigram_freq";
+const char *const Ver4DictConstants::BIGRAM_LOOKUP_TABLE_FILE_EXTENSION = ".bigram_lookup";
+const char *const Ver4DictConstants::BIGRAM_CONTENT_TABLE_FILE_EXTENSION = ".bigram_index_freq";
+const char *const Ver4DictConstants::SHORTCUT_FILE_EXTENSION = ".shortcut_shortcut";
+const char *const Ver4DictConstants::SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION = ".shortcut_lookup";
+const char *const Ver4DictConstants::SHORTCUT_CONTENT_TABLE_FILE_EXTENSION =
+ ".shortcut_index_shortcut";
+
+// Version 4 dictionary size is implicitly limited to 8MB due to 3-byte offsets.
+// TODO: Make MAX_DICTIONARY_SIZE 8MB.
+const int Ver4DictConstants::MAX_DICTIONARY_SIZE = 2 * 1024 * 1024;
+// Extended region size, which is not GCed region size in dict file + additional buffer size, is
+// limited to 1MB to prevent from inefficient traversing.
+const int Ver4DictConstants::MAX_DICT_EXTENDED_REGION_SIZE = 1 * 1024 * 1024;
+
+const int Ver4DictConstants::NOT_A_TERMINAL_ID = -1;
+const int Ver4DictConstants::PROBABILITY_SIZE = 1;
+const int Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE = 1;
+const int Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE = 3;
+const int Ver4DictConstants::NOT_A_TERMINAL_ADDRESS = 0;
+const int Ver4DictConstants::TERMINAL_ID_FIELD_SIZE = 4;
+const int Ver4DictConstants::TIME_STAMP_FIELD_SIZE = 4;
+const int Ver4DictConstants::WORD_LEVEL_FIELD_SIZE = 1;
+const int Ver4DictConstants::WORD_COUNT_FIELD_SIZE = 1;
+
+const int Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE = 4;
+const int Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE = 4;
+const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE = 16;
+const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE = 4;
+
+const int Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE = 3;
+// Unsigned int max value of BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE-byte is used for representing
+// invalid terminal ID in bigram lists.
+const int Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID =
+ (1 << (BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE * 8)) - 1;
+const int Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE = 1;
+const int Ver4DictConstants::BIGRAM_PROBABILITY_MASK = 0x0F;
+const int Ver4DictConstants::BIGRAM_HAS_NEXT_MASK = 0x80;
+const int Ver4DictConstants::BIGRAM_LARGE_PROBABILITY_FIELD_SIZE = 1;
+
+const int Ver4DictConstants::SHORTCUT_FLAGS_FIELD_SIZE = 1;
+const int Ver4DictConstants::SHORTCUT_PROBABILITY_MASK = 0x0F;
+const int Ver4DictConstants::SHORTCUT_HAS_NEXT_MASK = 0x80;
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h
new file mode 100644
index 000000000..d6d22c5c1
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_VER4_DICT_CONSTANTS_H
+#define LATINIME_VER4_DICT_CONSTANTS_H
+
+#include "defines.h"
+
+namespace latinime {
+
+// TODO: Create PtConstants under the pt_common and move some constant values there.
+// Note that there are corresponding definitions in FormatSpec.java.
+class Ver4DictConstants {
+ public:
+ static const char *const TRIE_FILE_EXTENSION;
+ static const char *const HEADER_FILE_EXTENSION;
+ static const char *const FREQ_FILE_EXTENSION;
+ static const char *const TERMINAL_ADDRESS_TABLE_FILE_EXTENSION;
+ static const char *const BIGRAM_FILE_EXTENSION;
+ static const char *const BIGRAM_LOOKUP_TABLE_FILE_EXTENSION;
+ static const char *const BIGRAM_CONTENT_TABLE_FILE_EXTENSION;
+ static const char *const SHORTCUT_FILE_EXTENSION;
+ static const char *const SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION;
+ static const char *const SHORTCUT_CONTENT_TABLE_FILE_EXTENSION;
+
+ static const int MAX_DICTIONARY_SIZE;
+ static const int MAX_DICT_EXTENDED_REGION_SIZE;
+
+ static const int NOT_A_TERMINAL_ID;
+ static const int PROBABILITY_SIZE;
+ static const int FLAGS_IN_PROBABILITY_FILE_SIZE;
+ static const int TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE;
+ static const int NOT_A_TERMINAL_ADDRESS;
+ static const int TERMINAL_ID_FIELD_SIZE;
+ static const int TIME_STAMP_FIELD_SIZE;
+ static const int WORD_LEVEL_FIELD_SIZE;
+ static const int WORD_COUNT_FIELD_SIZE;
+
+ static const int BIGRAM_ADDRESS_TABLE_BLOCK_SIZE;
+ static const int BIGRAM_ADDRESS_TABLE_DATA_SIZE;
+ static const int SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE;
+ static const int SHORTCUT_ADDRESS_TABLE_DATA_SIZE;
+
+ static const int BIGRAM_FLAGS_FIELD_SIZE;
+ static const int BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE;
+ static const int INVALID_BIGRAM_TARGET_TERMINAL_ID;
+ static const int BIGRAM_PROBABILITY_MASK;
+ static const int BIGRAM_HAS_NEXT_MASK;
+ // Used when bigram list has time stamp.
+ static const int BIGRAM_LARGE_PROBABILITY_FIELD_SIZE;
+
+ static const int SHORTCUT_FLAGS_FIELD_SIZE;
+ static const int SHORTCUT_PROBABILITY_MASK;
+ static const int SHORTCUT_HAS_NEXT_MASK;
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4DictConstants);
+};
+} // namespace latinime
+#endif /* LATINIME_VER4_DICT_CONSTANTS_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp
new file mode 100644
index 000000000..17fc9483b
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp
@@ -0,0 +1,98 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
+
+#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h"
+#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
+
+namespace latinime {
+
+const PtNodeParams Ver4PatriciaTrieNodeReader::fetchPtNodeInfoFromBufferAndProcessMovedPtNode(
+ const int ptNodePos, const int siblingNodePos) const {
+ if (ptNodePos < 0 || ptNodePos >= mBuffer->getTailPosition()) {
+ // Reading invalid position because of bug or broken dictionary.
+ AKLOGE("Fetching PtNode info from invalid dictionary position: %d, dictionary size: %d",
+ ptNodePos, mBuffer->getTailPosition());
+ ASSERT(false);
+ return PtNodeParams();
+ }
+ const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(ptNodePos);
+ const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
+ int pos = ptNodePos;
+ const int headPos = ptNodePos;
+ if (usesAdditionalBuffer) {
+ pos -= mBuffer->getOriginalBufferSize();
+ }
+ const PatriciaTrieReadingUtils::NodeFlags flags =
+ PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
+ const int parentPosOffset =
+ DynamicPtReadingUtils::getParentPtNodePosOffsetAndAdvancePosition(
+ dictBuf, &pos);
+ const int parentPos =
+ DynamicPtReadingUtils::getParentPtNodePos(parentPosOffset, headPos);
+ int codePoints[MAX_WORD_LENGTH];
+ const int codePonitCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition(
+ dictBuf, flags, MAX_WORD_LENGTH, codePoints, &pos);
+ int terminalIdFieldPos = NOT_A_DICT_POS;
+ int terminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
+ int probability = NOT_A_PROBABILITY;
+ if (PatriciaTrieReadingUtils::isTerminal(flags)) {
+ terminalIdFieldPos = pos;
+ if (usesAdditionalBuffer) {
+ terminalIdFieldPos += mBuffer->getOriginalBufferSize();
+ }
+ terminalId = Ver4PatriciaTrieReadingUtils::getTerminalIdAndAdvancePosition(dictBuf, &pos);
+ const ProbabilityEntry probabilityEntry =
+ mProbabilityDictContent->getProbabilityEntry(terminalId);
+ if (probabilityEntry.hasHistoricalInfo()) {
+ probability = ForgettingCurveUtils::decodeProbability(
+ probabilityEntry.getHistoricalInfo());
+ } else {
+ probability = probabilityEntry.getProbability();
+ }
+ }
+ int childrenPosFieldPos = pos;
+ if (usesAdditionalBuffer) {
+ childrenPosFieldPos += mBuffer->getOriginalBufferSize();
+ }
+ int childrenPos = DynamicPtReadingUtils::readChildrenPositionAndAdvancePosition(
+ dictBuf, &pos);
+ if (usesAdditionalBuffer && childrenPos != NOT_A_DICT_POS) {
+ childrenPos += mBuffer->getOriginalBufferSize();
+ }
+ if (usesAdditionalBuffer) {
+ pos += mBuffer->getOriginalBufferSize();
+ }
+ // Sibling position is the tail position of original PtNode.
+ int newSiblingNodePos = (siblingNodePos == NOT_A_DICT_POS) ? pos : siblingNodePos;
+ // Read destination node if the read node is a moved node.
+ if (DynamicPtReadingUtils::isMoved(flags)) {
+ // The destination position is stored at the same place as the parent position.
+ return fetchPtNodeInfoFromBufferAndProcessMovedPtNode(parentPos, newSiblingNodePos);
+ } else {
+ return PtNodeParams(headPos, flags, parentPos, codePonitCount, codePoints,
+ terminalIdFieldPos, terminalId, probability, childrenPosFieldPos, childrenPos,
+ newSiblingNodePos);
+ }
+}
+
+}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h
new file mode 100644
index 000000000..9d932457c
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_VER4_PATRICIA_TRIE_NODE_READER_H
+#define LATINIME_VER4_PATRICIA_TRIE_NODE_READER_H
+
+#include <stdint.h>
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h"
+
+namespace latinime {
+
+class BufferWithExtendableBuffer;
+class ProbabilityDictContent;
+
+/*
+ * This class is used for helping to read nodes of ver4 patricia trie. This class handles moved
+ * node and reads node attributes including probability form probabilityBuffer.
+ */
+class Ver4PatriciaTrieNodeReader : public PtNodeReader {
+ public:
+ Ver4PatriciaTrieNodeReader(const BufferWithExtendableBuffer *const buffer,
+ const ProbabilityDictContent *const probabilityDictContent)
+ : mBuffer(buffer), mProbabilityDictContent(probabilityDictContent) {}
+
+ ~Ver4PatriciaTrieNodeReader() {}
+
+ virtual const PtNodeParams fetchNodeInfoInBufferFromPtNodePos(const int ptNodePos) const {
+ return fetchPtNodeInfoFromBufferAndProcessMovedPtNode(ptNodePos,
+ NOT_A_DICT_POS /* siblingNodePos */);
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(Ver4PatriciaTrieNodeReader);
+
+ const BufferWithExtendableBuffer *const mBuffer;
+ const ProbabilityDictContent *const mProbabilityDictContent;
+
+ const PtNodeParams fetchPtNodeInfoFromBufferAndProcessMovedPtNode(const int ptNodePos,
+ const int siblingNodePos) const;
+};
+} // namespace latinime
+#endif /* LATINIME_VER4_PATRICIA_TRIE_NODE_READER_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp
new file mode 100644
index 000000000..32576cf0a
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp
@@ -0,0 +1,411 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h"
+
+#include "suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h"
+#include "suggest/policyimpl/dictionary/header/header_policy.h"
+#include "suggest/policyimpl/dictionary/shortcut/ver4_shortcut_list_policy.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.h"
+#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h"
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
+
+namespace latinime {
+
+const int Ver4PatriciaTrieNodeWriter::CHILDREN_POSITION_FIELD_SIZE = 3;
+
+bool Ver4PatriciaTrieNodeWriter::markPtNodeAsDeleted(
+ const PtNodeParams *const toBeUpdatedPtNodeParams) {
+ int pos = toBeUpdatedPtNodeParams->getHeadPos();
+ const bool usesAdditionalBuffer = mTrieBuffer->isInAdditionalBuffer(pos);
+ const uint8_t *const dictBuf = mTrieBuffer->getBuffer(usesAdditionalBuffer);
+ if (usesAdditionalBuffer) {
+ pos -= mTrieBuffer->getOriginalBufferSize();
+ }
+ // Read original flags
+ const PatriciaTrieReadingUtils::NodeFlags originalFlags =
+ PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
+ const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
+ DynamicPtReadingUtils::updateAndGetFlags(originalFlags, false /* isMoved */,
+ true /* isDeleted */, false /* willBecomeNonTerminal */);
+ int writingPos = toBeUpdatedPtNodeParams->getHeadPos();
+ // Update flags.
+ if (!DynamicPtWritingUtils::writeFlagsAndAdvancePosition(mTrieBuffer, updatedFlags,
+ &writingPos)) {
+ return false;
+ }
+ if (toBeUpdatedPtNodeParams->isTerminal()) {
+ // The PtNode is a terminal. Delete entry from the terminal position lookup table.
+ return mBuffers->getMutableTerminalPositionLookupTable()->setTerminalPtNodePosition(
+ toBeUpdatedPtNodeParams->getTerminalId(), NOT_A_DICT_POS /* ptNodePos */);
+ } else {
+ return true;
+ }
+}
+
+bool Ver4PatriciaTrieNodeWriter::markPtNodeAsMoved(
+ const PtNodeParams *const toBeUpdatedPtNodeParams,
+ const int movedPos, const int bigramLinkedNodePos) {
+ int pos = toBeUpdatedPtNodeParams->getHeadPos();
+ const bool usesAdditionalBuffer = mTrieBuffer->isInAdditionalBuffer(pos);
+ const uint8_t *const dictBuf = mTrieBuffer->getBuffer(usesAdditionalBuffer);
+ if (usesAdditionalBuffer) {
+ pos -= mTrieBuffer->getOriginalBufferSize();
+ }
+ // Read original flags
+ const PatriciaTrieReadingUtils::NodeFlags originalFlags =
+ PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
+ const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
+ DynamicPtReadingUtils::updateAndGetFlags(originalFlags, true /* isMoved */,
+ false /* isDeleted */, false /* willBecomeNonTerminal */);
+ int writingPos = toBeUpdatedPtNodeParams->getHeadPos();
+ // Update flags.
+ if (!DynamicPtWritingUtils::writeFlagsAndAdvancePosition(mTrieBuffer, updatedFlags,
+ &writingPos)) {
+ return false;
+ }
+ // Update moved position, which is stored in the parent offset field.
+ if (!DynamicPtWritingUtils::writeParentPosOffsetAndAdvancePosition(
+ mTrieBuffer, movedPos, toBeUpdatedPtNodeParams->getHeadPos(), &writingPos)) {
+ return false;
+ }
+ if (toBeUpdatedPtNodeParams->hasChildren()) {
+ // Update children's parent position.
+ mReadingHelper.initWithPtNodeArrayPos(toBeUpdatedPtNodeParams->getChildrenPos());
+ while (!mReadingHelper.isEnd()) {
+ const PtNodeParams childPtNodeParams(mReadingHelper.getPtNodeParams());
+ int parentOffsetFieldPos = childPtNodeParams.getHeadPos()
+ + DynamicPtWritingUtils::NODE_FLAG_FIELD_SIZE;
+ if (!DynamicPtWritingUtils::writeParentPosOffsetAndAdvancePosition(
+ mTrieBuffer, bigramLinkedNodePos, childPtNodeParams.getHeadPos(),
+ &parentOffsetFieldPos)) {
+ // Parent offset cannot be written because of a bug or a broken dictionary; thus,
+ // we give up to update dictionary.
+ return false;
+ }
+ mReadingHelper.readNextSiblingNode(childPtNodeParams);
+ }
+ }
+ return true;
+}
+
+bool Ver4PatriciaTrieNodeWriter::markPtNodeAsWillBecomeNonTerminal(
+ const PtNodeParams *const toBeUpdatedPtNodeParams) {
+ int pos = toBeUpdatedPtNodeParams->getHeadPos();
+ const bool usesAdditionalBuffer = mTrieBuffer->isInAdditionalBuffer(pos);
+ const uint8_t *const dictBuf = mTrieBuffer->getBuffer(usesAdditionalBuffer);
+ if (usesAdditionalBuffer) {
+ pos -= mTrieBuffer->getOriginalBufferSize();
+ }
+ // Read original flags
+ const PatriciaTrieReadingUtils::NodeFlags originalFlags =
+ PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
+ const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
+ DynamicPtReadingUtils::updateAndGetFlags(originalFlags, false /* isMoved */,
+ false /* isDeleted */, true /* willBecomeNonTerminal */);
+ if (!mBuffers->getMutableTerminalPositionLookupTable()->setTerminalPtNodePosition(
+ toBeUpdatedPtNodeParams->getTerminalId(), NOT_A_DICT_POS /* ptNodePos */)) {
+ AKLOGE("Cannot update terminal position lookup table. terminal id: %d",
+ toBeUpdatedPtNodeParams->getTerminalId());
+ return false;
+ }
+ // Update flags.
+ int writingPos = toBeUpdatedPtNodeParams->getHeadPos();
+ return DynamicPtWritingUtils::writeFlagsAndAdvancePosition(mTrieBuffer, updatedFlags,
+ &writingPos);
+}
+
+bool Ver4PatriciaTrieNodeWriter::updatePtNodeProbability(
+ const PtNodeParams *const toBeUpdatedPtNodeParams, const int newProbability,
+ const int timestamp) {
+ if (!toBeUpdatedPtNodeParams->isTerminal()) {
+ return false;
+ }
+ const ProbabilityEntry originalProbabilityEntry =
+ mBuffers->getProbabilityDictContent()->getProbabilityEntry(
+ toBeUpdatedPtNodeParams->getTerminalId());
+ const ProbabilityEntry probabilityEntry = createUpdatedEntryFrom(&originalProbabilityEntry,
+ newProbability, timestamp);
+ return mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry(
+ toBeUpdatedPtNodeParams->getTerminalId(), &probabilityEntry);
+}
+
+bool Ver4PatriciaTrieNodeWriter::updatePtNodeProbabilityAndGetNeedsToKeepPtNodeAfterGC(
+ const PtNodeParams *const toBeUpdatedPtNodeParams, bool *const outNeedsToKeepPtNode) {
+ if (!toBeUpdatedPtNodeParams->isTerminal()) {
+ AKLOGE("updatePtNodeProbabilityAndGetNeedsToSaveForGC is called for non-terminal PtNode.");
+ return false;
+ }
+ const ProbabilityEntry originalProbabilityEntry =
+ mBuffers->getProbabilityDictContent()->getProbabilityEntry(
+ toBeUpdatedPtNodeParams->getTerminalId());
+ if (originalProbabilityEntry.hasHistoricalInfo()) {
+ const HistoricalInfo historicalInfo = ForgettingCurveUtils::createHistoricalInfoToSave(
+ originalProbabilityEntry.getHistoricalInfo());
+ const ProbabilityEntry probabilityEntry =
+ originalProbabilityEntry.createEntryWithUpdatedHistoricalInfo(&historicalInfo);
+ if (!mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry(
+ toBeUpdatedPtNodeParams->getTerminalId(), &probabilityEntry)) {
+ AKLOGE("Cannot write updated probability entry. terminalId: %d",
+ toBeUpdatedPtNodeParams->getTerminalId());
+ return false;
+ }
+ const bool isValid = ForgettingCurveUtils::needsToKeep(&historicalInfo);
+ if (!isValid) {
+ if (!markPtNodeAsWillBecomeNonTerminal(toBeUpdatedPtNodeParams)) {
+ AKLOGE("Cannot mark PtNode as willBecomeNonTerminal.");
+ return false;
+ }
+ }
+ *outNeedsToKeepPtNode = isValid;
+ } else {
+ // No need to update probability.
+ *outNeedsToKeepPtNode = true;
+ }
+ return true;
+}
+
+bool Ver4PatriciaTrieNodeWriter::updateChildrenPosition(
+ const PtNodeParams *const toBeUpdatedPtNodeParams, const int newChildrenPosition) {
+ int childrenPosFieldPos = toBeUpdatedPtNodeParams->getChildrenPosFieldPos();
+ return DynamicPtWritingUtils::writeChildrenPositionAndAdvancePosition(mTrieBuffer,
+ newChildrenPosition, &childrenPosFieldPos);
+}
+
+bool Ver4PatriciaTrieNodeWriter::updateTerminalId(const PtNodeParams *const toBeUpdatedPtNodeParams,
+ const int newTerminalId) {
+ return mTrieBuffer->writeUint(newTerminalId, Ver4DictConstants::TERMINAL_ID_FIELD_SIZE,
+ toBeUpdatedPtNodeParams->getTerminalIdFieldPos());
+}
+
+bool Ver4PatriciaTrieNodeWriter::writePtNodeAndAdvancePosition(
+ const PtNodeParams *const ptNodeParams, int *const ptNodeWritingPos) {
+ return writePtNodeAndGetTerminalIdAndAdvancePosition(ptNodeParams, 0 /* outTerminalId */,
+ ptNodeWritingPos);
+}
+
+
+bool Ver4PatriciaTrieNodeWriter::writeNewTerminalPtNodeAndAdvancePosition(
+ const PtNodeParams *const ptNodeParams, const int timestamp, int *const ptNodeWritingPos) {
+ int terminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
+ if (!writePtNodeAndGetTerminalIdAndAdvancePosition(ptNodeParams, &terminalId,
+ ptNodeWritingPos)) {
+ return false;
+ }
+ // Write probability.
+ ProbabilityEntry newProbabilityEntry;
+ const ProbabilityEntry probabilityEntryToWrite = createUpdatedEntryFrom(
+ &newProbabilityEntry, ptNodeParams->getProbability(), timestamp);
+ return mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry(terminalId,
+ &probabilityEntryToWrite);
+}
+
+bool Ver4PatriciaTrieNodeWriter::addNewBigramEntry(
+ const PtNodeParams *const sourcePtNodeParams,
+ const PtNodeParams *const targetPtNodeParam, const int probability, const int timestamp,
+ bool *const outAddedNewBigram) {
+ if (!mBigramPolicy->addNewEntry(sourcePtNodeParams->getTerminalId(),
+ targetPtNodeParam->getTerminalId(), probability, timestamp, outAddedNewBigram)) {
+ AKLOGE("Cannot add new bigram entry. terminalId: %d, targetTerminalId: %d",
+ sourcePtNodeParams->getTerminalId(), targetPtNodeParam->getTerminalId());
+ return false;
+ }
+ if (!sourcePtNodeParams->hasBigrams()) {
+ // Update has bigrams flag.
+ return updatePtNodeFlags(sourcePtNodeParams->getHeadPos(),
+ sourcePtNodeParams->isBlacklisted(), sourcePtNodeParams->isNotAWord(),
+ sourcePtNodeParams->isTerminal(), sourcePtNodeParams->hasShortcutTargets(),
+ true /* hasBigrams */,
+ sourcePtNodeParams->getCodePointCount() > 1 /* hasMultipleChars */);
+ }
+ return true;
+}
+
+bool Ver4PatriciaTrieNodeWriter::removeBigramEntry(
+ const PtNodeParams *const sourcePtNodeParams, const PtNodeParams *const targetPtNodeParam) {
+ return mBigramPolicy->removeEntry(sourcePtNodeParams->getTerminalId(),
+ targetPtNodeParam->getTerminalId());
+}
+
+bool Ver4PatriciaTrieNodeWriter::updateAllBigramEntriesAndDeleteUselessEntries(
+ const PtNodeParams *const sourcePtNodeParams, int *const outBigramEntryCount) {
+ return mBigramPolicy->updateAllBigramEntriesAndDeleteUselessEntries(
+ sourcePtNodeParams->getTerminalId(), outBigramEntryCount);
+}
+
+bool Ver4PatriciaTrieNodeWriter::updateAllPositionFields(
+ const PtNodeParams *const toBeUpdatedPtNodeParams,
+ const DictPositionRelocationMap *const dictPositionRelocationMap,
+ int *const outBigramEntryCount) {
+ int parentPos = toBeUpdatedPtNodeParams->getParentPos();
+ if (parentPos != NOT_A_DICT_POS) {
+ PtNodeWriter::PtNodePositionRelocationMap::const_iterator it =
+ dictPositionRelocationMap->mPtNodePositionRelocationMap.find(parentPos);
+ if (it != dictPositionRelocationMap->mPtNodePositionRelocationMap.end()) {
+ parentPos = it->second;
+ }
+ }
+ int writingPos = toBeUpdatedPtNodeParams->getHeadPos()
+ + DynamicPtWritingUtils::NODE_FLAG_FIELD_SIZE;
+ // Write updated parent offset.
+ if (!DynamicPtWritingUtils::writeParentPosOffsetAndAdvancePosition(mTrieBuffer,
+ parentPos, toBeUpdatedPtNodeParams->getHeadPos(), &writingPos)) {
+ return false;
+ }
+
+ // Updates children position.
+ int childrenPos = toBeUpdatedPtNodeParams->getChildrenPos();
+ if (childrenPos != NOT_A_DICT_POS) {
+ PtNodeWriter::PtNodeArrayPositionRelocationMap::const_iterator it =
+ dictPositionRelocationMap->mPtNodeArrayPositionRelocationMap.find(childrenPos);
+ if (it != dictPositionRelocationMap->mPtNodeArrayPositionRelocationMap.end()) {
+ childrenPos = it->second;
+ }
+ }
+ if (!updateChildrenPosition(toBeUpdatedPtNodeParams, childrenPos)) {
+ return false;
+ }
+
+ // Counts bigram entries.
+ if (outBigramEntryCount) {
+ *outBigramEntryCount = mBigramPolicy->getBigramEntryConut(
+ toBeUpdatedPtNodeParams->getTerminalId());
+ }
+ return true;
+}
+
+bool Ver4PatriciaTrieNodeWriter::addShortcutTarget(const PtNodeParams *const ptNodeParams,
+ const int *const targetCodePoints, const int targetCodePointCount,
+ const int shortcutProbability) {
+ if (!mShortcutPolicy->addNewShortcut(ptNodeParams->getTerminalId(),
+ targetCodePoints, targetCodePointCount, shortcutProbability)) {
+ AKLOGE("Cannot add new shortuct entry. terminalId: %d", ptNodeParams->getTerminalId());
+ return false;
+ }
+ if (!ptNodeParams->hasShortcutTargets()) {
+ // Update has shortcut targets flag.
+ return updatePtNodeFlags(ptNodeParams->getHeadPos(),
+ ptNodeParams->isBlacklisted(), ptNodeParams->isNotAWord(),
+ ptNodeParams->isTerminal(), true /* hasShortcutTargets */,
+ ptNodeParams->hasBigrams(),
+ ptNodeParams->getCodePointCount() > 1 /* hasMultipleChars */);
+ }
+ return true;
+}
+
+bool Ver4PatriciaTrieNodeWriter::updatePtNodeHasBigramsAndShortcutTargetsFlags(
+ const PtNodeParams *const ptNodeParams) {
+ const bool hasBigrams = mBuffers->getBigramDictContent()->getBigramListHeadPos(
+ ptNodeParams->getTerminalId()) != NOT_A_DICT_POS;
+ const bool hasShortcutTargets = mBuffers->getShortcutDictContent()->getShortcutListHeadPos(
+ ptNodeParams->getTerminalId()) != NOT_A_DICT_POS;
+ return updatePtNodeFlags(ptNodeParams->getHeadPos(), ptNodeParams->isBlacklisted(),
+ ptNodeParams->isNotAWord(), ptNodeParams->isTerminal(), hasShortcutTargets,
+ hasBigrams, ptNodeParams->getCodePointCount() > 1 /* hasMultipleChars */);
+}
+
+bool Ver4PatriciaTrieNodeWriter::writePtNodeAndGetTerminalIdAndAdvancePosition(
+ const PtNodeParams *const ptNodeParams, int *const outTerminalId,
+ int *const ptNodeWritingPos) {
+ const int nodePos = *ptNodeWritingPos;
+ // Write dummy flags. The Node flags are updated with appropriate flags at the last step of the
+ // PtNode writing.
+ if (!DynamicPtWritingUtils::writeFlagsAndAdvancePosition(mTrieBuffer,
+ 0 /* nodeFlags */, ptNodeWritingPos)) {
+ return false;
+ }
+ // Calculate a parent offset and write the offset.
+ if (!DynamicPtWritingUtils::writeParentPosOffsetAndAdvancePosition(mTrieBuffer,
+ ptNodeParams->getParentPos(), nodePos, ptNodeWritingPos)) {
+ return false;
+ }
+ // Write code points
+ if (!DynamicPtWritingUtils::writeCodePointsAndAdvancePosition(mTrieBuffer,
+ ptNodeParams->getCodePoints(), ptNodeParams->getCodePointCount(), ptNodeWritingPos)) {
+ return false;
+ }
+ int terminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
+ if (!ptNodeParams->willBecomeNonTerminal()) {
+ if (ptNodeParams->getTerminalId() != Ver4DictConstants::NOT_A_TERMINAL_ID) {
+ terminalId = ptNodeParams->getTerminalId();
+ } else if (ptNodeParams->isTerminal()) {
+ // Write terminal information using a new terminal id.
+ // Get a new unused terminal id.
+ terminalId = mBuffers->getTerminalPositionLookupTable()->getNextTerminalId();
+ }
+ }
+ const int isTerminal = terminalId != Ver4DictConstants::NOT_A_TERMINAL_ID;
+ if (isTerminal) {
+ // Update the lookup table.
+ if (!mBuffers->getMutableTerminalPositionLookupTable()->setTerminalPtNodePosition(
+ terminalId, nodePos)) {
+ return false;
+ }
+ // Write terminal Id.
+ if (!mTrieBuffer->writeUintAndAdvancePosition(terminalId,
+ Ver4DictConstants::TERMINAL_ID_FIELD_SIZE, ptNodeWritingPos)) {
+ return false;
+ }
+ if (outTerminalId) {
+ *outTerminalId = terminalId;
+ }
+ }
+ // Write children position
+ if (!DynamicPtWritingUtils::writeChildrenPositionAndAdvancePosition(mTrieBuffer,
+ ptNodeParams->getChildrenPos(), ptNodeWritingPos)) {
+ return false;
+ }
+ return updatePtNodeFlags(nodePos, ptNodeParams->isBlacklisted(), ptNodeParams->isNotAWord(),
+ isTerminal, ptNodeParams->hasShortcutTargets(), ptNodeParams->hasBigrams(),
+ ptNodeParams->getCodePointCount() > 1 /* hasMultipleChars */);
+}
+
+const ProbabilityEntry Ver4PatriciaTrieNodeWriter::createUpdatedEntryFrom(
+ const ProbabilityEntry *const originalProbabilityEntry, const int newProbability,
+ const int timestamp) const {
+ // TODO: Consolidate historical info and probability.
+ if (mBuffers->getHeaderPolicy()->hasHistoricalInfoOfWords()) {
+ const HistoricalInfo updatedHistoricalInfo =
+ ForgettingCurveUtils::createUpdatedHistoricalInfo(
+ originalProbabilityEntry->getHistoricalInfo(), newProbability, timestamp);
+ return originalProbabilityEntry->createEntryWithUpdatedHistoricalInfo(
+ &updatedHistoricalInfo);
+ } else {
+ return originalProbabilityEntry->createEntryWithUpdatedProbability(newProbability);
+ }
+}
+
+bool Ver4PatriciaTrieNodeWriter::updatePtNodeFlags(const int ptNodePos,
+ const bool isBlacklisted, const bool isNotAWord, const bool isTerminal,
+ const bool hasShortcutTargets, const bool hasBigrams, const bool hasMultipleChars) {
+ // Create node flags and write them.
+ PatriciaTrieReadingUtils::NodeFlags nodeFlags =
+ PatriciaTrieReadingUtils::createAndGetFlags(isBlacklisted, isNotAWord, isTerminal,
+ hasShortcutTargets, hasBigrams, hasMultipleChars,
+ CHILDREN_POSITION_FIELD_SIZE);
+ if (!DynamicPtWritingUtils::writeFlags(mTrieBuffer, nodeFlags, ptNodePos)) {
+ AKLOGE("Cannot write PtNode flags. flags: %x, pos: %d", nodeFlags, ptNodePos);
+ return false;
+ }
+ return true;
+}
+
+}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h
new file mode 100644
index 000000000..69576d8e5
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h
@@ -0,0 +1,123 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_VER4_PATRICIA_TRIE_NODE_WRITER_H
+#define LATINIME_VER4_PATRICIA_TRIE_NODE_WRITER_H
+
+#include <stdint.h>
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
+
+namespace latinime {
+
+class BufferWithExtendableBuffer;
+class Ver4BigramListPolicy;
+class Ver4DictBuffers;
+class Ver4ShortcutListPolicy;
+
+/*
+ * This class is used for helping to writes nodes of ver4 patricia trie.
+ */
+class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
+ public:
+ Ver4PatriciaTrieNodeWriter(BufferWithExtendableBuffer *const trieBuffer,
+ Ver4DictBuffers *const buffers, const Ver4PatriciaTrieNodeReader *const ptNodeReader,
+ Ver4BigramListPolicy *const bigramPolicy, Ver4ShortcutListPolicy *const shortcutPolicy)
+ : mTrieBuffer(trieBuffer), mBuffers(buffers), mPtNodeReader(ptNodeReader),
+ mReadingHelper(mTrieBuffer, mPtNodeReader),
+ mBigramPolicy(bigramPolicy), mShortcutPolicy(shortcutPolicy) {}
+
+ virtual ~Ver4PatriciaTrieNodeWriter() {}
+
+ virtual bool markPtNodeAsDeleted(const PtNodeParams *const toBeUpdatedPtNodeParams);
+
+ virtual bool markPtNodeAsMoved(const PtNodeParams *const toBeUpdatedPtNodeParams,
+ const int movedPos, const int bigramLinkedNodePos);
+
+ virtual bool markPtNodeAsWillBecomeNonTerminal(
+ const PtNodeParams *const toBeUpdatedPtNodeParams);
+
+ virtual bool updatePtNodeProbability(const PtNodeParams *const toBeUpdatedPtNodeParams,
+ const int newProbability, const int timestamp);
+
+ virtual bool updatePtNodeProbabilityAndGetNeedsToKeepPtNodeAfterGC(
+ const PtNodeParams *const toBeUpdatedPtNodeParams, bool *const outNeedsToKeepPtNode);
+
+ virtual bool updateChildrenPosition(const PtNodeParams *const toBeUpdatedPtNodeParams,
+ const int newChildrenPosition);
+
+ bool updateTerminalId(const PtNodeParams *const toBeUpdatedPtNodeParams,
+ const int newTerminalId);
+
+ virtual bool writePtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams,
+ int *const ptNodeWritingPos);
+
+ virtual bool writeNewTerminalPtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams,
+ const int timestamp, int *const ptNodeWritingPos);
+
+ virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams,
+ const PtNodeParams *const targetPtNodeParam, const int probability, const int timestamp,
+ bool *const outAddedNewBigram);
+
+ virtual bool removeBigramEntry(const PtNodeParams *const sourcePtNodeParams,
+ const PtNodeParams *const targetPtNodeParam);
+
+ virtual bool updateAllBigramEntriesAndDeleteUselessEntries(
+ const PtNodeParams *const sourcePtNodeParams, int *const outBigramEntryCount);
+
+ virtual bool updateAllPositionFields(const PtNodeParams *const toBeUpdatedPtNodeParams,
+ const DictPositionRelocationMap *const dictPositionRelocationMap,
+ int *const outBigramEntryCount);
+
+ virtual bool addShortcutTarget(const PtNodeParams *const ptNodeParams,
+ const int *const targetCodePoints, const int targetCodePointCount,
+ const int shortcutProbability);
+
+ bool updatePtNodeHasBigramsAndShortcutTargetsFlags(const PtNodeParams *const ptNodeParams);
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(Ver4PatriciaTrieNodeWriter);
+
+ bool writePtNodeAndGetTerminalIdAndAdvancePosition(
+ const PtNodeParams *const ptNodeParams, int *const outTerminalId,
+ int *const ptNodeWritingPos);
+
+ // Create updated probability entry using given probability and timestamp. In addition to the
+ // probability, this method updates historical information if needed.
+ const ProbabilityEntry createUpdatedEntryFrom(
+ const ProbabilityEntry *const originalProbabilityEntry, const int newProbability,
+ const int timestamp) const;
+
+ bool updatePtNodeFlags(const int ptNodePos, const bool isBlacklisted, const bool isNotAWord,
+ const bool isTerminal, const bool hasShortcutTargets, const bool hasBigrams,
+ const bool hasMultipleChars);
+
+ static const int CHILDREN_POSITION_FIELD_SIZE;
+
+ BufferWithExtendableBuffer *const mTrieBuffer;
+ Ver4DictBuffers *const mBuffers;
+ const Ver4PatriciaTrieNodeReader *const mPtNodeReader;
+ DynamicPtReadingHelper mReadingHelper;
+ Ver4BigramListPolicy *const mBigramPolicy;
+ Ver4ShortcutListPolicy *const mShortcutPolicy;
+};
+} // namespace latinime
+#endif /* LATINIME_VER4_PATRICIA_TRIE_NODE_WRITER_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
new file mode 100644
index 000000000..96bb8128e
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
@@ -0,0 +1,352 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h"
+
+#include <vector>
+
+#include "suggest/core/dicnode/dic_node.h"
+#include "suggest/core/dicnode/dic_node_vector.h"
+#include "suggest/core/dictionary/unigram_property.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
+#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
+#include "suggest/policyimpl/dictionary/utils/probability_utils.h"
+
+namespace latinime {
+
+// Note that there are corresponding definitions in Java side in BinaryDictionaryTests and
+// BinaryDictionaryDecayingTests.
+const char *const Ver4PatriciaTriePolicy::UNIGRAM_COUNT_QUERY = "UNIGRAM_COUNT";
+const char *const Ver4PatriciaTriePolicy::BIGRAM_COUNT_QUERY = "BIGRAM_COUNT";
+const char *const Ver4PatriciaTriePolicy::MAX_UNIGRAM_COUNT_QUERY = "MAX_UNIGRAM_COUNT";
+const char *const Ver4PatriciaTriePolicy::MAX_BIGRAM_COUNT_QUERY = "MAX_BIGRAM_COUNT";
+const char *const Ver4PatriciaTriePolicy::SET_CURRENT_TIME_FOR_TESTING_QUERY_FORMAT =
+ "SET_CURRENT_TIME_FOR_TESTING:%d";
+const char *const Ver4PatriciaTriePolicy::GET_CURRENT_TIME_QUERY = "GET_CURRENT_TIME";
+const char *const Ver4PatriciaTriePolicy::QUIT_TIMEKEEPER_TEST_MODE_QUERY =
+ "QUIT_TIMEKEEPER_TEST_MODE";
+const int Ver4PatriciaTriePolicy::MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS = 1024;
+const int Ver4PatriciaTriePolicy::MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS =
+ Ver4DictConstants::MAX_DICTIONARY_SIZE - MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS;
+
+void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode,
+ DicNodeVector *const childDicNodes) const {
+ if (!dicNode->hasChildren()) {
+ return;
+ }
+ DynamicPtReadingHelper readingHelper(mDictBuffer, &mNodeReader);
+ readingHelper.initWithPtNodeArrayPos(dicNode->getChildrenPtNodeArrayPos());
+ while (!readingHelper.isEnd()) {
+ const PtNodeParams ptNodeParams = readingHelper.getPtNodeParams();
+ if (!ptNodeParams.isValid()) {
+ break;
+ }
+ bool isTerminal = ptNodeParams.isTerminal() && !ptNodeParams.isDeleted();
+ if (isTerminal && mHeaderPolicy->isDecayingDict()) {
+ // A DecayingDict may have a terminal PtNode that has a terminal DicNode whose
+ // probability is NOT_A_PROBABILITY. In such case, we don't want to treat it as a
+ // valid terminal DicNode.
+ isTerminal = ptNodeParams.getProbability() != NOT_A_PROBABILITY;
+ }
+ childDicNodes->pushLeavingChild(dicNode, ptNodeParams.getHeadPos(),
+ ptNodeParams.getChildrenPos(), ptNodeParams.getProbability(), isTerminal,
+ ptNodeParams.hasChildren(),
+ ptNodeParams.isBlacklisted()
+ || ptNodeParams.isNotAWord() /* isBlacklistedOrNotAWord */,
+ ptNodeParams.getCodePointCount(), ptNodeParams.getCodePoints());
+ readingHelper.readNextSiblingNode(ptNodeParams);
+ }
+}
+
+int Ver4PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
+ const int ptNodePos, const int maxCodePointCount, int *const outCodePoints,
+ int *const outUnigramProbability) const {
+ DynamicPtReadingHelper readingHelper(mDictBuffer, &mNodeReader);
+ readingHelper.initWithPtNodePos(ptNodePos);
+ return readingHelper.getCodePointsAndProbabilityAndReturnCodePointCount(
+ maxCodePointCount, outCodePoints, outUnigramProbability);
+}
+
+int Ver4PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord,
+ const int length, const bool forceLowerCaseSearch) const {
+ DynamicPtReadingHelper readingHelper(mDictBuffer, &mNodeReader);
+ readingHelper.initWithPtNodeArrayPos(getRootPosition());
+ return readingHelper.getTerminalPtNodePositionOfWord(inWord, length, forceLowerCaseSearch);
+}
+
+int Ver4PatriciaTriePolicy::getProbability(const int unigramProbability,
+ const int bigramProbability) const {
+ if (mHeaderPolicy->isDecayingDict()) {
+ // Both probabilities are encoded. Decode them and get probability.
+ return ForgettingCurveUtils::getProbability(unigramProbability, bigramProbability);
+ } else {
+ if (unigramProbability == NOT_A_PROBABILITY) {
+ return NOT_A_PROBABILITY;
+ } else if (bigramProbability == NOT_A_PROBABILITY) {
+ return ProbabilityUtils::backoff(unigramProbability);
+ } else {
+ // bigramProbability is a bigram probability delta.
+ return ProbabilityUtils::computeProbabilityForBigram(unigramProbability,
+ bigramProbability);
+ }
+ }
+}
+
+int Ver4PatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int ptNodePos) const {
+ if (ptNodePos == NOT_A_DICT_POS) {
+ return NOT_A_PROBABILITY;
+ }
+ const PtNodeParams ptNodeParams(mNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos));
+ if (ptNodeParams.isDeleted() || ptNodeParams.isBlacklisted() || ptNodeParams.isNotAWord()) {
+ return NOT_A_PROBABILITY;
+ }
+ return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY);
+}
+
+int Ver4PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const {
+ if (ptNodePos == NOT_A_DICT_POS) {
+ return NOT_A_DICT_POS;
+ }
+ const PtNodeParams ptNodeParams(mNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos));
+ if (ptNodeParams.isDeleted()) {
+ return NOT_A_DICT_POS;
+ }
+ return mBuffers.get()->getShortcutDictContent()->getShortcutListHeadPos(
+ ptNodeParams.getTerminalId());
+}
+
+int Ver4PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const {
+ if (ptNodePos == NOT_A_DICT_POS) {
+ return NOT_A_DICT_POS;
+ }
+ const PtNodeParams ptNodeParams(mNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos));
+ if (ptNodeParams.isDeleted()) {
+ return NOT_A_DICT_POS;
+ }
+ return mBuffers.get()->getBigramDictContent()->getBigramListHeadPos(
+ ptNodeParams.getTerminalId());
+}
+
+bool Ver4PatriciaTriePolicy::addUnigramWord(const int *const word, const int length,
+ const int probability, const int *const shortcutTargetCodePoints, const int shortcutLength,
+ const int shortcutProbability, const bool isNotAWord, const bool isBlacklisted,
+ const int timestamp) {
+ if (!mBuffers.get()->isUpdatable()) {
+ AKLOGI("Warning: addUnigramWord() is called for non-updatable dictionary.");
+ return false;
+ }
+ if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
+ AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d",
+ mDictBuffer->getTailPosition());
+ return false;
+ }
+ DynamicPtReadingHelper readingHelper(mDictBuffer, &mNodeReader);
+ readingHelper.initWithPtNodeArrayPos(getRootPosition());
+ bool addedNewUnigram = false;
+ if (mUpdatingHelper.addUnigramWord(&readingHelper, word, length, probability, isNotAWord,
+ isBlacklisted, timestamp, &addedNewUnigram)) {
+ if (addedNewUnigram) {
+ mUnigramCount++;
+ }
+ if (shortcutLength > 0) {
+ // Add shortcut target.
+ const int wordPos = getTerminalPtNodePositionOfWord(word, length,
+ false /* forceLowerCaseSearch */);
+ if (wordPos == NOT_A_DICT_POS) {
+ AKLOGE("Cannot find terminal PtNode position to add shortcut target.");
+ return false;
+ }
+ if (!mUpdatingHelper.addShortcutTarget(wordPos, shortcutTargetCodePoints,
+ shortcutLength, shortcutProbability)) {
+ AKLOGE("Cannot add new shortcut target. PtNodePos: %d, length: %d, probability: %d",
+ wordPos, shortcutLength, shortcutProbability);
+ return false;
+ }
+ }
+ return true;
+ } else {
+ return false;
+ }
+}
+
+bool Ver4PatriciaTriePolicy::addBigramWords(const int *const word0, const int length0,
+ const int *const word1, const int length1, const int probability,
+ const int timestamp) {
+ if (!mBuffers.get()->isUpdatable()) {
+ AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary.");
+ return false;
+ }
+ if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
+ AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d",
+ mDictBuffer->getTailPosition());
+ return false;
+ }
+ const int word0Pos = getTerminalPtNodePositionOfWord(word0, length0,
+ false /* forceLowerCaseSearch */);
+ if (word0Pos == NOT_A_DICT_POS) {
+ return false;
+ }
+ const int word1Pos = getTerminalPtNodePositionOfWord(word1, length1,
+ false /* forceLowerCaseSearch */);
+ if (word1Pos == NOT_A_DICT_POS) {
+ return false;
+ }
+ bool addedNewBigram = false;
+ if (mUpdatingHelper.addBigramWords(word0Pos, word1Pos, probability, timestamp,
+ &addedNewBigram)) {
+ if (addedNewBigram) {
+ mBigramCount++;
+ }
+ return true;
+ } else {
+ return false;
+ }
+}
+
+bool Ver4PatriciaTriePolicy::removeBigramWords(const int *const word0, const int length0,
+ const int *const word1, const int length1) {
+ if (!mBuffers.get()->isUpdatable()) {
+ AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary.");
+ return false;
+ }
+ if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
+ AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d",
+ mDictBuffer->getTailPosition());
+ return false;
+ }
+ const int word0Pos = getTerminalPtNodePositionOfWord(word0, length0,
+ false /* forceLowerCaseSearch */);
+ if (word0Pos == NOT_A_DICT_POS) {
+ return false;
+ }
+ const int word1Pos = getTerminalPtNodePositionOfWord(word1, length1,
+ false /* forceLowerCaseSearch */);
+ if (word1Pos == NOT_A_DICT_POS) {
+ return false;
+ }
+ if (mUpdatingHelper.removeBigramWords(word0Pos, word1Pos)) {
+ mBigramCount--;
+ return true;
+ } else {
+ return false;
+ }
+}
+
+void Ver4PatriciaTriePolicy::flush(const char *const filePath) {
+ if (!mBuffers.get()->isUpdatable()) {
+ AKLOGI("Warning: flush() is called for non-updatable dictionary. filePath: %s", filePath);
+ return;
+ }
+ mWritingHelper.writeToDictFile(filePath, mUnigramCount, mBigramCount);
+}
+
+void Ver4PatriciaTriePolicy::flushWithGC(const char *const filePath) {
+ if (!mBuffers.get()->isUpdatable()) {
+ AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary.");
+ return;
+ }
+ mWritingHelper.writeToDictFileWithGC(getRootPosition(), filePath);
+}
+
+bool Ver4PatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const {
+ if (!mBuffers.get()->isUpdatable()) {
+ AKLOGI("Warning: needsToRunGC() is called for non-updatable dictionary.");
+ return false;
+ }
+ if (mBuffers.get()->isNearSizeLimit()) {
+ // Additional buffer size is near the limit.
+ return true;
+ } else if (mHeaderPolicy->getExtendedRegionSize() + mDictBuffer->getUsedAdditionalBufferSize()
+ > Ver4DictConstants::MAX_DICT_EXTENDED_REGION_SIZE) {
+ // Total extended region size of the trie exceeds the limit.
+ return true;
+ } else if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS
+ && mDictBuffer->getUsedAdditionalBufferSize() > 0) {
+ // Needs to reduce dictionary size.
+ return true;
+ } else if (mHeaderPolicy->isDecayingDict()) {
+ return ForgettingCurveUtils::needsToDecay(mindsBlockByGC, mUnigramCount, mBigramCount,
+ mHeaderPolicy);
+ }
+ return false;
+}
+
+void Ver4PatriciaTriePolicy::getProperty(const char *const query, const int queryLength,
+ char *const outResult, const int maxResultLength) {
+ const int compareLength = queryLength + 1 /* terminator */;
+ int timestamp = NOT_A_TIMESTAMP;
+ if (strncmp(query, UNIGRAM_COUNT_QUERY, compareLength) == 0) {
+ snprintf(outResult, maxResultLength, "%d", mUnigramCount);
+ } else if (strncmp(query, BIGRAM_COUNT_QUERY, compareLength) == 0) {
+ snprintf(outResult, maxResultLength, "%d", mBigramCount);
+ } else if (strncmp(query, MAX_UNIGRAM_COUNT_QUERY, compareLength) == 0) {
+ snprintf(outResult, maxResultLength, "%d",
+ mHeaderPolicy->isDecayingDict() ? ForgettingCurveUtils::MAX_UNIGRAM_COUNT :
+ static_cast<int>(Ver4DictConstants::MAX_DICTIONARY_SIZE));
+ } else if (strncmp(query, MAX_BIGRAM_COUNT_QUERY, compareLength) == 0) {
+ snprintf(outResult, maxResultLength, "%d",
+ mHeaderPolicy->isDecayingDict() ? ForgettingCurveUtils::MAX_BIGRAM_COUNT :
+ static_cast<int>(Ver4DictConstants::MAX_DICTIONARY_SIZE));
+ } else if (sscanf(query, SET_CURRENT_TIME_FOR_TESTING_QUERY_FORMAT, &timestamp) == 1) {
+ TimeKeeper::startTestModeWithForceCurrentTime(timestamp);
+ } else if (strncmp(query, GET_CURRENT_TIME_QUERY, compareLength) == 0) {
+ snprintf(outResult, maxResultLength, "%d", TimeKeeper::peekCurrentTime());
+ } else if (strncmp(query, QUIT_TIMEKEEPER_TEST_MODE_QUERY, compareLength) == 0) {
+ TimeKeeper::stopTestMode();
+ }
+}
+
+const UnigramProperty Ver4PatriciaTriePolicy::getUnigramProperty(const int *const codePoints,
+ const int codePointCount) const {
+ const int ptNodePos = getTerminalPtNodePositionOfWord(codePoints, codePointCount,
+ false /* forceLowerCaseSearch */);
+ if (ptNodePos == NOT_A_DICT_POS) {
+ AKLOGE("fetchUnigramProperty is called for invalid word.");
+ return UnigramProperty();
+ }
+ const PtNodeParams ptNodeParams = mNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos);
+ const ProbabilityEntry probabilityEntry =
+ mBuffers.get()->getProbabilityDictContent()->getProbabilityEntry(
+ ptNodeParams.getTerminalId());
+ const HistoricalInfo *const historicalInfo = probabilityEntry.getHistoricalInfo();
+ // Fetch shortcut information.
+ std::vector<std::vector<int> > shortcutTargets;
+ std::vector<int> shortcutProbabilities;
+ int shortcutPos = getShortcutPositionOfPtNode(ptNodePos);
+ if (shortcutPos != NOT_A_DICT_POS) {
+ int shortcutTarget[MAX_WORD_LENGTH];
+ const ShortcutDictContent *const shortcutDictContent =
+ mBuffers.get()->getShortcutDictContent();
+ bool hasNext = true;
+ while (hasNext) {
+ int shortcutTargetLength = 0;
+ int shortcutProbability = NOT_A_PROBABILITY;
+ shortcutDictContent->getShortcutEntryAndAdvancePosition(MAX_WORD_LENGTH, shortcutTarget,
+ &shortcutTargetLength, &shortcutProbability, &hasNext, &shortcutPos);
+ std::vector<int> target(shortcutTarget, shortcutTarget + shortcutTargetLength);
+ shortcutTargets.push_back(target);
+ shortcutProbabilities.push_back(shortcutProbability);
+ }
+ }
+ return UnigramProperty(ptNodeParams.getCodePoints(), ptNodeParams.getCodePointCount(),
+ ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.hasBigrams(),
+ ptNodeParams.hasShortcutTargets(), ptNodeParams.getProbability(),
+ historicalInfo->getTimeStamp(), historicalInfo->getLevel(),
+ historicalInfo->getCount(), &shortcutTargets, &shortcutProbabilities);
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
new file mode 100644
index 000000000..8187b7a39
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
@@ -0,0 +1,140 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_VER4_PATRICIA_TRIE_POLICY_H
+#define LATINIME_VER4_PATRICIA_TRIE_POLICY_H
+
+#include "defines.h"
+#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
+#include "suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h"
+#include "suggest/policyimpl/dictionary/header/header_policy.h"
+#include "suggest/policyimpl/dictionary/shortcut/ver4_shortcut_list_policy.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.h"
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+
+namespace latinime {
+
+class DicNode;
+class DicNodeVector;
+
+// TODO: Implement.
+class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
+ public:
+ Ver4PatriciaTriePolicy(const Ver4DictBuffers::Ver4DictBuffersPtr &buffers)
+ : mBuffers(buffers), mHeaderPolicy(mBuffers.get()->getHeaderPolicy()),
+ mDictBuffer(mBuffers.get()->getWritableTrieBuffer()),
+ mBigramPolicy(mBuffers.get()->getMutableBigramDictContent(),
+ mBuffers.get()->getTerminalPositionLookupTable(), mHeaderPolicy),
+ mShortcutPolicy(mBuffers.get()->getMutableShortcutDictContent(),
+ mBuffers.get()->getTerminalPositionLookupTable()),
+ mNodeReader(mDictBuffer, mBuffers.get()->getProbabilityDictContent()),
+ mNodeWriter(mDictBuffer, mBuffers.get(), &mNodeReader, &mBigramPolicy,
+ &mShortcutPolicy),
+ mUpdatingHelper(mDictBuffer, &mNodeReader, &mNodeWriter),
+ mWritingHelper(mBuffers.get()),
+ mUnigramCount(mHeaderPolicy->getUnigramCount()),
+ mBigramCount(mHeaderPolicy->getBigramCount()) {};
+
+ AK_FORCE_INLINE int getRootPosition() const {
+ return 0;
+ }
+
+ void createAndGetAllChildDicNodes(const DicNode *const dicNode,
+ DicNodeVector *const childDicNodes) const;
+
+ int getCodePointsAndProbabilityAndReturnCodePointCount(
+ const int terminalPtNodePos, const int maxCodePointCount, int *const outCodePoints,
+ int *const outUnigramProbability) const;
+
+ int getTerminalPtNodePositionOfWord(const int *const inWord,
+ const int length, const bool forceLowerCaseSearch) const;
+
+ int getProbability(const int unigramProbability, const int bigramProbability) const;
+
+ int getUnigramProbabilityOfPtNode(const int ptNodePos) const;
+
+ int getShortcutPositionOfPtNode(const int ptNodePos) const;
+
+ int getBigramsPositionOfPtNode(const int ptNodePos) const;
+
+ const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const {
+ return mHeaderPolicy;
+ }
+
+ const DictionaryBigramsStructurePolicy *getBigramsStructurePolicy() const {
+ return &mBigramPolicy;
+ }
+
+ const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const {
+ return &mShortcutPolicy;
+ }
+
+ bool addUnigramWord(const int *const word, const int length, const int probability,
+ const int *const shortcutTargetCodePoints, const int shortcutLength,
+ const int shortcutProbability, const bool isNotAWord, const bool isBlacklisted,
+ const int timestamp);
+
+ bool addBigramWords(const int *const word0, const int length0, const int *const word1,
+ const int length1, const int probability, const int timestamp);
+
+ bool removeBigramWords(const int *const word0, const int length0, const int *const word1,
+ const int length1);
+
+ void flush(const char *const filePath);
+
+ void flushWithGC(const char *const filePath);
+
+ bool needsToRunGC(const bool mindsBlockByGC) const;
+
+ void getProperty(const char *const query, const int queryLength, char *const outResult,
+ const int maxResultLength);
+
+ const UnigramProperty getUnigramProperty(const int *const codePoints,
+ const int codePointCount) const;
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4PatriciaTriePolicy);
+
+ static const char *const UNIGRAM_COUNT_QUERY;
+ static const char *const BIGRAM_COUNT_QUERY;
+ static const char *const MAX_UNIGRAM_COUNT_QUERY;
+ static const char *const MAX_BIGRAM_COUNT_QUERY;
+ static const char *const SET_CURRENT_TIME_FOR_TESTING_QUERY_FORMAT;
+ static const char *const GET_CURRENT_TIME_QUERY;
+ static const char *const QUIT_TIMEKEEPER_TEST_MODE_QUERY;
+ // When the dictionary size is near the maximum size, we have to refuse dynamic operations to
+ // prevent the dictionary from overflowing.
+ static const int MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS;
+ static const int MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS;
+
+ Ver4DictBuffers::Ver4DictBuffersPtr mBuffers;
+ const HeaderPolicy *const mHeaderPolicy;
+ BufferWithExtendableBuffer *const mDictBuffer;
+ Ver4BigramListPolicy mBigramPolicy;
+ Ver4ShortcutListPolicy mShortcutPolicy;
+ Ver4PatriciaTrieNodeReader mNodeReader;
+ Ver4PatriciaTrieNodeWriter mNodeWriter;
+ DynamicPtUpdatingHelper mUpdatingHelper;
+ Ver4PatriciaTrieWritingHelper mWritingHelper;
+ int mUnigramCount;
+ int mBigramCount;
+};
+} // namespace latinime
+#endif // LATINIME_VER4_PATRICIA_TRIE_POLICY_H
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.cpp
new file mode 100644
index 000000000..254022db4
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.cpp
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.h"
+
+#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
+
+namespace latinime {
+
+/* static */ int Ver4PatriciaTrieReadingUtils::getTerminalIdAndAdvancePosition(
+ const uint8_t *const buffer, int *pos) {
+ return ByteArrayUtils::readUint32AndAdvancePosition(buffer, pos);
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.h
new file mode 100644
index 000000000..e418c4933
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_VER4_PATRICIA_TRIE_READING_UTILS_H
+#define LATINIME_VER4_PATRICIA_TRIE_READING_UTILS_H
+
+#include <stdint.h>
+
+#include "defines.h"
+
+namespace latinime {
+
+class BufferWithExtendableBuffer;
+
+class Ver4PatriciaTrieReadingUtils {
+ public:
+ static int getTerminalIdAndAdvancePosition(const uint8_t *const buffer,
+ int *const pos);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4PatriciaTrieReadingUtils);
+};
+} // namespace latinime
+#endif /* LATINIME_VER4_PATRICIA_TRIE_READING_UTILS_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp
new file mode 100644
index 000000000..43227635c
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp
@@ -0,0 +1,285 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.h"
+
+#include <cstring>
+#include <queue>
+
+#include "suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h"
+#include "suggest/policyimpl/dictionary/header/header_policy.h"
+#include "suggest/policyimpl/dictionary/shortcut/ver4_shortcut_list_policy.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h"
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+#include "suggest/policyimpl/dictionary/utils/file_utils.h"
+#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
+
+namespace latinime {
+
+void Ver4PatriciaTrieWritingHelper::writeToDictFile(const char *const dictDirPath,
+ const int unigramCount, const int bigramCount) const {
+ const HeaderPolicy *const headerPolicy = mBuffers->getHeaderPolicy();
+ BufferWithExtendableBuffer headerBuffer(
+ BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE);
+ const int extendedRegionSize = headerPolicy->getExtendedRegionSize()
+ + mBuffers->getTrieBuffer()->getUsedAdditionalBufferSize();
+ if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, false /* updatesLastUpdatedTime */,
+ false /* updatesLastDecayedTime */, unigramCount, bigramCount, extendedRegionSize)) {
+ AKLOGE("Cannot write header structure to buffer. updatesLastUpdatedTime: %d, "
+ "updatesLastDecayedTime: %d, unigramCount: %d, bigramCount: %d, "
+ "extendedRegionSize: %d", false, false, unigramCount, bigramCount,
+ extendedRegionSize);
+ return;
+ }
+ mBuffers->flushHeaderAndDictBuffers(dictDirPath, &headerBuffer);
+}
+
+void Ver4PatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNodeArrayPos,
+ const char *const dictDirPath) {
+ const HeaderPolicy *const headerPolicy = mBuffers->getHeaderPolicy();
+ Ver4DictBuffers::Ver4DictBuffersPtr dictBuffers(
+ Ver4DictBuffers::createVer4DictBuffers(headerPolicy));
+ int unigramCount = 0;
+ int bigramCount = 0;
+ if (!runGC(rootPtNodeArrayPos, headerPolicy, dictBuffers.get(), &unigramCount, &bigramCount)) {
+ return;
+ }
+ BufferWithExtendableBuffer headerBuffer(
+ BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE);
+ if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, true /* updatesLastUpdatedTime */,
+ true /* updatesLastDecayedTime */, unigramCount, bigramCount,
+ 0 /* extendedRegionSize */)) {
+ return;
+ }
+ dictBuffers.get()->flushHeaderAndDictBuffers(dictDirPath, &headerBuffer);
+}
+
+bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
+ const HeaderPolicy *const headerPolicy, Ver4DictBuffers *const buffersToWrite,
+ int *const outUnigramCount, int *const outBigramCount) {
+ Ver4PatriciaTrieNodeReader ptNodeReader(mBuffers->getTrieBuffer(),
+ mBuffers->getProbabilityDictContent());
+ Ver4BigramListPolicy bigramPolicy(mBuffers->getMutableBigramDictContent(),
+ mBuffers->getTerminalPositionLookupTable(), headerPolicy);
+ Ver4ShortcutListPolicy shortcutPolicy(mBuffers->getMutableShortcutDictContent(),
+ mBuffers->getTerminalPositionLookupTable());
+ Ver4PatriciaTrieNodeWriter ptNodeWriter(mBuffers->getWritableTrieBuffer(),
+ mBuffers, &ptNodeReader, &bigramPolicy, &shortcutPolicy);
+
+ DynamicPtReadingHelper readingHelper(mBuffers->getTrieBuffer(), &ptNodeReader);
+ readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
+ DynamicPtGcEventListeners
+ ::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
+ traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted(
+ &ptNodeWriter);
+ if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
+ &traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted)) {
+ return false;
+ }
+ const int unigramCount = traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
+ .getValidUnigramCount();
+ if (headerPolicy->isDecayingDict()
+ && unigramCount > ForgettingCurveUtils::MAX_UNIGRAM_COUNT_AFTER_GC) {
+ if (!truncateUnigrams(&ptNodeReader, &ptNodeWriter,
+ ForgettingCurveUtils::MAX_UNIGRAM_COUNT_AFTER_GC)) {
+ AKLOGE("Cannot remove unigrams. current: %d, max: %d", unigramCount,
+ ForgettingCurveUtils::MAX_UNIGRAM_COUNT_AFTER_GC);
+ return false;
+ }
+ }
+
+ readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
+ DynamicPtGcEventListeners::TraversePolicyToUpdateBigramProbability
+ traversePolicyToUpdateBigramProbability(&ptNodeWriter);
+ if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
+ &traversePolicyToUpdateBigramProbability)) {
+ return false;
+ }
+ const int bigramCount = traversePolicyToUpdateBigramProbability.getValidBigramEntryCount();
+ if (headerPolicy->isDecayingDict()
+ && bigramCount > ForgettingCurveUtils::MAX_BIGRAM_COUNT_AFTER_GC) {
+ if (!truncateBigrams(ForgettingCurveUtils::MAX_BIGRAM_COUNT_AFTER_GC)) {
+ AKLOGE("Cannot remove bigrams. current: %d, max: %d", bigramCount,
+ ForgettingCurveUtils::MAX_BIGRAM_COUNT_AFTER_GC);
+ return false;
+ }
+ }
+
+ // Mapping from positions in mBuffer to positions in bufferToWrite.
+ PtNodeWriter::DictPositionRelocationMap dictPositionRelocationMap;
+ readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
+ Ver4PatriciaTrieNodeWriter ptNodeWriterForNewBuffers(buffersToWrite->getWritableTrieBuffer(),
+ buffersToWrite, &ptNodeReader, &bigramPolicy, &shortcutPolicy);
+ DynamicPtGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
+ traversePolicyToPlaceAndWriteValidPtNodesToBuffer(&ptNodeWriterForNewBuffers,
+ buffersToWrite->getWritableTrieBuffer(), &dictPositionRelocationMap);
+ if (!readingHelper.traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
+ &traversePolicyToPlaceAndWriteValidPtNodesToBuffer)) {
+ return false;
+ }
+
+ // Create policy instances for the GCed dictionary.
+ Ver4PatriciaTrieNodeReader newPtNodeReader(buffersToWrite->getTrieBuffer(),
+ buffersToWrite->getProbabilityDictContent());
+ Ver4BigramListPolicy newBigramPolicy(buffersToWrite->getMutableBigramDictContent(),
+ buffersToWrite->getTerminalPositionLookupTable(), headerPolicy);
+ Ver4ShortcutListPolicy newShortcutPolicy(buffersToWrite->getMutableShortcutDictContent(),
+ buffersToWrite->getTerminalPositionLookupTable());
+ Ver4PatriciaTrieNodeWriter newPtNodeWriter(buffersToWrite->getWritableTrieBuffer(),
+ buffersToWrite, &newPtNodeReader, &newBigramPolicy, &newShortcutPolicy);
+ // Re-assign terminal IDs for valid terminal PtNodes.
+ TerminalPositionLookupTable::TerminalIdMap terminalIdMap;
+ if(!buffersToWrite->getMutableTerminalPositionLookupTable()->runGCTerminalIds(
+ &terminalIdMap)) {
+ return false;
+ }
+ // Run GC for probability dict content.
+ if (!buffersToWrite->getMutableProbabilityDictContent()->runGC(&terminalIdMap,
+ mBuffers->getProbabilityDictContent())) {
+ return false;
+ }
+ // Run GC for bigram dict content.
+ if(!buffersToWrite->getMutableBigramDictContent()->runGC(&terminalIdMap,
+ mBuffers->getBigramDictContent(), outBigramCount)) {
+ return false;
+ }
+ // Run GC for shortcut dict content.
+ if(!buffersToWrite->getMutableShortcutDictContent()->runGC(&terminalIdMap,
+ mBuffers->getShortcutDictContent())) {
+ return false;
+ }
+ DynamicPtReadingHelper newDictReadingHelper(buffersToWrite->getTrieBuffer(),
+ &newPtNodeReader);
+ newDictReadingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
+ DynamicPtGcEventListeners::TraversePolicyToUpdateAllPositionFields
+ traversePolicyToUpdateAllPositionFields(&newPtNodeWriter, &dictPositionRelocationMap);
+ if (!newDictReadingHelper.traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
+ &traversePolicyToUpdateAllPositionFields)) {
+ return false;
+ }
+ newDictReadingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
+ TraversePolicyToUpdateAllPtNodeFlagsAndTerminalIds
+ traversePolicyToUpdateAllPtNodeFlagsAndTerminalIds(&newPtNodeWriter, &terminalIdMap);
+ if (!newDictReadingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
+ &traversePolicyToUpdateAllPtNodeFlagsAndTerminalIds)) {
+ return false;
+ }
+ *outUnigramCount = traversePolicyToUpdateAllPositionFields.getUnigramCount();
+ return true;
+}
+
+bool Ver4PatriciaTrieWritingHelper::truncateUnigrams(
+ const Ver4PatriciaTrieNodeReader *const ptNodeReader,
+ Ver4PatriciaTrieNodeWriter *const ptNodeWriter, const int maxUnigramCount) {
+ const TerminalPositionLookupTable *const terminalPosLookupTable =
+ mBuffers->getTerminalPositionLookupTable();
+ const int nextTerminalId = terminalPosLookupTable->getNextTerminalId();
+ std::priority_queue<DictProbability, std::vector<DictProbability>, DictProbabilityComparator>
+ priorityQueue;
+ for (int i = 0; i < nextTerminalId; ++i) {
+ const int terminalPos = terminalPosLookupTable->getTerminalPtNodePosition(i);
+ if (terminalPos == NOT_A_DICT_POS) {
+ continue;
+ }
+ const ProbabilityEntry probabilityEntry =
+ mBuffers->getProbabilityDictContent()->getProbabilityEntry(i);
+ const int probability = probabilityEntry.hasHistoricalInfo() ?
+ ForgettingCurveUtils::decodeProbability(probabilityEntry.getHistoricalInfo()) :
+ probabilityEntry.getProbability();
+ priorityQueue.push(DictProbability(terminalPos, probability,
+ probabilityEntry.getHistoricalInfo()->getTimeStamp()));
+ }
+
+ // Delete unigrams.
+ while (static_cast<int>(priorityQueue.size()) > maxUnigramCount) {
+ const int ptNodePos = priorityQueue.top().getDictPos();
+ const PtNodeParams ptNodeParams =
+ ptNodeReader->fetchNodeInfoInBufferFromPtNodePos(ptNodePos);
+ if (!ptNodeWriter->markPtNodeAsWillBecomeNonTerminal(&ptNodeParams)) {
+ AKLOGE("Cannot mark PtNode as willBecomeNonterminal. PtNode pos: %d", ptNodePos);
+ return false;
+ }
+ priorityQueue.pop();
+ }
+ return true;
+}
+
+bool Ver4PatriciaTrieWritingHelper::truncateBigrams(const int maxBigramCount) {
+ const TerminalPositionLookupTable *const terminalPosLookupTable =
+ mBuffers->getTerminalPositionLookupTable();
+ const int nextTerminalId = terminalPosLookupTable->getNextTerminalId();
+ std::priority_queue<DictProbability, std::vector<DictProbability>, DictProbabilityComparator>
+ priorityQueue;
+ BigramDictContent *const bigramDictContent = mBuffers->getMutableBigramDictContent();
+ for (int i = 0; i < nextTerminalId; ++i) {
+ const int bigramListPos = bigramDictContent->getBigramListHeadPos(i);
+ if (bigramListPos == NOT_A_DICT_POS) {
+ continue;
+ }
+ bool hasNext = true;
+ int readingPos = bigramListPos;
+ while (hasNext) {
+ const int entryPos = readingPos;
+ const BigramEntry bigramEntry =
+ bigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
+ hasNext = bigramEntry.hasNext();
+ if (!bigramEntry.isValid()) {
+ continue;
+ }
+ const int probability = bigramEntry.hasHistoricalInfo() ?
+ ForgettingCurveUtils::decodeProbability(bigramEntry.getHistoricalInfo()) :
+ bigramEntry.getProbability();
+ priorityQueue.push(DictProbability(entryPos, probability,
+ bigramEntry.getHistoricalInfo()->getTimeStamp()));
+ }
+ }
+
+ // Delete bigrams.
+ while (static_cast<int>(priorityQueue.size()) > maxBigramCount) {
+ const int entryPos = priorityQueue.top().getDictPos();
+ const BigramEntry bigramEntry = bigramDictContent->getBigramEntry(entryPos);
+ const BigramEntry invalidatedBigramEntry = bigramEntry.getInvalidatedEntry();
+ if (!bigramDictContent->writeBigramEntry(&invalidatedBigramEntry, entryPos)) {
+ AKLOGE("Cannot write bigram entry to remove. pos: %d", entryPos);
+ return false;
+ }
+ priorityQueue.pop();
+ }
+ return true;
+}
+
+bool Ver4PatriciaTrieWritingHelper::TraversePolicyToUpdateAllPtNodeFlagsAndTerminalIds
+ ::onVisitingPtNode(const PtNodeParams *const ptNodeParams) {
+ if (!ptNodeParams->isTerminal()) {
+ return true;
+ }
+ TerminalPositionLookupTable::TerminalIdMap::const_iterator it =
+ mTerminalIdMap->find(ptNodeParams->getTerminalId());
+ if (it == mTerminalIdMap->end()) {
+ AKLOGE("terminal Id %d is not in the terminal position map. map size: %zd",
+ ptNodeParams->getTerminalId(), mTerminalIdMap->size());
+ return false;
+ }
+ if (!mPtNodeWriter->updateTerminalId(ptNodeParams, it->second)) {
+ AKLOGE("Cannot update terminal id. %d -> %d", it->first, it->second);
+ }
+ return mPtNodeWriter->updatePtNodeHasBigramsAndShortcutTargetsFlags(ptNodeParams);
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.h
new file mode 100644
index 000000000..c3a155e0e
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.h
@@ -0,0 +1,123 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_VER4_PATRICIA_TRIE_WRITING_HELPER_H
+#define LATINIME_VER4_PATRICIA_TRIE_WRITING_HELPER_H
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
+
+namespace latinime {
+
+class HeaderPolicy;
+class Ver4DictBuffers;
+class Ver4PatriciaTrieNodeReader;
+class Ver4PatriciaTrieNodeWriter;
+
+class Ver4PatriciaTrieWritingHelper {
+ public:
+ Ver4PatriciaTrieWritingHelper(Ver4DictBuffers *const buffers)
+ : mBuffers(buffers) {}
+
+ void writeToDictFile(const char *const dictDirPath, const int unigramCount,
+ const int bigramCount) const;
+
+ void writeToDictFileWithGC(const int rootPtNodeArrayPos, const char *const dictDirPath);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4PatriciaTrieWritingHelper);
+
+ class TraversePolicyToUpdateAllPtNodeFlagsAndTerminalIds
+ : public DynamicPtReadingHelper::TraversingEventListener {
+ public:
+ TraversePolicyToUpdateAllPtNodeFlagsAndTerminalIds(
+ Ver4PatriciaTrieNodeWriter *const ptNodeWriter,
+ const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap)
+ : mPtNodeWriter(ptNodeWriter), mTerminalIdMap(terminalIdMap) {}
+
+ bool onAscend() { return true; }
+
+ bool onDescend(const int ptNodeArrayPos) { return true; }
+
+ bool onReadingPtNodeArrayTail() { return true; }
+
+ bool onVisitingPtNode(const PtNodeParams *const ptNodeParams);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(TraversePolicyToUpdateAllPtNodeFlagsAndTerminalIds);
+
+ Ver4PatriciaTrieNodeWriter *const mPtNodeWriter;
+ const TerminalPositionLookupTable::TerminalIdMap *const mTerminalIdMap;
+ };
+
+ // For truncateUnigrams() and truncateBigrams().
+ class DictProbability {
+ public:
+ DictProbability(const int dictPos, const int probability, const int timestamp)
+ : mDictPos(dictPos), mProbability(probability), mTimestamp(timestamp) {}
+
+ int getDictPos() const {
+ return mDictPos;
+ }
+
+ int getProbability() const {
+ return mProbability;
+ }
+
+ int getTimestamp() const {
+ return mTimestamp;
+ }
+
+ private:
+ DISALLOW_DEFAULT_CONSTRUCTOR(DictProbability);
+
+ int mDictPos;
+ int mProbability;
+ int mTimestamp;
+ };
+
+ // For truncateUnigrams() and truncateBigrams().
+ class DictProbabilityComparator {
+ public:
+ bool operator()(const DictProbability &left, const DictProbability &right) {
+ if (left.getProbability() != right.getProbability()) {
+ return left.getProbability() > right.getProbability();
+ }
+ if (left.getTimestamp() != right.getTimestamp()) {
+ return left.getTimestamp() < right.getTimestamp();
+ }
+ return left.getDictPos() > right.getDictPos();
+ }
+
+ private:
+ DISALLOW_ASSIGNMENT_OPERATOR(DictProbabilityComparator);
+ };
+
+ bool runGC(const int rootPtNodeArrayPos, const HeaderPolicy *const headerPolicy,
+ Ver4DictBuffers *const buffersToWrite, int *const outUnigramCount,
+ int *const outBigramCount);
+
+ bool truncateUnigrams(const Ver4PatriciaTrieNodeReader *const ptNodeReader,
+ Ver4PatriciaTrieNodeWriter *const ptNodeWriter, const int maxUnigramCount);
+
+ bool truncateBigrams(const int maxBigramCount);
+
+ Ver4DictBuffers *const mBuffers;
+};
+} // namespace latinime
+
+#endif /* LATINIME_VER4_PATRICIA_TRIE_WRITING_HELPER_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp
index f692882f2..259dae4c6 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp
@@ -18,11 +18,42 @@
namespace latinime {
-const size_t BufferWithExtendableBuffer::MAX_ADDITIONAL_BUFFER_SIZE = 1024 * 1024;
+const size_t BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE = 1024 * 1024;
const int BufferWithExtendableBuffer::NEAR_BUFFER_LIMIT_THRESHOLD_PERCENTILE = 90;
// TODO: Needs to allocate larger memory corresponding to the current vector size.
const size_t BufferWithExtendableBuffer::EXTEND_ADDITIONAL_BUFFER_SIZE_STEP = 128 * 1024;
+uint32_t BufferWithExtendableBuffer::readUint(const int size, const int pos) const {
+ const bool readingPosIsInAdditionalBuffer = isInAdditionalBuffer(pos);
+ const int posInBuffer = readingPosIsInAdditionalBuffer ? pos - mOriginalBufferSize : pos;
+ return ByteArrayUtils::readUint(getBuffer(readingPosIsInAdditionalBuffer), size, posInBuffer);
+}
+
+uint32_t BufferWithExtendableBuffer::readUintAndAdvancePosition(const int size,
+ int *const pos) const {
+ const int value = readUint(size, *pos);
+ *pos += size;
+ return value;
+}
+
+void BufferWithExtendableBuffer::readCodePointsAndAdvancePosition(const int maxCodePointCount,
+ int *const outCodePoints, int *outCodePointCount, int *const pos) const {
+ const bool readingPosIsInAdditionalBuffer = isInAdditionalBuffer(*pos);
+ if (readingPosIsInAdditionalBuffer) {
+ *pos -= mOriginalBufferSize;
+ }
+ *outCodePointCount = ByteArrayUtils::readStringAndAdvancePosition(
+ getBuffer(readingPosIsInAdditionalBuffer), maxCodePointCount, outCodePoints, pos);
+ if (readingPosIsInAdditionalBuffer) {
+ *pos += mOriginalBufferSize;
+ }
+}
+
+bool BufferWithExtendableBuffer::writeUint(const uint32_t data, const int size, const int pos) {
+ int writingPos = pos;
+ return writeUintAndAdvancePosition(data, size, &writingPos);
+}
+
bool BufferWithExtendableBuffer::writeUintAndAdvancePosition(const uint32_t data, const int size,
int *const pos) {
if (!(size >= 1 && size <= 4)) {
@@ -46,7 +77,7 @@ bool BufferWithExtendableBuffer::writeUintAndAdvancePosition(const uint32_t data
}
bool BufferWithExtendableBuffer::writeCodePointsAndAdvancePosition(const int *const codePoints,
- const int codePointCount, const bool writesTerminator ,int *const pos) {
+ const int codePointCount, const bool writesTerminator, int *const pos) {
const size_t size = ByteArrayUtils::calculateRequiredByteCountToStoreCodePoints(
codePoints, codePointCount, writesTerminator);
if (!checkAndPrepareWriting(*pos, size)) {
@@ -100,4 +131,21 @@ bool BufferWithExtendableBuffer::checkAndPrepareWriting(const int pos, const int
return true;
}
+bool BufferWithExtendableBuffer::copy(const BufferWithExtendableBuffer *const sourceBuffer) {
+ int copyingPos = 0;
+ const int tailPos = sourceBuffer->getTailPosition();
+ const int maxDataChunkSize = sizeof(uint32_t);
+ while (copyingPos < tailPos) {
+ const int remainingSize = tailPos - copyingPos;
+ const int copyingSize = (remainingSize >= maxDataChunkSize) ?
+ maxDataChunkSize : remainingSize;
+ const uint32_t data = sourceBuffer->readUint(copyingSize, copyingPos);
+ if (!writeUint(data, copyingSize, copyingPos)) {
+ return false;
+ }
+ copyingPos += copyingSize;
+ }
+ return true;
+}
+
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h
index 9dc34823c..76be16518 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h
@@ -32,12 +32,20 @@ namespace latinime {
// raw pointer but provides several methods that handle boundary checking for writing data.
class BufferWithExtendableBuffer {
public:
+ static const size_t DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE;
+
BufferWithExtendableBuffer(uint8_t *const originalBuffer, const int originalBufferSize,
- const int maxAdditionalBufferSize = MAX_ADDITIONAL_BUFFER_SIZE)
+ const int maxAdditionalBufferSize)
: mOriginalBuffer(originalBuffer), mOriginalBufferSize(originalBufferSize),
mAdditionalBuffer(EXTEND_ADDITIONAL_BUFFER_SIZE_STEP), mUsedAdditionalBufferSize(0),
mMaxAdditionalBufferSize(maxAdditionalBufferSize) {}
+ // Without original buffer.
+ BufferWithExtendableBuffer(const int maxAdditionalBufferSize)
+ : mOriginalBuffer(0), mOriginalBufferSize(0),
+ mAdditionalBuffer(EXTEND_ADDITIONAL_BUFFER_SIZE_STEP), mUsedAdditionalBufferSize(0),
+ mMaxAdditionalBufferSize(maxAdditionalBufferSize) {}
+
AK_FORCE_INLINE int getTailPosition() const {
return mOriginalBufferSize + mUsedAdditionalBufferSize;
}
@@ -63,6 +71,13 @@ class BufferWithExtendableBuffer {
}
}
+ uint32_t readUint(const int size, const int pos) const;
+
+ uint32_t readUintAndAdvancePosition(const int size, int *const pos) const;
+
+ void readCodePointsAndAdvancePosition(const int maxCodePointCount,
+ int *const outCodePoints, int *outCodePointCount, int *const pos) const;
+
AK_FORCE_INLINE int getOriginalBufferSize() const {
return mOriginalBufferSize;
}
@@ -78,15 +93,18 @@ class BufferWithExtendableBuffer {
* Writing is allowed for original buffer, already written region of additional buffer and the
* tail of additional buffer.
*/
+ bool writeUint(const uint32_t data, const int size, const int pos);
+
bool writeUintAndAdvancePosition(const uint32_t data, const int size, int *const pos);
bool writeCodePointsAndAdvancePosition(const int *const codePoints, const int codePointCount,
const bool writesTerminator, int *const pos);
+ bool copy(const BufferWithExtendableBuffer *const sourceBuffer);
+
private:
DISALLOW_COPY_AND_ASSIGN(BufferWithExtendableBuffer);
- static const size_t MAX_ADDITIONAL_BUFFER_SIZE;
static const int NEAR_BUFFER_LIMIT_THRESHOLD_PERCENTILE;
static const size_t EXTEND_ADDITIONAL_BUFFER_SIZE_STEP;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h
index 0c1576818..ebdd523e1 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h
@@ -114,6 +114,24 @@ class ByteArrayUtils {
return buffer[(*pos)++];
}
+ static AK_FORCE_INLINE int readUint(const uint8_t *const buffer,
+ const int size, const int pos) {
+ // size must be in 1 to 4.
+ ASSERT(size >= 1 && size <= 4);
+ switch (size) {
+ case 1:
+ return ByteArrayUtils::readUint8(buffer, pos);
+ case 2:
+ return ByteArrayUtils::readUint16(buffer, pos);
+ case 3:
+ return ByteArrayUtils::readUint24(buffer, pos);
+ case 4:
+ return ByteArrayUtils::readUint32(buffer, pos);
+ default:
+ return 0;
+ }
+ }
+
/**
* Code Point Reading
*
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp
index 994826fa8..442373b29 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp
@@ -17,12 +17,14 @@
#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
#include <cstdio>
-#include <cstring>
#include "suggest/policyimpl/dictionary/header/header_policy.h"
-#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+#include "suggest/policyimpl/dictionary/utils/file_utils.h"
#include "suggest/policyimpl/dictionary/utils/format_utils.h"
+#include "utils/time_keeper.h"
namespace latinime {
@@ -30,60 +32,81 @@ const char *const DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE =
/* static */ bool DictFileWritingUtils::createEmptyDictFile(const char *const filePath,
const int dictVersion, const HeaderReadWriteUtils::AttributeMap *const attributeMap) {
+ TimeKeeper::setCurrentTime();
switch (dictVersion) {
- case 3:
- return createEmptyV3DictFile(filePath, attributeMap);
+ case FormatUtils::VERSION_4:
+ return createEmptyV4DictFile(filePath, attributeMap);
default:
- // Only version 3 dictionary is supported for now.
+ AKLOGE("Cannot create dictionary %s because format version %d is not supported.",
+ filePath, dictVersion);
return false;
}
}
-/* static */ bool DictFileWritingUtils::createEmptyV3DictFile(const char *const filePath,
+/* static */ bool DictFileWritingUtils::createEmptyV4DictFile(const char *const dirPath,
const HeaderReadWriteUtils::AttributeMap *const attributeMap) {
- BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */);
- HeaderPolicy headerPolicy(FormatUtils::VERSION_3, attributeMap);
- headerPolicy.writeHeaderToBuffer(&headerBuffer, true /* updatesLastUpdatedTime */,
- true /* updatesLastDecayedTime */, 0 /* unigramCount */, 0 /* bigramCount */,
- 0 /* extendedRegionSize */);
- BufferWithExtendableBuffer bodyBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */);
- if (!DynamicPatriciaTrieWritingUtils::writeEmptyDictionary(&bodyBuffer, 0 /* rootPos */)) {
+ HeaderPolicy headerPolicy(FormatUtils::VERSION_4, attributeMap);
+ Ver4DictBuffers::Ver4DictBuffersPtr dictBuffers =
+ Ver4DictBuffers::createVer4DictBuffers(&headerPolicy);
+ headerPolicy.writeHeaderToBuffer(dictBuffers.get()->getWritableHeaderBuffer(),
+ true /* updatesLastUpdatedTime */, true /* updatesLastDecayedTime */,
+ 0 /* unigramCount */, 0 /* bigramCount */, 0 /* extendedRegionSize */);
+ if (!DynamicPtWritingUtils::writeEmptyDictionary(
+ dictBuffers.get()->getWritableTrieBuffer(), 0 /* rootPos */)) {
+ AKLOGE("Empty ver4 dictionary structure cannot be created on memory.");
return false;
}
- return flushAllHeaderAndBodyToFile(filePath, &headerBuffer, &bodyBuffer);
+ return dictBuffers.get()->flush(dirPath);
}
/* static */ bool DictFileWritingUtils::flushAllHeaderAndBodyToFile(const char *const filePath,
BufferWithExtendableBuffer *const dictHeader, BufferWithExtendableBuffer *const dictBody) {
- const int tmpFileNameBufSize = strlen(filePath)
- + strlen(TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE) + 1 /* terminator */;
+ const int tmpFileNameBufSize = FileUtils::getFilePathWithSuffixBufSize(filePath,
+ TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE);
// Name of a temporary file used for writing that is a connected string of original name and
// TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE.
char tmpFileName[tmpFileNameBufSize];
- snprintf(tmpFileName, tmpFileNameBufSize, "%s%s", filePath,
- TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE);
- FILE *const file = fopen(tmpFileName, "wb");
- if (!file) {
- AKLOGE("Dictionary file %s cannnot be opened.", tmpFileName);
- ASSERT(false);
+ FileUtils::getFilePathWithSuffix(filePath, TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE,
+ tmpFileNameBufSize, tmpFileName);
+ if (!DictFileWritingUtils::flushBufferToFile(tmpFileName, dictHeader)) {
+ AKLOGE("Dictionary header cannot be written to %s.", tmpFileName);
+ return false;
+ }
+ if (!DictFileWritingUtils::flushBufferToFile(tmpFileName, dictBody)) {
+ AKLOGE("Dictionary structure cannot be written to %s.", tmpFileName);
+ return false;
+ }
+ if (rename(tmpFileName, filePath) != 0) {
+ AKLOGE("Dictionary file %s cannot be renamed to %s", tmpFileName, filePath);;
return false;
}
- // Write the dictionary header.
- if (!writeBufferToFile(file, dictHeader)) {
- remove(tmpFileName);
- AKLOGE("Dictionary header cannnot be written. size: %d", dictHeader->getTailPosition());
+ return true;
+}
+
+/* static */ bool DictFileWritingUtils::flushBufferToFileWithSuffix(const char *const basePath,
+ const char *const suffix, const BufferWithExtendableBuffer *const buffer) {
+ const int filePathBufSize = FileUtils::getFilePathWithSuffixBufSize(basePath, suffix);
+ char filePath[filePathBufSize];
+ FileUtils::getFilePathWithSuffix(basePath, suffix, filePathBufSize, filePath);
+ return flushBufferToFile(filePath, buffer);
+}
+
+/* static */ bool DictFileWritingUtils::flushBufferToFile(const char *const filePath,
+ const BufferWithExtendableBuffer *const buffer) {
+ FILE *const file = fopen(filePath, "wb");
+ if (!file) {
+ AKLOGE("File %s cannot be opened.", filePath);
ASSERT(false);
return false;
}
- // Write the dictionary body.
- if (!writeBufferToFile(file, dictBody)) {
- remove(tmpFileName);
- AKLOGE("Dictionary body cannnot be written. size: %d", dictBody->getTailPosition());
+ if (!writeBufferToFile(file, buffer)) {
+ remove(filePath);
+ AKLOGE("Buffer cannot be written to the file %s. size: %d", filePath,
+ buffer->getTailPosition());
ASSERT(false);
return false;
}
fclose(file);
- rename(tmpFileName, filePath);
return true;
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h
index bd4ac66fd..bdf9fd63c 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h
@@ -28,6 +28,8 @@ class BufferWithExtendableBuffer;
class DictFileWritingUtils {
public:
+ static const char *const TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE;
+
static bool createEmptyDictFile(const char *const filePath, const int dictVersion,
const HeaderReadWriteUtils::AttributeMap *const attributeMap);
@@ -35,14 +37,18 @@ class DictFileWritingUtils {
BufferWithExtendableBuffer *const dictHeader,
BufferWithExtendableBuffer *const dictBody);
+ static bool flushBufferToFileWithSuffix(const char *const basePath, const char *const suffix,
+ const BufferWithExtendableBuffer *const buffer);
+
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(DictFileWritingUtils);
- static const char *const TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE;
-
- static bool createEmptyV3DictFile(const char *const filePath,
+ static bool createEmptyV4DictFile(const char *const filePath,
const HeaderReadWriteUtils::AttributeMap *const attributeMap);
+ static bool flushBufferToFile(const char *const filePath,
+ const BufferWithExtendableBuffer *const buffer);
+
static bool writeBufferToFile(FILE *const file,
const BufferWithExtendableBuffer *const buffer);
};
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/file_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/file_utils.cpp
new file mode 100644
index 000000000..1f25cfa1e
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/file_utils.cpp
@@ -0,0 +1,157 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/utils/file_utils.h"
+
+#include <cstdio>
+#include <cstring>
+#include <dirent.h>
+#include <fcntl.h>
+#include <libgen.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+namespace latinime {
+
+// Returns -1 on error.
+/* static */ int FileUtils::getFileSize(const char *const filePath) {
+ const int fd = open(filePath, O_RDONLY);
+ if (fd == -1) {
+ return -1;
+ }
+ struct stat statBuf;
+ if (fstat(fd, &statBuf) != 0) {
+ close(fd);
+ return -1;
+ }
+ close(fd);
+ return static_cast<int>(statBuf.st_size);
+}
+
+/* static */ bool FileUtils::existsDir(const char *const dirPath) {
+ DIR *const dir = opendir(dirPath);
+ if (dir == NULL) {
+ return false;
+ }
+ closedir(dir);
+ return true;
+}
+
+// Remove a directory and all files in the directory.
+/* static */ bool FileUtils::removeDirAndFiles(const char *const dirPath) {
+ DIR *const dir = opendir(dirPath);
+ if (dir == NULL) {
+ AKLOGE("Cannot open dir %s.", dirPath);
+ return true;
+ }
+ struct dirent *dirent;
+ while ((dirent = readdir(dir)) != NULL) {
+ if (dirent->d_type != DT_REG) {
+ continue;
+ }
+ const int filePathBufSize = getFilePathBufSize(dirPath, dirent->d_name);
+ char filePath[filePathBufSize];
+ getFilePath(dirPath, dirent->d_name, filePathBufSize, filePath);
+ if (remove(filePath) != 0) {
+ AKLOGE("Cannot remove file %s.", filePath);
+ closedir(dir);
+ return false;
+ }
+ }
+ closedir(dir);
+ if (remove(dirPath) != 0) {
+ AKLOGE("Cannot remove directory %s.", dirPath);
+ return false;
+ }
+ return true;
+}
+
+/* static */ int FileUtils::getFilePathWithSuffixBufSize(const char *const filePath,
+ const char *const suffix) {
+ return strlen(filePath) + strlen(suffix) + 1 /* terminator */;
+}
+
+/* static */ void FileUtils::getFilePathWithSuffix(const char *const filePath,
+ const char *const suffix, const int filePathBufSize, char *const outFilePath) {
+ snprintf(outFilePath, filePathBufSize, "%s%s", filePath, suffix);
+}
+
+/* static */ int FileUtils::getFilePathBufSize(const char *const dirPath,
+ const char *const fileName) {
+ return strlen(dirPath) + 1 /* '/' */ + strlen(fileName) + 1 /* terminator */;
+}
+
+/* static */ void FileUtils::getFilePath(const char *const dirPath, const char *const fileName,
+ const int filePathBufSize, char *const outFilePath) {
+ snprintf(outFilePath, filePathBufSize, "%s/%s", dirPath, fileName);
+}
+
+/* static */ bool FileUtils::getFilePathWithoutSuffix(const char *const filePath,
+ const char *const suffix, const int outDirPathBufSize, char *const outDirPath) {
+ const int filePathLength = strlen(filePath);
+ const int suffixLength = strlen(suffix);
+ if (filePathLength <= suffixLength) {
+ AKLOGE("File path length (%s:%d) is shorter that suffix length (%s:%d).",
+ filePath, filePathLength, suffix, suffixLength);
+ return false;
+ }
+ const int resultFilePathLength = filePathLength - suffixLength;
+ if (outDirPathBufSize <= resultFilePathLength) {
+ AKLOGE("outDirPathBufSize is too small. filePath: %s, suffix: %s, outDirPathBufSize: %d",
+ filePath, suffix, outDirPathBufSize);
+ return false;
+ }
+ if (strncmp(filePath + resultFilePathLength, suffix, suffixLength) != 0) {
+ AKLOGE("File Path %s does not have %s as a suffix", filePath, suffix);
+ return false;
+ }
+ snprintf(outDirPath, resultFilePathLength + 1 /* terminator */, "%s", filePath);
+ return true;
+}
+
+/* static */ void FileUtils::getDirPath(const char *const filePath, const int outDirPathBufSize,
+ char *const outDirPath) {
+ for (int i = strlen(filePath) - 1; i >= 0; --i) {
+ if (filePath[i] == '/') {
+ if (i >= outDirPathBufSize) {
+ AKLOGE("outDirPathBufSize is too small. filePath: %s, outDirPathBufSize: %d",
+ filePath, outDirPathBufSize);
+ ASSERT(false);
+ return;
+ }
+ snprintf(outDirPath, i + 1 /* terminator */, "%s", filePath);
+ return;
+ }
+ }
+}
+
+/* static */ void FileUtils::getBasename(const char *const filePath,
+ const int outNameBufSize, char *const outName) {
+ const int filePathBufSize = strlen(filePath) + 1 /* terminator */;
+ char filePathBuf[filePathBufSize];
+ snprintf(filePathBuf, filePathBufSize, "%s", filePath);
+ const char *const baseName = basename(filePathBuf);
+ const int baseNameLength = strlen(baseName);
+ if (baseNameLength >= outNameBufSize) {
+ AKLOGE("outNameBufSize is too small. filePath: %s, outNameBufSize: %d",
+ filePath, outNameBufSize);
+ return;
+ }
+ snprintf(outName, baseNameLength + 1 /* terminator */, "%s", baseName);
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/file_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/file_utils.h
new file mode 100644
index 000000000..3e84a3038
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/file_utils.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_FILE_UTILS_H
+#define LATINIME_FILE_UTILS_H
+
+#include "defines.h"
+
+namespace latinime {
+
+class FileUtils {
+ public:
+ // Returns -1 on error.
+ static int getFileSize(const char *const filePath);
+
+ static bool existsDir(const char *const dirPath);
+
+ // Remove a directory and all files in the directory.
+ static bool removeDirAndFiles(const char *const dirPath);
+
+ static int getFilePathWithSuffixBufSize(const char *const filePath, const char *const suffix);
+
+ static void getFilePathWithSuffix(const char *const filePath, const char *const suffix,
+ const int filePathBufSize, char *const outFilePath);
+
+ static int getFilePathBufSize(const char *const dirPath, const char *const fileName);
+
+ static void getFilePath(const char *const dirPath, const char *const fileName,
+ const int filePathBufSize, char *const outFilePath);
+
+ // Returns whether the filePath have the suffix.
+ static bool getFilePathWithoutSuffix(const char *const filePath, const char *const suffix,
+ const int dirPathBufSize, char *const outDirPath);
+
+ static void getDirPath(const char *const filePath, const int dirPathBufSize,
+ char *const outDirPath);
+
+ static void getBasename(const char *const filePath, const int outNameBufSize,
+ char *const outName);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(FileUtils);
+};
+} // namespace latinime
+#endif /* LATINIME_FILE_UTILS_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp
index 1632fd072..4050ad363 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp
@@ -14,14 +14,14 @@
* limitations under the License.
*/
+#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
+
#include <cmath>
-#include <ctime>
#include <stdlib.h>
-#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
-
#include "suggest/core/policy/dictionary_header_structure_policy.h"
#include "suggest/policyimpl/dictionary/utils/probability_utils.h"
+#include "utils/time_keeper.h"
namespace latinime {
@@ -31,76 +31,86 @@ const int ForgettingCurveUtils::MAX_BIGRAM_COUNT = 12000;
const int ForgettingCurveUtils::MAX_BIGRAM_COUNT_AFTER_GC = 10000;
const int ForgettingCurveUtils::MAX_COMPUTED_PROBABILITY = 127;
-const int ForgettingCurveUtils::MAX_ENCODED_PROBABILITY = 15;
-const int ForgettingCurveUtils::MIN_VALID_ENCODED_PROBABILITY = 3;
-const int ForgettingCurveUtils::ENCODED_PROBABILITY_STEP = 1;
-// Currently, we try to decay each uni/bigram once every 2 hours. Accordingly, the expected
-// duration of the decay is approximately 66hours.
-const float ForgettingCurveUtils::MIN_PROBABILITY_TO_DECAY = 0.03f;
const int ForgettingCurveUtils::DECAY_INTERVAL_SECONDS = 2 * 60 * 60;
-const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityTable;
-ForgettingCurveUtils::TimeKeeper ForgettingCurveUtils::sTimeKeeper;
+const int ForgettingCurveUtils::MAX_LEVEL = 3;
+const int ForgettingCurveUtils::MAX_COUNT = 3;
+const int ForgettingCurveUtils::MIN_VALID_LEVEL = 1;
+const int ForgettingCurveUtils::TIME_STEP_DURATION_IN_SECONDS = 6 * 60 * 60;
+const int ForgettingCurveUtils::MAX_ELAPSED_TIME_STEP_COUNT = 15;
+const int ForgettingCurveUtils::DISCARD_LEVEL_ZERO_ENTRY_TIME_STEP_COUNT_THRESHOLD = 14;
-void ForgettingCurveUtils::TimeKeeper::setCurrentTime() {
- mCurrentTime = time(0);
-}
+const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityTable;
-/* static */ int ForgettingCurveUtils::getProbability(const int encodedUnigramProbability,
- const int encodedBigramProbability) {
- if (encodedUnigramProbability == NOT_A_PROBABILITY) {
- return NOT_A_PROBABILITY;
- } else if (encodedBigramProbability == NOT_A_PROBABILITY) {
- return backoff(decodeProbability(encodedUnigramProbability));
+/* static */ const HistoricalInfo ForgettingCurveUtils::createUpdatedHistoricalInfo(
+ const HistoricalInfo *const originalHistoricalInfo,
+ const int newProbability, const int timestamp) {
+ if (newProbability != NOT_A_PROBABILITY && originalHistoricalInfo->getLevel() == 0) {
+ return HistoricalInfo(timestamp, MIN_VALID_LEVEL /* level */, 0 /* count */);
+ } else if (!originalHistoricalInfo->isValid()) {
+ // Initial information.
+ return HistoricalInfo(timestamp, 0 /* level */, 1 /* count */);
} else {
- const int unigramProbability = decodeProbability(encodedUnigramProbability);
- const int bigramProbability = decodeProbability(encodedBigramProbability);
- return min(max(unigramProbability, bigramProbability), MAX_COMPUTED_PROBABILITY);
+ const int updatedCount = originalHistoricalInfo->getCount() + 1;
+ if (updatedCount > MAX_COUNT) {
+ // The count exceeds the max value the level can be incremented.
+ if (originalHistoricalInfo->getLevel() >= MAX_LEVEL) {
+ // The level is already max.
+ return HistoricalInfo(timestamp, originalHistoricalInfo->getLevel(),
+ originalHistoricalInfo->getCount());
+ } else {
+ // Level up.
+ return HistoricalInfo(timestamp, originalHistoricalInfo->getLevel() + 1,
+ 0 /* count */);
+ }
+ } else {
+ return HistoricalInfo(timestamp, originalHistoricalInfo->getLevel(), updatedCount);
+ }
}
}
-// Caveat: Unlike getProbability(), this method doesn't assume special bigram probability encoding
-// (i.e. unigram probability + bigram probability delta).
-/* static */ int ForgettingCurveUtils::getUpdatedEncodedProbability(
- const int originalEncodedProbability, const int newProbability) {
- if (originalEncodedProbability == NOT_A_PROBABILITY) {
- // The bigram relation is not in this dictionary.
- if (newProbability == NOT_A_PROBABILITY) {
- // The bigram target is not in other dictionaries.
- return 0;
- } else {
- return MIN_VALID_ENCODED_PROBABILITY;
- }
+/* static */ int ForgettingCurveUtils::decodeProbability(
+ const HistoricalInfo *const historicalInfo) {
+ const int elapsedTimeStepCount = getElapsedTimeStepCount(historicalInfo->getTimeStamp());
+ return sProbabilityTable.getProbability(historicalInfo->getLevel(),
+ min(max(elapsedTimeStepCount, 0), MAX_ELAPSED_TIME_STEP_COUNT));
+}
+
+/* static */ int ForgettingCurveUtils::getProbability(const int unigramProbability,
+ const int bigramProbability) {
+ if (unigramProbability == NOT_A_PROBABILITY) {
+ return NOT_A_PROBABILITY;
+ } else if (bigramProbability == NOT_A_PROBABILITY) {
+ return min(backoff(unigramProbability), MAX_COMPUTED_PROBABILITY);
} else {
- if (newProbability != NOT_A_PROBABILITY
- && originalEncodedProbability < MIN_VALID_ENCODED_PROBABILITY) {
- return MIN_VALID_ENCODED_PROBABILITY;
- }
- return min(originalEncodedProbability + ENCODED_PROBABILITY_STEP, MAX_ENCODED_PROBABILITY);
+ return min(max(unigramProbability, bigramProbability), MAX_COMPUTED_PROBABILITY);
}
}
-/* static */ int ForgettingCurveUtils::isValidEncodedProbability(const int encodedProbability) {
- return encodedProbability >= MIN_VALID_ENCODED_PROBABILITY;
+/* static */ bool ForgettingCurveUtils::needsToKeep(const HistoricalInfo *const historicalInfo) {
+ return historicalInfo->getLevel() > 0
+ || getElapsedTimeStepCount(historicalInfo->getTimeStamp())
+ < DISCARD_LEVEL_ZERO_ENTRY_TIME_STEP_COUNT_THRESHOLD;
}
-/* static */ int ForgettingCurveUtils::getEncodedProbabilityToSave(const int encodedProbability,
- const DictionaryHeaderStructurePolicy *const headerPolicy) {
- const int elapsedTime = sTimeKeeper.peekCurrentTime() - headerPolicy->getLastDecayedTime();
- const int decayIterationCount = max(elapsedTime / DECAY_INTERVAL_SECONDS, 1);
- int currentEncodedProbability = max(min(encodedProbability, MAX_ENCODED_PROBABILITY), 0);
- // TODO: Implement the decay in more proper way.
- for (int i = 0; i < decayIterationCount; ++i) {
- const float currentRate = static_cast<float>(currentEncodedProbability)
- / static_cast<float>(MAX_ENCODED_PROBABILITY);
- const float thresholdToDecay = (1.0f - MIN_PROBABILITY_TO_DECAY) * currentRate;
- const float randValue = static_cast<float>(rand()) / static_cast<float>(RAND_MAX);
- if (thresholdToDecay < randValue) {
- currentEncodedProbability = max(currentEncodedProbability - ENCODED_PROBABILITY_STEP,
- 0);
- }
+/* static */ const HistoricalInfo ForgettingCurveUtils::createHistoricalInfoToSave(
+ const HistoricalInfo *const originalHistoricalInfo) {
+ if (originalHistoricalInfo->getTimeStamp() == NOT_A_TIMESTAMP) {
+ return HistoricalInfo();
}
- return currentEncodedProbability;
+ const int elapsedTimeStep = getElapsedTimeStepCount(originalHistoricalInfo->getTimeStamp());
+ if (elapsedTimeStep <= MAX_ELAPSED_TIME_STEP_COUNT) {
+ // No need to update historical info.
+ return *originalHistoricalInfo;
+ }
+ // Level down.
+ const int maxLevelDownAmonut = elapsedTimeStep / (MAX_ELAPSED_TIME_STEP_COUNT + 1);
+ const int levelDownAmount = (maxLevelDownAmonut >= originalHistoricalInfo->getLevel()) ?
+ originalHistoricalInfo->getLevel() : maxLevelDownAmonut;
+ const int adjustedTimestamp = originalHistoricalInfo->getTimeStamp() +
+ levelDownAmount * (MAX_ELAPSED_TIME_STEP_COUNT + 1) * TIME_STEP_DURATION_IN_SECONDS;
+ return HistoricalInfo(adjustedTimestamp,
+ originalHistoricalInfo->getLevel() - levelDownAmount, 0 /* count */);
}
/* static */ bool ForgettingCurveUtils::needsToDecay(const bool mindsBlockByDecay,
@@ -116,21 +126,14 @@ void ForgettingCurveUtils::TimeKeeper::setCurrentTime() {
if (mindsBlockByDecay) {
return false;
}
- if (headerPolicy->getLastDecayedTime() + DECAY_INTERVAL_SECONDS < time(0)) {
+ if (headerPolicy->getLastDecayedTime() + DECAY_INTERVAL_SECONDS
+ < TimeKeeper::peekCurrentTime()) {
// Time to decay.
return true;
}
return false;
}
-/* static */ int ForgettingCurveUtils::decodeProbability(const int encodedProbability) {
- if (encodedProbability < MIN_VALID_ENCODED_PROBABILITY) {
- return NOT_A_PROBABILITY;
- } else {
- return min(sProbabilityTable.getProbability(encodedProbability), MAX_ENCODED_PROBABILITY);
- }
-}
-
// See comments in ProbabilityUtils::backoff().
/* static */ int ForgettingCurveUtils::backoff(const int unigramProbability) {
if (unigramProbability == NOT_A_PROBABILITY) {
@@ -140,15 +143,29 @@ void ForgettingCurveUtils::TimeKeeper::setCurrentTime() {
}
}
+/* static */ int ForgettingCurveUtils::getElapsedTimeStepCount(const int timestamp) {
+ return (TimeKeeper::peekCurrentTime() - timestamp) / TIME_STEP_DURATION_IN_SECONDS;
+}
+
ForgettingCurveUtils::ProbabilityTable::ProbabilityTable() : mTable() {
- // Table entry is as follows:
- // 1, 1, 1, 2, 3, 5, 6, 9, 13, 18, 25, 34, 48, 66, 91, 127.
- // Note that first MIN_VALID_ENCODED_PROBABILITY values are not used.
- mTable.resize(MAX_ENCODED_PROBABILITY + 1);
- for (int i = 0; i <= MAX_ENCODED_PROBABILITY; ++i) {
- const int probability = static_cast<int>(powf(static_cast<float>(MAX_COMPUTED_PROBABILITY),
- static_cast<float>(i) / static_cast<float>(MAX_ENCODED_PROBABILITY)));
- mTable[i] = min(MAX_COMPUTED_PROBABILITY, max(0, probability));
+ mTable.resize(MAX_LEVEL + 1);
+ for (int level = 0; level <= MAX_LEVEL; ++level) {
+ mTable[level].resize(MAX_ELAPSED_TIME_STEP_COUNT + 1);
+ const float initialProbability =
+ static_cast<float>(MAX_COMPUTED_PROBABILITY / (1 << (MAX_LEVEL - level)));
+ for (int timeStepCount = 0; timeStepCount <= MAX_ELAPSED_TIME_STEP_COUNT; ++timeStepCount) {
+ if (level == 0) {
+ mTable[level][timeStepCount] = NOT_A_PROBABILITY;
+ continue;
+ }
+ const int elapsedTime = timeStepCount * TIME_STEP_DURATION_IN_SECONDS;
+ const float probability = initialProbability
+ * powf(2.0f, -1.0f * static_cast<float>(elapsedTime)
+ / static_cast<float>(TIME_STEP_DURATION_IN_SECONDS
+ * (MAX_ELAPSED_TIME_STEP_COUNT + 1)));
+ mTable[level][timeStepCount] =
+ min(max(static_cast<int>(probability), 1), MAX_COMPUTED_PROBABILITY);
+ }
}
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h
index 2ad423874..6ac8dc528 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h
@@ -20,45 +20,33 @@
#include <vector>
#include "defines.h"
+#include "suggest/policyimpl/dictionary/utils/historical_info.h"
namespace latinime {
class DictionaryHeaderStructurePolicy;
-// TODO: Check the elapsed time and decrease the probability depending on the time. Time field is
-// required to introduced to each terminal PtNode and bigram entry.
// TODO: Quit using bigram probability to indicate the delta.
class ForgettingCurveUtils {
public:
- class TimeKeeper {
- public:
- TimeKeeper() : mCurrentTime(0) {}
- void setCurrentTime();
- int peekCurrentTime() const { return mCurrentTime; };
-
- private:
- DISALLOW_COPY_AND_ASSIGN(TimeKeeper);
-
- int mCurrentTime;
- };
-
static const int MAX_UNIGRAM_COUNT;
static const int MAX_UNIGRAM_COUNT_AFTER_GC;
static const int MAX_BIGRAM_COUNT;
static const int MAX_BIGRAM_COUNT_AFTER_GC;
- static TimeKeeper sTimeKeeper;
+ static const HistoricalInfo createUpdatedHistoricalInfo(
+ const HistoricalInfo *const originalHistoricalInfo, const int newProbability,
+ const int timestamp);
- static int getProbability(const int encodedUnigramProbability,
- const int encodedBigramProbability);
+ static const HistoricalInfo createHistoricalInfoToSave(
+ const HistoricalInfo *const originalHistoricalInfo);
- static int getUpdatedEncodedProbability(const int originalEncodedProbability,
- const int newProbability);
+ static int decodeProbability(const HistoricalInfo *const historicalInfo);
- static int isValidEncodedProbability(const int encodedProbability);
+ static int getProbability(const int encodedUnigramProbability,
+ const int encodedBigramProbability);
- static int getEncodedProbabilityToSave(const int encodedProbability,
- const DictionaryHeaderStructurePolicy *const headerPolicy);
+ static bool needsToKeep(const HistoricalInfo *const historicalInfo);
static bool needsToDecay(const bool mindsBlockByDecay, const int unigramCount,
const int bigramCount, const DictionaryHeaderStructurePolicy *const headerPolicy);
@@ -70,31 +58,32 @@ class ForgettingCurveUtils {
public:
ProbabilityTable();
- int getProbability(const int encodedProbability) const {
- if (encodedProbability < 0 || encodedProbability > static_cast<int>(mTable.size())) {
- return NOT_A_PROBABILITY;
- }
- return mTable[encodedProbability];
+ int getProbability(const int level, const int elapsedTimeStepCount) const {
+ return mTable[level][elapsedTimeStepCount];
}
private:
DISALLOW_COPY_AND_ASSIGN(ProbabilityTable);
- std::vector<int> mTable;
+ std::vector<std::vector<int> > mTable;
};
static const int MAX_COMPUTED_PROBABILITY;
- static const int MAX_ENCODED_PROBABILITY;
- static const int MIN_VALID_ENCODED_PROBABILITY;
- static const int ENCODED_PROBABILITY_STEP;
- static const float MIN_PROBABILITY_TO_DECAY;
static const int DECAY_INTERVAL_SECONDS;
- static const ProbabilityTable sProbabilityTable;
+ static const int MAX_LEVEL;
+ static const int MAX_COUNT;
+ static const int MIN_VALID_LEVEL;
+ static const int TIME_STEP_DURATION_IN_SECONDS;
+ static const int MAX_ELAPSED_TIME_STEP_COUNT;
+ static const int DISCARD_LEVEL_ZERO_ENTRY_TIME_STEP_COUNT_THRESHOLD;
+ static const int HALF_LIFE_TIME_IN_SECONDS;
- static int decodeProbability(const int encodedProbability);
+ static const ProbabilityTable sProbabilityTable;
static int backoff(const int unigramProbability);
+
+ static int getElapsedTimeStepCount(const int timestamp);
};
} // namespace latinime
#endif /* LATINIME_FORGETTING_CURVE_UTILS_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp
index 1d77d5c27..cd3c403fa 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp
@@ -41,10 +41,13 @@ const int FormatUtils::DICTIONARY_MINIMUM_SIZE = 12;
// Dictionary format version number (2 bytes)
// Options (2 bytes)
// Header size (4 bytes) : integer, big endian
- if (ByteArrayUtils::readUint16(dict, 4) == 2) {
+ // Conceptually this converts the hardcoded value of the bytes in the file into
+ // the symbolic value we use in the code. But we want the constants to be the
+ // same so we use them for both here.
+ if (ByteArrayUtils::readUint16(dict, 4) == VERSION_2) {
return VERSION_2;
- } else if (ByteArrayUtils::readUint16(dict, 4) == 3) {
- return VERSION_3;
+ } else if (ByteArrayUtils::readUint16(dict, 4) == VERSION_4) {
+ return VERSION_4;
} else {
return UNKNOWN_VERSION;
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h
index 79ed0de29..eb2227d60 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h
@@ -29,9 +29,10 @@ namespace latinime {
class FormatUtils {
public:
enum FORMAT_VERSION {
- VERSION_2,
- VERSION_3,
- UNKNOWN_VERSION
+ // These MUST have the same values as the relevant constants in FormatSpec.java.
+ VERSION_2 = 2,
+ VERSION_4 = 400,
+ UNKNOWN_VERSION = -1
};
// 32 bit magic number is stored at the beginning of the dictionary header to reject
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/historical_info.h b/native/jni/src/suggest/policyimpl/dictionary/utils/historical_info.h
new file mode 100644
index 000000000..428ca8626
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/historical_info.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_HISTORICAL_INFO_H
+#define LATINIME_HISTORICAL_INFO_H
+
+#include "defines.h"
+
+namespace latinime {
+
+class HistoricalInfo {
+ public:
+ // Invalid historical info.
+ HistoricalInfo()
+ : mTimestamp(NOT_A_TIMESTAMP), mLevel(0), mCount(0) {}
+
+ HistoricalInfo(const int timestamp, const int level, const int count)
+ : mTimestamp(timestamp), mLevel(level), mCount(count) {}
+
+ bool isValid() const {
+ return mTimestamp != NOT_A_TIMESTAMP;
+ }
+
+ int getTimeStamp() const {
+ return mTimestamp;
+ }
+
+ int getLevel() const {
+ return mLevel;
+ }
+
+ int getCount() const {
+ return mCount;
+ }
+
+ private:
+ // Copy constructor is public to use this class as a type of return value.
+ DISALLOW_ASSIGNMENT_OPERATOR(HistoricalInfo);
+
+ const int mTimestamp;
+ const int mLevel;
+ const int mCount;
+};
+} // namespace latinime
+#endif /* LATINIME_HISTORICAL_INFO_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.cpp
new file mode 100644
index 000000000..e88d6e0a9
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.cpp
@@ -0,0 +1,98 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
+
+#include <cerrno>
+#include <climits>
+#include <cstdio>
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+#include "suggest/policyimpl/dictionary/utils/file_utils.h"
+
+namespace latinime {
+
+/* static */ MmappedBuffer::MmappedBufferPtr MmappedBuffer::openBuffer(
+ const char *const path, const int bufferOffset, const int bufferSize,
+ const bool isUpdatable) {
+ const int mmapFd = open(path, O_RDONLY);
+ if (mmapFd < 0) {
+ AKLOGE("DICT: Can't open the source. path=%s errno=%d", path, errno);
+ return MmappedBufferPtr(0);
+ }
+ const int pagesize = sysconf(_SC_PAGESIZE);
+ const int offset = bufferOffset % pagesize;
+ int alignedOffset = bufferOffset - offset;
+ int alignedSize = bufferSize + offset;
+ const int protMode = isUpdatable ? PROT_READ | PROT_WRITE : PROT_READ;
+ void *const mmappedBuffer = mmap(0, alignedSize, protMode, MAP_PRIVATE, mmapFd,
+ alignedOffset);
+ if (mmappedBuffer == MAP_FAILED) {
+ AKLOGE("DICT: Can't mmap dictionary. errno=%d", errno);
+ close(mmapFd);
+ return MmappedBufferPtr(0);
+ }
+ uint8_t *const buffer = static_cast<uint8_t *>(mmappedBuffer) + offset;
+ if (!buffer) {
+ AKLOGE("DICT: buffer is null");
+ close(mmapFd);
+ return MmappedBufferPtr(0);
+ }
+ return MmappedBufferPtr(new MmappedBuffer(buffer, bufferSize, mmappedBuffer, alignedSize,
+ mmapFd, isUpdatable));
+}
+
+/* static */ MmappedBuffer::MmappedBufferPtr MmappedBuffer::openBuffer(
+ const char *const path, const bool isUpdatable) {
+ const int fileSize = FileUtils::getFileSize(path);
+ if (fileSize == -1) {
+ return MmappedBufferPtr(0);
+ } else if (fileSize == 0) {
+ return MmappedBufferPtr(new MmappedBuffer(isUpdatable));
+ } else {
+ return openBuffer(path, 0 /* bufferOffset */, fileSize, isUpdatable);
+ }
+}
+
+/* static */ MmappedBuffer::MmappedBufferPtr MmappedBuffer::openBuffer(
+ const char *const dirPath, const char *const fileName, const bool isUpdatable) {
+ const int filePathBufferSize = PATH_MAX + 1 /* terminator */;
+ char filePath[filePathBufferSize];
+ const int filePathLength = snprintf(filePath, filePathBufferSize, "%s%s", dirPath,
+ fileName);
+ if (filePathLength >= filePathBufferSize) {
+ return 0;
+ }
+ return openBuffer(filePath, isUpdatable);
+}
+
+MmappedBuffer::~MmappedBuffer() {
+ if (mAlignedSize == 0) {
+ return;
+ }
+ int ret = munmap(mMmappedBuffer, mAlignedSize);
+ if (ret != 0) {
+ AKLOGE("DICT: Failure in munmap. ret=%d errno=%d", ret, errno);
+ }
+ ret = close(mMmapFd);
+ if (ret != 0) {
+ AKLOGE("DICT: Failure in close. ret=%d errno=%d", ret, errno);
+ }
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h b/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h
index 6b69116eb..73a733b0c 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h
@@ -17,58 +17,27 @@
#ifndef LATINIME_MMAPPED_BUFFER_H
#define LATINIME_MMAPPED_BUFFER_H
-#include <cerrno>
-#include <fcntl.h>
#include <stdint.h>
-#include <sys/mman.h>
-#include <unistd.h>
#include "defines.h"
+#include "utils/exclusive_ownership_pointer.h"
namespace latinime {
class MmappedBuffer {
public:
- static MmappedBuffer* openBuffer(const char *const path, const int bufferOffset,
- const int bufferSize, const bool isUpdatable) {
- const int openMode = isUpdatable ? O_RDWR : O_RDONLY;
- const int mmapFd = open(path, openMode);
- if (mmapFd < 0) {
- AKLOGE("DICT: Can't open the source. path=%s errno=%d", path, errno);
- return 0;
- }
- const int pagesize = getpagesize();
- const int offset = bufferOffset % pagesize;
- int alignedOffset = bufferOffset - offset;
- int alignedSize = bufferSize + offset;
- const int protMode = isUpdatable ? PROT_READ | PROT_WRITE : PROT_READ;
- void *const mmappedBuffer = mmap(0, alignedSize, protMode, MAP_PRIVATE, mmapFd,
- alignedOffset);
- if (mmappedBuffer == MAP_FAILED) {
- AKLOGE("DICT: Can't mmap dictionary. errno=%d", errno);
- close(mmapFd);
- return 0;
- }
- uint8_t *const buffer = static_cast<uint8_t *>(mmappedBuffer) + offset;
- if (!buffer) {
- AKLOGE("DICT: buffer is null");
- close(mmapFd);
- return 0;
- }
- return new MmappedBuffer(buffer, bufferSize, mmappedBuffer, alignedSize, mmapFd,
- isUpdatable);
- }
+ typedef ExclusiveOwnershipPointer<MmappedBuffer> MmappedBufferPtr;
- ~MmappedBuffer() {
- int ret = munmap(mMmappedBuffer, mAlignedSize);
- if (ret != 0) {
- AKLOGE("DICT: Failure in munmap. ret=%d errno=%d", ret, errno);
- }
- ret = close(mMmapFd);
- if (ret != 0) {
- AKLOGE("DICT: Failure in close. ret=%d errno=%d", ret, errno);
- }
- }
+ static MmappedBufferPtr openBuffer(const char *const path,
+ const int bufferOffset, const int bufferSize, const bool isUpdatable);
+
+ // Mmap entire file.
+ static MmappedBufferPtr openBuffer(const char *const path, const bool isUpdatable);
+
+ static MmappedBufferPtr openBuffer(const char *const dirPath, const char *const fileName,
+ const bool isUpdatable);
+
+ ~MmappedBuffer();
AK_FORCE_INLINE uint8_t *getBuffer() const {
return mBuffer;
@@ -89,6 +58,11 @@ class MmappedBuffer {
: mBuffer(buffer), mBufferSize(bufferSize), mMmappedBuffer(mmappedBuffer),
mAlignedSize(alignedSize), mMmapFd(mmapFd), mIsUpdatable(isUpdatable) {}
+ // Empty file. We have to handle an empty file as a valid part of a dictionary.
+ AK_FORCE_INLINE MmappedBuffer(const bool isUpdatable)
+ : mBuffer(0), mBufferSize(0), mMmappedBuffer(0), mAlignedSize(0), mMmapFd(0),
+ mIsUpdatable(isUpdatable) {}
+
DISALLOW_IMPLICIT_CONSTRUCTORS(MmappedBuffer);
uint8_t *const mBuffer;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.cpp
new file mode 100644
index 000000000..4ad82f9f7
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.cpp
@@ -0,0 +1,96 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/utils/sparse_table.h"
+
+namespace latinime {
+
+const int SparseTable::NOT_EXIST = -1;
+const int SparseTable::INDEX_SIZE = 4;
+
+bool SparseTable::contains(const int id) const {
+ const int readingPos = getPosInIndexTable(id);
+ if (id < 0 || mIndexTableBuffer->getTailPosition() <= readingPos) {
+ return false;
+ }
+ const int index = mIndexTableBuffer->readUint(INDEX_SIZE, readingPos);
+ return index != NOT_EXIST;
+}
+
+uint32_t SparseTable::get(const int id) const {
+ const int indexTableReadingPos = getPosInIndexTable(id);
+ const int index = mIndexTableBuffer->readUint(INDEX_SIZE, indexTableReadingPos);
+ const int contentTableReadingPos = getPosInContentTable(id, index);
+ return mContentTableBuffer->readUint(mDataSize, contentTableReadingPos);
+}
+
+bool SparseTable::set(const int id, const uint32_t value) {
+ const int posInIndexTable = getPosInIndexTable(id);
+ // Extends the index table if needed.
+ if (mIndexTableBuffer->getTailPosition() < posInIndexTable) {
+ int tailPos = mIndexTableBuffer->getTailPosition();
+ while(tailPos < posInIndexTable) {
+ if (!mIndexTableBuffer->writeUintAndAdvancePosition(NOT_EXIST, INDEX_SIZE, &tailPos)) {
+ AKLOGE("cannot extend index table. tailPos: %d to: %d", tailPos, posInIndexTable);
+ return false;
+ }
+ }
+ }
+ if (contains(id)) {
+ // The entry is already in the content table.
+ const int index = mIndexTableBuffer->readUint(INDEX_SIZE, posInIndexTable);
+ if (!mContentTableBuffer->writeUint(value, mDataSize, getPosInContentTable(id, index))) {
+ AKLOGE("cannot update value %d. pos: %d, tailPos: %d, mDataSize: %d", value,
+ getPosInContentTable(id, index), mContentTableBuffer->getTailPosition(),
+ mDataSize);
+ return false;
+ }
+ return true;
+ }
+ // The entry is not in the content table.
+ // Create new entry in the content table.
+ const int index = getIndexFromContentTablePos(mContentTableBuffer->getTailPosition());
+ if (!mIndexTableBuffer->writeUint(index, INDEX_SIZE, posInIndexTable)) {
+ AKLOGE("cannot write index %d. pos %d", index, posInIndexTable);
+ return false;
+ }
+ // Write a new block that containing the entry to be set.
+ int writingPos = getPosInContentTable(0 /* id */, index);
+ for (int i = 0; i < mBlockSize; ++i) {
+ if (!mContentTableBuffer->writeUintAndAdvancePosition(NOT_A_DICT_POS, mDataSize,
+ &writingPos)) {
+ AKLOGE("cannot write content table to extend. writingPos: %d, tailPos: %d, "
+ "mDataSize: %d", writingPos, mContentTableBuffer->getTailPosition(), mDataSize);
+ return false;
+ }
+ }
+ return mContentTableBuffer->writeUint(value, mDataSize, getPosInContentTable(id, index));
+}
+
+int SparseTable::getIndexFromContentTablePos(const int contentTablePos) const {
+ return contentTablePos / mDataSize / mBlockSize;
+}
+
+int SparseTable::getPosInIndexTable(const int id) const {
+ return (id / mBlockSize) * INDEX_SIZE;
+}
+
+int SparseTable::getPosInContentTable(const int id, const int index) const {
+ const int offset = id % mBlockSize;
+ return (index * mBlockSize + offset) * mDataSize;
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.h b/native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.h
new file mode 100644
index 000000000..21c167506
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_SPARSE_TABLE_H
+#define LATINIME_SPARSE_TABLE_H
+
+#include <stdint.h>
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+
+namespace latinime {
+
+// Note that there is a corresponding implementation in SparseTable.java.
+// TODO: Support multiple content buffers.
+class SparseTable {
+ public:
+ SparseTable(BufferWithExtendableBuffer *const indexTableBuffer,
+ BufferWithExtendableBuffer *const contentTableBuffer, const int blockSize,
+ const int dataSize)
+ : mIndexTableBuffer(indexTableBuffer), mContentTableBuffer(contentTableBuffer),
+ mBlockSize(blockSize), mDataSize(dataSize) {}
+
+ bool contains(const int id) const;
+
+ uint32_t get(const int id) const;
+
+ bool set(const int id, const uint32_t value);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(SparseTable);
+
+ int getIndexFromContentTablePos(const int contentTablePos) const;
+
+ int getPosInIndexTable(const int id) const;
+
+ int getPosInContentTable(const int id, const int index) const;
+
+ static const int NOT_EXIST;
+ static const int INDEX_SIZE;
+
+ BufferWithExtendableBuffer *const mIndexTableBuffer;
+ BufferWithExtendableBuffer *const mContentTableBuffer;
+ const int mBlockSize;
+ const int mDataSize;
+};
+} // namespace latinime
+#endif /* LATINIME_SPARSE_TABLE_H */
diff --git a/native/jni/src/suggest/policyimpl/typing/typing_traversal.h b/native/jni/src/suggest/policyimpl/typing/typing_traversal.h
index 007c19e0a..fd0ac9eb6 100644
--- a/native/jni/src/suggest/policyimpl/typing/typing_traversal.h
+++ b/native/jni/src/suggest/policyimpl/typing/typing_traversal.h
@@ -81,7 +81,7 @@ class TypingTraversal : public Traversal {
return false;
}
const int point0Index = dicNode->getInputIndex(0);
- return dicNode->isTerminalWordNode()
+ return dicNode->isTerminalDicNode()
&& traverseSession->getProximityInfoState(0)->
hasSpaceProximity(point0Index);
}
@@ -96,7 +96,7 @@ class TypingTraversal : public Traversal {
if (dicNode->isCompletion(inputSize)) {
return false;
}
- if (!dicNode->isTerminalWordNode()) {
+ if (!dicNode->isTerminalDicNode()) {
return false;
}
const int16_t pointIndex = dicNode->getInputIndex(0);
diff --git a/native/jni/src/suggest/policyimpl/typing/typing_weighting.cpp b/native/jni/src/suggest/policyimpl/typing/typing_weighting.cpp
index 5b6b5e874..54f65c786 100644
--- a/native/jni/src/suggest/policyimpl/typing/typing_weighting.cpp
+++ b/native/jni/src/suggest/policyimpl/typing/typing_weighting.cpp
@@ -23,39 +23,64 @@ namespace latinime {
const TypingWeighting TypingWeighting::sInstance;
-ErrorType TypingWeighting::getErrorType(const CorrectionType correctionType,
+ErrorTypeUtils::ErrorType TypingWeighting::getErrorType(const CorrectionType correctionType,
const DicTraverseSession *const traverseSession, const DicNode *const parentDicNode,
const DicNode *const dicNode) const {
switch (correctionType) {
case CT_MATCH:
if (isProximityDicNode(traverseSession, dicNode)) {
- return ET_PROXIMITY_CORRECTION;
+ return ErrorTypeUtils::PROXIMITY_CORRECTION;
+ } else if (dicNode->isInDigraph()) {
+ return ErrorTypeUtils::MATCH_WITH_DIGRAPH;
} else {
- return ET_NOT_AN_ERROR;
+ // Compare the node code point with original primary code point on the keyboard.
+ const ProximityInfoState *const pInfoState =
+ traverseSession->getProximityInfoState(0);
+ const int primaryOriginalCodePoint = pInfoState->getPrimaryOriginalCodePointAt(
+ dicNode->getInputIndex(0));
+ const int nodeCodePoint = dicNode->getNodeCodePoint();
+ if (primaryOriginalCodePoint == nodeCodePoint) {
+ // Node code point is same as original code point on the keyboard.
+ return ErrorTypeUtils::NOT_AN_ERROR;
+ } else if (CharUtils::toLowerCase(primaryOriginalCodePoint) ==
+ CharUtils::toLowerCase(nodeCodePoint)) {
+ // Only cases of the code points are different.
+ return ErrorTypeUtils::MATCH_WITH_CASE_ERROR;
+ } else if (CharUtils::toBaseCodePoint(primaryOriginalCodePoint) ==
+ CharUtils::toBaseCodePoint(nodeCodePoint)) {
+ // Node code point is a variant of original code point.
+ return ErrorTypeUtils::MATCH_WITH_ACCENT_ERROR;
+ } else {
+ // Node code point is a variant of original code point and the cases are also
+ // different.
+ return ErrorTypeUtils::MATCH_WITH_ACCENT_ERROR
+ | ErrorTypeUtils::MATCH_WITH_CASE_ERROR;
+ }
}
+ break;
case CT_ADDITIONAL_PROXIMITY:
- return ET_PROXIMITY_CORRECTION;
+ return ErrorTypeUtils::PROXIMITY_CORRECTION;
case CT_OMISSION:
if (parentDicNode->canBeIntentionalOmission()) {
- return ET_INTENTIONAL_OMISSION;
+ return ErrorTypeUtils::INTENTIONAL_OMISSION;
} else {
- return ET_EDIT_CORRECTION;
+ return ErrorTypeUtils::EDIT_CORRECTION;
}
break;
case CT_SUBSTITUTION:
case CT_INSERTION:
case CT_TERMINAL_INSERTION:
case CT_TRANSPOSITION:
- return ET_EDIT_CORRECTION;
+ return ErrorTypeUtils::EDIT_CORRECTION;
case CT_NEW_WORD_SPACE_OMISSION:
case CT_NEW_WORD_SPACE_SUBSTITUTION:
- return ET_NEW_WORD;
+ return ErrorTypeUtils::NEW_WORD;
case CT_TERMINAL:
- return ET_NOT_AN_ERROR;
+ return ErrorTypeUtils::NOT_AN_ERROR;
case CT_COMPLETION:
- return ET_COMPLETION;
+ return ErrorTypeUtils::COMPLETION;
default:
- return ET_NOT_AN_ERROR;
+ return ErrorTypeUtils::NOT_AN_ERROR;
}
}
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/typing/typing_weighting.h b/native/jni/src/suggest/policyimpl/typing/typing_weighting.h
index 9f0a331e3..41314ef52 100644
--- a/native/jni/src/suggest/policyimpl/typing/typing_weighting.h
+++ b/native/jni/src/suggest/policyimpl/typing/typing_weighting.h
@@ -19,6 +19,7 @@
#include "defines.h"
#include "suggest/core/dicnode/dic_node_utils.h"
+#include "suggest/core/dictionary/error_type_utils.h"
#include "suggest/core/layout/touch_position_correction_utils.h"
#include "suggest/core/policy/weighting.h"
#include "suggest/core/session/dic_traverse_session.h"
@@ -204,7 +205,7 @@ class TypingWeighting : public Weighting {
return cost * traverseSession->getMultiWordCostMultiplier();
}
- ErrorType getErrorType(const CorrectionType correctionType,
+ ErrorTypeUtils::ErrorType getErrorType(const CorrectionType correctionType,
const DicTraverseSession *const traverseSession,
const DicNode *const parentDicNode, const DicNode *const dicNode) const;
diff --git a/native/jni/src/utils/exclusive_ownership_pointer.h b/native/jni/src/utils/exclusive_ownership_pointer.h
new file mode 100644
index 000000000..081802e8b
--- /dev/null
+++ b/native/jni/src/utils/exclusive_ownership_pointer.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_EXCLUSIVE_OWNERSHIP_POINTER_H
+#define LATINIME_EXCLUSIVE_OWNERSHIP_POINTER_H
+
+#include "defines.h"
+
+namespace latinime {
+
+template<class T>
+class ExclusiveOwnershipPointer {
+ public:
+ // This instance become an owner of the raw pointer.
+ AK_FORCE_INLINE ExclusiveOwnershipPointer(T *const rawPointer)
+ : mPointer(rawPointer),
+ mSharedOwnerPtr(new (ExclusiveOwnershipPointer<T> *)(this)) {}
+
+ // Move the ownership.
+ AK_FORCE_INLINE ExclusiveOwnershipPointer(const ExclusiveOwnershipPointer<T> &pointer)
+ : mPointer(pointer.mPointer), mSharedOwnerPtr(pointer.mSharedOwnerPtr) {
+ transferOwnership(&pointer);
+ }
+
+ AK_FORCE_INLINE ~ExclusiveOwnershipPointer() {
+ deletePointersIfHavingOwnership();
+ }
+
+ AK_FORCE_INLINE T *get() const {
+ return mPointer;
+ }
+
+ private:
+ // This class allows to copy and ensures only one instance has the ownership of the
+ // managed pointer.
+ DISALLOW_DEFAULT_CONSTRUCTOR(ExclusiveOwnershipPointer);
+ DISALLOW_ASSIGNMENT_OPERATOR(ExclusiveOwnershipPointer);
+
+ void transferOwnership(const ExclusiveOwnershipPointer<T> *const src) {
+ if (*mSharedOwnerPtr != src) {
+ AKLOGE("Failed to transfer the ownership because src is not the current owner."
+ "src: %p, owner: %p", src, *mSharedOwnerPtr);
+ ASSERT(false);
+ return;
+ }
+ // Transfer the ownership from src to this instance.
+ *mSharedOwnerPtr = this;
+ }
+
+ void deletePointersIfHavingOwnership() {
+ if (mSharedOwnerPtr && *mSharedOwnerPtr == this) {
+ if (mPointer) {
+ if (DEBUG_DICT) {
+ AKLOGI("Releasing pointer: %p", mPointer);
+ }
+ delete mPointer;
+ }
+ delete mSharedOwnerPtr;
+ }
+ }
+
+ T *mPointer;
+ // mSharedOwnerPtr points a shared memory space where the instance which has the ownership is
+ // stored.
+ ExclusiveOwnershipPointer<T> **mSharedOwnerPtr;
+};
+} // namespace latinime
+#endif /* LATINIME_EXCLUSIVE_OWNERSHIP_POINTER_H */
diff --git a/native/jni/src/utils/time_keeper.cpp b/native/jni/src/utils/time_keeper.cpp
new file mode 100644
index 000000000..026284060
--- /dev/null
+++ b/native/jni/src/utils/time_keeper.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "utils/time_keeper.h"
+
+#include <ctime>
+
+namespace latinime {
+
+int TimeKeeper::sCurrentTime;
+bool TimeKeeper::sSetForTesting;
+
+/* static */ void TimeKeeper::setCurrentTime() {
+ if (!sSetForTesting) {
+ sCurrentTime = time(0);
+ }
+}
+
+/* static */ void TimeKeeper::startTestModeWithForceCurrentTime(const int currentTime) {
+ sCurrentTime = currentTime;
+ sSetForTesting = true;
+}
+
+/* static */ void TimeKeeper::stopTestMode() {
+ sSetForTesting = false;
+}
+
+} // namespace latinime
diff --git a/native/jni/src/utils/time_keeper.h b/native/jni/src/utils/time_keeper.h
new file mode 100644
index 000000000..d066757e4
--- /dev/null
+++ b/native/jni/src/utils/time_keeper.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_TIME_KEEPER_H
+#define LATINIME_TIME_KEEPER_H
+
+#include "defines.h"
+
+namespace latinime {
+
+class TimeKeeper {
+ public:
+ static void setCurrentTime();
+
+ static void startTestModeWithForceCurrentTime(const int currentTime);
+
+ static void stopTestMode();
+
+ static int peekCurrentTime() { return sCurrentTime; };
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(TimeKeeper);
+
+ static int sCurrentTime;
+ static bool sSetForTesting;
+};
+} // namespace latinime
+#endif /* LATINIME_TIME_KEEPER_H */