aboutsummaryrefslogtreecommitdiffstats
path: root/native
diff options
context:
space:
mode:
Diffstat (limited to 'native')
-rw-r--r--native/jni/Android.mk47
-rw-r--r--native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp146
-rw-r--r--native/jni/src/defines.h36
-rw-r--r--native/jni/src/suggest/core/dicnode/dic_node.h79
-rw-r--r--native/jni/src/suggest/core/dicnode/dic_node_utils.cpp42
-rw-r--r--native/jni/src/suggest/core/dicnode/dic_node_utils.h12
-rw-r--r--native/jni/src/suggest/core/dicnode/dic_node_vector.h12
-rw-r--r--native/jni/src/suggest/core/dicnode/internal/dic_node_properties.h79
-rw-r--r--native/jni/src/suggest/core/dicnode/internal/dic_node_state_prevword.h16
-rw-r--r--native/jni/src/suggest/core/dicnode/internal/dic_node_state_scoring.h41
-rw-r--r--native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp7
-rw-r--r--native/jni/src/suggest/core/dictionary/bloom_filter.h2
-rw-r--r--native/jni/src/suggest/core/dictionary/dictionary.cpp73
-rw-r--r--native/jni/src/suggest/core/dictionary/dictionary.h43
-rw-r--r--native/jni/src/suggest/core/dictionary/digraph_utils.cpp12
-rw-r--r--native/jni/src/suggest/core/dictionary/digraph_utils.h2
-rw-r--r--native/jni/src/suggest/core/dictionary/error_type_utils.cpp34
-rw-r--r--native/jni/src/suggest/core/dictionary/error_type_utils.h69
-rw-r--r--native/jni/src/suggest/core/dictionary/multi_bigram_map.cpp71
-rw-r--r--native/jni/src/suggest/core/dictionary/multi_bigram_map.h67
-rw-r--r--native/jni/src/suggest/core/dictionary/unigram_property.cpp52
-rw-r--r--native/jni/src/suggest/core/dictionary/unigram_property.h87
-rw-r--r--native/jni/src/suggest/core/layout/proximity_info.cpp16
-rw-r--r--native/jni/src/suggest/core/layout/proximity_info.h11
-rw-r--r--native/jni/src/suggest/core/layout/proximity_info_state.cpp6
-rw-r--r--native/jni/src/suggest/core/layout/proximity_info_state.h2
-rw-r--r--native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h4
-rw-r--r--native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h22
-rw-r--r--native/jni/src/suggest/core/policy/weighting.cpp5
-rw-r--r--native/jni/src/suggest/core/policy/weighting.h3
-rw-r--r--native/jni/src/suggest/core/session/dic_traverse_session.cpp8
-rw-r--r--native/jni/src/suggest/core/session/dic_traverse_session.h27
-rw-r--r--native/jni/src/suggest/core/suggest.cpp27
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp95
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h35
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp391
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h92
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp240
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h69
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.cpp53
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h (renamed from native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h)15
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp191
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h (renamed from native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.h)88
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp124
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h163
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp380
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h121
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp (renamed from native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.cpp)169
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h (renamed from native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h)190
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp (renamed from native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.cpp)26
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h (renamed from native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h)22
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp558
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h138
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.cpp (renamed from native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.cpp)61
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h (renamed from native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.h)25
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp14
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h81
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp32
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.h23
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp (renamed from native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp)29
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h (renamed from native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h)36
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp (renamed from native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.cpp)2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.h (renamed from native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h)1
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h123
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/shortcut/ver4_shortcut_list_policy.h110
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp132
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.cpp144
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp294
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h99
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h229
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h39
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h95
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp202
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h102
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_entry.h99
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/dict_content.h36
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.cpp160
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h63
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h79
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.cpp175
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h90
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h75
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.cpp39
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h111
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.cpp100
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h61
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp138
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h137
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp71
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h73
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp98
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h58
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp411
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h123
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp352
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h140
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.cpp28
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.h37
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp285
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.h123
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp52
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h22
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h18
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp85
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h12
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/file_utils.cpp157
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/file_utils.h58
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp167
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h57
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp9
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h7
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/historical_info.h58
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.cpp98
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h60
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.cpp96
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.h61
-rw-r--r--native/jni/src/suggest/policyimpl/typing/typing_traversal.h4
-rw-r--r--native/jni/src/suggest/policyimpl/typing/typing_weighting.cpp47
-rw-r--r--native/jni/src/suggest/policyimpl/typing/typing_weighting.h3
-rw-r--r--native/jni/src/utils/exclusive_ownership_pointer.h81
-rw-r--r--native/jni/src/utils/time_keeper.cpp41
-rw-r--r--native/jni/src/utils/time_keeper.h41
122 files changed, 3406 insertions, 7183 deletions
diff --git a/native/jni/Android.mk b/native/jni/Android.mk
index e11e706f3..ca6a77997 100644
--- a/native/jni/Android.mk
+++ b/native/jni/Android.mk
@@ -57,9 +57,7 @@ LATIN_IME_CORE_SRC_FILES := \
bloom_filter.cpp \
dictionary.cpp \
digraph_utils.cpp \
- error_type_utils.cpp \
- multi_bigram_map.cpp \
- unigram_property.cpp) \
+ multi_bigram_map.cpp) \
$(addprefix suggest/core/layout/, \
additional_proximity_chars.cpp \
proximity_info.cpp \
@@ -69,45 +67,27 @@ LATIN_IME_CORE_SRC_FILES := \
suggest/core/policy/weighting.cpp \
suggest/core/session/dic_traverse_session.cpp \
$(addprefix suggest/policyimpl/dictionary/, \
+ bigram/bigram_list_read_write_utils.cpp \
+ bigram/dynamic_bigram_list_policy.cpp \
header/header_policy.cpp \
header/header_read_write_utils.cpp \
shortcut/shortcut_list_reading_utils.cpp \
- structure/dictionary_structure_with_buffer_policy_factory.cpp) \
- $(addprefix suggest/policyimpl/dictionary/bigram/, \
- bigram_list_read_write_utils.cpp \
- ver4_bigram_list_policy.cpp) \
- $(addprefix suggest/policyimpl/dictionary/structure/pt_common/, \
- dynamic_pt_gc_event_listeners.cpp \
- dynamic_pt_reading_helper.cpp \
- dynamic_pt_reading_utils.cpp \
- dynamic_pt_updating_helper.cpp \
- dynamic_pt_writing_utils.cpp) \
- $(addprefix suggest/policyimpl/dictionary/structure/v2/, \
+ dictionary_structure_with_buffer_policy_factory.cpp \
+ dynamic_patricia_trie_gc_event_listeners.cpp \
+ dynamic_patricia_trie_node_reader.cpp \
+ dynamic_patricia_trie_policy.cpp \
+ dynamic_patricia_trie_reading_helper.cpp \
+ dynamic_patricia_trie_reading_utils.cpp \
+ dynamic_patricia_trie_writing_helper.cpp \
+ dynamic_patricia_trie_writing_utils.cpp \
patricia_trie_policy.cpp \
patricia_trie_reading_utils.cpp) \
- $(addprefix suggest/policyimpl/dictionary/structure/v4/, \
- ver4_dict_buffers.cpp \
- ver4_dict_constants.cpp \
- ver4_patricia_trie_node_reader.cpp \
- ver4_patricia_trie_node_writer.cpp \
- ver4_patricia_trie_policy.cpp \
- ver4_patricia_trie_reading_utils.cpp \
- ver4_patricia_trie_writing_helper.cpp) \
- $(addprefix suggest/policyimpl/dictionary/structure/v4/content/, \
- bigram_dict_content.cpp \
- probability_dict_content.cpp \
- shortcut_dict_content.cpp \
- sparse_table_dict_content.cpp \
- terminal_position_lookup_table.cpp) \
$(addprefix suggest/policyimpl/dictionary/utils/, \
buffer_with_extendable_buffer.cpp \
byte_array_utils.cpp \
dict_file_writing_utils.cpp \
- file_utils.cpp \
forgetting_curve_utils.cpp \
- format_utils.cpp \
- mmapped_buffer.cpp \
- sparse_table.cpp) \
+ format_utils.cpp) \
suggest/policyimpl/gesture/gesture_suggest_policy_factory.cpp \
$(addprefix suggest/policyimpl/typing/, \
scoring_params.cpp \
@@ -118,8 +98,7 @@ LATIN_IME_CORE_SRC_FILES := \
$(addprefix utils/, \
autocorrection_threshold_utils.cpp \
char_utils.cpp \
- log_utils.cpp \
- time_keeper.cpp)
+ log_utils.cpp)
LOCAL_SRC_FILES := \
$(LATIN_IME_JNI_SRC_FILES) \
diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
index 716bda5a7..8f21c50ec 100644
--- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
+++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
@@ -24,9 +24,8 @@
#include "jni.h"
#include "jni_common.h"
#include "suggest/core/dictionary/dictionary.h"
-#include "suggest/core/dictionary/unigram_property.h"
#include "suggest/core/suggest_options.h"
-#include "suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h"
+#include "suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h"
#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
#include "utils/autocorrection_threshold_utils.h"
@@ -87,11 +86,11 @@ static jlong latinime_BinaryDictionary_open(JNIEnv *env, jclass clazz, jstring s
char sourceDirChars[sourceDirUtf8Length + 1];
env->GetStringUTFRegion(sourceDir, 0, env->GetStringLength(sourceDir), sourceDirChars);
sourceDirChars[sourceDirUtf8Length] = '\0';
- DictionaryStructureWithBufferPolicy::StructurePolicyPtr dictionaryStructureWithBufferPolicy =
+ DictionaryStructureWithBufferPolicy *const dictionaryStructureWithBufferPolicy =
DictionaryStructureWithBufferPolicyFactory::newDictionaryStructureWithBufferPolicy(
sourceDirChars, static_cast<int>(dictOffset), static_cast<int>(dictSize),
isUpdatable == JNI_TRUE);
- if (!dictionaryStructureWithBufferPolicy.get()) {
+ if (!dictionaryStructureWithBufferPolicy) {
return 0;
}
@@ -136,12 +135,6 @@ static void latinime_BinaryDictionary_close(JNIEnv *env, jclass clazz, jlong dic
delete dictionary;
}
-static int latinime_BinaryDictionary_getFormatVersion(JNIEnv *env, jclass clazz, jlong dict) {
- Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
- if (!dictionary) return 0;
- return dictionary->getFormatVersionNumber();
-}
-
static int latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jclass clazz, jlong dict,
jlong proximityInfo, jlong dicTraverseSession, jintArray xCoordinatesArray,
jintArray yCoordinatesArray, jintArray timesArray, jintArray pointerIdsArray,
@@ -259,21 +252,6 @@ static jint latinime_BinaryDictionary_getBigramProbability(JNIEnv *env, jclass c
word1Length);
}
-static void latinime_BinaryDictionary_getUnigramProperty(JNIEnv *env, jclass clazz,
- jlong dict, jintArray word, jintArray outCodePoints, jbooleanArray outFlags,
- jintArray outProbability, jintArray outHistoricalInfo, jobject outShortcutTargets,
- jobject outShortcutProbabilities) {
- Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
- if (!dictionary) return;
- const jsize wordLength = env->GetArrayLength(word);
- int wordCodePoints[wordLength];
- env->GetIntArrayRegion(word, 0, wordLength, wordCodePoints);
- const UnigramProperty unigramProperty = dictionary->getUnigramProperty(
- wordCodePoints, wordLength);
- unigramProperty.outputProperties(env, outCodePoints, outFlags, outProbability,
- outHistoricalInfo, outShortcutTargets, outShortcutProbabilities);
-}
-
static jfloat latinime_BinaryDictionary_calcNormalizedScore(JNIEnv *env, jclass clazz,
jintArray before, jintArray after, jint score) {
jsize beforeLength = env->GetArrayLength(before);
@@ -299,8 +277,7 @@ static jint latinime_BinaryDictionary_editDistance(JNIEnv *env, jclass clazz, ji
}
static void latinime_BinaryDictionary_addUnigramWord(JNIEnv *env, jclass clazz, jlong dict,
- jintArray word, jint probability, jintArray shortcutTarget, jint shortuctProbability,
- jboolean isNotAWord, jboolean isBlacklisted, jint timestamp) {
+ jintArray word, jint probability) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) {
return;
@@ -308,17 +285,11 @@ static void latinime_BinaryDictionary_addUnigramWord(JNIEnv *env, jclass clazz,
jsize wordLength = env->GetArrayLength(word);
int codePoints[wordLength];
env->GetIntArrayRegion(word, 0, wordLength, codePoints);
- jsize shortcutLength = shortcutTarget ? env->GetArrayLength(shortcutTarget) : 0;
- int shortcutTargetCodePoints[shortcutLength];
- if (shortcutTarget) {
- env->GetIntArrayRegion(shortcutTarget, 0, shortcutLength, shortcutTargetCodePoints);
- }
- dictionary->addUnigramWord(codePoints, wordLength, probability, shortcutTargetCodePoints,
- shortcutLength, shortuctProbability, isNotAWord, isBlacklisted, timestamp);
+ dictionary->addUnigramWord(codePoints, wordLength, probability);
}
static void latinime_BinaryDictionary_addBigramWords(JNIEnv *env, jclass clazz, jlong dict,
- jintArray word0, jintArray word1, jint probability, jint timestamp) {
+ jintArray word0, jintArray word1, jint probability) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) {
return;
@@ -330,7 +301,7 @@ static void latinime_BinaryDictionary_addBigramWords(JNIEnv *env, jclass clazz,
int word1CodePoints[word1Length];
env->GetIntArrayRegion(word1, 0, word1Length, word1CodePoints);
dictionary->addBigramWords(word0CodePoints, word0Length, word1CodePoints,
- word1Length, probability, timestamp);
+ word1Length, probability);
}
static void latinime_BinaryDictionary_removeBigramWords(JNIEnv *env, jclass clazz, jlong dict,
@@ -349,87 +320,6 @@ static void latinime_BinaryDictionary_removeBigramWords(JNIEnv *env, jclass claz
word1Length);
}
-// Returns how many language model params are processed.
-static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, jclass clazz,
- jlong dict, jobjectArray languageModelParams, jint startIndex) {
- Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
- if (!dictionary) {
- return 0;
- }
- jsize languageModelParamCount = env->GetArrayLength(languageModelParams);
- if (languageModelParamCount == 0 || startIndex >= languageModelParamCount) {
- return 0;
- }
- jobject languageModelParam = env->GetObjectArrayElement(languageModelParams, 0);
- jclass languageModelParamClass = env->GetObjectClass(languageModelParam);
- env->DeleteLocalRef(languageModelParam);
-
- jfieldID word0FieldId = env->GetFieldID(languageModelParamClass, "mWord0", "[I");
- jfieldID word1FieldId = env->GetFieldID(languageModelParamClass, "mWord1", "[I");
- jfieldID unigramProbabilityFieldId =
- env->GetFieldID(languageModelParamClass, "mUnigramProbability", "I");
- jfieldID bigramProbabilityFieldId =
- env->GetFieldID(languageModelParamClass, "mBigramProbability", "I");
- jfieldID timestampFieldId =
- env->GetFieldID(languageModelParamClass, "mTimestamp", "I");
- jfieldID shortcutTargetFieldId =
- env->GetFieldID(languageModelParamClass, "mShortcutTarget", "[I");
- jfieldID shortcutProbabilityFieldId =
- env->GetFieldID(languageModelParamClass, "mShortcutProbability", "I");
- jfieldID isNotAWordFieldId =
- env->GetFieldID(languageModelParamClass, "mIsNotAWord", "Z");
- jfieldID isBlacklistedFieldId =
- env->GetFieldID(languageModelParamClass, "mIsBlacklisted", "Z");
- env->DeleteLocalRef(languageModelParamClass);
-
- for (int i = startIndex; i < languageModelParamCount; ++i) {
- jobject languageModelParam = env->GetObjectArrayElement(languageModelParams, i);
- // languageModelParam is a set of params for word1; thus, word1 cannot be null. On the
- // other hand, word0 can be null and then it means the set of params doesn't contain bigram
- // information.
- jintArray word0 = static_cast<jintArray>(
- env->GetObjectField(languageModelParam, word0FieldId));
- jsize word0Length = word0 ? env->GetArrayLength(word0) : 0;
- int word0CodePoints[word0Length];
- if (word0) {
- env->GetIntArrayRegion(word0, 0, word0Length, word0CodePoints);
- }
- jintArray word1 = static_cast<jintArray>(
- env->GetObjectField(languageModelParam, word1FieldId));
- jsize word1Length = env->GetArrayLength(word1);
- int word1CodePoints[word1Length];
- env->GetIntArrayRegion(word1, 0, word1Length, word1CodePoints);
- jint unigramProbability = env->GetIntField(languageModelParam, unigramProbabilityFieldId);
- jint timestamp = env->GetIntField(languageModelParam, timestampFieldId);
- jboolean isNotAWord = env->GetBooleanField(languageModelParam, isNotAWordFieldId);
- jboolean isBlacklisted = env->GetBooleanField(languageModelParam, isBlacklistedFieldId);
- jintArray shortcutTarget = static_cast<jintArray>(
- env->GetObjectField(languageModelParam, shortcutTargetFieldId));
- jsize shortcutLength = shortcutTarget ? env->GetArrayLength(shortcutTarget) : 0;
- int shortcutTargetCodePoints[shortcutLength];
- if (shortcutTarget) {
- env->GetIntArrayRegion(shortcutTarget, 0, shortcutLength, shortcutTargetCodePoints);
- }
- jint shortcutProbability = env->GetIntField(languageModelParam, shortcutProbabilityFieldId);
- dictionary->addUnigramWord(word1CodePoints, word1Length, unigramProbability,
- shortcutTargetCodePoints, shortcutLength, shortcutProbability,
- isNotAWord, isBlacklisted, timestamp);
- if (word0) {
- jint bigramProbability = env->GetIntField(languageModelParam, bigramProbabilityFieldId);
- dictionary->addBigramWords(word0CodePoints, word0Length, word1CodePoints, word1Length,
- bigramProbability, timestamp);
- }
- if (dictionary->needsToRunGC(true /* mindsBlockByGC */)) {
- return i + 1;
- }
- env->DeleteLocalRef(word0);
- env->DeleteLocalRef(word1);
- env->DeleteLocalRef(shortcutTarget);
- env->DeleteLocalRef(languageModelParam);
- }
- return languageModelParamCount;
-}
-
static int latinime_BinaryDictionary_calculateProbabilityNative(JNIEnv *env, jclass clazz,
jlong dict, jint unigramProbability, jint bigramProbability) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
@@ -453,7 +343,7 @@ static jstring latinime_BinaryDictionary_getProperty(JNIEnv *env, jclass clazz,
static const int GET_PROPERTY_RESULT_LENGTH = 100;
char resultChars[GET_PROPERTY_RESULT_LENGTH];
resultChars[0] = '\0';
- dictionary->getProperty(queryChars, queryUtf8Length, resultChars, GET_PROPERTY_RESULT_LENGTH);
+ dictionary->getProperty(queryChars, resultChars, GET_PROPERTY_RESULT_LENGTH);
return env->NewStringUTF(resultChars);
}
@@ -474,11 +364,6 @@ static const JNINativeMethod sMethods[] = {
reinterpret_cast<void *>(latinime_BinaryDictionary_close)
},
{
- const_cast<char *>("getFormatVersionNative"),
- const_cast<char *>("(J)I"),
- reinterpret_cast<void *>(latinime_BinaryDictionary_getFormatVersion)
- },
- {
const_cast<char *>("flushNative"),
const_cast<char *>("(JLjava/lang/String;)V"),
reinterpret_cast<void *>(latinime_BinaryDictionary_flush)
@@ -509,11 +394,6 @@ static const JNINativeMethod sMethods[] = {
reinterpret_cast<void *>(latinime_BinaryDictionary_getBigramProbability)
},
{
- const_cast<char *>("getUnigramPropertyNative"),
- const_cast<char *>("(J[I[I[Z[I[ILjava/util/ArrayList;Ljava/util/ArrayList;)V"),
- reinterpret_cast<void *>(latinime_BinaryDictionary_getUnigramProperty)
- },
- {
const_cast<char *>("calcNormalizedScoreNative"),
const_cast<char *>("([I[II)F"),
reinterpret_cast<void *>(latinime_BinaryDictionary_calcNormalizedScore)
@@ -525,12 +405,12 @@ static const JNINativeMethod sMethods[] = {
},
{
const_cast<char *>("addUnigramWordNative"),
- const_cast<char *>("(J[II[IIZZI)V"),
+ const_cast<char *>("(J[II)V"),
reinterpret_cast<void *>(latinime_BinaryDictionary_addUnigramWord)
},
{
const_cast<char *>("addBigramWordsNative"),
- const_cast<char *>("(J[I[III)V"),
+ const_cast<char *>("(J[I[II)V"),
reinterpret_cast<void *>(latinime_BinaryDictionary_addBigramWords)
},
{
@@ -539,12 +419,6 @@ static const JNINativeMethod sMethods[] = {
reinterpret_cast<void *>(latinime_BinaryDictionary_removeBigramWords)
},
{
- const_cast<char *>("addMultipleDictionaryEntriesNative"),
- const_cast<char *>(
- "(J[Lcom/android/inputmethod/latin/BinaryDictionary$LanguageModelParam;I)I"),
- reinterpret_cast<void *>(latinime_BinaryDictionary_addMultipleDictionaryEntries)
- },
- {
const_cast<char *>("calculateProbabilityNative"),
const_cast<char *>("(JII)I"),
reinterpret_cast<void *>(latinime_BinaryDictionary_calculateProbabilityNative)
diff --git a/native/jni/src/defines.h b/native/jni/src/defines.h
index 9a26fe051..742e388e4 100644
--- a/native/jni/src/defines.h
+++ b/native/jni/src/defines.h
@@ -298,7 +298,6 @@ static inline void prof_out(void) {
#define NOT_AN_INDEX (-1)
#define NOT_A_PROBABILITY (-1)
#define NOT_A_DICT_POS (S_INT_MIN)
-#define NOT_A_TIMESTAMP (-1)
// A special value to mean the first word confidence makes no sense in this case,
// e.g. this is not a multi-word suggestion.
@@ -342,21 +341,12 @@ template<typename T> AK_FORCE_INLINE const T &max(const T &a, const T &b) { retu
#define INPUTLENGTH_FOR_DEBUG (-1)
#define MIN_OUTPUT_INDEX_FOR_DEBUG (-1)
-#define DISALLOW_DEFAULT_CONSTRUCTOR(TypeName) \
- TypeName()
-
-#define DISALLOW_COPY_CONSTRUCTOR(TypeName) \
- TypeName(const TypeName&)
-
-#define DISALLOW_ASSIGNMENT_OPERATOR(TypeName) \
- void operator=(const TypeName&)
-
#define DISALLOW_COPY_AND_ASSIGN(TypeName) \
- DISALLOW_COPY_CONSTRUCTOR(TypeName); \
- DISALLOW_ASSIGNMENT_OPERATOR(TypeName)
+ TypeName(const TypeName&); \
+ void operator=(const TypeName&)
#define DISALLOW_IMPLICIT_CONSTRUCTORS(TypeName) \
- DISALLOW_DEFAULT_CONSTRUCTOR(TypeName); \
+ TypeName(); \
DISALLOW_COPY_AND_ASSIGN(TypeName)
// Used as a return value for character comparison
@@ -402,4 +392,24 @@ typedef enum {
// Create new word with space substitution
CT_NEW_WORD_SPACE_SUBSTITUTION,
} CorrectionType;
+
+// ErrorType is mainly decided by CorrectionType but it is also depending on if
+// the correction has really been performed or not.
+typedef enum {
+ // Substitution, omission and transposition
+ ET_EDIT_CORRECTION,
+ // Proximity error
+ ET_PROXIMITY_CORRECTION,
+ // Completion
+ ET_COMPLETION,
+ // New word
+ // TODO: Remove.
+ // A new word error should be an edit correction error or a proximity correction error.
+ ET_NEW_WORD,
+ // Treat error as an intentional omission when the CorrectionType is omission and the node can
+ // be intentional omission.
+ ET_INTENTIONAL_OMISSION,
+ // Not treated as an error. Tracked for checking exact match
+ ET_NOT_AN_ERROR
+} ErrorType;
#endif // LATINIME_DEFINES_H
diff --git a/native/jni/src/suggest/core/dicnode/dic_node.h b/native/jni/src/suggest/core/dicnode/dic_node.h
index 0b2b4a9e8..49cfdecac 100644
--- a/native/jni/src/suggest/core/dicnode/dic_node.h
+++ b/native/jni/src/suggest/core/dicnode/dic_node.h
@@ -99,7 +99,7 @@ class DicNode {
virtual ~DicNode() {}
// Init for copy
- void initByCopy(const DicNode *const dicNode) {
+ void initByCopy(const DicNode *dicNode) {
mIsUsed = true;
mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion;
mDicNodeProperties.init(&dicNode->mDicNodeProperties);
@@ -107,25 +107,25 @@ class DicNode {
PROF_NODE_COPY(&dicNode->mProfiler, mProfiler);
}
- // Init for root with prevWordPtNodePos which is used for bigram
- void initAsRoot(const int rootPtNodeArrayPos, const int prevWordPtNodePos) {
+ // Init for root with prevWordNodePos which is used for bigram
+ void initAsRoot(const int rootGroupPos, const int prevWordNodePos) {
mIsUsed = true;
mIsCachedForNextSuggestion = false;
mDicNodeProperties.init(
- NOT_A_DICT_POS /* pos */, rootPtNodeArrayPos, NOT_A_CODE_POINT /* nodeCodePoint */,
+ NOT_A_DICT_POS /* pos */, rootGroupPos, NOT_A_CODE_POINT /* nodeCodePoint */,
NOT_A_PROBABILITY /* probability */, false /* isTerminal */,
true /* hasChildren */, false /* isBlacklistedOrNotAWord */, 0 /* depth */,
0 /* terminalDepth */);
- mDicNodeState.init(prevWordPtNodePos);
+ mDicNodeState.init(prevWordNodePos);
PROF_NODE_RESET(mProfiler);
}
// Init for root with previous word
- void initAsRootWithPreviousWord(const DicNode *const dicNode, const int rootPtNodeArrayPos) {
+ void initAsRootWithPreviousWord(DicNode *dicNode, const int rootGroupPos) {
mIsUsed = true;
mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion;
mDicNodeProperties.init(
- NOT_A_DICT_POS /* pos */, rootPtNodeArrayPos, NOT_A_CODE_POINT /* nodeCodePoint */,
+ NOT_A_DICT_POS /* pos */, rootGroupPos, NOT_A_CODE_POINT /* nodeCodePoint */,
NOT_A_PROBABILITY /* probability */, false /* isTerminal */,
true /* hasChildren */, false /* isBlacklistedOrNotAWord */, 0 /* depth */,
0 /* terminalDepth */);
@@ -138,7 +138,7 @@ class DicNode {
mDicNodeState.mDicNodeStatePrevWord.init(
dicNode->mDicNodeState.mDicNodeStatePrevWord.getPrevWordCount() + 1,
dicNode->mDicNodeProperties.getProbability(),
- dicNode->mDicNodeProperties.getPtNodePos(),
+ dicNode->mDicNodeProperties.getPos(),
dicNode->mDicNodeState.mDicNodeStatePrevWord.mPrevWord,
dicNode->mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength(),
dicNode->getOutputWordBuf(),
@@ -148,27 +148,26 @@ class DicNode {
PROF_NODE_COPY(&dicNode->mProfiler, mProfiler);
}
- void initAsPassingChild(DicNode *parentDicNode) {
+ void initAsPassingChild(DicNode *parentNode) {
mIsUsed = true;
- mIsCachedForNextSuggestion = parentDicNode->mIsCachedForNextSuggestion;
- const int parentCodePoint = parentDicNode->getNodeTypedCodePoint();
- mDicNodeProperties.init(&parentDicNode->mDicNodeProperties, parentCodePoint);
- mDicNodeState.init(&parentDicNode->mDicNodeState);
- PROF_NODE_COPY(&parentDicNode->mProfiler, mProfiler);
+ mIsCachedForNextSuggestion = parentNode->mIsCachedForNextSuggestion;
+ const int c = parentNode->getNodeTypedCodePoint();
+ mDicNodeProperties.init(&parentNode->mDicNodeProperties, c);
+ mDicNodeState.init(&parentNode->mDicNodeState);
+ PROF_NODE_COPY(&parentNode->mProfiler, mProfiler);
}
- void initAsChild(const DicNode *const dicNode, const int ptNodePos,
- const int childrenPtNodeArrayPos, const int probability, const bool isTerminal,
- const bool hasChildren, const bool isBlacklistedOrNotAWord,
- const uint16_t mergedNodeCodePointCount, const int *const mergedNodeCodePoints) {
+ void initAsChild(const DicNode *const dicNode, const int pos, const int childrenPos,
+ const int probability, const bool isTerminal, const bool hasChildren,
+ const bool isBlacklistedOrNotAWord, const uint16_t mergedNodeCodePointCount,
+ const int *const mergedNodeCodePoints) {
mIsUsed = true;
uint16_t newDepth = static_cast<uint16_t>(dicNode->getNodeCodePointCount() + 1);
mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion;
const uint16_t newLeavingDepth = static_cast<uint16_t>(
dicNode->mDicNodeProperties.getLeavingDepth() + mergedNodeCodePointCount);
- mDicNodeProperties.init(ptNodePos, childrenPtNodeArrayPos, mergedNodeCodePoints[0],
- probability, isTerminal, hasChildren, isBlacklistedOrNotAWord, newDepth,
- newLeavingDepth);
+ mDicNodeProperties.init(pos, childrenPos, mergedNodeCodePoints[0], probability,
+ isTerminal, hasChildren, isBlacklistedOrNotAWord, newDepth, newLeavingDepth);
mDicNodeState.init(&dicNode->mDicNodeState, mergedNodeCodePointCount,
mergedNodeCodePoints);
PROF_NODE_COPY(&dicNode->mProfiler, mProfiler);
@@ -235,7 +234,7 @@ class DicNode {
}
bool isFirstWord() const {
- return mDicNodeState.mDicNodeStatePrevWord.getPrevWordPtNodePos() == NOT_A_DICT_POS;
+ return mDicNodeState.mDicNodeStatePrevWord.getPrevWordNodePos() == NOT_A_DICT_POS;
}
bool isCompletion(const int inputSize) const {
@@ -247,30 +246,29 @@ class DicNode {
}
// Used to get bigram probability in DicNodeUtils
- int getPtNodePos() const {
- return mDicNodeProperties.getPtNodePos();
+ int getPos() const {
+ return mDicNodeProperties.getPos();
}
// Used to get bigram probability in DicNodeUtils
- int getPrevWordTerminalPtNodePos() const {
- return mDicNodeState.mDicNodeStatePrevWord.getPrevWordPtNodePos();
+ int getPrevWordPos() const {
+ return mDicNodeState.mDicNodeStatePrevWord.getPrevWordNodePos();
}
// Used in DicNodeUtils
- int getChildrenPtNodeArrayPos() const {
- return mDicNodeProperties.getChildrenPtNodeArrayPos();
+ int getChildrenPos() const {
+ return mDicNodeProperties.getChildrenPos();
}
int getProbability() const {
return mDicNodeProperties.getProbability();
}
- AK_FORCE_INLINE bool isTerminalDicNode() const {
- const bool isTerminalPtNode = mDicNodeProperties.isTerminal();
- const int currentDicNodeDepth = getNodeCodePointCount();
- const int terminalDicNodeDepth = mDicNodeProperties.getLeavingDepth();
- return isTerminalPtNode && currentDicNodeDepth > 0
- && currentDicNodeDepth == terminalDicNodeDepth;
+ AK_FORCE_INLINE bool isTerminalWordNode() const {
+ const bool isTerminalNodes = mDicNodeProperties.isTerminal();
+ const int currentNodeDepth = getNodeCodePointCount();
+ const int terminalNodeDepth = mDicNodeProperties.getLeavingDepth();
+ return isTerminalNodes && currentNodeDepth > 0 && currentNodeDepth == terminalNodeDepth;
}
bool shouldBeFilteredBySafetyNetForBigram() const {
@@ -376,8 +374,8 @@ class DicNode {
}
// Used to commit input partially
- int getPrevWordPtNodePos() const {
- return mDicNodeState.mDicNodeStatePrevWord.getPrevWordPtNodePos();
+ int getPrevWordNodePos() const {
+ return mDicNodeState.mDicNodeStatePrevWord.getPrevWordNodePos();
}
AK_FORCE_INLINE const int *getOutputWordBuf() const {
@@ -412,7 +410,7 @@ class DicNode {
// TODO: Remove once touch path is merged into ProximityInfoState
// Note: Returned codepoint may be a digraph codepoint if the node is in a composite glyph.
int getNodeCodePoint() const {
- const int codePoint = mDicNodeProperties.getDicNodeCodePoint();
+ const int codePoint = mDicNodeProperties.getNodeCodePoint();
const DigraphUtils::DigraphCodePointIndex digraphIndex =
mDicNodeState.mDicNodeStateScoring.getDigraphIndex();
if (digraphIndex == DigraphUtils::NOT_A_DIGRAPH_INDEX) {
@@ -425,8 +423,8 @@ class DicNode {
// Utils for cost calculation //
////////////////////////////////
AK_FORCE_INLINE bool isSameNodeCodePoint(const DicNode *const dicNode) const {
- return mDicNodeProperties.getDicNodeCodePoint()
- == dicNode->mDicNodeProperties.getDicNodeCodePoint();
+ return mDicNodeProperties.getNodeCodePoint()
+ == dicNode->mDicNodeProperties.getNodeCodePoint();
}
// TODO: remove
@@ -576,8 +574,7 @@ class DicNode {
// Caveat: Must not be called outside Weighting
// This restriction is guaranteed by "friend"
AK_FORCE_INLINE void addCost(const float spatialCost, const float languageCost,
- const bool doNormalization, const int inputSize,
- const ErrorTypeUtils::ErrorType errorType) {
+ const bool doNormalization, const int inputSize, const ErrorType errorType) {
if (DEBUG_GEO_FULL) {
LOGI_SHOW_ADD_COST_PROP;
}
diff --git a/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp b/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp
index 5540b6df5..ec65114c7 100644
--- a/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp
+++ b/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp
@@ -22,6 +22,7 @@
#include "suggest/core/dicnode/dic_node_vector.h"
#include "suggest/core/dictionary/multi_bigram_map.h"
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
+#include "utils/char_utils.h"
namespace latinime {
@@ -31,20 +32,19 @@ namespace latinime {
/* static */ void DicNodeUtils::initAsRoot(
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
- const int prevWordPtNodePos, DicNode *const newRootDicNode) {
- newRootDicNode->initAsRoot(dictionaryStructurePolicy->getRootPosition(), prevWordPtNodePos);
+ const int prevWordNodePos, DicNode *const newRootNode) {
+ newRootNode->initAsRoot(dictionaryStructurePolicy->getRootPosition(), prevWordNodePos);
}
/*static */ void DicNodeUtils::initAsRootWithPreviousWord(
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
- const DicNode *const prevWordLastDicNode, DicNode *const newRootDicNode) {
- newRootDicNode->initAsRootWithPreviousWord(
- prevWordLastDicNode, dictionaryStructurePolicy->getRootPosition());
+ DicNode *const prevWordLastNode, DicNode *const newRootNode) {
+ newRootNode->initAsRootWithPreviousWord(
+ prevWordLastNode, dictionaryStructurePolicy->getRootPosition());
}
-/* static */ void DicNodeUtils::initByCopy(const DicNode *const srcDicNode,
- DicNode *const destDicNode) {
- destDicNode->initByCopy(srcDicNode);
+/* static */ void DicNodeUtils::initByCopy(DicNode *srcNode, DicNode *destNode) {
+ destNode->initByCopy(srcNode);
}
///////////////////////////////////
@@ -52,14 +52,14 @@ namespace latinime {
///////////////////////////////////
/* static */ void DicNodeUtils::getAllChildDicNodes(DicNode *dicNode,
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
- DicNodeVector *const childDicNodes) {
+ DicNodeVector *childDicNodes) {
if (dicNode->isTotalInputSizeExceedingLimit()) {
return;
}
if (!dicNode->isLeavingNode()) {
childDicNodes->pushPassingChild(dicNode);
} else {
- dictionaryStructurePolicy->createAndGetAllChildDicNodes(dicNode, childDicNodes);
+ dictionaryStructurePolicy->createAndGetAllChildNodes(dicNode, childDicNodes);
}
}
@@ -71,11 +71,11 @@ namespace latinime {
*/
/* static */ float DicNodeUtils::getBigramNodeImprobability(
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
- const DicNode *const dicNode, MultiBigramMap *const multiBigramMap) {
- if (dicNode->hasMultipleWords() && !dicNode->isValidMultipleWordSuggestion()) {
+ const DicNode *const node, MultiBigramMap *multiBigramMap) {
+ if (node->hasMultipleWords() && !node->isValidMultipleWordSuggestion()) {
return static_cast<float>(MAX_VALUE_FOR_WEIGHTING);
}
- const int probability = getBigramNodeProbability(dictionaryStructurePolicy, dicNode,
+ const int probability = getBigramNodeProbability(dictionaryStructurePolicy, node,
multiBigramMap);
// TODO: This equation to calculate the improbability looks unreasonable. Investigate this.
const float cost = static_cast<float>(MAX_PROBABILITY - probability)
@@ -85,19 +85,19 @@ namespace latinime {
/* static */ int DicNodeUtils::getBigramNodeProbability(
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
- const DicNode *const dicNode, MultiBigramMap *const multiBigramMap) {
- const int unigramProbability = dicNode->getProbability();
- const int ptNodePos = dicNode->getPtNodePos();
- const int prevWordTerminalPtNodePos = dicNode->getPrevWordTerminalPtNodePos();
- if (NOT_A_DICT_POS == ptNodePos || NOT_A_DICT_POS == prevWordTerminalPtNodePos) {
+ const DicNode *const node, MultiBigramMap *multiBigramMap) {
+ const int unigramProbability = node->getProbability();
+ const int wordPos = node->getPos();
+ const int prevWordPos = node->getPrevWordPos();
+ if (NOT_A_DICT_POS == wordPos || NOT_A_DICT_POS == prevWordPos) {
// Note: Normally wordPos comes from the dictionary and should never equal
// NOT_A_VALID_WORD_POS.
return dictionaryStructurePolicy->getProbability(unigramProbability,
NOT_A_PROBABILITY);
}
if (multiBigramMap) {
- return multiBigramMap->getBigramProbability(dictionaryStructurePolicy,
- prevWordTerminalPtNodePos, ptNodePos, unigramProbability);
+ return multiBigramMap->getBigramProbability(dictionaryStructurePolicy, prevWordPos,
+ wordPos, unigramProbability);
}
return dictionaryStructurePolicy->getProbability(unigramProbability,
NOT_A_PROBABILITY);
@@ -109,7 +109,7 @@ namespace latinime {
// TODO: Move to char_utils?
/* static */ int DicNodeUtils::appendTwoWords(const int *const src0, const int16_t length0,
- const int *const src1, const int16_t length1, int *const dest) {
+ const int *const src1, const int16_t length1, int *dest) {
int actualLength0 = 0;
for (int i = 0; i < length0; ++i) {
if (src0[i] == 0) {
diff --git a/native/jni/src/suggest/core/dicnode/dic_node_utils.h b/native/jni/src/suggest/core/dicnode/dic_node_utils.h
index 3f1514a52..3fb351a61 100644
--- a/native/jni/src/suggest/core/dicnode/dic_node_utils.h
+++ b/native/jni/src/suggest/core/dicnode/dic_node_utils.h
@@ -31,20 +31,20 @@ class MultiBigramMap;
class DicNodeUtils {
public:
static int appendTwoWords(const int *src0, const int16_t length0, const int *src1,
- const int16_t length1, int *const dest);
+ const int16_t length1, int *dest);
static void initAsRoot(
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
- const int prevWordPtNodePos, DicNode *const newRootDicNode);
+ const int prevWordNodePos, DicNode *newRootNode);
static void initAsRootWithPreviousWord(
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
- const DicNode *const prevWordLastDicNode, DicNode *const newRootDicNode);
- static void initByCopy(const DicNode *const srcDicNode, DicNode *const destDicNode);
+ DicNode *prevWordLastNode, DicNode *newRootNode);
+ static void initByCopy(DicNode *srcNode, DicNode *destNode);
static void getAllChildDicNodes(DicNode *dicNode,
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
DicNodeVector *childDicNodes);
static float getBigramNodeImprobability(
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
- const DicNode *const dicNode, MultiBigramMap *const multiBigramMap);
+ const DicNode *const node, MultiBigramMap *const multiBigramMap);
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(DicNodeUtils);
@@ -53,7 +53,7 @@ class DicNodeUtils {
static int getBigramNodeProbability(
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
- const DicNode *const dicNode, MultiBigramMap *const multiBigramMap);
+ const DicNode *const node, MultiBigramMap *multiBigramMap);
};
} // namespace latinime
#endif // LATINIME_DIC_NODE_UTILS_H
diff --git a/native/jni/src/suggest/core/dicnode/dic_node_vector.h b/native/jni/src/suggest/core/dicnode/dic_node_vector.h
index 9364e7751..42addae8d 100644
--- a/native/jni/src/suggest/core/dicnode/dic_node_vector.h
+++ b/native/jni/src/suggest/core/dicnode/dic_node_vector.h
@@ -62,14 +62,14 @@ class DicNodeVector {
mDicNodes.back().initAsPassingChild(dicNode);
}
- void pushLeavingChild(const DicNode *const dicNode, const int ptNodePos,
- const int childrenPtNodeArrayPos, const int probability, const bool isTerminal,
- const bool hasChildren, const bool isBlacklistedOrNotAWord,
- const uint16_t mergedNodeCodePointCount, const int *const mergedNodeCodePoints) {
+ void pushLeavingChild(const DicNode *const dicNode, const int pos, const int childrenPos,
+ const int probability, const bool isTerminal, const bool hasChildren,
+ const bool isBlacklistedOrNotAWord, const uint16_t mergedNodeCodePointCount,
+ const int *const mergedNodeCodePoints) {
ASSERT(!mLock);
mDicNodes.push_back(mEmptyNode);
- mDicNodes.back().initAsChild(dicNode, ptNodePos, childrenPtNodeArrayPos, probability,
- isTerminal, hasChildren, isBlacklistedOrNotAWord, mergedNodeCodePointCount,
+ mDicNodes.back().initAsChild(dicNode, pos, childrenPos, probability, isTerminal,
+ hasChildren, isBlacklistedOrNotAWord, mergedNodeCodePointCount,
mergedNodeCodePoints);
}
diff --git a/native/jni/src/suggest/core/dicnode/internal/dic_node_properties.h b/native/jni/src/suggest/core/dicnode/internal/dic_node_properties.h
index c41a7243a..9e0f62ceb 100644
--- a/native/jni/src/suggest/core/dicnode/internal/dic_node_properties.h
+++ b/native/jni/src/suggest/core/dicnode/internal/dic_node_properties.h
@@ -24,14 +24,15 @@
namespace latinime {
/**
- * PtNode information related to the DicNode from the lexicon trie.
+ * Node for traversing the lexicon trie.
*/
+// TODO: Introduce a dictionary node class which has attribute members required to understand the
+// dictionary structure.
class DicNodeProperties {
public:
AK_FORCE_INLINE DicNodeProperties()
- : mPtNodePos(0), mChildrenPtNodeArrayPos(0), mProbability(0), mDicNodeCodePoint(0),
- mIsTerminal(false), mHasChildrenPtNodes(false), mIsBlacklistedOrNotAWord(false),
- mDepth(0), mLeavingDepth(0) {}
+ : mPos(0), mChildrenPos(0), mProbability(0), mNodeCodePoint(0), mIsTerminal(false),
+ mHasChildren(false), mIsBlacklistedOrNotAWord(false), mDepth(0), mLeavingDepth(0) {}
virtual ~DicNodeProperties() {}
@@ -39,57 +40,57 @@ class DicNodeProperties {
void init(const int pos, const int childrenPos, const int nodeCodePoint, const int probability,
const bool isTerminal, const bool hasChildren, const bool isBlacklistedOrNotAWord,
const uint16_t depth, const uint16_t leavingDepth) {
- mPtNodePos = pos;
- mChildrenPtNodeArrayPos = childrenPos;
- mDicNodeCodePoint = nodeCodePoint;
+ mPos = pos;
+ mChildrenPos = childrenPos;
+ mNodeCodePoint = nodeCodePoint;
mProbability = probability;
mIsTerminal = isTerminal;
- mHasChildrenPtNodes = hasChildren;
+ mHasChildren = hasChildren;
mIsBlacklistedOrNotAWord = isBlacklistedOrNotAWord;
mDepth = depth;
mLeavingDepth = leavingDepth;
}
// Init for copy
- void init(const DicNodeProperties *const dicNodeProp) {
- mPtNodePos = dicNodeProp->mPtNodePos;
- mChildrenPtNodeArrayPos = dicNodeProp->mChildrenPtNodeArrayPos;
- mDicNodeCodePoint = dicNodeProp->mDicNodeCodePoint;
- mProbability = dicNodeProp->mProbability;
- mIsTerminal = dicNodeProp->mIsTerminal;
- mHasChildrenPtNodes = dicNodeProp->mHasChildrenPtNodes;
- mIsBlacklistedOrNotAWord = dicNodeProp->mIsBlacklistedOrNotAWord;
- mDepth = dicNodeProp->mDepth;
- mLeavingDepth = dicNodeProp->mLeavingDepth;
+ void init(const DicNodeProperties *const nodeProp) {
+ mPos = nodeProp->mPos;
+ mChildrenPos = nodeProp->mChildrenPos;
+ mNodeCodePoint = nodeProp->mNodeCodePoint;
+ mProbability = nodeProp->mProbability;
+ mIsTerminal = nodeProp->mIsTerminal;
+ mHasChildren = nodeProp->mHasChildren;
+ mIsBlacklistedOrNotAWord = nodeProp->mIsBlacklistedOrNotAWord;
+ mDepth = nodeProp->mDepth;
+ mLeavingDepth = nodeProp->mLeavingDepth;
}
// Init as passing child
- void init(const DicNodeProperties *const dicNodeProp, const int codePoint) {
- mPtNodePos = dicNodeProp->mPtNodePos;
- mChildrenPtNodeArrayPos = dicNodeProp->mChildrenPtNodeArrayPos;
- mDicNodeCodePoint = codePoint; // Overwrite the node char of a passing child
- mProbability = dicNodeProp->mProbability;
- mIsTerminal = dicNodeProp->mIsTerminal;
- mHasChildrenPtNodes = dicNodeProp->mHasChildrenPtNodes;
- mIsBlacklistedOrNotAWord = dicNodeProp->mIsBlacklistedOrNotAWord;
- mDepth = dicNodeProp->mDepth + 1; // Increment the depth of a passing child
- mLeavingDepth = dicNodeProp->mLeavingDepth;
+ void init(const DicNodeProperties *const nodeProp, const int codePoint) {
+ mPos = nodeProp->mPos;
+ mChildrenPos = nodeProp->mChildrenPos;
+ mNodeCodePoint = codePoint; // Overwrite the node char of a passing child
+ mProbability = nodeProp->mProbability;
+ mIsTerminal = nodeProp->mIsTerminal;
+ mHasChildren = nodeProp->mHasChildren;
+ mIsBlacklistedOrNotAWord = nodeProp->mIsBlacklistedOrNotAWord;
+ mDepth = nodeProp->mDepth + 1; // Increment the depth of a passing child
+ mLeavingDepth = nodeProp->mLeavingDepth;
}
- int getPtNodePos() const {
- return mPtNodePos;
+ int getPos() const {
+ return mPos;
}
- int getChildrenPtNodeArrayPos() const {
- return mChildrenPtNodeArrayPos;
+ int getChildrenPos() const {
+ return mChildrenPos;
}
int getProbability() const {
return mProbability;
}
- int getDicNodeCodePoint() const {
- return mDicNodeCodePoint;
+ int getNodeCodePoint() const {
+ return mNodeCodePoint;
}
uint16_t getDepth() const {
@@ -106,7 +107,7 @@ class DicNodeProperties {
}
bool hasChildren() const {
- return mHasChildrenPtNodes || mDepth != mLeavingDepth;
+ return mHasChildren || mDepth != mLeavingDepth;
}
bool isBlacklistedOrNotAWord() const {
@@ -117,12 +118,12 @@ class DicNodeProperties {
// Caution!!!
// Use a default copy constructor and an assign operator because shallow copies are ok
// for this class
- int mPtNodePos;
- int mChildrenPtNodeArrayPos;
+ int mPos;
+ int mChildrenPos;
int mProbability;
- int mDicNodeCodePoint;
+ int mNodeCodePoint;
bool mIsTerminal;
- bool mHasChildrenPtNodes;
+ bool mHasChildren;
bool mIsBlacklistedOrNotAWord;
uint16_t mDepth;
uint16_t mLeavingDepth;
diff --git a/native/jni/src/suggest/core/dicnode/internal/dic_node_state_prevword.h b/native/jni/src/suggest/core/dicnode/internal/dic_node_state_prevword.h
index dba57056b..b8986203d 100644
--- a/native/jni/src/suggest/core/dicnode/internal/dic_node_state_prevword.h
+++ b/native/jni/src/suggest/core/dicnode/internal/dic_node_state_prevword.h
@@ -30,7 +30,7 @@ class DicNodeStatePrevWord {
public:
AK_FORCE_INLINE DicNodeStatePrevWord()
: mPrevWordCount(0), mPrevWordLength(0), mPrevWordStart(0), mPrevWordProbability(0),
- mPrevWordPtNodePos(NOT_A_DICT_POS), mSecondWordFirstInputIndex(NOT_AN_INDEX) {
+ mPrevWordNodePos(NOT_A_DICT_POS), mSecondWordFirstInputIndex(NOT_AN_INDEX) {
memset(mPrevWord, 0, sizeof(mPrevWord));
}
@@ -41,7 +41,7 @@ class DicNodeStatePrevWord {
mPrevWordCount = 0;
mPrevWordStart = 0;
mPrevWordProbability = -1;
- mPrevWordPtNodePos = NOT_A_DICT_POS;
+ mPrevWordNodePos = NOT_A_DICT_POS;
mSecondWordFirstInputIndex = NOT_AN_INDEX;
}
@@ -50,7 +50,7 @@ class DicNodeStatePrevWord {
mPrevWordCount = 0;
mPrevWordStart = 0;
mPrevWordProbability = -1;
- mPrevWordPtNodePos = prevWordNodePos;
+ mPrevWordNodePos = prevWordNodePos;
mSecondWordFirstInputIndex = NOT_AN_INDEX;
}
@@ -60,7 +60,7 @@ class DicNodeStatePrevWord {
mPrevWordCount = prevWord->mPrevWordCount;
mPrevWordStart = prevWord->mPrevWordStart;
mPrevWordProbability = prevWord->mPrevWordProbability;
- mPrevWordPtNodePos = prevWord->mPrevWordPtNodePos;
+ mPrevWordNodePos = prevWord->mPrevWordNodePos;
mSecondWordFirstInputIndex = prevWord->mSecondWordFirstInputIndex;
memcpy(mPrevWord, prevWord->mPrevWord, prevWord->mPrevWordLength * sizeof(mPrevWord[0]));
}
@@ -71,7 +71,7 @@ class DicNodeStatePrevWord {
const int prevWordSecondWordFirstInputIndex, const int lastInputIndex) {
mPrevWordCount = min(prevWordCount, static_cast<int16_t>(MAX_RESULTS));
mPrevWordProbability = prevWordProbability;
- mPrevWordPtNodePos = prevWordNodePos;
+ mPrevWordNodePos = prevWordNodePos;
int twoWordsLen =
DicNodeUtils::appendTwoWords(src0, length0, src1, length1, mPrevWord);
if (twoWordsLen >= MAX_WORD_LENGTH) {
@@ -116,8 +116,8 @@ class DicNodeStatePrevWord {
return mPrevWordStart;
}
- int getPrevWordPtNodePos() const {
- return mPrevWordPtNodePos;
+ int getPrevWordNodePos() const {
+ return mPrevWordNodePos;
}
int getPrevWordCodePointAt(const int id) const {
@@ -147,7 +147,7 @@ class DicNodeStatePrevWord {
int16_t mPrevWordLength;
int16_t mPrevWordStart;
int16_t mPrevWordProbability;
- int mPrevWordPtNodePos;
+ int mPrevWordNodePos;
int mSecondWordFirstInputIndex;
};
} // namespace latinime
diff --git a/native/jni/src/suggest/core/dicnode/internal/dic_node_state_scoring.h b/native/jni/src/suggest/core/dicnode/internal/dic_node_state_scoring.h
index 74f9eee92..3c85d0e9d 100644
--- a/native/jni/src/suggest/core/dicnode/internal/dic_node_state_scoring.h
+++ b/native/jni/src/suggest/core/dicnode/internal/dic_node_state_scoring.h
@@ -21,7 +21,6 @@
#include "defines.h"
#include "suggest/core/dictionary/digraph_utils.h"
-#include "suggest/core/dictionary/error_type_utils.h"
namespace latinime {
@@ -32,7 +31,7 @@ class DicNodeStateScoring {
mDigraphIndex(DigraphUtils::NOT_A_DIGRAPH_INDEX),
mEditCorrectionCount(0), mProximityCorrectionCount(0),
mNormalizedCompoundDistance(0.0f), mSpatialDistance(0.0f), mLanguageDistance(0.0f),
- mRawLength(0.0f), mContainingErrorTypes(ErrorTypeUtils::NOT_AN_ERROR),
+ mRawLength(0.0f), mExactMatch(true),
mNormalizedCompoundDistanceAfterFirstWord(MAX_VALUE_FOR_WEIGHTING) {
}
@@ -48,7 +47,7 @@ class DicNodeStateScoring {
mDoubleLetterLevel = NOT_A_DOUBLE_LETTER;
mDigraphIndex = DigraphUtils::NOT_A_DIGRAPH_INDEX;
mNormalizedCompoundDistanceAfterFirstWord = MAX_VALUE_FOR_WEIGHTING;
- mContainingErrorTypes = ErrorTypeUtils::NOT_AN_ERROR;
+ mExactMatch = true;
}
AK_FORCE_INLINE void init(const DicNodeStateScoring *const scoring) {
@@ -60,21 +59,34 @@ class DicNodeStateScoring {
mRawLength = scoring->mRawLength;
mDoubleLetterLevel = scoring->mDoubleLetterLevel;
mDigraphIndex = scoring->mDigraphIndex;
- mContainingErrorTypes = scoring->mContainingErrorTypes;
+ mExactMatch = scoring->mExactMatch;
mNormalizedCompoundDistanceAfterFirstWord =
scoring->mNormalizedCompoundDistanceAfterFirstWord;
}
void addCost(const float spatialCost, const float languageCost, const bool doNormalization,
- const int inputSize, const int totalInputIndex,
- const ErrorTypeUtils::ErrorType errorType) {
+ const int inputSize, const int totalInputIndex, const ErrorType errorType) {
addDistance(spatialCost, languageCost, doNormalization, inputSize, totalInputIndex);
- mContainingErrorTypes = mContainingErrorTypes | errorType;
- if (ErrorTypeUtils::isEditCorrectionError(errorType)) {
- ++mEditCorrectionCount;
- }
- if (ErrorTypeUtils::isProximityCorrectionError(errorType)) {
- ++mProximityCorrectionCount;
+ switch (errorType) {
+ case ET_EDIT_CORRECTION:
+ ++mEditCorrectionCount;
+ mExactMatch = false;
+ break;
+ case ET_PROXIMITY_CORRECTION:
+ ++mProximityCorrectionCount;
+ mExactMatch = false;
+ break;
+ case ET_COMPLETION:
+ mExactMatch = false;
+ break;
+ case ET_NEW_WORD:
+ mExactMatch = false;
+ break;
+ case ET_INTENTIONAL_OMISSION:
+ mExactMatch = false;
+ break;
+ case ET_NOT_AN_ERROR:
+ break;
}
}
@@ -170,7 +182,7 @@ class DicNodeStateScoring {
}
bool isExactMatch() const {
- return ErrorTypeUtils::isExactMatch(mContainingErrorTypes);
+ return mExactMatch;
}
private:
@@ -187,8 +199,7 @@ class DicNodeStateScoring {
float mSpatialDistance;
float mLanguageDistance;
float mRawLength;
- // All accumulated error types so far
- ErrorTypeUtils::ErrorType mContainingErrorTypes;
+ bool mExactMatch;
float mNormalizedCompoundDistanceAfterFirstWord;
AK_FORCE_INLINE void addDistance(float spatialDistance, float languageDistance,
diff --git a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp
index 2a62b555b..71f4ef6ea 100644
--- a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp
+++ b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp
@@ -144,7 +144,7 @@ int BigramDictionary::getPredictions(const int *prevWord, const int prevWordLeng
int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const int prevWordLength,
const bool forceLowerCaseSearch) const {
if (0 >= prevWordLength) return NOT_A_DICT_POS;
- int pos = mDictionaryStructurePolicy->getTerminalPtNodePositionOfWord(prevWord, prevWordLength,
+ int pos = mDictionaryStructurePolicy->getTerminalNodePositionOfWord(prevWord, prevWordLength,
forceLowerCaseSearch);
if (NOT_A_DICT_POS == pos) return NOT_A_DICT_POS;
return mDictionaryStructurePolicy->getBigramsPositionOfPtNode(pos);
@@ -155,7 +155,7 @@ int BigramDictionary::getBigramProbability(const int *word0, int length0, const
int pos = getBigramListPositionForWord(word0, length0, false /* forceLowerCaseSearch */);
// getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams
if (NOT_A_DICT_POS == pos) return NOT_A_PROBABILITY;
- int nextWordPos = mDictionaryStructurePolicy->getTerminalPtNodePositionOfWord(word1, length1,
+ int nextWordPos = mDictionaryStructurePolicy->getTerminalNodePositionOfWord(word1, length1,
false /* forceLowerCaseSearch */);
if (NOT_A_DICT_POS == nextWordPos) return NOT_A_PROBABILITY;
@@ -163,8 +163,7 @@ int BigramDictionary::getBigramProbability(const int *word0, int length0, const
mDictionaryStructurePolicy->getBigramsStructurePolicy(), pos);
while (bigramsIt.hasNext()) {
bigramsIt.next();
- if (bigramsIt.getBigramPos() == nextWordPos
- && bigramsIt.getProbability() != NOT_A_PROBABILITY) {
+ if (bigramsIt.getBigramPos() == nextWordPos) {
return mDictionaryStructurePolicy->getProbability(
mDictionaryStructurePolicy->getUnigramProbabilityOfPtNode(nextWordPos),
bigramsIt.getProbability());
diff --git a/native/jni/src/suggest/core/dictionary/bloom_filter.h b/native/jni/src/suggest/core/dictionary/bloom_filter.h
index 5f9700486..5205456a8 100644
--- a/native/jni/src/suggest/core/dictionary/bloom_filter.h
+++ b/native/jni/src/suggest/core/dictionary/bloom_filter.h
@@ -50,8 +50,6 @@ class BloomFilter {
}
private:
- DISALLOW_ASSIGNMENT_OPERATOR(BloomFilter);
-
// Size, in bytes, of the bloom filter index for bigrams
// 128 gives us 1024 buckets. The probability of false positive is (1 - e ** (-kn/m))**k,
// where k is the number of hash functions, n the number of bigrams, and m the number of
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp
index e68c0a6d8..59ead1894 100644
--- a/native/jni/src/suggest/core/dictionary/dictionary.cpp
+++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp
@@ -21,39 +21,46 @@
#include <stdint.h>
#include "defines.h"
+#include "suggest/core/dictionary/bigram_dictionary.h"
#include "suggest/core/policy/dictionary_header_structure_policy.h"
+#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
#include "suggest/core/session/dic_traverse_session.h"
#include "suggest/core/suggest.h"
#include "suggest/core/suggest_options.h"
#include "suggest/policyimpl/gesture/gesture_suggest_policy_factory.h"
#include "suggest/policyimpl/typing/typing_suggest_policy_factory.h"
#include "utils/log_utils.h"
-#include "utils/time_keeper.h"
namespace latinime {
const int Dictionary::HEADER_ATTRIBUTE_BUFFER_SIZE = 32;
-Dictionary::Dictionary(JNIEnv *env, const DictionaryStructureWithBufferPolicy::StructurePolicyPtr
- &dictionaryStructureWithBufferPolicy)
+Dictionary::Dictionary(JNIEnv *env,
+ DictionaryStructureWithBufferPolicy *const dictionaryStructureWithBufferPolicy)
: mDictionaryStructureWithBufferPolicy(dictionaryStructureWithBufferPolicy),
- mBigramDictionary(new BigramDictionary(mDictionaryStructureWithBufferPolicy.get())),
+ mBigramDictionary(new BigramDictionary(mDictionaryStructureWithBufferPolicy)),
mGestureSuggest(new Suggest(GestureSuggestPolicyFactory::getGestureSuggestPolicy())),
mTypingSuggest(new Suggest(TypingSuggestPolicyFactory::getTypingSuggestPolicy())) {
logDictionaryInfo(env);
}
+Dictionary::~Dictionary() {
+ delete mBigramDictionary;
+ delete mGestureSuggest;
+ delete mTypingSuggest;
+ delete mDictionaryStructureWithBufferPolicy;
+}
+
int Dictionary::getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession *traverseSession,
int *xcoordinates, int *ycoordinates, int *times, int *pointerIds, int *inputCodePoints,
int inputSize, int *prevWordCodePoints, int prevWordLength, int commitPoint,
const SuggestOptions *const suggestOptions, int *outWords, int *frequencies,
int *spaceIndices, int *outputTypes, int *outputAutoCommitFirstWordConfidence) const {
- TimeKeeper::setCurrentTime();
int result = 0;
if (suggestOptions->isGesture()) {
DicTraverseSession::initSessionInstance(
traverseSession, this, prevWordCodePoints, prevWordLength, suggestOptions);
- result = mGestureSuggest.get()->getSuggestions(proximityInfo, traverseSession, xcoordinates,
+ result = mGestureSuggest->getSuggestions(proximityInfo, traverseSession, xcoordinates,
ycoordinates, times, pointerIds, inputCodePoints, inputSize, commitPoint, outWords,
frequencies, spaceIndices, outputTypes, outputAutoCommitFirstWordConfidence);
if (DEBUG_DICT) {
@@ -63,7 +70,7 @@ int Dictionary::getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession
} else {
DicTraverseSession::initSessionInstance(
traverseSession, this, prevWordCodePoints, prevWordLength, suggestOptions);
- result = mTypingSuggest.get()->getSuggestions(proximityInfo, traverseSession, xcoordinates,
+ result = mTypingSuggest->getSuggestions(proximityInfo, traverseSession, xcoordinates,
ycoordinates, times, pointerIds, inputCodePoints, inputSize, commitPoint,
outWords, frequencies, spaceIndices, outputTypes,
outputAutoCommitFirstWordConfidence);
@@ -76,15 +83,12 @@ int Dictionary::getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession
int Dictionary::getBigrams(const int *word, int length, int *outWords, int *frequencies,
int *outputTypes) const {
- TimeKeeper::setCurrentTime();
if (length <= 0) return 0;
- return mBigramDictionary.get()->getPredictions(word, length, outWords, frequencies,
- outputTypes);
+ return mBigramDictionary->getPredictions(word, length, outWords, frequencies, outputTypes);
}
int Dictionary::getProbability(const int *word, int length) const {
- TimeKeeper::setCurrentTime();
- int pos = getDictionaryStructurePolicy()->getTerminalPtNodePositionOfWord(word, length,
+ int pos = getDictionaryStructurePolicy()->getTerminalNodePositionOfWord(word, length,
false /* forceLowerCaseSearch */);
if (NOT_A_DICT_POS == pos) {
return NOT_A_PROBABILITY;
@@ -94,60 +98,39 @@ int Dictionary::getProbability(const int *word, int length) const {
int Dictionary::getBigramProbability(const int *word0, int length0, const int *word1,
int length1) const {
- TimeKeeper::setCurrentTime();
- return mBigramDictionary.get()->getBigramProbability(word0, length0, word1, length1);
+ return mBigramDictionary->getBigramProbability(word0, length0, word1, length1);
}
-void Dictionary::addUnigramWord(const int *const word, const int length, const int probability,
- const int *const shortcutTargetCodePoints, const int shortcutLength,
- const int shortcutProbability, const bool isNotAWord, const bool isBlacklisted,
- const int timestamp) {
- TimeKeeper::setCurrentTime();
- mDictionaryStructureWithBufferPolicy.get()->addUnigramWord(word, length, probability,
- shortcutTargetCodePoints, shortcutLength, shortcutProbability, isNotAWord,
- isBlacklisted, timestamp);
+void Dictionary::addUnigramWord(const int *const word, const int length, const int probability) {
+ mDictionaryStructureWithBufferPolicy->addUnigramWord(word, length, probability);
}
void Dictionary::addBigramWords(const int *const word0, const int length0, const int *const word1,
- const int length1, const int probability, const int timestamp) {
- TimeKeeper::setCurrentTime();
- mDictionaryStructureWithBufferPolicy.get()->addBigramWords(word0, length0, word1, length1,
- probability, timestamp);
+ const int length1, const int probability) {
+ mDictionaryStructureWithBufferPolicy->addBigramWords(word0, length0, word1, length1,
+ probability);
}
void Dictionary::removeBigramWords(const int *const word0, const int length0,
const int *const word1, const int length1) {
- TimeKeeper::setCurrentTime();
- mDictionaryStructureWithBufferPolicy.get()->removeBigramWords(word0, length0, word1, length1);
+ mDictionaryStructureWithBufferPolicy->removeBigramWords(word0, length0, word1, length1);
}
void Dictionary::flush(const char *const filePath) {
- TimeKeeper::setCurrentTime();
- mDictionaryStructureWithBufferPolicy.get()->flush(filePath);
+ mDictionaryStructureWithBufferPolicy->flush(filePath);
}
void Dictionary::flushWithGC(const char *const filePath) {
- TimeKeeper::setCurrentTime();
- mDictionaryStructureWithBufferPolicy.get()->flushWithGC(filePath);
+ mDictionaryStructureWithBufferPolicy->flushWithGC(filePath);
}
bool Dictionary::needsToRunGC(const bool mindsBlockByGC) {
- TimeKeeper::setCurrentTime();
- return mDictionaryStructureWithBufferPolicy.get()->needsToRunGC(mindsBlockByGC);
+ return mDictionaryStructureWithBufferPolicy->needsToRunGC(mindsBlockByGC);
}
-void Dictionary::getProperty(const char *const query, const int queryLength, char *const outResult,
+void Dictionary::getProperty(const char *const query, char *const outResult,
const int maxResultLength) {
- TimeKeeper::setCurrentTime();
- return mDictionaryStructureWithBufferPolicy.get()->getProperty(query, queryLength, outResult,
- maxResultLength);
-}
-
-const UnigramProperty Dictionary::getUnigramProperty(const int *const codePoints,
- const int codePointCount) {
- TimeKeeper::setCurrentTime();
- return mDictionaryStructureWithBufferPolicy.get()->getUnigramProperty(
- codePoints, codePointCount);
+ return mDictionaryStructureWithBufferPolicy->getProperty(query, outResult, maxResultLength);
}
void Dictionary::logDictionaryInfo(JNIEnv *const env) const {
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.h b/native/jni/src/suggest/core/dictionary/dictionary.h
index b37b4aa18..0195d5bf0 100644
--- a/native/jni/src/suggest/core/dictionary/dictionary.h
+++ b/native/jni/src/suggest/core/dictionary/dictionary.h
@@ -21,20 +21,15 @@
#include "defines.h"
#include "jni.h"
-#include "suggest/core/dictionary/bigram_dictionary.h"
-#include "suggest/core/dictionary/unigram_property.h"
-#include "suggest/core/policy/dictionary_header_structure_policy.h"
-#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
-#include "suggest/core/suggest_interface.h"
-#include "utils/exclusive_ownership_pointer.h"
namespace latinime {
+class BigramDictionary;
class DictionaryStructureWithBufferPolicy;
class DicTraverseSession;
class ProximityInfo;
+class SuggestInterface;
class SuggestOptions;
-class UnigramProperty;
class Dictionary {
public:
@@ -58,8 +53,8 @@ class Dictionary {
static const int KIND_FLAG_POSSIBLY_OFFENSIVE = 0x80000000;
static const int KIND_FLAG_EXACT_MATCH = 0x40000000;
- Dictionary(JNIEnv *env, const DictionaryStructureWithBufferPolicy::StructurePolicyPtr
- &dictionaryStructureWithBufferPolicy);
+ Dictionary(JNIEnv *env,
+ DictionaryStructureWithBufferPolicy *const dictionaryStructureWithBufferPoilcy);
int getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession *traverseSession,
int *xcoordinates, int *ycoordinates, int *times, int *pointerIds, int *inputCodePoints,
@@ -74,13 +69,10 @@ class Dictionary {
int getBigramProbability(const int *word0, int length0, const int *word1, int length1) const;
- void addUnigramWord(const int *const word, const int length, const int probability,
- const int *const shortcutTargetCodePoints, const int shortcutLength,
- const int shortcutProbability, const bool isNotAWord, const bool isBlacklisted,
- const int timestamp);
+ void addUnigramWord(const int *const word, const int length, const int probability);
void addBigramWords(const int *const word0, const int length0, const int *const word1,
- const int length1, const int probability, const int timestamp);
+ const int length1, const int probability);
void removeBigramWords(const int *const word0, const int length0, const int *const word1,
const int length1);
@@ -91,33 +83,24 @@ class Dictionary {
bool needsToRunGC(const bool mindsBlockByGC);
- void getProperty(const char *const query, const int queryLength, char *const outResult,
+ void getProperty(const char *const query, char *const outResult,
const int maxResultLength);
- const UnigramProperty getUnigramProperty(const int *const codePoints, const int codePointCount);
-
const DictionaryStructureWithBufferPolicy *getDictionaryStructurePolicy() const {
- return mDictionaryStructureWithBufferPolicy.get();
+ return mDictionaryStructureWithBufferPolicy;
}
- int getFormatVersionNumber() const {
- return mDictionaryStructureWithBufferPolicy.get()->getHeaderStructurePolicy()
- ->getFormatVersionNumber();
- }
+ virtual ~Dictionary();
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(Dictionary);
- typedef ExclusiveOwnershipPointer<BigramDictionary> BigramDictionaryPtr;
- typedef ExclusiveOwnershipPointer<SuggestInterface> SuggestInterfacePtr;
-
static const int HEADER_ATTRIBUTE_BUFFER_SIZE;
- const DictionaryStructureWithBufferPolicy::StructurePolicyPtr
- mDictionaryStructureWithBufferPolicy;
- const BigramDictionaryPtr mBigramDictionary;
- const SuggestInterfacePtr mGestureSuggest;
- const SuggestInterfacePtr mTypingSuggest;
+ DictionaryStructureWithBufferPolicy *const mDictionaryStructureWithBufferPolicy;
+ const BigramDictionary *const mBigramDictionary;
+ const SuggestInterface *const mGestureSuggest;
+ const SuggestInterface *const mTypingSuggest;
void logDictionaryInfo(JNIEnv *const env) const;
};
diff --git a/native/jni/src/suggest/core/dictionary/digraph_utils.cpp b/native/jni/src/suggest/core/dictionary/digraph_utils.cpp
index 5f9b8f3e2..3271c1bfb 100644
--- a/native/jni/src/suggest/core/dictionary/digraph_utils.cpp
+++ b/native/jni/src/suggest/core/dictionary/digraph_utils.cpp
@@ -28,8 +28,11 @@ const DigraphUtils::digraph_t DigraphUtils::GERMAN_UMLAUT_DIGRAPHS[] =
{ { 'a', 'e', 0x00E4 }, // U+00E4 : LATIN SMALL LETTER A WITH DIAERESIS
{ 'o', 'e', 0x00F6 }, // U+00F6 : LATIN SMALL LETTER O WITH DIAERESIS
{ 'u', 'e', 0x00FC } }; // U+00FC : LATIN SMALL LETTER U WITH DIAERESIS
+const DigraphUtils::digraph_t DigraphUtils::FRENCH_LIGATURES_DIGRAPHS[] =
+ { { 'a', 'e', 0x00E6 }, // U+00E6 : LATIN SMALL LETTER AE
+ { 'o', 'e', 0x0153 } }; // U+0153 : LATIN SMALL LIGATURE OE
const DigraphUtils::DigraphType DigraphUtils::USED_DIGRAPH_TYPES[] =
- { DIGRAPH_TYPE_GERMAN_UMLAUT };
+ { DIGRAPH_TYPE_GERMAN_UMLAUT, DIGRAPH_TYPE_FRENCH_LIGATURES };
/* static */ bool DigraphUtils::hasDigraphForCodePoint(
const DictionaryHeaderStructurePolicy *const headerPolicy,
@@ -47,6 +50,9 @@ const DigraphUtils::DigraphType DigraphUtils::USED_DIGRAPH_TYPES[] =
if (headerPolicy->requiresGermanUmlautProcessing()) {
return DIGRAPH_TYPE_GERMAN_UMLAUT;
}
+ if (headerPolicy->requiresFrenchLigatureProcessing()) {
+ return DIGRAPH_TYPE_FRENCH_LIGATURES;
+ }
return DIGRAPH_TYPE_NONE;
}
@@ -80,6 +86,10 @@ const DigraphUtils::DigraphType DigraphUtils::USED_DIGRAPH_TYPES[] =
*digraphs = GERMAN_UMLAUT_DIGRAPHS;
return NELEMS(GERMAN_UMLAUT_DIGRAPHS);
}
+ if (digraphType == DIGRAPH_TYPE_FRENCH_LIGATURES) {
+ *digraphs = FRENCH_LIGATURES_DIGRAPHS;
+ return NELEMS(FRENCH_LIGATURES_DIGRAPHS);
+ }
return 0;
}
diff --git a/native/jni/src/suggest/core/dictionary/digraph_utils.h b/native/jni/src/suggest/core/dictionary/digraph_utils.h
index bec2cd6e2..6ae16e390 100644
--- a/native/jni/src/suggest/core/dictionary/digraph_utils.h
+++ b/native/jni/src/suggest/core/dictionary/digraph_utils.h
@@ -34,6 +34,7 @@ class DigraphUtils {
typedef enum {
DIGRAPH_TYPE_NONE,
DIGRAPH_TYPE_GERMAN_UMLAUT,
+ DIGRAPH_TYPE_FRENCH_LIGATURES
} DigraphType;
typedef struct { int first; int second; int compositeGlyph; } digraph_t;
@@ -54,6 +55,7 @@ class DigraphUtils {
const DigraphType digraphType, const int compositeGlyphCodePoint);
static const digraph_t GERMAN_UMLAUT_DIGRAPHS[];
+ static const digraph_t FRENCH_LIGATURES_DIGRAPHS[];
static const DigraphType USED_DIGRAPH_TYPES[];
};
} // namespace latinime
diff --git a/native/jni/src/suggest/core/dictionary/error_type_utils.cpp b/native/jni/src/suggest/core/dictionary/error_type_utils.cpp
deleted file mode 100644
index 0635fef7e..000000000
--- a/native/jni/src/suggest/core/dictionary/error_type_utils.cpp
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright (C) 2013, The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "suggest/core/dictionary/error_type_utils.h"
-
-namespace latinime {
-
-const ErrorTypeUtils::ErrorType ErrorTypeUtils::NOT_AN_ERROR = 0x0;
-const ErrorTypeUtils::ErrorType ErrorTypeUtils::MATCH_WITH_CASE_ERROR = 0x1;
-const ErrorTypeUtils::ErrorType ErrorTypeUtils::MATCH_WITH_ACCENT_ERROR = 0x2;
-const ErrorTypeUtils::ErrorType ErrorTypeUtils::MATCH_WITH_DIGRAPH = 0x4;
-const ErrorTypeUtils::ErrorType ErrorTypeUtils::INTENTIONAL_OMISSION = 0x8;
-const ErrorTypeUtils::ErrorType ErrorTypeUtils::EDIT_CORRECTION = 0x10;
-const ErrorTypeUtils::ErrorType ErrorTypeUtils::PROXIMITY_CORRECTION = 0x20;
-const ErrorTypeUtils::ErrorType ErrorTypeUtils::COMPLETION = 0x40;
-const ErrorTypeUtils::ErrorType ErrorTypeUtils::NEW_WORD = 0x80;
-
-const ErrorTypeUtils::ErrorType ErrorTypeUtils::ERRORS_TREATED_AS_AN_EXACT_MATCH =
- NOT_AN_ERROR | MATCH_WITH_CASE_ERROR | MATCH_WITH_ACCENT_ERROR | MATCH_WITH_DIGRAPH;
-
-} // namespace latinime
diff --git a/native/jni/src/suggest/core/dictionary/error_type_utils.h b/native/jni/src/suggest/core/dictionary/error_type_utils.h
deleted file mode 100644
index ab4a65e48..000000000
--- a/native/jni/src/suggest/core/dictionary/error_type_utils.h
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LATINIME_ERROR_TYPE_UTILS_H
-#define LATINIME_ERROR_TYPE_UTILS_H
-
-#include <stdint.h>
-
-#include "defines.h"
-
-namespace latinime {
-
-class ErrorTypeUtils {
- public:
- // ErrorType is mainly decided by CorrectionType but it is also depending on if
- // the correction has really been performed or not.
- typedef uint32_t ErrorType;
-
- static const ErrorType NOT_AN_ERROR;
- static const ErrorType MATCH_WITH_CASE_ERROR;
- static const ErrorType MATCH_WITH_ACCENT_ERROR;
- static const ErrorType MATCH_WITH_DIGRAPH;
- // Treat error as an intentional omission when the CorrectionType is omission and the node can
- // be intentional omission.
- static const ErrorType INTENTIONAL_OMISSION;
- // Substitution, omission and transposition
- static const ErrorType EDIT_CORRECTION;
- // Proximity error
- static const ErrorType PROXIMITY_CORRECTION;
- // Completion
- static const ErrorType COMPLETION;
- // New word
- // TODO: Remove.
- // A new word error should be an edit correction error or a proximity correction error.
- static const ErrorType NEW_WORD;
-
- // TODO: Differentiate errors.
- static bool isExactMatch(const ErrorType containingErrors) {
- return (containingErrors & ~ERRORS_TREATED_AS_AN_EXACT_MATCH) == 0;
- }
-
- static bool isEditCorrectionError(const ErrorType errorType) {
- return (errorType & EDIT_CORRECTION) != 0;
- }
-
- static bool isProximityCorrectionError(const ErrorType errorType) {
- return (errorType & PROXIMITY_CORRECTION) != 0;
- }
-
- private:
- DISALLOW_IMPLICIT_CONSTRUCTORS(ErrorTypeUtils);
-
- static const ErrorType ERRORS_TREATED_AS_AN_EXACT_MATCH;
-};
-} // namespace latinime
-#endif // LATINIME_ERROR_TYPE_UTILS_H
diff --git a/native/jni/src/suggest/core/dictionary/multi_bigram_map.cpp b/native/jni/src/suggest/core/dictionary/multi_bigram_map.cpp
index 49d82e69a..b1d2f4b4d 100644
--- a/native/jni/src/suggest/core/dictionary/multi_bigram_map.cpp
+++ b/native/jni/src/suggest/core/dictionary/multi_bigram_map.cpp
@@ -30,75 +30,4 @@ const size_t MultiBigramMap::MAX_CACHED_PREV_WORDS_IN_BIGRAM_MAP = 25;
// Most common previous word contexts currently have 100 bigrams
const int MultiBigramMap::BigramMap::DEFAULT_HASH_MAP_SIZE_FOR_EACH_BIGRAM_MAP = 100;
-// Look up the bigram probability for the given word pair from the cached bigram maps.
-// Also caches the bigrams if there is space remaining and they have not been cached already.
-int MultiBigramMap::getBigramProbability(
- const DictionaryStructureWithBufferPolicy *const structurePolicy,
- const int wordPosition, const int nextWordPosition, const int unigramProbability) {
- hash_map_compat<int, BigramMap>::const_iterator mapPosition =
- mBigramMaps.find(wordPosition);
- if (mapPosition != mBigramMaps.end()) {
- return mapPosition->second.getBigramProbability(structurePolicy, nextWordPosition,
- unigramProbability);
- }
- if (mBigramMaps.size() < MAX_CACHED_PREV_WORDS_IN_BIGRAM_MAP) {
- addBigramsForWordPosition(structurePolicy, wordPosition);
- return mBigramMaps[wordPosition].getBigramProbability(structurePolicy,
- nextWordPosition, unigramProbability);
- }
- return readBigramProbabilityFromBinaryDictionary(structurePolicy, wordPosition,
- nextWordPosition, unigramProbability);
-}
-
-void MultiBigramMap::BigramMap::init(
- const DictionaryStructureWithBufferPolicy *const structurePolicy, const int nodePos) {
- const int bigramsListPos = structurePolicy->getBigramsPositionOfPtNode(nodePos);
- BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(),
- bigramsListPos);
- while (bigramsIt.hasNext()) {
- bigramsIt.next();
- if (bigramsIt.getBigramPos() == NOT_A_DICT_POS) {
- continue;
- }
- mBigramMap[bigramsIt.getBigramPos()] = bigramsIt.getProbability();
- mBloomFilter.setInFilter(bigramsIt.getBigramPos());
- }
-}
-
-int MultiBigramMap::BigramMap::getBigramProbability(
- const DictionaryStructureWithBufferPolicy *const structurePolicy,
- const int nextWordPosition, const int unigramProbability) const {
- int bigramProbability = NOT_A_PROBABILITY;
- if (mBloomFilter.isInFilter(nextWordPosition)) {
- const hash_map_compat<int, int>::const_iterator bigramProbabilityIt =
- mBigramMap.find(nextWordPosition);
- if (bigramProbabilityIt != mBigramMap.end()) {
- bigramProbability = bigramProbabilityIt->second;
- }
- }
- return structurePolicy->getProbability(unigramProbability, bigramProbability);
-}
-
-void MultiBigramMap::addBigramsForWordPosition(
- const DictionaryStructureWithBufferPolicy *const structurePolicy, const int position) {
- mBigramMaps[position].init(structurePolicy, position);
-}
-
-int MultiBigramMap::readBigramProbabilityFromBinaryDictionary(
- const DictionaryStructureWithBufferPolicy *const structurePolicy, const int nodePos,
- const int nextWordPosition, const int unigramProbability) {
- int bigramProbability = NOT_A_PROBABILITY;
- const int bigramsListPos = structurePolicy->getBigramsPositionOfPtNode(nodePos);
- BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(),
- bigramsListPos);
- while (bigramsIt.hasNext()) {
- bigramsIt.next();
- if (bigramsIt.getBigramPos() == nextWordPosition) {
- bigramProbability = bigramsIt.getProbability();
- break;
- }
- }
- return structurePolicy->getProbability(unigramProbability, bigramProbability);
-}
-
} // namespace latinime
diff --git a/native/jni/src/suggest/core/dictionary/multi_bigram_map.h b/native/jni/src/suggest/core/dictionary/multi_bigram_map.h
index 421b2681c..4633c07b0 100644
--- a/native/jni/src/suggest/core/dictionary/multi_bigram_map.h
+++ b/native/jni/src/suggest/core/dictionary/multi_bigram_map.h
@@ -38,7 +38,21 @@ class MultiBigramMap {
// Look up the bigram probability for the given word pair from the cached bigram maps.
// Also caches the bigrams if there is space remaining and they have not been cached already.
int getBigramProbability(const DictionaryStructureWithBufferPolicy *const structurePolicy,
- const int wordPosition, const int nextWordPosition, const int unigramProbability);
+ const int wordPosition, const int nextWordPosition, const int unigramProbability) {
+ hash_map_compat<int, BigramMap>::const_iterator mapPosition =
+ mBigramMaps.find(wordPosition);
+ if (mapPosition != mBigramMaps.end()) {
+ return mapPosition->second.getBigramProbability(structurePolicy, nextWordPosition,
+ unigramProbability);
+ }
+ if (mBigramMaps.size() < MAX_CACHED_PREV_WORDS_IN_BIGRAM_MAP) {
+ addBigramsForWordPosition(structurePolicy, wordPosition);
+ return mBigramMaps[wordPosition].getBigramProbability(structurePolicy,
+ nextWordPosition, unigramProbability);
+ }
+ return readBigramProbabilityFromBinaryDictionary(structurePolicy, wordPosition,
+ nextWordPosition, unigramProbability);
+ }
void clear() {
mBigramMaps.clear();
@@ -53,11 +67,33 @@ class MultiBigramMap {
~BigramMap() {}
void init(const DictionaryStructureWithBufferPolicy *const structurePolicy,
- const int nodePos);
+ const int nodePos) {
+ const int bigramsListPos = structurePolicy->getBigramsPositionOfPtNode(nodePos);
+ BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(),
+ bigramsListPos);
+ while (bigramsIt.hasNext()) {
+ bigramsIt.next();
+ if (bigramsIt.getBigramPos() == NOT_A_DICT_POS) {
+ continue;
+ }
+ mBigramMap[bigramsIt.getBigramPos()] = bigramsIt.getProbability();
+ mBloomFilter.setInFilter(bigramsIt.getBigramPos());
+ }
+ }
- int getBigramProbability(
+ AK_FORCE_INLINE int getBigramProbability(
const DictionaryStructureWithBufferPolicy *const structurePolicy,
- const int nextWordPosition, const int unigramProbability) const;
+ const int nextWordPosition, const int unigramProbability) const {
+ int bigramProbability = NOT_A_PROBABILITY;
+ if (mBloomFilter.isInFilter(nextWordPosition)) {
+ const hash_map_compat<int, int>::const_iterator bigramProbabilityIt =
+ mBigramMap.find(nextWordPosition);
+ if (bigramProbabilityIt != mBigramMap.end()) {
+ bigramProbability = bigramProbabilityIt->second;
+ }
+ }
+ return structurePolicy->getProbability(unigramProbability, bigramProbability);
+ }
private:
// NOTE: The BigramMap class doesn't use DISALLOW_COPY_AND_ASSIGN() because its default
@@ -67,12 +103,27 @@ class MultiBigramMap {
BloomFilter mBloomFilter;
};
- void addBigramsForWordPosition(
- const DictionaryStructureWithBufferPolicy *const structurePolicy, const int position);
+ AK_FORCE_INLINE void addBigramsForWordPosition(
+ const DictionaryStructureWithBufferPolicy *const structurePolicy, const int position) {
+ mBigramMaps[position].init(structurePolicy, position);
+ }
- int readBigramProbabilityFromBinaryDictionary(
+ AK_FORCE_INLINE int readBigramProbabilityFromBinaryDictionary(
const DictionaryStructureWithBufferPolicy *const structurePolicy, const int nodePos,
- const int nextWordPosition, const int unigramProbability);
+ const int nextWordPosition, const int unigramProbability) {
+ int bigramProbability = NOT_A_PROBABILITY;
+ const int bigramsListPos = structurePolicy->getBigramsPositionOfPtNode(nodePos);
+ BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(),
+ bigramsListPos);
+ while (bigramsIt.hasNext()) {
+ bigramsIt.next();
+ if (bigramsIt.getBigramPos() == nextWordPosition) {
+ bigramProbability = bigramsIt.getProbability();
+ break;
+ }
+ }
+ return structurePolicy->getProbability(unigramProbability, bigramProbability);
+ }
static const size_t MAX_CACHED_PREV_WORDS_IN_BIGRAM_MAP;
hash_map_compat<int, BigramMap> mBigramMaps;
diff --git a/native/jni/src/suggest/core/dictionary/unigram_property.cpp b/native/jni/src/suggest/core/dictionary/unigram_property.cpp
deleted file mode 100644
index 16bbb69d8..000000000
--- a/native/jni/src/suggest/core/dictionary/unigram_property.cpp
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "suggest/core/dictionary/unigram_property.h"
-
-namespace latinime {
-
-void UnigramProperty::outputProperties(JNIEnv *const env, jintArray outCodePoints,
- jbooleanArray outFlags, jintArray outProbability, jintArray outHistoricalInfo,
- jobject outShortcutTargets, jobject outShortcutProbabilities) const {
- env->SetIntArrayRegion(outCodePoints, 0 /* start */, mCodePointCount, mCodePoints);
- jboolean flags[] = {mIsNotAWord, mIsBlacklisted, mHasBigrams, mHasShortcuts};
- env->SetBooleanArrayRegion(outFlags, 0 /* start */, NELEMS(flags), flags);
- env->SetIntArrayRegion(outProbability, 0 /* start */, 1 /* len */, &mProbability);
- int historicalInfo[] = {mTimestamp, mLevel, mCount};
- env->SetIntArrayRegion(outHistoricalInfo, 0 /* start */, NELEMS(historicalInfo),
- historicalInfo);
-
- jclass integerClass = env->FindClass("java/lang/Integer");
- jmethodID intToIntegerConstructorId = env->GetMethodID(integerClass, "<init>", "(I)V");
- jclass arrayListClass = env->FindClass("java/util/ArrayList");
- jmethodID addMethodId = env->GetMethodID(arrayListClass, "add", "(Ljava/lang/Object;)Z");
- const int shortcutTargetCount = mShortcutTargets.size();
- for (int i = 0; i < shortcutTargetCount; ++i) {
- jintArray shortcutTargetCodePointArray = env->NewIntArray(mShortcutTargets[i].size());
- env->SetIntArrayRegion(shortcutTargetCodePointArray, 0 /* start */,
- mShortcutTargets[i].size(), &mShortcutTargets[i][0]);
- env->CallVoidMethod(outShortcutTargets, addMethodId, shortcutTargetCodePointArray);
- env->DeleteLocalRef(shortcutTargetCodePointArray);
- jobject integerProbability = env->NewObject(integerClass, intToIntegerConstructorId,
- mShortcutProbabilities[i]);
- env->CallVoidMethod(outShortcutProbabilities, addMethodId, integerProbability);
- env->DeleteLocalRef(integerProbability);
- }
- env->DeleteLocalRef(integerClass);
- env->DeleteLocalRef(arrayListClass);
-}
-
-} // namespace latinime
diff --git a/native/jni/src/suggest/core/dictionary/unigram_property.h b/native/jni/src/suggest/core/dictionary/unigram_property.h
deleted file mode 100644
index c4ebb86ab..000000000
--- a/native/jni/src/suggest/core/dictionary/unigram_property.h
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LATINIME_UNIGRAM_PROPERTY_H
-#define LATINIME_UNIGRAM_PROPERTY_H
-
-#include <cstring>
-#include <vector>
-
-#include "defines.h"
-#include "jni.h"
-
-namespace latinime {
-
-// This class is used for returning information belonging to a unigram to java side.
-class UnigramProperty {
- public:
- // Invalid unigram.
- UnigramProperty()
- : mCodePoints(), mCodePointCount(0), mIsNotAWord(false), mIsBlacklisted(false),
- mHasBigrams(false), mHasShortcuts(false), mProbability(NOT_A_PROBABILITY),
- mTimestamp(0), mLevel(0), mCount(0), mShortcutTargets(), mShortcutProbabilities() {}
-
- UnigramProperty(const UnigramProperty &unigramProperty)
- : mCodePoints(), mCodePointCount(unigramProperty.mCodePointCount),
- mIsNotAWord(unigramProperty.mIsNotAWord),
- mIsBlacklisted(unigramProperty.mIsBlacklisted),
- mHasBigrams(unigramProperty.mHasBigrams),
- mHasShortcuts(unigramProperty.mHasShortcuts),
- mProbability(unigramProperty.mProbability),
- mTimestamp(unigramProperty.mTimestamp), mLevel(unigramProperty.mLevel),
- mCount(unigramProperty.mCount), mShortcutTargets(unigramProperty.mShortcutTargets),
- mShortcutProbabilities(unigramProperty.mShortcutProbabilities) {
- memcpy(mCodePoints, unigramProperty.mCodePoints, sizeof(mCodePoints));
- }
-
- UnigramProperty(const int *const codePoints, const int codePointCount,
- const bool isNotAWord, const bool isBlacklisted, const bool hasBigrams,
- const bool hasShortcuts, const int probability, const int timestamp,
- const int level, const int count,
- const std::vector<std::vector<int> > *const shortcutTargets,
- const std::vector<int> *const shortcutProbabilities)
- : mCodePoints(), mCodePointCount(codePointCount),
- mIsNotAWord(isNotAWord), mIsBlacklisted(isBlacklisted), mHasBigrams(hasBigrams),
- mHasShortcuts(hasShortcuts), mProbability(probability), mTimestamp(timestamp),
- mLevel(level), mCount(count), mShortcutTargets(*shortcutTargets),
- mShortcutProbabilities(*shortcutProbabilities) {
- memcpy(mCodePoints, codePoints, sizeof(mCodePoints));
- }
-
- void outputProperties(JNIEnv *const env, jintArray outCodePoints, jbooleanArray outFlags,
- jintArray outProbability, jintArray outHistoricalInfo, jobject outShortcutTargets,
- jobject outShortcutProbabilities) const;
-
- private:
- DISALLOW_ASSIGNMENT_OPERATOR(UnigramProperty);
-
- int mCodePoints[MAX_WORD_LENGTH];
- int mCodePointCount;
- bool mIsNotAWord;
- bool mIsBlacklisted;
- bool mHasBigrams;
- bool mHasShortcuts;
- int mProbability;
- // Historical information
- int mTimestamp;
- int mLevel;
- int mCount;
- // Shortcut
- std::vector<std::vector<int> > mShortcutTargets;
- std::vector<int> mShortcutProbabilities;
-};
-} // namespace latinime
-#endif // LATINIME_UNIGRAM_PROPERTY_H
diff --git a/native/jni/src/suggest/core/layout/proximity_info.cpp b/native/jni/src/suggest/core/layout/proximity_info.cpp
index ee8e59ef9..e64476d82 100644
--- a/native/jni/src/suggest/core/layout/proximity_info.cpp
+++ b/native/jni/src/suggest/core/layout/proximity_info.cpp
@@ -71,7 +71,7 @@ ProximityInfo::ProximityInfo(JNIEnv *env, const jstring localeJStr,
&& sweetSpotCenterYs && sweetSpotRadii),
mProximityCharsArray(new int[GRID_WIDTH * GRID_HEIGHT * MAX_PROXIMITY_CHARS_SIZE
/* proximityCharsLength */]),
- mLowerCodePointToKeyMap() {
+ mCodeToKeyMap() {
/* Let's check the input array length here to make sure */
const jsize proximityCharsLength = env->GetArrayLength(proximityChars);
if (proximityCharsLength != GRID_WIDTH * GRID_HEIGHT * MAX_PROXIMITY_CHARS_SIZE) {
@@ -147,14 +147,7 @@ int ProximityInfo::getCodePointOf(const int keyIndex) const {
if (keyIndex < 0 || keyIndex >= KEY_COUNT) {
return NOT_A_CODE_POINT;
}
- return mKeyIndexToLowerCodePointG[keyIndex];
-}
-
-int ProximityInfo::getOriginalCodePointOf(const int keyIndex) const {
- if (keyIndex < 0 || keyIndex >= KEY_COUNT) {
- return NOT_A_CODE_POINT;
- }
- return mKeyIndexToOriginalCodePoint[keyIndex];
+ return mKeyIndexToCodePointG[keyIndex];
}
void ProximityInfo::initializeG() {
@@ -171,9 +164,8 @@ void ProximityInfo::initializeG() {
const float gapY = sweetSpotCenterY - mCenterYsG[i];
mSweetSpotCenterYsG[i] = static_cast<int>(mCenterYsG[i] + gapY * verticalScale);
}
- mLowerCodePointToKeyMap[lowerCode] = i;
- mKeyIndexToOriginalCodePoint[i] = code;
- mKeyIndexToLowerCodePointG[i] = lowerCode;
+ mCodeToKeyMap[lowerCode] = i;
+ mKeyIndexToCodePointG[i] = lowerCode;
}
for (int i = 0; i < KEY_COUNT; i++) {
mKeyKeyDistancesG[i][i] = 0;
diff --git a/native/jni/src/suggest/core/layout/proximity_info.h b/native/jni/src/suggest/core/layout/proximity_info.h
index a91b9d674..f25949001 100644
--- a/native/jni/src/suggest/core/layout/proximity_info.h
+++ b/native/jni/src/suggest/core/layout/proximity_info.h
@@ -39,7 +39,6 @@ class ProximityInfo {
float getNormalizedSquaredDistanceFromCenterFloatG(
const int keyId, const int x, const int y, const bool isGeometric) const;
int getCodePointOf(const int keyIndex) const;
- int getOriginalCodePointOf(const int keyIndex) const;
bool hasSweetSpotData(const int keyIndex) const {
// When there are no calibration data for a key,
// the radius of the key is assigned to zero.
@@ -77,11 +76,11 @@ class ProximityInfo {
ProximityInfoUtils::initializeProximities(inputCodes, inputXCoordinates, inputYCoordinates,
inputSize, mKeyXCoordinates, mKeyYCoordinates, mKeyWidths, mKeyHeights,
mProximityCharsArray, CELL_HEIGHT, CELL_WIDTH, GRID_WIDTH, MOST_COMMON_KEY_WIDTH,
- KEY_COUNT, mLocaleStr, &mLowerCodePointToKeyMap, allInputCodes);
+ KEY_COUNT, mLocaleStr, &mCodeToKeyMap, allInputCodes);
}
AK_FORCE_INLINE int getKeyIndexOf(const int c) const {
- return ProximityInfoUtils::getKeyIndexOf(KEY_COUNT, c, &mLowerCodePointToKeyMap);
+ return ProximityInfoUtils::getKeyIndexOf(KEY_COUNT, c, &mCodeToKeyMap);
}
AK_FORCE_INLINE bool isCodePointOnKeyboard(const int codePoint) const {
@@ -118,9 +117,9 @@ class ProximityInfo {
// Sweet spots for geometric input. Note that we have extra sweet spots only for Y coordinates.
float mSweetSpotCenterYsG[MAX_KEY_COUNT_IN_A_KEYBOARD];
float mSweetSpotRadii[MAX_KEY_COUNT_IN_A_KEYBOARD];
- hash_map_compat<int, int> mLowerCodePointToKeyMap;
- int mKeyIndexToOriginalCodePoint[MAX_KEY_COUNT_IN_A_KEYBOARD];
- int mKeyIndexToLowerCodePointG[MAX_KEY_COUNT_IN_A_KEYBOARD];
+ hash_map_compat<int, int> mCodeToKeyMap;
+
+ int mKeyIndexToCodePointG[MAX_KEY_COUNT_IN_A_KEYBOARD];
int mCenterXsG[MAX_KEY_COUNT_IN_A_KEYBOARD];
int mCenterYsG[MAX_KEY_COUNT_IN_A_KEYBOARD];
int mKeyKeyDistancesG[MAX_KEY_COUNT_IN_A_KEYBOARD][MAX_KEY_COUNT_IN_A_KEYBOARD];
diff --git a/native/jni/src/suggest/core/layout/proximity_info_state.cpp b/native/jni/src/suggest/core/layout/proximity_info_state.cpp
index bb4b41714..fbabd92f2 100644
--- a/native/jni/src/suggest/core/layout/proximity_info_state.cpp
+++ b/native/jni/src/suggest/core/layout/proximity_info_state.cpp
@@ -30,12 +30,6 @@
namespace latinime {
-int ProximityInfoState::getPrimaryOriginalCodePointAt(const int index) const {
- const int primaryCodePoint = getPrimaryCodePointAt(index);
- const int keyIndex = mProximityInfo->getKeyIndexOf(primaryCodePoint);
- return mProximityInfo->getOriginalCodePointOf(keyIndex);
-}
-
// TODO: Remove the dependency of "isGeometric"
void ProximityInfoState::initInputParams(const int pointerId, const float maxPointToKeyLength,
const ProximityInfo *proximityInfo, const int *const inputCodes, const int inputSize,
diff --git a/native/jni/src/suggest/core/layout/proximity_info_state.h b/native/jni/src/suggest/core/layout/proximity_info_state.h
index e941e43d8..c94060fa9 100644
--- a/native/jni/src/suggest/core/layout/proximity_info_state.h
+++ b/native/jni/src/suggest/core/layout/proximity_info_state.h
@@ -65,8 +65,6 @@ class ProximityInfoState {
return getProximityCodePointsAt(index)[0];
}
- int getPrimaryOriginalCodePointAt(const int index) const;
-
inline bool sameAsTyped(const int *word, int length) const {
if (length != mSampledInputSize) {
return false;
diff --git a/native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h b/native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h
index b76b13971..5492c6070 100644
--- a/native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h
+++ b/native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h
@@ -29,10 +29,12 @@ class DictionaryHeaderStructurePolicy {
public:
virtual ~DictionaryHeaderStructurePolicy() {}
- virtual int getFormatVersionNumber() const = 0;
+ virtual bool supportsDynamicUpdate() const = 0;
virtual bool requiresGermanUmlautProcessing() const = 0;
+ virtual bool requiresFrenchLigatureProcessing() const = 0;
+
virtual float getMultiWordCostMultiplier() const = 0;
virtual int getLastDecayedTime() const = 0;
diff --git a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
index c74a4ebbe..41f82049f 100644
--- a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
+++ b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
@@ -18,8 +18,6 @@
#define LATINIME_DICTIONARY_STRUCTURE_POLICY_H
#include "defines.h"
-#include "suggest/core/dictionary/unigram_property.h"
-#include "utils/exclusive_ownership_pointer.h"
namespace latinime {
@@ -30,25 +28,23 @@ class DictionaryHeaderStructurePolicy;
class DictionaryShortcutsStructurePolicy;
/*
- * This class abstracts the structure of dictionaries.
+ * This class abstracts structure of dictionaries.
* Implement this policy to support additional dictionaries.
*/
class DictionaryStructureWithBufferPolicy {
public:
- typedef ExclusiveOwnershipPointer<DictionaryStructureWithBufferPolicy> StructurePolicyPtr;
-
virtual ~DictionaryStructureWithBufferPolicy() {}
virtual int getRootPosition() const = 0;
- virtual void createAndGetAllChildDicNodes(const DicNode *const dicNode,
+ virtual void createAndGetAllChildNodes(const DicNode *const dicNode,
DicNodeVector *const childDicNodes) const = 0;
virtual int getCodePointsAndProbabilityAndReturnCodePointCount(
const int nodePos, const int maxCodePointCount, int *const outCodePoints,
int *const outUnigramProbability) const = 0;
- virtual int getTerminalPtNodePositionOfWord(const int *const inWord,
+ virtual int getTerminalNodePositionOfWord(const int *const inWord,
const int length, const bool forceLowerCaseSearch) const = 0;
virtual int getProbability(const int unigramProbability,
@@ -68,13 +64,11 @@ class DictionaryStructureWithBufferPolicy {
// Returns whether the update was success or not.
virtual bool addUnigramWord(const int *const word, const int length,
- const int probability, const int *const shortcutTargetCodePoints,
- const int shortcutLength, const int shortcutProbability, const bool isNotAWord,
- const bool isBlacklisted,const int timestamp) = 0;
+ const int probability) = 0;
// Returns whether the update was success or not.
virtual bool addBigramWords(const int *const word0, const int length0, const int *const word1,
- const int length1, const int probability, const int timestamp) = 0;
+ const int length1, const int probability) = 0;
// Returns whether the update was success or not.
virtual bool removeBigramWords(const int *const word0, const int length0,
@@ -88,13 +82,9 @@ class DictionaryStructureWithBufferPolicy {
// Currently, this method is used only for testing. You may want to consider creating new
// dedicated method instead of this if you want to use this in the production.
- virtual void getProperty(const char *const query, const int queryLength, char *const outResult,
+ virtual void getProperty(const char *const query, char *const outResult,
const int maxResultLength) = 0;
- // Used for testing.
- virtual const UnigramProperty getUnigramProperty(const int *const codePonts,
- const int codePointCount) const = 0;
-
protected:
DictionaryStructureWithBufferPolicy() {}
diff --git a/native/jni/src/suggest/core/policy/weighting.cpp b/native/jni/src/suggest/core/policy/weighting.cpp
index c202b81fe..0c4016893 100644
--- a/native/jni/src/suggest/core/policy/weighting.cpp
+++ b/native/jni/src/suggest/core/policy/weighting.cpp
@@ -20,7 +20,6 @@
#include "suggest/core/dicnode/dic_node.h"
#include "suggest/core/dicnode/dic_node_profiler.h"
#include "suggest/core/dicnode/dic_node_utils.h"
-#include "suggest/core/dictionary/error_type_utils.h"
#include "suggest/core/session/dic_traverse_session.h"
namespace latinime {
@@ -83,8 +82,8 @@ static inline void profile(const CorrectionType correctionType, DicNode *const n
traverseSession, parentDicNode, dicNode, &inputStateG);
const float languageCost = Weighting::getLanguageCost(weighting, correctionType,
traverseSession, parentDicNode, dicNode, multiBigramMap);
- const ErrorTypeUtils::ErrorType errorType = weighting->getErrorType(correctionType,
- traverseSession, parentDicNode, dicNode);
+ const ErrorType errorType = weighting->getErrorType(correctionType, traverseSession,
+ parentDicNode, dicNode);
profile(correctionType, dicNode);
if (inputStateG.mNeedsToUpdateInputStateG) {
dicNode->updateInputIndexG(&inputStateG);
diff --git a/native/jni/src/suggest/core/policy/weighting.h b/native/jni/src/suggest/core/policy/weighting.h
index bd6b3cf41..2d49e98a6 100644
--- a/native/jni/src/suggest/core/policy/weighting.h
+++ b/native/jni/src/suggest/core/policy/weighting.h
@@ -18,7 +18,6 @@
#define LATINIME_WEIGHTING_H
#include "defines.h"
-#include "suggest/core/dictionary/error_type_utils.h"
namespace latinime {
@@ -85,7 +84,7 @@ class Weighting {
virtual float getSpaceSubstitutionCost(const DicTraverseSession *const traverseSession,
const DicNode *const dicNode) const = 0;
- virtual ErrorTypeUtils::ErrorType getErrorType(const CorrectionType correctionType,
+ virtual ErrorType getErrorType(const CorrectionType correctionType,
const DicTraverseSession *const traverseSession,
const DicNode *const parentDicNode, const DicNode *const dicNode) const = 0;
diff --git a/native/jni/src/suggest/core/session/dic_traverse_session.cpp b/native/jni/src/suggest/core/session/dic_traverse_session.cpp
index 5070491f4..50f2bbd8d 100644
--- a/native/jni/src/suggest/core/session/dic_traverse_session.cpp
+++ b/native/jni/src/suggest/core/session/dic_traverse_session.cpp
@@ -35,16 +35,16 @@ void DicTraverseSession::init(const Dictionary *const dictionary, const int *pre
->getMultiWordCostMultiplier();
mSuggestOptions = suggestOptions;
if (!prevWord) {
- mPrevWordPtNodePos = NOT_A_DICT_POS;
+ mPrevWordPos = NOT_A_DICT_POS;
return;
}
// TODO: merge following similar calls to getTerminalPosition into one case-insensitive call.
- mPrevWordPtNodePos = getDictionaryStructurePolicy()->getTerminalPtNodePositionOfWord(
+ mPrevWordPos = getDictionaryStructurePolicy()->getTerminalNodePositionOfWord(
prevWord, prevWordLength, false /* forceLowerCaseSearch */);
- if (mPrevWordPtNodePos == NOT_A_DICT_POS) {
+ if (mPrevWordPos == NOT_A_DICT_POS) {
// Check bigrams for lower-cased previous word if original was not found. Useful for
// auto-capitalized words like "The [current_word]".
- mPrevWordPtNodePos = getDictionaryStructurePolicy()->getTerminalPtNodePositionOfWord(
+ mPrevWordPos = getDictionaryStructurePolicy()->getTerminalNodePositionOfWord(
prevWord, prevWordLength, true /* forceLowerCaseSearch */);
}
}
diff --git a/native/jni/src/suggest/core/session/dic_traverse_session.h b/native/jni/src/suggest/core/session/dic_traverse_session.h
index 6e4dda44d..e0b1c67d9 100644
--- a/native/jni/src/suggest/core/session/dic_traverse_session.h
+++ b/native/jni/src/suggest/core/session/dic_traverse_session.h
@@ -59,7 +59,7 @@ class DicTraverseSession {
}
AK_FORCE_INLINE DicTraverseSession(JNIEnv *env, jstring localeStr, bool usesLargeCache)
- : mPrevWordPtNodePos(NOT_A_DICT_POS), mProximityInfo(0),
+ : mPrevWordPos(NOT_A_DICT_POS), mProximityInfo(0),
mDictionary(0), mSuggestOptions(0), mDicNodesCache(usesLargeCache),
mMultiBigramMap(), mInputSize(0), mPartiallyCommited(false), mMaxPointerCount(1),
mMultiWordCostMultiplier(1.0f) {
@@ -86,9 +86,11 @@ class DicTraverseSession {
//--------------------
const ProximityInfo *getProximityInfo() const { return mProximityInfo; }
const SuggestOptions *getSuggestOptions() const { return mSuggestOptions; }
- int getPrevWordPtNodePos() const { return mPrevWordPtNodePos; }
+ int getPrevWordPos() const { return mPrevWordPos; }
// TODO: REMOVE
- void setPrevWordPtNodePos(const int ptNodePos) { mPrevWordPtNodePos = ptNodePos; }
+ void setPrevWordPos(int pos) { mPrevWordPos = pos; }
+ // TODO: Use proper parameter when changed
+ int getDicRootPos() const { return 0; }
DicNodesCache *getDicTraverseCache() { return &mDicNodesCache; }
MultiBigramMap *getMultiBigramMap() { return &mMultiBigramMap; }
const ProximityInfoState *getProximityInfoState(int id) const {
@@ -117,13 +119,26 @@ class DicTraverseSession {
return true;
}
- ProximityType getProximityTypeG(const DicNode *const dicNode, const int childCodePoint) const {
+ void getSearchKeys(const DicNode *node, std::vector<int> *const outputSearchKeyVector) const {
+ for (int i = 0; i < MAX_POINTER_COUNT_G; ++i) {
+ if (!mProximityInfoStates[i].isUsed()) {
+ continue;
+ }
+ const int pointerId = node->getInputIndex(i);
+ const std::vector<int> *const searchKeyVector =
+ mProximityInfoStates[i].getSearchKeyVector(pointerId);
+ outputSearchKeyVector->insert(outputSearchKeyVector->end(), searchKeyVector->begin(),
+ searchKeyVector->end());
+ }
+ }
+
+ ProximityType getProximityTypeG(const DicNode *const node, const int childCodePoint) const {
ProximityType proximityType = UNRELATED_CHAR;
for (int i = 0; i < MAX_POINTER_COUNT_G; ++i) {
if (!mProximityInfoStates[i].isUsed()) {
continue;
}
- const int pointerId = dicNode->getInputIndex(i);
+ const int pointerId = node->getInputIndex(i);
proximityType = mProximityInfoStates[i].getProximityTypeG(pointerId, childCodePoint);
ASSERT(proximityType == UNRELATED_CHAR || proximityType == MATCH_CHAR);
// TODO: Make this more generic
@@ -177,7 +192,7 @@ class DicTraverseSession {
const int *const inputYs, const int *const times, const int *const pointerIds,
const int inputSize, const float maxSpatialDistance, const int maxPointerCount);
- int mPrevWordPtNodePos;
+ int mPrevWordPos;
const ProximityInfo *mProximityInfo;
const Dictionary *mDictionary;
const SuggestOptions *mSuggestOptions;
diff --git a/native/jni/src/suggest/core/suggest.cpp b/native/jni/src/suggest/core/suggest.cpp
index f84c84181..73ccebc88 100644
--- a/native/jni/src/suggest/core/suggest.cpp
+++ b/native/jni/src/suggest/core/suggest.cpp
@@ -98,7 +98,7 @@ void Suggest::initializeSearch(DicTraverseSession *traverseSession, int commitPo
// Continue suggestion after partial commit.
DicNode *topDicNode =
traverseSession->getDicTraverseCache()->setCommitPoint(commitPoint);
- traverseSession->setPrevWordPtNodePos(topDicNode->getPrevWordPtNodePos());
+ traverseSession->setPrevWordPos(topDicNode->getPrevWordNodePos());
traverseSession->getDicTraverseCache()->continueSearch();
traverseSession->setPartiallyCommited();
}
@@ -109,7 +109,7 @@ void Suggest::initializeSearch(DicTraverseSession *traverseSession, int commitPo
// Create a new dic node here
DicNode rootNode;
DicNodeUtils::initAsRoot(traverseSession->getDictionaryStructurePolicy(),
- traverseSession->getPrevWordPtNodePos(), &rootNode);
+ traverseSession->getPrevWordPos(), &rootNode);
traverseSession->getDicTraverseCache()->copyPushActive(&rootNode);
}
}
@@ -231,15 +231,12 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen
BinaryDictionaryShortcutIterator shortcutIt(
traverseSession->getDictionaryStructurePolicy()->getShortcutsStructurePolicy(),
traverseSession->getDictionaryStructurePolicy()
- ->getShortcutPositionOfPtNode(terminalDicNode->getPtNodePos()));
+ ->getShortcutPositionOfPtNode(terminalDicNode->getPos()));
// Shortcut is not supported for multiple words suggestions.
// TODO: Check shortcuts during traversal for multiple words suggestions.
const bool sameAsTyped = TRAVERSAL->sameAsTyped(traverseSession, terminalDicNode);
- const int shortcutBaseScore = SCORING->doesAutoCorrectValidWord() ?
- SCORING->calculateFinalScore(compoundDistance, traverseSession->getInputSize(),
- true /* forceCommit */) : finalScore;
const int updatedOutputWordIndex = ShortcutUtils::outputShortcuts(&shortcutIt,
- outputWordIndex, shortcutBaseScore, outputCodePoints, frequencies, outputTypes,
+ outputWordIndex, finalScore, outputCodePoints, frequencies, outputTypes,
sameAsTyped);
const int secondWordFirstInputIndex = terminalDicNode->getSecondWordFirstInputIndex(
traverseSession->getProximityInfoState(0));
@@ -424,15 +421,15 @@ void Suggest::expandCurrentDicNodes(DicTraverseSession *traverseSession) const {
}
break;
case UNRELATED_CHAR:
- // Just drop this dicNode and do nothing.
+ // Just drop this node and do nothing.
break;
default:
- // Just drop this dicNode and do nothing.
+ // Just drop this node and do nothing.
break;
}
}
- // Push the dicNode for look-ahead correction
+ // Push the node for look-ahead correction
if (allowsErrorCorrections && canDoLookAheadCorrection) {
traverseSession->getDicTraverseCache()->copyPushNextActive(&dicNode);
}
@@ -445,7 +442,7 @@ void Suggest::processTerminalDicNode(
if (dicNode->getCompoundDistance() >= static_cast<float>(MAX_VALUE_FOR_WEIGHTING)) {
return;
}
- if (!dicNode->isTerminalDicNode()) {
+ if (!dicNode->isTerminalWordNode()) {
return;
}
if (dicNode->shouldBeFilteredBySafetyNetForBigram()) {
@@ -466,7 +463,7 @@ void Suggest::processTerminalDicNode(
/**
* Adds the expanded dicNode to the next search priority queue. Also creates an additional next word
- * (by the space omission error correction) search path if input dicNode is on a terminal.
+ * (by the space omission error correction) search path if input dicNode is on a terminal node.
*/
void Suggest::processExpandedDicNode(
DicTraverseSession *traverseSession, DicNode *dicNode) const {
@@ -508,7 +505,7 @@ void Suggest::processDicNodeAsSubstitution(DicTraverseSession *traverseSession,
processExpandedDicNode(traverseSession, childDicNode);
}
-// Process the DicNode codepoint as a digraph. This means that composite glyphs like the German
+// Process the node codepoint as a digraph. This means that composite glyphs like the German
// u-umlaut is expanded to the transliteration "ue". Note that this happens in parallel with
// the normal non-digraph traversal, so both "uber" and "ueber" can be corrected to "[u-umlaut]ber".
void Suggest::processDicNodeAsDigraph(DicTraverseSession *traverseSession,
@@ -521,7 +518,7 @@ void Suggest::processDicNodeAsDigraph(DicTraverseSession *traverseSession,
/**
* Handle the dicNode as an omission error (e.g., ths => this). Skip the current letter and consider
* matches for all possible next letters. Note that just skipping the current letter without any
- * other conditions tends to flood the search DicNodes cache with omission DicNodes. Instead, check
+ * other conditions tends to flood the search dic nodes cache with omission nodes. Instead, check
* the possible *next* letters after the omission to better limit search to plausible omissions.
* Note that apostrophes are handled as omissions.
*/
@@ -608,7 +605,7 @@ void Suggest::processDicNodeAsTransposition(DicTraverseSession *traverseSession,
}
/**
- * Weight child dicNode by aligning it to the key
+ * Weight child node by aligning it to the key
*/
void Suggest::weightChildNode(DicTraverseSession *traverseSession, DicNode *dicNode) const {
const int inputSize = traverseSession->getInputSize();
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp
index 7d0d09631..1926b9831 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp
@@ -16,6 +16,7 @@
#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h"
+#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
@@ -37,6 +38,7 @@ const BigramListReadWriteUtils::BigramFlags BigramListReadWriteUtils::FLAG_ATTRI
// Mask for attribute probability, stored on 4 bits inside the flags byte.
const BigramListReadWriteUtils::BigramFlags
BigramListReadWriteUtils::MASK_ATTRIBUTE_PROBABILITY = 0x0F;
+const int BigramListReadWriteUtils::ATTRIBUTE_ADDRESS_SHIFT = 4;
/* static */ void BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(
const uint8_t *const bigramsBuf, BigramFlags *const outBigramFlags,
@@ -77,6 +79,11 @@ const BigramListReadWriteUtils::BigramFlags
offset = ByteArrayUtils::readUint24AndAdvancePosition(bigramsBuf, pos);
break;
}
+ if (offset == DynamicPatriciaTrieReadingUtils::DICT_OFFSET_INVALID) {
+ return NOT_A_DICT_POS;
+ } else if (offset == DynamicPatriciaTrieReadingUtils::DICT_OFFSET_ZERO_OFFSET) {
+ return origin;
+ }
if (isOffsetNegative(flags)) {
return origin - offset;
} else {
@@ -84,4 +91,92 @@ const BigramListReadWriteUtils::BigramFlags
}
}
+/* static */ bool BigramListReadWriteUtils::setHasNextFlag(
+ BufferWithExtendableBuffer *const buffer, const bool hasNext, const int entryPos) {
+ const bool usesAdditionalBuffer = buffer->isInAdditionalBuffer(entryPos);
+ int readingPos = entryPos;
+ if (usesAdditionalBuffer) {
+ readingPos -= buffer->getOriginalBufferSize();
+ }
+ BigramFlags bigramFlags = ByteArrayUtils::readUint8AndAdvancePosition(
+ buffer->getBuffer(usesAdditionalBuffer), &readingPos);
+ if (hasNext) {
+ bigramFlags = bigramFlags | FLAG_ATTRIBUTE_HAS_NEXT;
+ } else {
+ bigramFlags = bigramFlags & (~FLAG_ATTRIBUTE_HAS_NEXT);
+ }
+ int writingPos = entryPos;
+ return buffer->writeUintAndAdvancePosition(bigramFlags, 1 /* size */, &writingPos);
+}
+
+/* static */ bool BigramListReadWriteUtils::createAndWriteBigramEntry(
+ BufferWithExtendableBuffer *const buffer, const int targetPos, const int probability,
+ const bool hasNext, int *const writingPos) {
+ BigramFlags flags;
+ if (!createAndGetBigramFlags(*writingPos, targetPos, probability, hasNext, &flags)) {
+ return false;
+ }
+ return writeBigramEntry(buffer, flags, targetPos, writingPos);
+}
+
+/* static */ bool BigramListReadWriteUtils::writeBigramEntry(
+ BufferWithExtendableBuffer *const bufferToWrite, const BigramFlags flags,
+ const int targetPtNodePos, int *const writingPos) {
+ const int offset = getBigramTargetOffset(targetPtNodePos, *writingPos);
+ const BigramFlags flagsToWrite = (offset < 0) ?
+ (flags | FLAG_ATTRIBUTE_OFFSET_NEGATIVE) : (flags & ~FLAG_ATTRIBUTE_OFFSET_NEGATIVE);
+ if (!bufferToWrite->writeUintAndAdvancePosition(flagsToWrite, 1 /* size */, writingPos)) {
+ return false;
+ }
+ const uint32_t absOffest = abs(offset);
+ const int bigramTargetFieldSize = attributeAddressSize(flags);
+ return bufferToWrite->writeUintAndAdvancePosition(absOffest, bigramTargetFieldSize,
+ writingPos);
+}
+
+// Returns true if the bigram entry is valid and put entry flags into out*.
+/* static */ bool BigramListReadWriteUtils::createAndGetBigramFlags(const int entryPos,
+ const int targetPtNodePos, const int probability, const bool hasNext,
+ BigramFlags *const outBigramFlags) {
+ BigramFlags flags = probability & MASK_ATTRIBUTE_PROBABILITY;
+ if (hasNext) {
+ flags |= FLAG_ATTRIBUTE_HAS_NEXT;
+ }
+ const int offset = getBigramTargetOffset(targetPtNodePos, entryPos);
+ if (offset < 0) {
+ flags |= FLAG_ATTRIBUTE_OFFSET_NEGATIVE;
+ }
+ const uint32_t absOffest = abs(offset);
+ if ((absOffest >> 24) != 0) {
+ // Offset is too large.
+ return false;
+ } else if ((absOffest >> 16) != 0) {
+ flags |= FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES;
+ } else if ((absOffest >> 8) != 0) {
+ flags |= FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES;
+ } else {
+ flags |= FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE;
+ }
+ // Currently, all newly written bigram position fields are 3 bytes to simplify dictionary
+ // writing.
+ // TODO: Remove following 2 lines and optimize memory space.
+ flags = (flags & (~MASK_ATTRIBUTE_ADDRESS_TYPE)) | FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES;
+ *outBigramFlags = flags;
+ return true;
+}
+
+/* static */ int BigramListReadWriteUtils::getBigramTargetOffset(const int targetPtNodePos,
+ const int entryPos) {
+ if (targetPtNodePos == NOT_A_DICT_POS) {
+ return DynamicPatriciaTrieReadingUtils::DICT_OFFSET_INVALID;
+ } else {
+ const int offset = targetPtNodePos - (entryPos + 1 /* bigramFlagsField */);
+ if (offset == 0) {
+ return DynamicPatriciaTrieReadingUtils::DICT_OFFSET_ZERO_OFFSET;
+ } else {
+ return offset;
+ }
+ }
+}
+
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h
index 7e1038300..eabe4e099 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h
@@ -45,6 +45,34 @@ public:
// Bigrams reading methods
static void skipExistingBigrams(const uint8_t *const bigramsBuf, int *const bigramListPos);
+ // Returns the size of the bigram position field that is stored in bigram flags.
+ static AK_FORCE_INLINE int attributeAddressSize(const BigramFlags flags) {
+ return (flags & MASK_ATTRIBUTE_ADDRESS_TYPE) >> ATTRIBUTE_ADDRESS_SHIFT;
+ /* Note: this is a value-dependant optimization of what may probably be
+ more readably written this way:
+ switch (flags * BinaryFormat::MASK_ATTRIBUTE_ADDRESS_TYPE) {
+ case FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE: return 1;
+ case FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES: return 2;
+ case FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTE: return 3;
+ default: return 0;
+ }
+ */
+ }
+
+ static bool setHasNextFlag(BufferWithExtendableBuffer *const buffer,
+ const bool hasNext, const int entryPos);
+
+ static AK_FORCE_INLINE BigramFlags setProbabilityInFlags(const BigramFlags flags,
+ const int probability) {
+ return (flags & (~MASK_ATTRIBUTE_PROBABILITY)) | (probability & MASK_ATTRIBUTE_PROBABILITY);
+ }
+
+ static bool createAndWriteBigramEntry(BufferWithExtendableBuffer *const buffer,
+ const int targetPos, const int probability, const bool hasNext, int *const writingPos);
+
+ static bool writeBigramEntry(BufferWithExtendableBuffer *const buffer, const BigramFlags flags,
+ const int targetOffset, int *const writingPos);
+
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(BigramListReadWriteUtils);
@@ -55,6 +83,11 @@ private:
static const BigramFlags FLAG_ATTRIBUTE_OFFSET_NEGATIVE;
static const BigramFlags FLAG_ATTRIBUTE_HAS_NEXT;
static const BigramFlags MASK_ATTRIBUTE_PROBABILITY;
+ static const int ATTRIBUTE_ADDRESS_SHIFT;
+
+ // Returns true if the bigram entry is valid and put entry flags into out*.
+ static bool createAndGetBigramFlags(const int entryPos, const int targetPos,
+ const int probability, const bool hasNext, BigramFlags *const outBigramFlags);
static AK_FORCE_INLINE bool isOffsetNegative(const BigramFlags flags) {
return (flags & FLAG_ATTRIBUTE_OFFSET_NEGATIVE) != 0;
@@ -62,6 +95,8 @@ private:
static int getBigramAddressAndAdvancePosition(const uint8_t *const bigramsBuf,
const BigramFlags flags, int *const pos);
+
+ static int getBigramTargetOffset(const int targetPtNodePos, const int entryPos);
};
} // namespace latinime
#endif // LATINIME_BIGRAM_LIST_READ_WRITE_UTILS_H
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp
new file mode 100644
index 000000000..b1170e251
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp
@@ -0,0 +1,391 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h"
+
+#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h"
+#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h"
+#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h"
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
+
+namespace latinime {
+
+const int DynamicBigramListPolicy::CONTINUING_BIGRAM_LINK_COUNT_LIMIT = 10000;
+const int DynamicBigramListPolicy::BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT = 100000;
+
+void DynamicBigramListPolicy::getNextBigram(int *const outBigramPos, int *const outProbability,
+ bool *const outHasNext, int *const bigramEntryPos) const {
+ const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*bigramEntryPos);
+ const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
+ if (usesAdditionalBuffer) {
+ *bigramEntryPos -= mBuffer->getOriginalBufferSize();
+ }
+ BigramListReadWriteUtils::BigramFlags bigramFlags;
+ int originalBigramPos;
+ BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(buffer, &bigramFlags,
+ &originalBigramPos, bigramEntryPos);
+ if (usesAdditionalBuffer && originalBigramPos != NOT_A_DICT_POS) {
+ originalBigramPos += mBuffer->getOriginalBufferSize();
+ }
+ *outProbability = BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags);
+ *outHasNext = BigramListReadWriteUtils::hasNext(bigramFlags);
+ if (mIsDecayingDict && !ForgettingCurveUtils::isValidEncodedProbability(*outProbability)) {
+ // This bigram is too weak to output.
+ *outBigramPos = NOT_A_DICT_POS;
+ } else {
+ *outBigramPos = followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos);
+ }
+ if (usesAdditionalBuffer) {
+ *bigramEntryPos += mBuffer->getOriginalBufferSize();
+ }
+}
+
+void DynamicBigramListPolicy::skipAllBigrams(int *const bigramListPos) const {
+ const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*bigramListPos);
+ const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
+ if (usesAdditionalBuffer) {
+ *bigramListPos -= mBuffer->getOriginalBufferSize();
+ }
+ BigramListReadWriteUtils::skipExistingBigrams(buffer, bigramListPos);
+ if (usesAdditionalBuffer) {
+ *bigramListPos += mBuffer->getOriginalBufferSize();
+ }
+}
+
+bool DynamicBigramListPolicy::copyAllBigrams(BufferWithExtendableBuffer *const bufferToWrite,
+ int *const fromPos, int *const toPos, int *const outBigramsCount) const {
+ const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*fromPos);
+ if (usesAdditionalBuffer) {
+ *fromPos -= mBuffer->getOriginalBufferSize();
+ }
+ *outBigramsCount = 0;
+ BigramListReadWriteUtils::BigramFlags bigramFlags;
+ int bigramEntryCount = 0;
+ int lastWrittenEntryPos = NOT_A_DICT_POS;
+ do {
+ if (++bigramEntryCount > BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT) {
+ AKLOGE("Too many bigram entries. Entry count: %d, Limit: %d",
+ bigramEntryCount, BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT);
+ ASSERT(false);
+ return false;
+ }
+ // The buffer address can be changed after calling buffer writing methods.
+ int originalBigramPos;
+ BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(
+ mBuffer->getBuffer(usesAdditionalBuffer), &bigramFlags, &originalBigramPos,
+ fromPos);
+ if (originalBigramPos == NOT_A_DICT_POS) {
+ // skip invalid bigram entry.
+ continue;
+ }
+ if (usesAdditionalBuffer) {
+ originalBigramPos += mBuffer->getOriginalBufferSize();
+ }
+ const int bigramPos = followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos);
+ if (bigramPos == NOT_A_DICT_POS) {
+ // Target PtNode has been invalidated.
+ continue;
+ }
+ lastWrittenEntryPos = *toPos;
+ if (!BigramListReadWriteUtils::createAndWriteBigramEntry(bufferToWrite, bigramPos,
+ BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags),
+ BigramListReadWriteUtils::hasNext(bigramFlags), toPos)) {
+ return false;
+ }
+ (*outBigramsCount)++;
+ } while(BigramListReadWriteUtils::hasNext(bigramFlags));
+ // Makes the last entry the terminal of the list. Updates the flags.
+ if (lastWrittenEntryPos != NOT_A_DICT_POS) {
+ if (!BigramListReadWriteUtils::setHasNextFlag(bufferToWrite, false /* hasNext */,
+ lastWrittenEntryPos)) {
+ return false;
+ }
+ }
+ if (usesAdditionalBuffer) {
+ *fromPos += mBuffer->getOriginalBufferSize();
+ }
+ return true;
+}
+
+// Finding useless bigram entries and remove them. Bigram entry is useless when the target PtNode
+// has been deleted or is not a valid terminal.
+bool DynamicBigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries(
+ int *const bigramListPos, int *const outValidBigramEntryCount) {
+ const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*bigramListPos);
+ if (usesAdditionalBuffer) {
+ *bigramListPos -= mBuffer->getOriginalBufferSize();
+ }
+ DynamicPatriciaTrieNodeReader nodeReader(mBuffer, this /* bigramsPolicy */, mShortcutPolicy);
+ BigramListReadWriteUtils::BigramFlags bigramFlags;
+ int bigramEntryCount = 0;
+ do {
+ if (++bigramEntryCount > BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT) {
+ AKLOGE("Too many bigram entries. Entry count: %d, Limit: %d",
+ bigramEntryCount, BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT);
+ ASSERT(false);
+ return false;
+ }
+ int bigramEntryPos = *bigramListPos;
+ int originalBigramPos;
+ // The buffer address can be changed after calling buffer writing methods.
+ BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(
+ mBuffer->getBuffer(usesAdditionalBuffer), &bigramFlags, &originalBigramPos,
+ bigramListPos);
+ if (usesAdditionalBuffer) {
+ bigramEntryPos += mBuffer->getOriginalBufferSize();
+ }
+ if (originalBigramPos == NOT_A_DICT_POS) {
+ // This entry has already been removed.
+ continue;
+ }
+ if (usesAdditionalBuffer) {
+ originalBigramPos += mBuffer->getOriginalBufferSize();
+ }
+ const int bigramTargetNodePos =
+ followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos);
+ nodeReader.fetchNodeInfoInBufferFromPtNodePos(bigramTargetNodePos);
+ if (nodeReader.isDeleted() || !nodeReader.isTerminal()
+ || bigramTargetNodePos == NOT_A_DICT_POS) {
+ // The target is no longer valid terminal. Invalidate the current bigram entry.
+ if (!BigramListReadWriteUtils::writeBigramEntry(mBuffer, bigramFlags,
+ NOT_A_DICT_POS /* targetPtNodePos */, &bigramEntryPos)) {
+ return false;
+ }
+ continue;
+ }
+ bool isRemoved = false;
+ if (!updateProbabilityForDecay(bigramFlags, bigramTargetNodePos, &bigramEntryPos,
+ &isRemoved)) {
+ return false;
+ }
+ if (!isRemoved) {
+ (*outValidBigramEntryCount) += 1;
+ }
+ } while(BigramListReadWriteUtils::hasNext(bigramFlags));
+ return true;
+}
+
+// Updates bigram target PtNode positions in the list after the placing step in GC.
+bool DynamicBigramListPolicy::updateAllBigramTargetPtNodePositions(int *const bigramListPos,
+ const DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap *const
+ ptNodePositionRelocationMap, int *const outBigramEntryCount) {
+ const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*bigramListPos);
+ if (usesAdditionalBuffer) {
+ *bigramListPos -= mBuffer->getOriginalBufferSize();
+ }
+ BigramListReadWriteUtils::BigramFlags bigramFlags;
+ int bigramEntryCount = 0;
+ do {
+ if (++bigramEntryCount > BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT) {
+ AKLOGE("Too many bigram entries. Entry count: %d, Limit: %d",
+ bigramEntryCount, BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT);
+ ASSERT(false);
+ return false;
+ }
+ int bigramEntryPos = *bigramListPos;
+ if (usesAdditionalBuffer) {
+ bigramEntryPos += mBuffer->getOriginalBufferSize();
+ }
+ int bigramTargetPtNodePos;
+ // The buffer address can be changed after calling buffer writing methods.
+ BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(
+ mBuffer->getBuffer(usesAdditionalBuffer), &bigramFlags, &bigramTargetPtNodePos,
+ bigramListPos);
+ if (bigramTargetPtNodePos == NOT_A_DICT_POS) {
+ continue;
+ }
+ if (usesAdditionalBuffer) {
+ bigramTargetPtNodePos += mBuffer->getOriginalBufferSize();
+ }
+
+ DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap::const_iterator it =
+ ptNodePositionRelocationMap->find(bigramTargetPtNodePos);
+ if (it != ptNodePositionRelocationMap->end()) {
+ bigramTargetPtNodePos = it->second;
+ } else {
+ bigramTargetPtNodePos = NOT_A_DICT_POS;
+ }
+ if (!BigramListReadWriteUtils::writeBigramEntry(mBuffer, bigramFlags,
+ bigramTargetPtNodePos, &bigramEntryPos)) {
+ return false;
+ }
+ } while(BigramListReadWriteUtils::hasNext(bigramFlags));
+ (*outBigramEntryCount) = bigramEntryCount;
+ return true;
+}
+
+bool DynamicBigramListPolicy::addNewBigramEntryToBigramList(const int bigramTargetPos,
+ const int probability, int *const bigramListPos, bool *const outAddedNewBigram) {
+ const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*bigramListPos);
+ if (usesAdditionalBuffer) {
+ *bigramListPos -= mBuffer->getOriginalBufferSize();
+ }
+ BigramListReadWriteUtils::BigramFlags bigramFlags;
+ int bigramEntryCount = 0;
+ do {
+ if (++bigramEntryCount > BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT) {
+ AKLOGE("Too many bigram entries. Entry count: %d, Limit: %d",
+ bigramEntryCount, BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT);
+ ASSERT(false);
+ return false;
+ }
+ int entryPos = *bigramListPos;
+ if (usesAdditionalBuffer) {
+ entryPos += mBuffer->getOriginalBufferSize();
+ }
+ int originalBigramPos;
+ // The buffer address can be changed after calling buffer writing methods.
+ BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(
+ mBuffer->getBuffer(usesAdditionalBuffer), &bigramFlags, &originalBigramPos,
+ bigramListPos);
+ if (usesAdditionalBuffer && originalBigramPos != NOT_A_DICT_POS) {
+ originalBigramPos += mBuffer->getOriginalBufferSize();
+ }
+ if (followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos) == bigramTargetPos) {
+ // Update this bigram entry.
+ *outAddedNewBigram = false;
+ const int originalProbability = BigramListReadWriteUtils::getProbabilityFromFlags(
+ bigramFlags);
+ const int probabilityToWrite = mIsDecayingDict ?
+ ForgettingCurveUtils::getUpdatedEncodedProbability(originalProbability,
+ probability) : probability;
+ const BigramListReadWriteUtils::BigramFlags updatedFlags =
+ BigramListReadWriteUtils::setProbabilityInFlags(bigramFlags,
+ probabilityToWrite);
+ return BigramListReadWriteUtils::writeBigramEntry(mBuffer, updatedFlags,
+ originalBigramPos, &entryPos);
+ }
+ if (BigramListReadWriteUtils::hasNext(bigramFlags)) {
+ continue;
+ }
+ // The current last entry is found.
+ // First, update the flags of the last entry.
+ if (!BigramListReadWriteUtils::setHasNextFlag(mBuffer, true /* hasNext */, entryPos)) {
+ *outAddedNewBigram = false;
+ return false;
+ }
+ if (usesAdditionalBuffer) {
+ *bigramListPos += mBuffer->getOriginalBufferSize();
+ }
+ // Then, add a new entry after the last entry.
+ *outAddedNewBigram = true;
+ return writeNewBigramEntry(bigramTargetPos, probability, bigramListPos);
+ } while(BigramListReadWriteUtils::hasNext(bigramFlags));
+ // We return directly from the while loop.
+ ASSERT(false);
+ return false;
+}
+
+bool DynamicBigramListPolicy::writeNewBigramEntry(const int bigramTargetPos, const int probability,
+ int *const writingPos) {
+ // hasNext is false because we are adding a new bigram entry at the end of the bigram list.
+ const int probabilityToWrite = mIsDecayingDict ?
+ ForgettingCurveUtils::getUpdatedEncodedProbability(NOT_A_PROBABILITY, probability) :
+ probability;
+ return BigramListReadWriteUtils::createAndWriteBigramEntry(mBuffer, bigramTargetPos,
+ probabilityToWrite, false /* hasNext */, writingPos);
+}
+
+bool DynamicBigramListPolicy::removeBigram(const int bigramListPos, const int bigramTargetPos) {
+ const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(bigramListPos);
+ int pos = bigramListPos;
+ if (usesAdditionalBuffer) {
+ pos -= mBuffer->getOriginalBufferSize();
+ }
+ BigramListReadWriteUtils::BigramFlags bigramFlags;
+ int bigramEntryCount = 0;
+ do {
+ if (++bigramEntryCount > BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT) {
+ AKLOGE("Too many bigram entries. Entry count: %d, Limit: %d",
+ bigramEntryCount, BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT);
+ ASSERT(false);
+ return false;
+ }
+ int bigramEntryPos = pos;
+ int originalBigramPos;
+ // The buffer address can be changed after calling buffer writing methods.
+ BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(
+ mBuffer->getBuffer(usesAdditionalBuffer), &bigramFlags, &originalBigramPos, &pos);
+ if (usesAdditionalBuffer) {
+ bigramEntryPos += mBuffer->getOriginalBufferSize();
+ }
+ if (usesAdditionalBuffer && originalBigramPos != NOT_A_DICT_POS) {
+ originalBigramPos += mBuffer->getOriginalBufferSize();
+ }
+ const int bigramPos = followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos);
+ if (bigramPos != bigramTargetPos) {
+ continue;
+ }
+ // Target entry is found. Write an invalid target position to mark the bigram invalid.
+ return BigramListReadWriteUtils::writeBigramEntry(mBuffer, bigramFlags,
+ NOT_A_DICT_POS /* targetOffset */, &bigramEntryPos);
+ } while(BigramListReadWriteUtils::hasNext(bigramFlags));
+ return false;
+}
+
+int DynamicBigramListPolicy::followBigramLinkAndGetCurrentBigramPtNodePos(
+ const int originalBigramPos) const {
+ if (originalBigramPos == NOT_A_DICT_POS) {
+ return NOT_A_DICT_POS;
+ }
+ int currentPos = originalBigramPos;
+ DynamicPatriciaTrieNodeReader nodeReader(mBuffer, this /* bigramsPolicy */, mShortcutPolicy);
+ nodeReader.fetchNodeInfoInBufferFromPtNodePos(currentPos);
+ int bigramLinkCount = 0;
+ while (nodeReader.getBigramLinkedNodePos() != NOT_A_DICT_POS) {
+ currentPos = nodeReader.getBigramLinkedNodePos();
+ nodeReader.fetchNodeInfoInBufferFromPtNodePos(currentPos);
+ bigramLinkCount++;
+ if (bigramLinkCount > CONTINUING_BIGRAM_LINK_COUNT_LIMIT) {
+ AKLOGE("Bigram link is invalid. start position: %d", originalBigramPos);
+ ASSERT(false);
+ return NOT_A_DICT_POS;
+ }
+ }
+ return currentPos;
+}
+
+bool DynamicBigramListPolicy::updateProbabilityForDecay(
+ const BigramListReadWriteUtils::BigramFlags bigramFlags, const int targetPtNodePos,
+ int *const bigramEntryPos, bool *const outRemoved) const {
+ *outRemoved = false;
+ if (mIsDecayingDict) {
+ // Update bigram probability for decaying.
+ const int newProbability = ForgettingCurveUtils::getEncodedProbabilityToSave(
+ BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags), mHeaderPolicy);
+ if (ForgettingCurveUtils::isValidEncodedProbability(newProbability)) {
+ // Write new probability.
+ const BigramListReadWriteUtils::BigramFlags updatedBigramFlags =
+ BigramListReadWriteUtils::setProbabilityInFlags(
+ bigramFlags, newProbability);
+ if (!BigramListReadWriteUtils::writeBigramEntry(mBuffer, updatedBigramFlags,
+ targetPtNodePos, bigramEntryPos)) {
+ return false;
+ }
+ } else {
+ // Remove current bigram entry.
+ *outRemoved = true;
+ if (!BigramListReadWriteUtils::writeBigramEntry(mBuffer, bigramFlags,
+ NOT_A_DICT_POS /* targetPtNodePos */, bigramEntryPos)) {
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h
new file mode 100644
index 000000000..0504b59d5
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DYNAMIC_BIGRAM_LIST_POLICY_H
+#define LATINIME_DYNAMIC_BIGRAM_LIST_POLICY_H
+
+#include <stdint.h>
+
+#include "defines.h"
+#include "suggest/core/policy/dictionary_bigrams_structure_policy.h"
+#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h"
+#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h"
+
+namespace latinime {
+
+class BufferWithExtendableBuffer;
+class DictionaryHeaderStructurePolicy;
+class DictionaryShortcutsStructurePolicy;
+
+/*
+ * This is a dynamic version of BigramListPolicy and supports an additional buffer.
+ */
+class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy {
+ public:
+ DynamicBigramListPolicy(const DictionaryHeaderStructurePolicy *const headerPolicy,
+ BufferWithExtendableBuffer *const buffer,
+ const DictionaryShortcutsStructurePolicy *const shortcutPolicy,
+ const bool isDecayingDict)
+ : mHeaderPolicy(headerPolicy), mBuffer(buffer), mShortcutPolicy(shortcutPolicy),
+ mIsDecayingDict(isDecayingDict) {}
+
+ ~DynamicBigramListPolicy() {}
+
+ void getNextBigram(int *const outBigramPos, int *const outProbability, bool *const outHasNext,
+ int *const bigramEntryPos) const;
+
+ void skipAllBigrams(int *const bigramListPos) const;
+
+ // Copy bigrams from the bigram list that starts at fromPos in mBuffer to toPos in
+ // bufferToWrite and advance these positions after bigram lists. This method skips invalid
+ // bigram entries and write the valid bigram entry count to outBigramsCount.
+ bool copyAllBigrams(BufferWithExtendableBuffer *const bufferToWrite, int *const fromPos,
+ int *const toPos, int *const outBigramsCount) const;
+
+ bool updateAllBigramEntriesAndDeleteUselessEntries(int *const bigramListPos,
+ int *const outBigramEntryCount);
+
+ bool updateAllBigramTargetPtNodePositions(int *const bigramListPos,
+ const DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap *const
+ ptNodePositionRelocationMap, int *const outValidBigramEntryCount);
+
+ bool addNewBigramEntryToBigramList(const int bigramTargetPos, const int probability,
+ int *const bigramListPos, bool *const outAddedNewBigram);
+
+ bool writeNewBigramEntry(const int bigramTargetPos, const int probability,
+ int *const writingPos);
+
+ // Return whether or not targetBigramPos is found.
+ bool removeBigram(const int bigramListPos, const int bigramTargetPos);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicBigramListPolicy);
+
+ static const int CONTINUING_BIGRAM_LINK_COUNT_LIMIT;
+ static const int BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT;
+
+ const DictionaryHeaderStructurePolicy *const mHeaderPolicy;
+ BufferWithExtendableBuffer *const mBuffer;
+ const DictionaryShortcutsStructurePolicy *const mShortcutPolicy;
+ const bool mIsDecayingDict;
+
+ // Follow bigram link and return the position of bigram target PtNode that is currently valid.
+ int followBigramLinkAndGetCurrentBigramPtNodePos(const int originalBigramPos) const;
+
+ bool updateProbabilityForDecay(const BigramListReadWriteUtils::BigramFlags bigramFlags,
+ const int targetPtNodePos, int *const bigramEntryPos, bool *const outRemoved) const;
+};
+} // namespace latinime
+#endif // LATINIME_DYNAMIC_BIGRAM_LIST_POLICY_H
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp
deleted file mode 100644
index cd2243025..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp
+++ /dev/null
@@ -1,240 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h"
-
-#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h"
-#include "suggest/policyimpl/dictionary/header/header_policy.h"
-#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h"
-#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
-#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
-#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
-
-namespace latinime {
-
-void Ver4BigramListPolicy::getNextBigram(int *const outBigramPos, int *const outProbability,
- bool *const outHasNext, int *const bigramEntryPos) const {
- const BigramEntry bigramEntry =
- mBigramDictContent->getBigramEntryAndAdvancePosition(bigramEntryPos);
- if (outBigramPos) {
- // Lookup target PtNode position.
- *outBigramPos = mTerminalPositionLookupTable->getTerminalPtNodePosition(
- bigramEntry.getTargetTerminalId());
- }
- if (outProbability) {
- if (bigramEntry.hasHistoricalInfo()) {
- *outProbability =
- ForgettingCurveUtils::decodeProbability(bigramEntry.getHistoricalInfo());
- } else {
- *outProbability = bigramEntry.getProbability();
- }
- }
- if (outHasNext) {
- *outHasNext = bigramEntry.hasNext();
- }
-}
-
-bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTargetTerminalId,
- const int newProbability, const int timestamp, bool *const outAddedNewEntry) {
- if (outAddedNewEntry) {
- *outAddedNewEntry = false;
- }
- const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId);
- if (bigramListPos == NOT_A_DICT_POS) {
- // Updating PtNode doesn't have a bigram list.
- // Create new bigram list.
- if (!mBigramDictContent->createNewBigramList(terminalId)) {
- return false;
- }
- const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY,
- newTargetTerminalId);
- const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(&newBigramEntry,
- newProbability, timestamp);
- // Write an entry.
- const int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId);
- if (!mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, writingPos)) {
- return false;
- }
- if (outAddedNewEntry) {
- *outAddedNewEntry = true;
- }
- return true;
- }
-
- const int entryPosToUpdate = getEntryPosToUpdate(newTargetTerminalId, bigramListPos);
- if (entryPosToUpdate != NOT_A_DICT_POS) {
- // Overwrite existing entry.
- const BigramEntry originalBigramEntry =
- mBigramDictContent->getBigramEntry(entryPosToUpdate);
- if (!originalBigramEntry.isValid()) {
- // Reuse invalid entry.
- if (outAddedNewEntry) {
- *outAddedNewEntry = true;
- }
- }
- const BigramEntry updatedBigramEntry =
- originalBigramEntry.updateTargetTerminalIdAndGetEntry(newTargetTerminalId);
- const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(
- &updatedBigramEntry, newProbability, timestamp);
- return mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, entryPosToUpdate);
- }
-
- // Add new entry to the bigram list.
- // Create new bigram list.
- if (!mBigramDictContent->createNewBigramList(terminalId)) {
- return false;
- }
- // Write new entry at a head position of the bigram list.
- int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId);
- const BigramEntry newBigramEntry(true /* hasNext */, NOT_A_PROBABILITY, newTargetTerminalId);
- const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(
- &newBigramEntry, newProbability, timestamp);
- if (!mBigramDictContent->writeBigramEntryAndAdvancePosition(&bigramEntryToWrite, &writingPos)) {
- return false;
- }
- if (outAddedNewEntry) {
- *outAddedNewEntry = true;
- }
- // Append existing entries by copying.
- return mBigramDictContent->copyBigramList(bigramListPos, writingPos);
-}
-
-bool Ver4BigramListPolicy::removeEntry(const int terminalId, const int targetTerminalId) {
- const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId);
- if (bigramListPos == NOT_A_DICT_POS) {
- // Bigram list doesn't exist.
- return false;
- }
- const int entryPosToUpdate = getEntryPosToUpdate(targetTerminalId, bigramListPos);
- if (entryPosToUpdate == NOT_A_DICT_POS) {
- // Bigram entry doesn't exist.
- return false;
- }
- const BigramEntry bigramEntry = mBigramDictContent->getBigramEntry(entryPosToUpdate);
- if (targetTerminalId != bigramEntry.getTargetTerminalId()) {
- // Bigram entry doesn't exist.
- return false;
- }
- // Remove bigram entry by marking it as invalid entry and overwriting the original entry.
- const BigramEntry updatedBigramEntry = bigramEntry.getInvalidatedEntry();
- return mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPosToUpdate);
-}
-
-bool Ver4BigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries(const int terminalId,
- int *const outBigramCount) {
- const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId);
- if (bigramListPos == NOT_A_DICT_POS) {
- // Bigram list doesn't exist.
- return true;
- }
- bool hasNext = true;
- int readingPos = bigramListPos;
- while (hasNext) {
- const int entryPos = readingPos;
- const BigramEntry bigramEntry =
- mBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
- hasNext = bigramEntry.hasNext();
- if (!bigramEntry.isValid()) {
- continue;
- }
- const int targetPtNodePos = mTerminalPositionLookupTable->getTerminalPtNodePosition(
- bigramEntry.getTargetTerminalId());
- if (targetPtNodePos == NOT_A_DICT_POS) {
- // Invalidate bigram entry.
- const BigramEntry updatedBigramEntry = bigramEntry.getInvalidatedEntry();
- if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) {
- return false;
- }
- } else if (bigramEntry.hasHistoricalInfo()) {
- const HistoricalInfo historicalInfo = ForgettingCurveUtils::createHistoricalInfoToSave(
- bigramEntry.getHistoricalInfo());
- if (ForgettingCurveUtils::needsToKeep(&historicalInfo)) {
- const BigramEntry updatedBigramEntry =
- bigramEntry.updateHistoricalInfoAndGetEntry(&historicalInfo);
- if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) {
- return false;
- }
- *outBigramCount += 1;
- } else {
- // Remove entry.
- const BigramEntry updatedBigramEntry = bigramEntry.getInvalidatedEntry();
- if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) {
- return false;
- }
- }
- } else {
- *outBigramCount += 1;
- }
- }
- return true;
-}
-
-int Ver4BigramListPolicy::getBigramEntryConut(const int terminalId) {
- const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId);
- if (bigramListPos == NOT_A_DICT_POS) {
- // Bigram list doesn't exist.
- return 0;
- }
- int bigramCount = 0;
- bool hasNext = true;
- int readingPos = bigramListPos;
- while (hasNext) {
- const BigramEntry bigramEntry =
- mBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
- hasNext = bigramEntry.hasNext();
- if (bigramEntry.isValid()) {
- bigramCount++;
- }
- }
- return bigramCount;
-}
-
-int Ver4BigramListPolicy::getEntryPosToUpdate(const int targetTerminalIdToFind,
- const int bigramListPos) const {
- bool hasNext = true;
- int invalidEntryPos = NOT_A_DICT_POS;
- int readingPos = bigramListPos;
- while (hasNext) {
- const int entryPos = readingPos;
- const BigramEntry bigramEntry =
- mBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
- hasNext = bigramEntry.hasNext();
- if (bigramEntry.getTargetTerminalId() == targetTerminalIdToFind) {
- // Entry with same target is found.
- return entryPos;
- } else if (!bigramEntry.isValid()) {
- // Invalid entry that can be reused is found.
- invalidEntryPos = entryPos;
- }
- }
- return invalidEntryPos;
-}
-
-const BigramEntry Ver4BigramListPolicy::createUpdatedBigramEntryFrom(
- const BigramEntry *const originalBigramEntry, const int newProbability,
- const int timestamp) const {
- // TODO: Consolidate historical info and probability.
- if (mHeaderPolicy->hasHistoricalInfoOfWords()) {
- const HistoricalInfo updatedHistoricalInfo =
- ForgettingCurveUtils::createUpdatedHistoricalInfo(
- originalBigramEntry->getHistoricalInfo(), newProbability, timestamp);
- return originalBigramEntry->updateHistoricalInfoAndGetEntry(&updatedHistoricalInfo);
- } else {
- return originalBigramEntry->updateProbabilityAndGetEntry(newProbability);
- }
-}
-
-} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h
deleted file mode 100644
index 5b6c5a173..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LATINIME_VER4_BIGRAM_LIST_POLICY_H
-#define LATINIME_VER4_BIGRAM_LIST_POLICY_H
-
-#include "defines.h"
-#include "suggest/core/policy/dictionary_bigrams_structure_policy.h"
-#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_entry.h"
-
-namespace latinime {
-
-class BigramDictContent;
-class HeaderPolicy;
-class TerminalPositionLookupTable;
-
-class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy {
- public:
- Ver4BigramListPolicy(BigramDictContent *const bigramDictContent,
- const TerminalPositionLookupTable *const terminalPositionLookupTable,
- const HeaderPolicy *const headerPolicy)
- : mBigramDictContent(bigramDictContent),
- mTerminalPositionLookupTable(terminalPositionLookupTable),
- mHeaderPolicy(headerPolicy) {}
-
- void getNextBigram(int *const outBigramPos, int *const outProbability,
- bool *const outHasNext, int *const bigramEntryPos) const;
-
- void skipAllBigrams(int *const pos) const {
- // Do nothing because we don't need to skip bigram lists in ver4 dictionaries.
- }
-
- bool addNewEntry(const int terminalId, const int newTargetTerminalId, const int newProbability,
- const int timestamp, bool *const outAddedNewEntry);
-
- bool removeEntry(const int terminalId, const int targetTerminalId);
-
- bool updateAllBigramEntriesAndDeleteUselessEntries(const int terminalId,
- int *const outBigramCount);
-
- int getBigramEntryConut(const int terminalId);
-
- private:
- DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4BigramListPolicy);
-
- int getEntryPosToUpdate(const int targetTerminalIdToFind, const int bigramListPos) const;
-
- const BigramEntry createUpdatedBigramEntryFrom(const BigramEntry *const originalBigramEntry,
- const int newProbability, const int timestamp) const;
-
- BigramDictContent *const mBigramDictContent;
- const TerminalPositionLookupTable *const mTerminalPositionLookupTable;
- const HeaderPolicy *const mHeaderPolicy;
-};
-} // namespace latinime
-#endif /* LATINIME_VER4_BIGRAM_LIST_POLICY_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.cpp b/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.cpp
new file mode 100644
index 000000000..ff80dd2f6
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.cpp
@@ -0,0 +1,53 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h"
+
+#include <stdint.h>
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h"
+#include "suggest/policyimpl/dictionary/patricia_trie_policy.h"
+#include "suggest/policyimpl/dictionary/utils/format_utils.h"
+#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
+
+namespace latinime {
+
+/* static */ DictionaryStructureWithBufferPolicy *DictionaryStructureWithBufferPolicyFactory
+ ::newDictionaryStructureWithBufferPolicy(const char *const path, const int bufOffset,
+ const int size, const bool isUpdatable) {
+ // Allocated buffer in MmapedBuffer::openBuffer() will be freed in the destructor of
+ // impl classes of DictionaryStructureWithBufferPolicy.
+ const MmappedBuffer *const mmapedBuffer = MmappedBuffer::openBuffer(path, bufOffset, size,
+ isUpdatable);
+ if (!mmapedBuffer) {
+ return 0;
+ }
+ switch (FormatUtils::detectFormatVersion(mmapedBuffer->getBuffer(),
+ mmapedBuffer->getBufferSize())) {
+ case FormatUtils::VERSION_2:
+ return new PatriciaTriePolicy(mmapedBuffer);
+ case FormatUtils::VERSION_3:
+ return new DynamicPatriciaTriePolicy(mmapedBuffer);
+ default:
+ AKLOGE("DICT: dictionary format is unknown, bad magic number");
+ delete mmapedBuffer;
+ ASSERT(false);
+ return 0;
+ }
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h b/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h
index 45ab52931..8cebc3b16 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h
@@ -21,27 +21,16 @@
#include "defines.h"
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
-#include "utils/exclusive_ownership_pointer.h"
namespace latinime {
class DictionaryStructureWithBufferPolicyFactory {
public:
- static DictionaryStructureWithBufferPolicy::StructurePolicyPtr
- newDictionaryStructureWithBufferPolicy(const char *const path, const int bufOffset,
- const int size, const bool isUpdatable);
+ static DictionaryStructureWithBufferPolicy *newDictionaryStructureWithBufferPolicy(
+ const char *const path, const int bufOffset, const int size, const bool isUpdatable);
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(DictionaryStructureWithBufferPolicyFactory);
-
- static DictionaryStructureWithBufferPolicy::StructurePolicyPtr
- newPolicyforDirectoryDict(const char *const path, const bool isUpdatable);
-
- static DictionaryStructureWithBufferPolicy::StructurePolicyPtr
- newPolicyforFileDict(const char *const path, const int bufOffset, const int size);
-
- static void getHeaderFilePathInDictDir(const char *const dirPath,
- const int outHeaderFileBufSize, char *const outHeaderFilePath);
};
} // namespace latinime
#endif // LATINIME_DICTIONARY_STRUCTURE_WITH_BUFFER_POLICY_FACTORY_H
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp
new file mode 100644
index 000000000..5724c5d88
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp
@@ -0,0 +1,191 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h"
+
+#include "suggest/core/policy/dictionary_header_structure_policy.h"
+#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
+
+namespace latinime {
+
+bool DynamicPatriciaTrieGcEventListeners
+ ::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
+ ::onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
+ const int *const nodeCodePoints) {
+ // PtNode is useless when the PtNode is not a terminal and doesn't have any not useless
+ // children.
+ bool isUselessPtNode = !node->isTerminal();
+ if (node->isTerminal() && mIsDecayingDict) {
+ const int newProbability =
+ ForgettingCurveUtils::getEncodedProbabilityToSave(node->getProbability(),
+ mHeaderPolicy);
+ int writingPos = node->getProbabilityFieldPos();
+ // Update probability.
+ if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(
+ mBuffer, newProbability, &writingPos)) {
+ return false;
+ }
+ if (!ForgettingCurveUtils::isValidEncodedProbability(newProbability)) {
+ isUselessPtNode = true;
+ }
+ }
+ if (mChildrenValue > 0) {
+ isUselessPtNode = false;
+ } else if (node->isTerminal()) {
+ // Remove children as all children are useless.
+ int writingPos = node->getChildrenPosFieldPos();
+ if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(
+ mBuffer, NOT_A_DICT_POS /* childrenPosition */, &writingPos)) {
+ return false;
+ }
+ }
+ if (isUselessPtNode) {
+ // Current PtNode is no longer needed. Mark it as deleted.
+ if (!mWritingHelper->markNodeAsDeleted(node)) {
+ return false;
+ }
+ } else {
+ mValueStack.back() += 1;
+ if (node->isTerminal()) {
+ mValidUnigramCount += 1;
+ }
+ }
+ return true;
+}
+
+bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateBigramProbability
+ ::onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
+ const int *const nodeCodePoints) {
+ if (!node->isDeleted()) {
+ int pos = node->getBigramsPos();
+ if (pos != NOT_A_DICT_POS) {
+ int bigramEntryCount = 0;
+ if (!mBigramPolicy->updateAllBigramEntriesAndDeleteUselessEntries(&pos,
+ &bigramEntryCount)) {
+ return false;
+ }
+ mValidBigramEntryCount += bigramEntryCount;
+ }
+ }
+ return true;
+}
+
+// Writes dummy PtNode array size when the head of PtNode array is read.
+bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
+ ::onDescend(const int ptNodeArrayPos) {
+ mValidPtNodeCount = 0;
+ int writingPos = mBufferToWrite->getTailPosition();
+ mDictPositionRelocationMap->mPtNodeArrayPositionRelocationMap.insert(
+ DynamicPatriciaTrieWritingHelper::PtNodeArrayPositionRelocationMap::value_type(
+ ptNodeArrayPos, writingPos));
+ // Writes dummy PtNode array size because arrays can have a forward link or needles PtNodes.
+ // This field will be updated later in onReadingPtNodeArrayTail() with actual PtNode count.
+ mPtNodeArraySizeFieldPos = writingPos;
+ return DynamicPatriciaTrieWritingUtils::writePtNodeArraySizeAndAdvancePosition(
+ mBufferToWrite, 0 /* arraySize */, &writingPos);
+}
+
+// Write PtNode array terminal and actual PtNode array size.
+bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
+ ::onReadingPtNodeArrayTail() {
+ int writingPos = mBufferToWrite->getTailPosition();
+ // Write PtNode array terminal.
+ if (!DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition(
+ mBufferToWrite, NOT_A_DICT_POS /* forwardLinkPos */, &writingPos)) {
+ return false;
+ }
+ // Write actual PtNode array size.
+ if (!DynamicPatriciaTrieWritingUtils::writePtNodeArraySizeAndAdvancePosition(
+ mBufferToWrite, mValidPtNodeCount, &mPtNodeArraySizeFieldPos)) {
+ return false;
+ }
+ return true;
+}
+
+// Write valid PtNode to buffer and memorize mapping from the old position to the new position.
+bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
+ ::onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
+ const int *const nodeCodePoints) {
+ if (node->isDeleted()) {
+ // Current PtNode is not written in new buffer because it has been deleted.
+ mDictPositionRelocationMap->mPtNodePositionRelocationMap.insert(
+ DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap::value_type(
+ node->getHeadPos(), NOT_A_DICT_POS));
+ return true;
+ }
+ int writingPos = mBufferToWrite->getTailPosition();
+ mDictPositionRelocationMap->mPtNodePositionRelocationMap.insert(
+ DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap::value_type(
+ node->getHeadPos(), writingPos));
+ mValidPtNodeCount++;
+ // Writes current PtNode.
+ return mWritingHelper->writePtNodeToBufferByCopyingPtNodeInfo(mBufferToWrite, node,
+ node->getParentPos(), nodeCodePoints, node->getCodePointCount(),
+ node->getProbability(), &writingPos);
+}
+
+bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateAllPositionFields
+ ::onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
+ const int *const nodeCodePoints) {
+ // Updates parent position.
+ int parentPos = node->getParentPos();
+ if (parentPos != NOT_A_DICT_POS) {
+ DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap::const_iterator it =
+ mDictPositionRelocationMap->mPtNodePositionRelocationMap.find(parentPos);
+ if (it != mDictPositionRelocationMap->mPtNodePositionRelocationMap.end()) {
+ parentPos = it->second;
+ }
+ }
+ int writingPos = node->getHeadPos() + DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE;
+ // Write updated parent offset.
+ if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(mBufferToWrite,
+ parentPos, node->getHeadPos(), &writingPos)) {
+ return false;
+ }
+
+ // Updates children position.
+ int childrenPos = node->getChildrenPos();
+ if (childrenPos != NOT_A_DICT_POS) {
+ DynamicPatriciaTrieWritingHelper::PtNodeArrayPositionRelocationMap::const_iterator it =
+ mDictPositionRelocationMap->mPtNodeArrayPositionRelocationMap.find(childrenPos);
+ if (it != mDictPositionRelocationMap->mPtNodeArrayPositionRelocationMap.end()) {
+ childrenPos = it->second;
+ }
+ }
+ writingPos = node->getChildrenPosFieldPos();
+ if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBufferToWrite,
+ childrenPos, &writingPos)) {
+ return false;
+ }
+
+ // Updates bigram target PtNode positions in the bigram list.
+ int bigramsPos = node->getBigramsPos();
+ if (bigramsPos != NOT_A_DICT_POS) {
+ int bigramEntryCount;
+ if (!mBigramPolicy->updateAllBigramTargetPtNodePositions(&bigramsPos,
+ &mDictPositionRelocationMap->mPtNodePositionRelocationMap, &bigramEntryCount)) {
+ return false;
+ }
+ mBigramCount += bigramEntryCount;
+ }
+ if (node->isTerminal()) {
+ mUnigramCount++;
+ }
+
+ return true;
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h
index d8867754d..9755120b0 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h
@@ -14,32 +14,37 @@
* limitations under the License.
*/
-#ifndef LATINIME_DYNAMIC_PT_GC_EVENT_LISTENERS_H
-#define LATINIME_DYNAMIC_PT_GC_EVENT_LISTENERS_H
+#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_GC_EVENT_LISTENERS_H
+#define LATINIME_DYNAMIC_PATRICIA_TRIE_GC_EVENT_LISTENERS_H
#include <vector>
#include "defines.h"
-#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h"
-#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h"
+#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h"
+#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h"
+#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h"
+#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
#include "utils/hash_map_compat.h"
namespace latinime {
-class PtNodeParams;
+class DictionaryHeaderStructurePolicy;
-class DynamicPtGcEventListeners {
+class DynamicPatriciaTrieGcEventListeners {
public:
// Updates all PtNodes that can be reached from the root. Checks if each PtNode is useless or
// not and marks useless PtNodes as deleted. Such deleted PtNodes will be discarded in the GC.
// TODO: Concatenate non-terminal PtNodes.
class TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
- : public DynamicPtReadingHelper::TraversingEventListener {
+ : public DynamicPatriciaTrieReadingHelper::TraversingEventListener {
public:
TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted(
- PtNodeWriter *const ptNodeWriter)
- : mPtNodeWriter(ptNodeWriter), mValueStack(), mChildrenValue(0),
+ const DictionaryHeaderStructurePolicy *const headerPolicy,
+ DynamicPatriciaTrieWritingHelper *const writingHelper,
+ BufferWithExtendableBuffer *const buffer, const bool isDecayingDict)
+ : mHeaderPolicy(headerPolicy), mWritingHelper(writingHelper), mBuffer(buffer),
+ mIsDecayingDict(isDecayingDict), mValueStack(), mChildrenValue(0),
mValidUnigramCount(0) {}
~TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted() {};
@@ -61,7 +66,8 @@ class DynamicPtGcEventListeners {
bool onReadingPtNodeArrayTail() { return true; }
- bool onVisitingPtNode(const PtNodeParams *const ptNodeParams);
+ bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
+ const int *const nodeCodePoints);
int getValidUnigramCount() const {
return mValidUnigramCount;
@@ -71,7 +77,10 @@ class DynamicPtGcEventListeners {
DISALLOW_IMPLICIT_CONSTRUCTORS(
TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted);
- PtNodeWriter *const mPtNodeWriter;
+ const DictionaryHeaderStructurePolicy *const mHeaderPolicy;
+ DynamicPatriciaTrieWritingHelper *const mWritingHelper;
+ BufferWithExtendableBuffer *const mBuffer;
+ const bool mIsDecayingDict;
std::vector<int> mValueStack;
int mChildrenValue;
int mValidUnigramCount;
@@ -80,10 +89,11 @@ class DynamicPtGcEventListeners {
// Updates all bigram entries that are held by valid PtNodes. This removes useless bigram
// entries.
class TraversePolicyToUpdateBigramProbability
- : public DynamicPtReadingHelper::TraversingEventListener {
+ : public DynamicPatriciaTrieReadingHelper::TraversingEventListener {
public:
- TraversePolicyToUpdateBigramProbability(PtNodeWriter *const ptNodeWriter)
- : mPtNodeWriter(ptNodeWriter), mValidBigramEntryCount(0) {}
+ TraversePolicyToUpdateBigramProbability(
+ DynamicBigramListPolicy *const bigramPolicy)
+ : mBigramPolicy(bigramPolicy), mValidBigramEntryCount(0) {}
bool onAscend() { return true; }
@@ -91,7 +101,8 @@ class DynamicPtGcEventListeners {
bool onReadingPtNodeArrayTail() { return true; }
- bool onVisitingPtNode(const PtNodeParams *const ptNodeParams);
+ bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
+ const int *const nodeCodePoints);
int getValidBigramEntryCount() const {
return mValidBigramEntryCount;
@@ -100,17 +111,19 @@ class DynamicPtGcEventListeners {
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(TraversePolicyToUpdateBigramProbability);
- PtNodeWriter *const mPtNodeWriter;
+ DynamicBigramListPolicy *const mBigramPolicy;
int mValidBigramEntryCount;
};
class TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
- : public DynamicPtReadingHelper::TraversingEventListener {
+ : public DynamicPatriciaTrieReadingHelper::TraversingEventListener {
public:
TraversePolicyToPlaceAndWriteValidPtNodesToBuffer(
- PtNodeWriter *const ptNodeWriter, BufferWithExtendableBuffer *const bufferToWrite,
- PtNodeWriter::DictPositionRelocationMap *const dictPositionRelocationMap)
- : mPtNodeWriter(ptNodeWriter), mBufferToWrite(bufferToWrite),
+ DynamicPatriciaTrieWritingHelper *const writingHelper,
+ BufferWithExtendableBuffer *const bufferToWrite,
+ DynamicPatriciaTrieWritingHelper::DictPositionRelocationMap *const
+ dictPositionRelocationMap)
+ : mWritingHelper(writingHelper), mBufferToWrite(bufferToWrite),
mDictPositionRelocationMap(dictPositionRelocationMap), mValidPtNodeCount(0),
mPtNodeArraySizeFieldPos(NOT_A_DICT_POS) {};
@@ -120,24 +133,31 @@ class DynamicPtGcEventListeners {
bool onReadingPtNodeArrayTail();
- bool onVisitingPtNode(const PtNodeParams *const ptNodeParams);
+ bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
+ const int *const nodeCodePoints);
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(TraversePolicyToPlaceAndWriteValidPtNodesToBuffer);
- PtNodeWriter *const mPtNodeWriter;
+ DynamicPatriciaTrieWritingHelper *const mWritingHelper;
BufferWithExtendableBuffer *const mBufferToWrite;
- PtNodeWriter::DictPositionRelocationMap *const mDictPositionRelocationMap;
+ DynamicPatriciaTrieWritingHelper::DictPositionRelocationMap *const
+ mDictPositionRelocationMap;
int mValidPtNodeCount;
int mPtNodeArraySizeFieldPos;
};
class TraversePolicyToUpdateAllPositionFields
- : public DynamicPtReadingHelper::TraversingEventListener {
+ : public DynamicPatriciaTrieReadingHelper::TraversingEventListener {
public:
- TraversePolicyToUpdateAllPositionFields(PtNodeWriter *const ptNodeWriter,
- const PtNodeWriter::DictPositionRelocationMap *const dictPositionRelocationMap)
- : mPtNodeWriter(ptNodeWriter),
+ TraversePolicyToUpdateAllPositionFields(
+ DynamicPatriciaTrieWritingHelper *const writingHelper,
+ DynamicBigramListPolicy *const bigramPolicy,
+ BufferWithExtendableBuffer *const bufferToWrite,
+ const DynamicPatriciaTrieWritingHelper::DictPositionRelocationMap *const
+ dictPositionRelocationMap)
+ : mWritingHelper(writingHelper), mBigramPolicy(bigramPolicy),
+ mBufferToWrite(bufferToWrite),
mDictPositionRelocationMap(dictPositionRelocationMap), mUnigramCount(0),
mBigramCount(0) {};
@@ -147,7 +167,8 @@ class DynamicPtGcEventListeners {
bool onReadingPtNodeArrayTail() { return true; }
- bool onVisitingPtNode(const PtNodeParams *const ptNodeParams);
+ bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
+ const int *const nodeCodePoints);
int getUnigramCount() const {
return mUnigramCount;
@@ -160,14 +181,17 @@ class DynamicPtGcEventListeners {
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(TraversePolicyToUpdateAllPositionFields);
- PtNodeWriter *const mPtNodeWriter;
- const PtNodeWriter::DictPositionRelocationMap *const mDictPositionRelocationMap;
+ DynamicPatriciaTrieWritingHelper *const mWritingHelper;
+ DynamicBigramListPolicy *const mBigramPolicy;
+ BufferWithExtendableBuffer *const mBufferToWrite;
+ const DynamicPatriciaTrieWritingHelper::DictPositionRelocationMap *const
+ mDictPositionRelocationMap;
int mUnigramCount;
int mBigramCount;
};
private:
- DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPtGcEventListeners);
+ DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieGcEventListeners);
};
} // namespace latinime
-#endif /* LATINIME_DYNAMIC_PT_GC_EVENT_LISTENERS_H */
+#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_GC_EVENT_LISTENERS_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp
new file mode 100644
index 000000000..2fa3111d3
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp
@@ -0,0 +1,124 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h"
+
+#include "suggest/core/policy/dictionary_bigrams_structure_policy.h"
+#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h"
+#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+
+namespace latinime {
+
+void DynamicPatriciaTrieNodeReader::fetchPtNodeInfoFromBufferAndProcessMovedPtNode(
+ const int ptNodePos, const int maxCodePointCount, int *const outCodePoints) {
+ if (ptNodePos < 0 || ptNodePos >= mBuffer->getTailPosition()) {
+ // Reading invalid position because of bug or broken dictionary.
+ AKLOGE("Fetching PtNode info from invalid dictionary position: %d, dictionary size: %d",
+ ptNodePos, mBuffer->getTailPosition());
+ ASSERT(false);
+ invalidatePtNodeInfo();
+ return;
+ }
+ const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(ptNodePos);
+ const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
+ int pos = ptNodePos;
+ mHeadPos = ptNodePos;
+ if (usesAdditionalBuffer) {
+ pos -= mBuffer->getOriginalBufferSize();
+ }
+ mFlags = PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
+ const int parentPosOffset =
+ DynamicPatriciaTrieReadingUtils::getParentPtNodePosOffsetAndAdvancePosition(dictBuf,
+ &pos);
+ mParentPos = DynamicPatriciaTrieReadingUtils::getParentPtNodePos(parentPosOffset, mHeadPos);
+ if (outCodePoints != 0) {
+ mCodePointCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition(
+ dictBuf, mFlags, maxCodePointCount, outCodePoints, &pos);
+ } else {
+ mCodePointCount = PatriciaTrieReadingUtils::skipCharacters(
+ dictBuf, mFlags, MAX_WORD_LENGTH, &pos);
+ }
+ if (isTerminal()) {
+ mProbabilityFieldPos = pos;
+ if (usesAdditionalBuffer) {
+ mProbabilityFieldPos += mBuffer->getOriginalBufferSize();
+ }
+ mProbability = PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(dictBuf, &pos);
+ } else {
+ mProbabilityFieldPos = NOT_A_DICT_POS;
+ mProbability = NOT_A_PROBABILITY;
+ }
+ mChildrenPosFieldPos = pos;
+ if (usesAdditionalBuffer) {
+ mChildrenPosFieldPos += mBuffer->getOriginalBufferSize();
+ }
+ mChildrenPos = DynamicPatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(
+ dictBuf, &pos);
+ if (usesAdditionalBuffer && mChildrenPos != NOT_A_DICT_POS) {
+ mChildrenPos += mBuffer->getOriginalBufferSize();
+ }
+ if (mSiblingPos == NOT_A_DICT_POS) {
+ if (DynamicPatriciaTrieReadingUtils::isMoved(mFlags)) {
+ mBigramLinkedNodePos = mChildrenPos;
+ } else {
+ mBigramLinkedNodePos = NOT_A_DICT_POS;
+ }
+ }
+ if (usesAdditionalBuffer) {
+ pos += mBuffer->getOriginalBufferSize();
+ }
+ if (PatriciaTrieReadingUtils::hasShortcutTargets(mFlags)) {
+ mShortcutPos = pos;
+ mShortcutsPolicy->skipAllShortcuts(&pos);
+ } else {
+ mShortcutPos = NOT_A_DICT_POS;
+ }
+ if (PatriciaTrieReadingUtils::hasBigrams(mFlags)) {
+ mBigramPos = pos;
+ mBigramsPolicy->skipAllBigrams(&pos);
+ } else {
+ mBigramPos = NOT_A_DICT_POS;
+ }
+ // Update siblingPos if needed.
+ if (mSiblingPos == NOT_A_DICT_POS) {
+ // Sibling position is the tail position of current node.
+ mSiblingPos = pos;
+ }
+ // Read destination node if the read node is a moved node.
+ if (DynamicPatriciaTrieReadingUtils::isMoved(mFlags)) {
+ // The destination position is stored at the same place as the parent position.
+ fetchPtNodeInfoFromBufferAndProcessMovedPtNode(mParentPos, maxCodePointCount,
+ outCodePoints);
+ }
+}
+
+void DynamicPatriciaTrieNodeReader::invalidatePtNodeInfo() {
+ mHeadPos = NOT_A_DICT_POS;
+ mFlags = 0;
+ mParentPos = NOT_A_DICT_POS;
+ mCodePointCount = 0;
+ mProbabilityFieldPos = NOT_A_DICT_POS;
+ mProbability = NOT_A_PROBABILITY;
+ mChildrenPosFieldPos = NOT_A_DICT_POS;
+ mChildrenPos = NOT_A_DICT_POS;
+ mBigramLinkedNodePos = NOT_A_DICT_POS;
+ mShortcutPos = NOT_A_DICT_POS;
+ mBigramPos = NOT_A_DICT_POS;
+ mSiblingPos = NOT_A_DICT_POS;
+}
+
+}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h
new file mode 100644
index 000000000..3b36d425f
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h
@@ -0,0 +1,163 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_NODE_READER_H
+#define LATINIME_DYNAMIC_PATRICIA_TRIE_NODE_READER_H
+
+#include <stdint.h>
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h"
+
+namespace latinime {
+
+class BufferWithExtendableBuffer;
+class DictionaryBigramsStructurePolicy;
+class DictionaryShortcutsStructurePolicy;
+
+/*
+ * This class is used for helping to read nodes of dynamic patricia trie. This class handles moved
+ * node and reads node attributes.
+ */
+class DynamicPatriciaTrieNodeReader {
+ public:
+ DynamicPatriciaTrieNodeReader(const BufferWithExtendableBuffer *const buffer,
+ const DictionaryBigramsStructurePolicy *const bigramsPolicy,
+ const DictionaryShortcutsStructurePolicy *const shortcutsPolicy)
+ : mBuffer(buffer), mBigramsPolicy(bigramsPolicy),
+ mShortcutsPolicy(shortcutsPolicy), mHeadPos(NOT_A_DICT_POS), mFlags(0),
+ mParentPos(NOT_A_DICT_POS), mCodePointCount(0), mProbabilityFieldPos(NOT_A_DICT_POS),
+ mProbability(NOT_A_PROBABILITY), mChildrenPosFieldPos(NOT_A_DICT_POS),
+ mChildrenPos(NOT_A_DICT_POS), mBigramLinkedNodePos(NOT_A_DICT_POS),
+ mShortcutPos(NOT_A_DICT_POS), mBigramPos(NOT_A_DICT_POS),
+ mSiblingPos(NOT_A_DICT_POS) {}
+
+ ~DynamicPatriciaTrieNodeReader() {}
+
+ // Reads PtNode information from dictionary buffer and updates members with the information.
+ AK_FORCE_INLINE void fetchNodeInfoInBufferFromPtNodePos(const int ptNodePos) {
+ fetchNodeInfoInBufferFromPtNodePosAndGetNodeCodePoints(ptNodePos ,
+ 0 /* maxCodePointCount */, 0 /* outCodePoints */);
+ }
+
+ AK_FORCE_INLINE void fetchNodeInfoInBufferFromPtNodePosAndGetNodeCodePoints(
+ const int ptNodePos, const int maxCodePointCount, int *const outCodePoints) {
+ mSiblingPos = NOT_A_DICT_POS;
+ mBigramLinkedNodePos = NOT_A_DICT_POS;
+ fetchPtNodeInfoFromBufferAndProcessMovedPtNode(ptNodePos, maxCodePointCount, outCodePoints);
+ }
+
+ // HeadPos is different from NodePos when the current PtNode is a moved PtNode.
+ AK_FORCE_INLINE int getHeadPos() const {
+ return mHeadPos;
+ }
+
+ // Flags
+ AK_FORCE_INLINE bool isDeleted() const {
+ return DynamicPatriciaTrieReadingUtils::isDeleted(mFlags);
+ }
+
+ AK_FORCE_INLINE bool hasChildren() const {
+ return mChildrenPos != NOT_A_DICT_POS;
+ }
+
+ AK_FORCE_INLINE bool isTerminal() const {
+ return PatriciaTrieReadingUtils::isTerminal(mFlags);
+ }
+
+ AK_FORCE_INLINE bool isBlacklisted() const {
+ return PatriciaTrieReadingUtils::isBlacklisted(mFlags);
+ }
+
+ AK_FORCE_INLINE bool isNotAWord() const {
+ return PatriciaTrieReadingUtils::isNotAWord(mFlags);
+ }
+
+ // Parent node position
+ AK_FORCE_INLINE int getParentPos() const {
+ return mParentPos;
+ }
+
+ // Number of code points
+ AK_FORCE_INLINE uint8_t getCodePointCount() const {
+ return mCodePointCount;
+ }
+
+ // Probability
+ AK_FORCE_INLINE int getProbabilityFieldPos() const {
+ return mProbabilityFieldPos;
+ }
+
+ AK_FORCE_INLINE int getProbability() const {
+ return mProbability;
+ }
+
+ // Children PtNode array position
+ AK_FORCE_INLINE int getChildrenPosFieldPos() const {
+ return mChildrenPosFieldPos;
+ }
+
+ AK_FORCE_INLINE int getChildrenPos() const {
+ return mChildrenPos;
+ }
+
+ // Bigram linked node position.
+ AK_FORCE_INLINE int getBigramLinkedNodePos() const {
+ return mBigramLinkedNodePos;
+ }
+
+ // Shortcutlist position
+ AK_FORCE_INLINE int getShortcutPos() const {
+ return mShortcutPos;
+ }
+
+ // Bigrams position
+ AK_FORCE_INLINE int getBigramsPos() const {
+ return mBigramPos;
+ }
+
+ // Sibling node position
+ AK_FORCE_INLINE int getSiblingNodePos() const {
+ return mSiblingPos;
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(DynamicPatriciaTrieNodeReader);
+
+ const BufferWithExtendableBuffer *const mBuffer;
+ const DictionaryBigramsStructurePolicy *const mBigramsPolicy;
+ const DictionaryShortcutsStructurePolicy *const mShortcutsPolicy;
+ int mHeadPos;
+ DynamicPatriciaTrieReadingUtils::NodeFlags mFlags;
+ int mParentPos;
+ uint8_t mCodePointCount;
+ int mProbabilityFieldPos;
+ int mProbability;
+ int mChildrenPosFieldPos;
+ int mChildrenPos;
+ int mBigramLinkedNodePos;
+ int mShortcutPos;
+ int mBigramPos;
+ int mSiblingPos;
+
+ void fetchPtNodeInfoFromBufferAndProcessMovedPtNode(const int ptNodePos,
+ const int maxCodePointCount, int *const outCodePoints);
+
+ void invalidatePtNodeInfo();
+};
+} // namespace latinime
+#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_NODE_READER_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp
new file mode 100644
index 000000000..495b146c2
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp
@@ -0,0 +1,380 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h"
+
+#include <cstdio>
+#include <cstring>
+#include <ctime>
+
+#include "defines.h"
+#include "suggest/core/dicnode/dic_node.h"
+#include "suggest/core/dicnode/dic_node_vector.h"
+#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h"
+#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h"
+#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h"
+#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
+#include "suggest/policyimpl/dictionary/utils/probability_utils.h"
+
+namespace latinime {
+
+// Note that these are corresponding definitions in Java side in BinaryDictionaryTests and
+// BinaryDictionaryDecayingTests.
+const char *const DynamicPatriciaTriePolicy::UNIGRAM_COUNT_QUERY = "UNIGRAM_COUNT";
+const char *const DynamicPatriciaTriePolicy::BIGRAM_COUNT_QUERY = "BIGRAM_COUNT";
+const char *const DynamicPatriciaTriePolicy::MAX_UNIGRAM_COUNT_QUERY = "MAX_UNIGRAM_COUNT";
+const char *const DynamicPatriciaTriePolicy::MAX_BIGRAM_COUNT_QUERY = "MAX_BIGRAM_COUNT";
+const char *const DynamicPatriciaTriePolicy::SET_NEEDS_TO_DECAY_FOR_TESTING_QUERY =
+ "SET_NEEDS_TO_DECAY_FOR_TESTING";
+const int DynamicPatriciaTriePolicy::MAX_DICT_EXTENDED_REGION_SIZE = 1024 * 1024;
+const int DynamicPatriciaTriePolicy::MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS =
+ DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE - 1024;
+
+void DynamicPatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode,
+ DicNodeVector *const childDicNodes) const {
+ if (!dicNode->hasChildren()) {
+ return;
+ }
+ DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer,
+ getBigramsStructurePolicy(), getShortcutsStructurePolicy());
+ readingHelper.initWithPtNodeArrayPos(dicNode->getChildrenPos());
+ const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader();
+ while (!readingHelper.isEnd()) {
+ bool isTerminal = nodeReader->isTerminal() && !nodeReader->isDeleted();
+ if (isTerminal && mHeaderPolicy.isDecayingDict()) {
+ // A DecayingDict may have a terminal PtNode that has a terminal DicNode whose
+ // probability is NOT_A_PROBABILITY. In such case, we don't want to treat it as a
+ // valid terminal DicNode.
+ isTerminal = getProbability(nodeReader->getProbability(), NOT_A_PROBABILITY)
+ != NOT_A_PROBABILITY;
+ }
+ childDicNodes->pushLeavingChild(dicNode, nodeReader->getHeadPos(),
+ nodeReader->getChildrenPos(), nodeReader->getProbability(), isTerminal,
+ nodeReader->hasChildren(), nodeReader->isBlacklisted() || nodeReader->isNotAWord(),
+ nodeReader->getCodePointCount(), readingHelper.getMergedNodeCodePoints());
+ readingHelper.readNextSiblingNode();
+ }
+}
+
+int DynamicPatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
+ const int ptNodePos, const int maxCodePointCount, int *const outCodePoints,
+ int *const outUnigramProbability) const {
+ // This method traverses parent nodes from the terminal by following parent pointers; thus,
+ // node code points are stored in the buffer in the reverse order.
+ int reverseCodePoints[maxCodePointCount];
+ DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer,
+ getBigramsStructurePolicy(), getShortcutsStructurePolicy());
+ // First, read the terminal node and get its probability.
+ readingHelper.initWithPtNodePos(ptNodePos);
+ if (!readingHelper.isValidTerminalNode()) {
+ // Node at the ptNodePos is not a valid terminal node.
+ *outUnigramProbability = NOT_A_PROBABILITY;
+ return 0;
+ }
+ // Store terminal node probability.
+ *outUnigramProbability = readingHelper.getNodeReader()->getProbability();
+ // Then, following parent node link to the dictionary root and fetch node code points.
+ while (!readingHelper.isEnd()) {
+ if (readingHelper.getTotalCodePointCount() > maxCodePointCount) {
+ // The ptNodePos is not a valid terminal node position in the dictionary.
+ *outUnigramProbability = NOT_A_PROBABILITY;
+ return 0;
+ }
+ // Store node code points to buffer in the reverse order.
+ readingHelper.fetchMergedNodeCodePointsInReverseOrder(
+ readingHelper.getPrevTotalCodePointCount(), reverseCodePoints);
+ // Follow parent node toward the root node.
+ readingHelper.readParentNode();
+ }
+ if (readingHelper.isError()) {
+ // The node position or the dictionary is invalid.
+ *outUnigramProbability = NOT_A_PROBABILITY;
+ return 0;
+ }
+ // Reverse the stored code points to output them.
+ const int codePointCount = readingHelper.getTotalCodePointCount();
+ for (int i = 0; i < codePointCount; ++i) {
+ outCodePoints[i] = reverseCodePoints[codePointCount - i - 1];
+ }
+ return codePointCount;
+}
+
+int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord,
+ const int length, const bool forceLowerCaseSearch) const {
+ int searchCodePoints[length];
+ for (int i = 0; i < length; ++i) {
+ searchCodePoints[i] = forceLowerCaseSearch ? CharUtils::toLowerCase(inWord[i]) : inWord[i];
+ }
+ DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer,
+ getBigramsStructurePolicy(), getShortcutsStructurePolicy());
+ readingHelper.initWithPtNodeArrayPos(getRootPosition());
+ const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader();
+ while (!readingHelper.isEnd()) {
+ const int matchedCodePointCount = readingHelper.getPrevTotalCodePointCount();
+ if (readingHelper.getTotalCodePointCount() > length
+ || !readingHelper.isMatchedCodePoint(0 /* index */,
+ searchCodePoints[matchedCodePointCount])) {
+ // Current node has too many code points or its first code point is different from
+ // target code point. Skip this node and read the next sibling node.
+ readingHelper.readNextSiblingNode();
+ continue;
+ }
+ // Check following merged node code points.
+ const int nodeCodePointCount = nodeReader->getCodePointCount();
+ for (int j = 1; j < nodeCodePointCount; ++j) {
+ if (!readingHelper.isMatchedCodePoint(
+ j, searchCodePoints[matchedCodePointCount + j])) {
+ // Different code point is found. The given word is not included in the dictionary.
+ return NOT_A_DICT_POS;
+ }
+ }
+ // All characters are matched.
+ if (length == readingHelper.getTotalCodePointCount()) {
+ // Terminal position is found.
+ return nodeReader->getHeadPos();
+ }
+ if (!nodeReader->hasChildren()) {
+ return NOT_A_DICT_POS;
+ }
+ // Advance to the children nodes.
+ readingHelper.readChildNode();
+ }
+ // If we already traversed the tree further than the word is long, there means
+ // there was no match (or we would have found it).
+ return NOT_A_DICT_POS;
+}
+
+int DynamicPatriciaTriePolicy::getProbability(const int unigramProbability,
+ const int bigramProbability) const {
+ if (mHeaderPolicy.isDecayingDict()) {
+ return ForgettingCurveUtils::getProbability(unigramProbability, bigramProbability);
+ } else {
+ if (unigramProbability == NOT_A_PROBABILITY) {
+ return NOT_A_PROBABILITY;
+ } else if (bigramProbability == NOT_A_PROBABILITY) {
+ return ProbabilityUtils::backoff(unigramProbability);
+ } else {
+ return ProbabilityUtils::computeProbabilityForBigram(unigramProbability,
+ bigramProbability);
+ }
+ }
+}
+
+int DynamicPatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int ptNodePos) const {
+ if (ptNodePos == NOT_A_DICT_POS) {
+ return NOT_A_PROBABILITY;
+ }
+ DynamicPatriciaTrieNodeReader nodeReader(&mBufferWithExtendableBuffer,
+ getBigramsStructurePolicy(), getShortcutsStructurePolicy());
+ nodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos);
+ if (nodeReader.isDeleted() || nodeReader.isBlacklisted() || nodeReader.isNotAWord()) {
+ return NOT_A_PROBABILITY;
+ }
+ return getProbability(nodeReader.getProbability(), NOT_A_PROBABILITY);
+}
+
+int DynamicPatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const {
+ if (ptNodePos == NOT_A_DICT_POS) {
+ return NOT_A_DICT_POS;
+ }
+ DynamicPatriciaTrieNodeReader nodeReader(&mBufferWithExtendableBuffer,
+ getBigramsStructurePolicy(), getShortcutsStructurePolicy());
+ nodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos);
+ if (nodeReader.isDeleted()) {
+ return NOT_A_DICT_POS;
+ }
+ return nodeReader.getShortcutPos();
+}
+
+int DynamicPatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const {
+ if (ptNodePos == NOT_A_DICT_POS) {
+ return NOT_A_DICT_POS;
+ }
+ DynamicPatriciaTrieNodeReader nodeReader(&mBufferWithExtendableBuffer,
+ getBigramsStructurePolicy(), getShortcutsStructurePolicy());
+ nodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos);
+ if (nodeReader.isDeleted()) {
+ return NOT_A_DICT_POS;
+ }
+ return nodeReader.getBigramsPos();
+}
+
+bool DynamicPatriciaTriePolicy::addUnigramWord(const int *const word, const int length,
+ const int probability) {
+ if (!mBuffer->isUpdatable()) {
+ AKLOGI("Warning: addUnigramWord() is called for non-updatable dictionary.");
+ return false;
+ }
+ if (mBufferWithExtendableBuffer.getTailPosition()
+ >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
+ AKLOGE("The dictionary is too large to dynamically update.");
+ return false;
+ }
+ DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer,
+ getBigramsStructurePolicy(), getShortcutsStructurePolicy());
+ readingHelper.initWithPtNodeArrayPos(getRootPosition());
+ DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
+ &mBigramListPolicy, &mShortcutListPolicy, mHeaderPolicy.isDecayingDict());
+ bool addedNewUnigram = false;
+ if (writingHelper.addUnigramWord(&readingHelper, word, length, probability,
+ &addedNewUnigram)) {
+ if (addedNewUnigram) {
+ mUnigramCount++;
+ }
+ return true;
+ } else {
+ return false;
+ }
+}
+
+bool DynamicPatriciaTriePolicy::addBigramWords(const int *const word0, const int length0,
+ const int *const word1, const int length1, const int probability) {
+ if (!mBuffer->isUpdatable()) {
+ AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary.");
+ return false;
+ }
+ if (mBufferWithExtendableBuffer.getTailPosition()
+ >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
+ AKLOGE("The dictionary is too large to dynamically update.");
+ return false;
+ }
+ const int word0Pos = getTerminalNodePositionOfWord(word0, length0,
+ false /* forceLowerCaseSearch */);
+ if (word0Pos == NOT_A_DICT_POS) {
+ return false;
+ }
+ const int word1Pos = getTerminalNodePositionOfWord(word1, length1,
+ false /* forceLowerCaseSearch */);
+ if (word1Pos == NOT_A_DICT_POS) {
+ return false;
+ }
+ DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
+ &mBigramListPolicy, &mShortcutListPolicy, mHeaderPolicy.isDecayingDict());
+ bool addedNewBigram = false;
+ if (writingHelper.addBigramWords(word0Pos, word1Pos, probability, &addedNewBigram)) {
+ if (addedNewBigram) {
+ mBigramCount++;
+ }
+ return true;
+ } else {
+ return false;
+ }
+}
+
+bool DynamicPatriciaTriePolicy::removeBigramWords(const int *const word0, const int length0,
+ const int *const word1, const int length1) {
+ if (!mBuffer->isUpdatable()) {
+ AKLOGI("Warning: removeBigramWords() is called for non-updatable dictionary.");
+ return false;
+ }
+ if (mBufferWithExtendableBuffer.getTailPosition()
+ >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
+ AKLOGE("The dictionary is too large to dynamically update.");
+ return false;
+ }
+ const int word0Pos = getTerminalNodePositionOfWord(word0, length0,
+ false /* forceLowerCaseSearch */);
+ if (word0Pos == NOT_A_DICT_POS) {
+ return false;
+ }
+ const int word1Pos = getTerminalNodePositionOfWord(word1, length1,
+ false /* forceLowerCaseSearch */);
+ if (word1Pos == NOT_A_DICT_POS) {
+ return false;
+ }
+ DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
+ &mBigramListPolicy, &mShortcutListPolicy, mHeaderPolicy.isDecayingDict());
+ if (writingHelper.removeBigramWords(word0Pos, word1Pos)) {
+ mBigramCount--;
+ return true;
+ } else {
+ return false;
+ }
+}
+
+void DynamicPatriciaTriePolicy::flush(const char *const filePath) {
+ if (!mBuffer->isUpdatable()) {
+ AKLOGI("Warning: flush() is called for non-updatable dictionary.");
+ return;
+ }
+ DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
+ &mBigramListPolicy, &mShortcutListPolicy, false /* needsToDecay */);
+ writingHelper.writeToDictFile(filePath, &mHeaderPolicy, mUnigramCount, mBigramCount);
+}
+
+void DynamicPatriciaTriePolicy::flushWithGC(const char *const filePath) {
+ if (!mBuffer->isUpdatable()) {
+ AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary.");
+ return;
+ }
+ const bool needsToDecay = mHeaderPolicy.isDecayingDict()
+ && (mNeedsToDecayForTesting || ForgettingCurveUtils::needsToDecay(
+ false /* mindsBlockByDecay */, mUnigramCount, mBigramCount, &mHeaderPolicy));
+ DynamicBigramListPolicy bigramListPolicyForGC(&mHeaderPolicy, &mBufferWithExtendableBuffer,
+ &mShortcutListPolicy, needsToDecay);
+ DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
+ &bigramListPolicyForGC, &mShortcutListPolicy, needsToDecay);
+ writingHelper.writeToDictFileWithGC(getRootPosition(), filePath, &mHeaderPolicy);
+ mNeedsToDecayForTesting = false;
+}
+
+bool DynamicPatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const {
+ if (!mBuffer->isUpdatable()) {
+ AKLOGI("Warning: needsToRunGC() is called for non-updatable dictionary.");
+ return false;
+ }
+ if (mBufferWithExtendableBuffer.isNearSizeLimit()) {
+ // Additional buffer size is near the limit.
+ return true;
+ } else if (mHeaderPolicy.getExtendedRegionSize()
+ + mBufferWithExtendableBuffer.getUsedAdditionalBufferSize()
+ > MAX_DICT_EXTENDED_REGION_SIZE) {
+ // Total extended region size exceeds the limit.
+ return true;
+ } else if (mBufferWithExtendableBuffer.getTailPosition()
+ >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS
+ && mBufferWithExtendableBuffer.getUsedAdditionalBufferSize() > 0) {
+ // Needs to reduce dictionary size.
+ return true;
+ } else if (mHeaderPolicy.isDecayingDict()) {
+ return mNeedsToDecayForTesting || ForgettingCurveUtils::needsToDecay(
+ mindsBlockByGC, mUnigramCount, mBigramCount, &mHeaderPolicy);
+ }
+ return false;
+}
+
+void DynamicPatriciaTriePolicy::getProperty(const char *const query, char *const outResult,
+ const int maxResultLength) {
+ if (strncmp(query, UNIGRAM_COUNT_QUERY, maxResultLength) == 0) {
+ snprintf(outResult, maxResultLength, "%d", mUnigramCount);
+ } else if (strncmp(query, BIGRAM_COUNT_QUERY, maxResultLength) == 0) {
+ snprintf(outResult, maxResultLength, "%d", mBigramCount);
+ } else if (strncmp(query, MAX_UNIGRAM_COUNT_QUERY, maxResultLength) == 0) {
+ snprintf(outResult, maxResultLength, "%d",
+ mHeaderPolicy.isDecayingDict() ? ForgettingCurveUtils::MAX_UNIGRAM_COUNT :
+ static_cast<int>(DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE));
+ } else if (strncmp(query, MAX_BIGRAM_COUNT_QUERY, maxResultLength) == 0) {
+ snprintf(outResult, maxResultLength, "%d",
+ mHeaderPolicy.isDecayingDict() ? ForgettingCurveUtils::MAX_BIGRAM_COUNT :
+ static_cast<int>(DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE));
+ } else if (strncmp(query, SET_NEEDS_TO_DECAY_FOR_TESTING_QUERY, maxResultLength) == 0) {
+ mNeedsToDecayForTesting = true;
+ }
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h
new file mode 100644
index 000000000..be97ee1a5
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h
@@ -0,0 +1,121 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H
+#define LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H
+
+#include "defines.h"
+#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
+#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h"
+#include "suggest/policyimpl/dictionary/header/header_policy.h"
+#include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h"
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
+
+namespace latinime {
+
+class DicNode;
+class DicNodeVector;
+
+class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
+ public:
+ DynamicPatriciaTriePolicy(const MmappedBuffer *const buffer)
+ : mBuffer(buffer), mHeaderPolicy(mBuffer->getBuffer(), buffer->getBufferSize()),
+ mBufferWithExtendableBuffer(mBuffer->getBuffer() + mHeaderPolicy.getSize(),
+ mBuffer->getBufferSize() - mHeaderPolicy.getSize()),
+ mShortcutListPolicy(&mBufferWithExtendableBuffer),
+ mBigramListPolicy(&mHeaderPolicy, &mBufferWithExtendableBuffer, &mShortcutListPolicy,
+ mHeaderPolicy.isDecayingDict()),
+ mUnigramCount(mHeaderPolicy.getUnigramCount()),
+ mBigramCount(mHeaderPolicy.getBigramCount()), mNeedsToDecayForTesting(false) {}
+
+ ~DynamicPatriciaTriePolicy() {
+ delete mBuffer;
+ }
+
+ AK_FORCE_INLINE int getRootPosition() const {
+ return 0;
+ }
+
+ void createAndGetAllChildNodes(const DicNode *const dicNode,
+ DicNodeVector *const childDicNodes) const;
+
+ int getCodePointsAndProbabilityAndReturnCodePointCount(
+ const int terminalPtNodePos, const int maxCodePointCount, int *const outCodePoints,
+ int *const outUnigramProbability) const;
+
+ int getTerminalNodePositionOfWord(const int *const inWord,
+ const int length, const bool forceLowerCaseSearch) const;
+
+ int getProbability(const int unigramProbability, const int bigramProbability) const;
+
+ int getUnigramProbabilityOfPtNode(const int ptNodePos) const;
+
+ int getShortcutPositionOfPtNode(const int ptNodePos) const;
+
+ int getBigramsPositionOfPtNode(const int ptNodePos) const;
+
+ const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const {
+ return &mHeaderPolicy;
+ }
+
+ const DictionaryBigramsStructurePolicy *getBigramsStructurePolicy() const {
+ return &mBigramListPolicy;
+ }
+
+ const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const {
+ return &mShortcutListPolicy;
+ }
+
+ bool addUnigramWord(const int *const word, const int length, const int probability);
+
+ bool addBigramWords(const int *const word0, const int length0, const int *const word1,
+ const int length1, const int probability);
+
+ bool removeBigramWords(const int *const word0, const int length0, const int *const word1,
+ const int length1);
+
+ void flush(const char *const filePath);
+
+ void flushWithGC(const char *const filePath);
+
+ bool needsToRunGC(const bool mindsBlockByGC) const;
+
+ void getProperty(const char *const query, char *const outResult,
+ const int maxResultLength);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTriePolicy);
+
+ static const char *const UNIGRAM_COUNT_QUERY;
+ static const char *const BIGRAM_COUNT_QUERY;
+ static const char *const MAX_UNIGRAM_COUNT_QUERY;
+ static const char *const MAX_BIGRAM_COUNT_QUERY;
+ static const char *const SET_NEEDS_TO_DECAY_FOR_TESTING_QUERY;
+ static const int MAX_DICT_EXTENDED_REGION_SIZE;
+ static const int MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS;
+
+ const MmappedBuffer *const mBuffer;
+ const HeaderPolicy mHeaderPolicy;
+ BufferWithExtendableBuffer mBufferWithExtendableBuffer;
+ DynamicShortcutListPolicy mShortcutListPolicy;
+ DynamicBigramListPolicy mBigramListPolicy;
+ int mUnigramCount;
+ int mBigramCount;
+ int mNeedsToDecayForTesting;
+};
+} // namespace latinime
+#endif // LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp
index b918e0765..f108c219f 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp
@@ -14,25 +14,22 @@
* limitations under the License.
*/
-#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h"
+#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
-#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
-#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h"
-#include "utils/char_utils.h"
namespace latinime {
// To avoid infinite loop caused by invalid or malicious forward links.
-const int DynamicPtReadingHelper::MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP = 100000;
-const int DynamicPtReadingHelper::MAX_PT_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP = 100000;
-const size_t DynamicPtReadingHelper::MAX_READING_STATE_STACK_SIZE = MAX_WORD_LENGTH;
+const int DynamicPatriciaTrieReadingHelper::MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP = 100000;
+const int DynamicPatriciaTrieReadingHelper::MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP = 100000;
+const size_t DynamicPatriciaTrieReadingHelper::MAX_READING_STATE_STACK_SIZE = MAX_WORD_LENGTH;
// Visits all PtNodes in post-order depth first manner.
// For example, visits c -> b -> y -> x -> a for the following dictionary:
// a _ b _ c
// \ x _ y
-bool DynamicPtReadingHelper::traverseAllPtNodesInPostorderDepthFirstManner(
+bool DynamicPatriciaTrieReadingHelper::traverseAllPtNodesInPostorderDepthFirstManner(
TraversingEventListener *const listener) {
bool alreadyVisitedChildren = false;
// Descend from the root to the root PtNode array.
@@ -40,26 +37,22 @@ bool DynamicPtReadingHelper::traverseAllPtNodesInPostorderDepthFirstManner(
return false;
}
while (!isEnd()) {
- const PtNodeParams ptNodeParams(getPtNodeParams());
- if (!ptNodeParams.isValid()) {
- break;
- }
if (!alreadyVisitedChildren) {
- if (ptNodeParams.hasChildren()) {
+ if (mNodeReader.hasChildren()) {
// Move to the first child.
- if (!listener->onDescend(ptNodeParams.getChildrenPos())) {
+ if (!listener->onDescend(mNodeReader.getChildrenPos())) {
return false;
}
pushReadingStateToStack();
- readChildNode(ptNodeParams);
+ readChildNode();
} else {
alreadyVisitedChildren = true;
}
} else {
- if (!listener->onVisitingPtNode(&ptNodeParams)) {
+ if (!listener->onVisitingPtNode(&mNodeReader, mMergedNodeCodePoints)) {
return false;
}
- readNextSiblingNode(ptNodeParams);
+ readNextSiblingNode();
if (isEnd()) {
// All PtNodes in current linked PtNode arrays have been visited.
// Return to the parent.
@@ -92,7 +85,7 @@ bool DynamicPtReadingHelper::traverseAllPtNodesInPostorderDepthFirstManner(
// For example, visits a -> b -> x -> c -> y for the following dictionary:
// a _ b _ c
// \ x _ y
-bool DynamicPtReadingHelper::traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
+bool DynamicPatriciaTrieReadingHelper::traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
TraversingEventListener *const listener) {
bool alreadyVisitedAllPtNodesInArray = false;
bool alreadyVisitedChildren = false;
@@ -108,14 +101,10 @@ bool DynamicPtReadingHelper::traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFi
}
pushReadingStateToStack();
while (!isEnd()) {
- const PtNodeParams ptNodeParams(getPtNodeParams());
- if (!ptNodeParams.isValid()) {
- break;
- }
if (alreadyVisitedAllPtNodesInArray) {
if (alreadyVisitedChildren) {
// Move to next sibling PtNode's children.
- readNextSiblingNode(ptNodeParams);
+ readNextSiblingNode();
if (isEnd()) {
// Return to the parent PTNode.
if (!listener->onAscend()) {
@@ -131,13 +120,13 @@ bool DynamicPtReadingHelper::traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFi
alreadyVisitedChildren = false;
}
} else {
- if (ptNodeParams.hasChildren()) {
+ if (mNodeReader.hasChildren()) {
// Move to the first child.
- if (!listener->onDescend(ptNodeParams.getChildrenPos())) {
+ if (!listener->onDescend(mNodeReader.getChildrenPos())) {
return false;
}
pushReadingStateToStack();
- readChildNode(ptNodeParams);
+ readChildNode();
// Push state to return the head of PtNode array.
pushReadingStateToStack();
alreadyVisitedAllPtNodesInArray = false;
@@ -147,10 +136,10 @@ bool DynamicPtReadingHelper::traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFi
}
}
} else {
- if (!listener->onVisitingPtNode(&ptNodeParams)) {
+ if (!listener->onVisitingPtNode(&mNodeReader, mMergedNodeCodePoints)) {
return false;
}
- readNextSiblingNode(ptNodeParams);
+ readNextSiblingNode();
if (isEnd()) {
if (!listener->onReadingPtNodeArrayTail()) {
return false;
@@ -169,95 +158,9 @@ bool DynamicPtReadingHelper::traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFi
return !isError();
}
-int DynamicPtReadingHelper::getCodePointsAndProbabilityAndReturnCodePointCount(
- const int maxCodePointCount, int *const outCodePoints, int *const outUnigramProbability) {
- // This method traverses parent nodes from the terminal by following parent pointers; thus,
- // node code points are stored in the buffer in the reverse order.
- int reverseCodePoints[maxCodePointCount];
- const PtNodeParams terminalPtNodeParams(getPtNodeParams());
- // First, read the terminal node and get its probability.
- if (!isValidTerminalNode(terminalPtNodeParams)) {
- // Node at the ptNodePos is not a valid terminal node.
- *outUnigramProbability = NOT_A_PROBABILITY;
- return 0;
- }
- // Store terminal node probability.
- *outUnigramProbability = terminalPtNodeParams.getProbability();
- // Then, following parent node link to the dictionary root and fetch node code points.
- int totalCodePointCount = 0;
- while (!isEnd()) {
- const PtNodeParams ptNodeParams(getPtNodeParams());
- totalCodePointCount = getTotalCodePointCount(ptNodeParams);
- if (!ptNodeParams.isValid() || totalCodePointCount > maxCodePointCount) {
- // The ptNodePos is not a valid terminal node position in the dictionary.
- *outUnigramProbability = NOT_A_PROBABILITY;
- return 0;
- }
- // Store node code points to buffer in the reverse order.
- fetchMergedNodeCodePointsInReverseOrder(ptNodeParams, getPrevTotalCodePointCount(),
- reverseCodePoints);
- // Follow parent node toward the root node.
- readParentNode(ptNodeParams);
- }
- if (isError()) {
- // The node position or the dictionary is invalid.
- *outUnigramProbability = NOT_A_PROBABILITY;
- return 0;
- }
- // Reverse the stored code points to output them.
- for (int i = 0; i < totalCodePointCount; ++i) {
- outCodePoints[i] = reverseCodePoints[totalCodePointCount - i - 1];
- }
- return totalCodePointCount;
-}
-
-int DynamicPtReadingHelper::getTerminalPtNodePositionOfWord(const int *const inWord,
- const int length, const bool forceLowerCaseSearch) {
- int searchCodePoints[length];
- for (int i = 0; i < length; ++i) {
- searchCodePoints[i] = forceLowerCaseSearch ? CharUtils::toLowerCase(inWord[i]) : inWord[i];
- }
- while (!isEnd()) {
- const PtNodeParams ptNodeParams(getPtNodeParams());
- const int matchedCodePointCount = getPrevTotalCodePointCount();
- if (getTotalCodePointCount(ptNodeParams) > length
- || !isMatchedCodePoint(ptNodeParams, 0 /* index */,
- searchCodePoints[matchedCodePointCount])) {
- // Current node has too many code points or its first code point is different from
- // target code point. Skip this node and read the next sibling node.
- readNextSiblingNode(ptNodeParams);
- continue;
- }
- // Check following merged node code points.
- const int nodeCodePointCount = ptNodeParams.getCodePointCount();
- for (int j = 1; j < nodeCodePointCount; ++j) {
- if (!isMatchedCodePoint(ptNodeParams, j, searchCodePoints[matchedCodePointCount + j])) {
- // Different code point is found. The given word is not included in the dictionary.
- return NOT_A_DICT_POS;
- }
- }
- // All characters are matched.
- if (length == getTotalCodePointCount(ptNodeParams)) {
- if (!ptNodeParams.isTerminal()) {
- return NOT_A_DICT_POS;
- }
- // Terminal position is found.
- return ptNodeParams.getHeadPos();
- }
- if (!ptNodeParams.hasChildren()) {
- return NOT_A_DICT_POS;
- }
- // Advance to the children nodes.
- readChildNode(ptNodeParams);
- }
- // If we already traversed the tree further than the word is long, there means
- // there was no match (or we would have found it).
- return NOT_A_DICT_POS;
-}
-
// Read node array size and process empty node arrays. Nodes and arrays are counted up in this
// method to avoid an infinite loop.
-void DynamicPtReadingHelper::nextPtNodeArray() {
+void DynamicPatriciaTrieReadingHelper::nextPtNodeArray() {
if (mReadingState.mPos < 0 || mReadingState.mPos >= mBuffer->getTailPosition()) {
// Reading invalid position because of a bug or a broken dictionary.
AKLOGE("Reading PtNode array info from invalid dictionary position: %d, dict size: %d",
@@ -267,48 +170,42 @@ void DynamicPtReadingHelper::nextPtNodeArray() {
mReadingState.mPos = NOT_A_DICT_POS;
return;
}
- mReadingState.mPosOfThisPtNodeArrayHead = mReadingState.mPos;
+ mReadingState.mPosOfLastPtNodeArrayHead = mReadingState.mPos;
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(mReadingState.mPos);
const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
if (usesAdditionalBuffer) {
mReadingState.mPos -= mBuffer->getOriginalBufferSize();
}
- mReadingState.mRemainingPtNodeCountInThisArray =
- PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(dictBuf,
- &mReadingState.mPos);
+ mReadingState.mNodeCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(
+ dictBuf, &mReadingState.mPos);
if (usesAdditionalBuffer) {
mReadingState.mPos += mBuffer->getOriginalBufferSize();
}
// Count up nodes and node arrays to avoid infinite loop.
- mReadingState.mTotalPtNodeIndexInThisArrayChain +=
- mReadingState.mRemainingPtNodeCountInThisArray;
- mReadingState.mPtNodeArrayIndexInThisArrayChain++;
- if (mReadingState.mRemainingPtNodeCountInThisArray < 0
- || mReadingState.mTotalPtNodeIndexInThisArrayChain
- > MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP
- || mReadingState.mPtNodeArrayIndexInThisArrayChain
- > MAX_PT_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP) {
+ mReadingState.mTotalNodeCount += mReadingState.mNodeCount;
+ mReadingState.mNodeArrayCount++;
+ if (mReadingState.mNodeCount < 0
+ || mReadingState.mTotalNodeCount > MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP
+ || mReadingState.mNodeArrayCount > MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP) {
// Invalid dictionary.
AKLOGI("Invalid dictionary. nodeCount: %d, totalNodeCount: %d, MAX_CHILD_COUNT: %d"
"nodeArrayCount: %d, MAX_NODE_ARRAY_COUNT: %d",
- mReadingState.mRemainingPtNodeCountInThisArray,
- mReadingState.mTotalPtNodeIndexInThisArrayChain,
- MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP,
- mReadingState.mPtNodeArrayIndexInThisArrayChain,
- MAX_PT_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP);
+ mReadingState.mNodeCount, mReadingState.mTotalNodeCount,
+ MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP, mReadingState.mNodeArrayCount,
+ MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP);
ASSERT(false);
mIsError = true;
mReadingState.mPos = NOT_A_DICT_POS;
return;
}
- if (mReadingState.mRemainingPtNodeCountInThisArray == 0) {
+ if (mReadingState.mNodeCount == 0) {
// Empty node array. Try following forward link.
followForwardLink();
}
}
// Follow the forward link and read the next node array if exists.
-void DynamicPtReadingHelper::followForwardLink() {
+void DynamicPatriciaTrieReadingHelper::followForwardLink() {
if (mReadingState.mPos < 0 || mReadingState.mPos >= mBuffer->getTailPosition()) {
// Reading invalid position because of bug or broken dictionary.
AKLOGE("Reading forward link from invalid dictionary position: %d, dict size: %d",
@@ -324,12 +221,12 @@ void DynamicPtReadingHelper::followForwardLink() {
mReadingState.mPos -= mBuffer->getOriginalBufferSize();
}
const int forwardLinkPosition =
- DynamicPtReadingUtils::getForwardLinkPosition(dictBuf, mReadingState.mPos);
+ DynamicPatriciaTrieReadingUtils::getForwardLinkPosition(dictBuf, mReadingState.mPos);
if (usesAdditionalBuffer) {
mReadingState.mPos += mBuffer->getOriginalBufferSize();
}
mReadingState.mPosOfLastForwardLinkField = mReadingState.mPos;
- if (DynamicPtReadingUtils::isValidForwardLinkPosition(forwardLinkPosition)) {
+ if (DynamicPatriciaTrieReadingUtils::isValidForwardLinkPosition(forwardLinkPosition)) {
// Follow the forward link.
mReadingState.mPos += forwardLinkPosition;
nextPtNodeArray();
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h
index a69490943..a71c06971 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h
@@ -14,15 +14,16 @@
* limitations under the License.
*/
-#ifndef LATINIME_DYNAMIC_PT_READING_HELPER_H
-#define LATINIME_DYNAMIC_PT_READING_HELPER_H
+#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_READING_HELPER_H
+#define LATINIME_DYNAMIC_PATRICIA_TRIE_READING_HELPER_H
#include <cstddef>
#include <vector>
#include "defines.h"
-#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
-#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h"
+#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h"
+#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h"
namespace latinime {
@@ -34,7 +35,7 @@ class DictionaryShortcutsStructurePolicy;
* This class is used for traversing dynamic patricia trie. This class supports iterating nodes and
* dealing with additional buffer. This class counts nodes and node arrays to avoid infinite loop.
*/
-class DynamicPtReadingHelper {
+class DynamicPatriciaTrieReadingHelper {
public:
class TraversingEventListener {
public:
@@ -50,7 +51,8 @@ class DynamicPtReadingHelper {
virtual bool onReadingPtNodeArrayTail() = 0;
// Returns whether the event handling was succeeded or not.
- virtual bool onVisitingPtNode(const PtNodeParams *const node) = 0;
+ virtual bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
+ const int *const nodeCodePoints) = 0;
protected:
TraversingEventListener() {};
@@ -59,12 +61,13 @@ class DynamicPtReadingHelper {
DISALLOW_COPY_AND_ASSIGN(TraversingEventListener);
};
- DynamicPtReadingHelper(const BufferWithExtendableBuffer *const buffer,
- const PtNodeReader *const ptNodeReader)
+ DynamicPatriciaTrieReadingHelper(const BufferWithExtendableBuffer *const buffer,
+ const DictionaryBigramsStructurePolicy *const bigramsPolicy,
+ const DictionaryShortcutsStructurePolicy *const shortcutsPolicy)
: mIsError(false), mReadingState(), mBuffer(buffer),
- mPtNodeReader(ptNodeReader), mReadingStateStack() {}
+ mNodeReader(mBuffer, bigramsPolicy, shortcutsPolicy), mReadingStateStack() {}
- ~DynamicPtReadingHelper() {}
+ ~DynamicPatriciaTrieReadingHelper() {}
AK_FORCE_INLINE bool isError() const {
return mIsError;
@@ -81,12 +84,15 @@ class DynamicPtReadingHelper {
} else {
mIsError = false;
mReadingState.mPos = ptNodeArrayPos;
- mReadingState.mTotalCodePointCountSinceInitialization = 0;
- mReadingState.mTotalPtNodeIndexInThisArrayChain = 0;
- mReadingState.mPtNodeArrayIndexInThisArrayChain = 0;
+ mReadingState.mPrevTotalCodePointCount = 0;
+ mReadingState.mTotalNodeCount = 0;
+ mReadingState.mNodeArrayCount = 0;
mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
mReadingStateStack.clear();
nextPtNodeArray();
+ if (!isEnd()) {
+ fetchPtNodeInfo();
+ }
}
}
@@ -97,88 +103,94 @@ class DynamicPtReadingHelper {
} else {
mIsError = false;
mReadingState.mPos = ptNodePos;
- mReadingState.mRemainingPtNodeCountInThisArray = 1;
- mReadingState.mTotalCodePointCountSinceInitialization = 0;
- mReadingState.mTotalPtNodeIndexInThisArrayChain = 1;
- mReadingState.mPtNodeArrayIndexInThisArrayChain = 1;
+ mReadingState.mNodeCount = 1;
+ mReadingState.mPrevTotalCodePointCount = 0;
+ mReadingState.mTotalNodeCount = 1;
+ mReadingState.mNodeArrayCount = 1;
mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
- mReadingState.mPosOfThisPtNodeArrayHead = NOT_A_DICT_POS;
+ mReadingState.mPosOfLastPtNodeArrayHead = NOT_A_DICT_POS;
mReadingStateStack.clear();
+ fetchPtNodeInfo();
}
}
- AK_FORCE_INLINE const PtNodeParams getPtNodeParams() const {
- if (isEnd()) {
- return PtNodeParams();
- }
- return mPtNodeReader->fetchNodeInfoInBufferFromPtNodePos(mReadingState.mPos);
+ AK_FORCE_INLINE const DynamicPatriciaTrieNodeReader* getNodeReader() const {
+ return &mNodeReader;
}
- AK_FORCE_INLINE bool isValidTerminalNode(const PtNodeParams &ptNodeParams) const {
- return !isEnd() && !ptNodeParams.isDeleted() && ptNodeParams.isTerminal();
+ AK_FORCE_INLINE bool isValidTerminalNode() const {
+ return !isEnd() && !mNodeReader.isDeleted() && mNodeReader.isTerminal();
}
- AK_FORCE_INLINE bool isMatchedCodePoint(const PtNodeParams &ptNodeParams, const int index,
- const int codePoint) const {
- return ptNodeParams.getCodePoints()[index] == codePoint;
+ AK_FORCE_INLINE bool isMatchedCodePoint(const int index, const int codePoint) const {
+ return mMergedNodeCodePoints[index] == codePoint;
}
// Return code point count exclude the last read node's code points.
AK_FORCE_INLINE int getPrevTotalCodePointCount() const {
- return mReadingState.mTotalCodePointCountSinceInitialization;
+ return mReadingState.mPrevTotalCodePointCount;
}
// Return code point count include the last read node's code points.
- AK_FORCE_INLINE int getTotalCodePointCount(const PtNodeParams &ptNodeParams) const {
- return mReadingState.mTotalCodePointCountSinceInitialization
- + ptNodeParams.getCodePointCount();
+ AK_FORCE_INLINE int getTotalCodePointCount() const {
+ return mReadingState.mPrevTotalCodePointCount + mNodeReader.getCodePointCount();
}
- AK_FORCE_INLINE void fetchMergedNodeCodePointsInReverseOrder(const PtNodeParams &ptNodeParams,
+ AK_FORCE_INLINE void fetchMergedNodeCodePointsInReverseOrder(
const int index, int *const outCodePoints) const {
- const int nodeCodePointCount = ptNodeParams.getCodePointCount();
- const int *const nodeCodePoints = ptNodeParams.getCodePoints();
+ const int nodeCodePointCount = mNodeReader.getCodePointCount();
for (int i = 0; i < nodeCodePointCount; ++i) {
- outCodePoints[index + i] = nodeCodePoints[nodeCodePointCount - 1 - i];
+ outCodePoints[index + i] = mMergedNodeCodePoints[nodeCodePointCount - 1 - i];
}
}
- AK_FORCE_INLINE void readNextSiblingNode(const PtNodeParams &ptNodeParams) {
- mReadingState.mRemainingPtNodeCountInThisArray -= 1;
- mReadingState.mPos = ptNodeParams.getSiblingNodePos();
- if (mReadingState.mRemainingPtNodeCountInThisArray <= 0) {
+ AK_FORCE_INLINE const int *getMergedNodeCodePoints() const {
+ return mMergedNodeCodePoints;
+ }
+
+ AK_FORCE_INLINE void readNextSiblingNode() {
+ mReadingState.mNodeCount -= 1;
+ mReadingState.mPos = mNodeReader.getSiblingNodePos();
+ if (mReadingState.mNodeCount <= 0) {
// All nodes in the current node array have been read.
followForwardLink();
+ if (!isEnd()) {
+ fetchPtNodeInfo();
+ }
+ } else {
+ fetchPtNodeInfo();
}
}
// Read the first child node of the current node.
- AK_FORCE_INLINE void readChildNode(const PtNodeParams &ptNodeParams) {
- if (ptNodeParams.hasChildren()) {
- mReadingState.mTotalCodePointCountSinceInitialization +=
- ptNodeParams.getCodePointCount();
- mReadingState.mTotalPtNodeIndexInThisArrayChain = 0;
- mReadingState.mPtNodeArrayIndexInThisArrayChain = 0;
- mReadingState.mPos = ptNodeParams.getChildrenPos();
+ AK_FORCE_INLINE void readChildNode() {
+ if (mNodeReader.hasChildren()) {
+ mReadingState.mPrevTotalCodePointCount += mNodeReader.getCodePointCount();
+ mReadingState.mTotalNodeCount = 0;
+ mReadingState.mNodeArrayCount = 0;
+ mReadingState.mPos = mNodeReader.getChildrenPos();
mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
// Read children node array.
nextPtNodeArray();
+ if (!isEnd()) {
+ fetchPtNodeInfo();
+ }
} else {
mReadingState.mPos = NOT_A_DICT_POS;
}
}
// Read the parent node of the current node.
- AK_FORCE_INLINE void readParentNode(const PtNodeParams &ptNodeParams) {
- if (ptNodeParams.getParentPos() != NOT_A_DICT_POS) {
- mReadingState.mTotalCodePointCountSinceInitialization +=
- ptNodeParams.getCodePointCount();
- mReadingState.mTotalPtNodeIndexInThisArrayChain = 1;
- mReadingState.mPtNodeArrayIndexInThisArrayChain = 1;
- mReadingState.mRemainingPtNodeCountInThisArray = 1;
- mReadingState.mPos = ptNodeParams.getParentPos();
+ AK_FORCE_INLINE void readParentNode() {
+ if (mNodeReader.getParentPos() != NOT_A_DICT_POS) {
+ mReadingState.mPrevTotalCodePointCount += mNodeReader.getCodePointCount();
+ mReadingState.mTotalNodeCount = 1;
+ mReadingState.mNodeArrayCount = 1;
+ mReadingState.mNodeCount = 1;
+ mReadingState.mPos = mNodeReader.getParentPos();
mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
- mReadingState.mPosOfThisPtNodeArrayHead = NOT_A_DICT_POS;
+ mReadingState.mPosOfLastPtNodeArrayHead = NOT_A_DICT_POS;
+ fetchPtNodeInfo();
} else {
mReadingState.mPos = NOT_A_DICT_POS;
}
@@ -189,7 +201,13 @@ class DynamicPtReadingHelper {
}
AK_FORCE_INLINE int getPosOfLastPtNodeArrayHead() const {
- return mReadingState.mPosOfThisPtNodeArrayHead;
+ return mReadingState.mPosOfLastPtNodeArrayHead;
+ }
+
+ AK_FORCE_INLINE void reloadCurrentPtNodeInfo() {
+ if (!isEnd()) {
+ fetchPtNodeInfo();
+ }
}
bool traverseAllPtNodesInPostorderDepthFirstManner(TraversingEventListener *const listener);
@@ -197,54 +215,53 @@ class DynamicPtReadingHelper {
bool traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
TraversingEventListener *const listener);
- int getCodePointsAndProbabilityAndReturnCodePointCount(const int maxCodePointCount,
- int *const outCodePoints, int *const outUnigramProbability);
-
- int getTerminalPtNodePositionOfWord(const int *const inWord, const int length,
- const bool forceLowerCaseSearch);
-
private:
- DISALLOW_COPY_AND_ASSIGN(DynamicPtReadingHelper);
+ DISALLOW_COPY_AND_ASSIGN(DynamicPatriciaTrieReadingHelper);
- // This class encapsulates the reading state of a position in the dictionary. It points at a
- // specific PtNode in the dictionary.
- class PtNodeReadingState {
+ class ReadingState {
public:
// Note that copy constructor and assignment operator are used for this class to use
// std::vector.
- PtNodeReadingState() : mPos(NOT_A_DICT_POS), mRemainingPtNodeCountInThisArray(0),
- mTotalCodePointCountSinceInitialization(0), mTotalPtNodeIndexInThisArrayChain(0),
- mPtNodeArrayIndexInThisArrayChain(0), mPosOfLastForwardLinkField(NOT_A_DICT_POS),
- mPosOfThisPtNodeArrayHead(NOT_A_DICT_POS) {}
+ ReadingState() : mPos(NOT_A_DICT_POS), mNodeCount(0), mPrevTotalCodePointCount(0),
+ mTotalNodeCount(0), mNodeArrayCount(0), mPosOfLastForwardLinkField(NOT_A_DICT_POS),
+ mPosOfLastPtNodeArrayHead(NOT_A_DICT_POS) {}
int mPos;
- // Remaining node count in the current array.
- int mRemainingPtNodeCountInThisArray;
- int mTotalCodePointCountSinceInitialization;
- // Counter of PtNodes used to avoid infinite loops caused by broken or malicious links.
- int mTotalPtNodeIndexInThisArrayChain;
- // Counter of PtNode arrays used to avoid infinite loops caused by cyclic links of empty
- // PtNode arrays.
- int mPtNodeArrayIndexInThisArrayChain;
+ // Node count of a node array.
+ int mNodeCount;
+ int mPrevTotalCodePointCount;
+ int mTotalNodeCount;
+ int mNodeArrayCount;
int mPosOfLastForwardLinkField;
- int mPosOfThisPtNodeArrayHead;
+ int mPosOfLastPtNodeArrayHead;
};
static const int MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP;
- static const int MAX_PT_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP;
+ static const int MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP;
static const size_t MAX_READING_STATE_STACK_SIZE;
// TODO: Introduce error code to track what caused the error.
bool mIsError;
- PtNodeReadingState mReadingState;
+ ReadingState mReadingState;
const BufferWithExtendableBuffer *const mBuffer;
- const PtNodeReader *const mPtNodeReader;
- std::vector<PtNodeReadingState> mReadingStateStack;
+ DynamicPatriciaTrieNodeReader mNodeReader;
+ int mMergedNodeCodePoints[MAX_WORD_LENGTH];
+ std::vector<ReadingState> mReadingStateStack;
void nextPtNodeArray();
void followForwardLink();
+ AK_FORCE_INLINE void fetchPtNodeInfo() {
+ mNodeReader.fetchNodeInfoInBufferFromPtNodePosAndGetNodeCodePoints(mReadingState.mPos,
+ MAX_WORD_LENGTH, mMergedNodeCodePoints);
+ if (mNodeReader.getCodePointCount() <= 0) {
+ // Empty node is not allowed.
+ mIsError = true;
+ mReadingState.mPos = NOT_A_DICT_POS;
+ }
+ }
+
AK_FORCE_INLINE void pushReadingStateToStack() {
if (mReadingStateStack.size() > MAX_READING_STATE_STACK_SIZE) {
AKLOGI("Reading state stack overflow. Max size: %zd", MAX_READING_STATE_STACK_SIZE);
@@ -262,8 +279,11 @@ class DynamicPtReadingHelper {
} else {
mReadingState = mReadingStateStack.back();
mReadingStateStack.pop_back();
+ if (!isEnd()) {
+ fetchPtNodeInfo();
+ }
}
}
};
} // namespace latinime
-#endif /* LATINIME_DYNAMIC_PT_READING_HELPER_H */
+#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_READING_HELPER_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp
index 3586b50ab..d68446db6 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp
@@ -14,38 +14,38 @@
* limitations under the License.
*/
-#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h"
+#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
#include "defines.h"
#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
namespace latinime {
-const DynamicPtReadingUtils::NodeFlags DynamicPtReadingUtils::MASK_MOVED = 0xC0;
-const DynamicPtReadingUtils::NodeFlags DynamicPtReadingUtils::FLAG_IS_NOT_MOVED = 0xC0;
-const DynamicPtReadingUtils::NodeFlags DynamicPtReadingUtils::FLAG_IS_MOVED = 0x40;
-const DynamicPtReadingUtils::NodeFlags DynamicPtReadingUtils::FLAG_IS_DELETED = 0x80;
-const DynamicPtReadingUtils::NodeFlags DynamicPtReadingUtils::FLAG_WILL_BECOME_NON_TERMINAL = 0x00;
+typedef DynamicPatriciaTrieReadingUtils DptReadingUtils;
+
+const DptReadingUtils::NodeFlags DptReadingUtils::MASK_MOVED = 0xC0;
+const DptReadingUtils::NodeFlags DptReadingUtils::FLAG_IS_NOT_MOVED = 0xC0;
+const DptReadingUtils::NodeFlags DptReadingUtils::FLAG_IS_MOVED = 0x40;
+const DptReadingUtils::NodeFlags DptReadingUtils::FLAG_IS_DELETED = 0x80;
// TODO: Make DICT_OFFSET_ZERO_OFFSET = 0.
// Currently, DICT_OFFSET_INVALID is 0 in Java side but offset can be 0 during GC. So, the maximum
// value of offsets, which is 0x7FFFFF is used to represent 0 offset.
-const int DynamicPtReadingUtils::DICT_OFFSET_INVALID = 0;
-const int DynamicPtReadingUtils::DICT_OFFSET_ZERO_OFFSET = 0x7FFFFF;
+const int DptReadingUtils::DICT_OFFSET_INVALID = 0;
+const int DptReadingUtils::DICT_OFFSET_ZERO_OFFSET = 0x7FFFFF;
-/* static */ int DynamicPtReadingUtils::getForwardLinkPosition(const uint8_t *const buffer,
+/* static */ int DptReadingUtils::getForwardLinkPosition(const uint8_t *const buffer,
const int pos) {
int linkAddressPos = pos;
return ByteArrayUtils::readSint24AndAdvancePosition(buffer, &linkAddressPos);
}
-/* static */ int DynamicPtReadingUtils::getParentPtNodePosOffsetAndAdvancePosition(
+/* static */ int DptReadingUtils::getParentPtNodePosOffsetAndAdvancePosition(
const uint8_t *const buffer, int *const pos) {
return ByteArrayUtils::readSint24AndAdvancePosition(buffer, pos);
}
-/* static */ int DynamicPtReadingUtils::getParentPtNodePos(const int parentOffset,
- const int ptNodePos) {
+/* static */ int DptReadingUtils::getParentPtNodePos(const int parentOffset, const int ptNodePos) {
if (parentOffset == DICT_OFFSET_INVALID) {
return NOT_A_DICT_POS;
} else if (parentOffset == DICT_OFFSET_ZERO_OFFSET) {
@@ -55,7 +55,7 @@ const int DynamicPtReadingUtils::DICT_OFFSET_ZERO_OFFSET = 0x7FFFFF;
}
}
-/* static */ int DynamicPtReadingUtils::readChildrenPositionAndAdvancePosition(
+/* static */ int DptReadingUtils::readChildrenPositionAndAdvancePosition(
const uint8_t *const buffer, int *const pos) {
const int base = *pos;
const int offset = ByteArrayUtils::readSint24AndAdvancePosition(buffer, pos);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h
index 89ae12c0b..67c3cc57e 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#ifndef LATINIME_DYNAMIC_PT_READING_UTILS_H
-#define LATINIME_DYNAMIC_PT_READING_UTILS_H
+#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_READING_UTILS_H
+#define LATINIME_DYNAMIC_PATRICIA_TRIE_READING_UTILS_H
#include <stdint.h>
@@ -23,7 +23,7 @@
namespace latinime {
-class DynamicPtReadingUtils {
+class DynamicPatriciaTrieReadingUtils {
public:
typedef uint8_t NodeFlags;
@@ -54,30 +54,22 @@ class DynamicPtReadingUtils {
return FLAG_IS_DELETED == (MASK_MOVED & flags);
}
- static AK_FORCE_INLINE bool willBecomeNonTerminal(const NodeFlags flags) {
- return FLAG_WILL_BECOME_NON_TERMINAL == (MASK_MOVED & flags);
- }
-
static AK_FORCE_INLINE NodeFlags updateAndGetFlags(const NodeFlags originalFlags,
- const bool isMoved, const bool isDeleted, const bool willBecomeNonTerminal) {
+ const bool isMoved, const bool isDeleted) {
NodeFlags flags = originalFlags;
- flags = willBecomeNonTerminal ?
- ((flags & (~MASK_MOVED)) | FLAG_WILL_BECOME_NON_TERMINAL) : flags;
flags = isMoved ? ((flags & (~MASK_MOVED)) | FLAG_IS_MOVED) : flags;
flags = isDeleted ? ((flags & (~MASK_MOVED)) | FLAG_IS_DELETED) : flags;
- flags = (!isMoved && !isDeleted && !willBecomeNonTerminal) ?
- ((flags & (~MASK_MOVED)) | FLAG_IS_NOT_MOVED) : flags;
+ flags = (!isMoved && !isDeleted) ? ((flags & (~MASK_MOVED)) | FLAG_IS_NOT_MOVED) : flags;
return flags;
}
private:
- DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPtReadingUtils);
+ DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieReadingUtils);
static const NodeFlags MASK_MOVED;
static const NodeFlags FLAG_IS_NOT_MOVED;
static const NodeFlags FLAG_IS_MOVED;
static const NodeFlags FLAG_IS_DELETED;
- static const NodeFlags FLAG_WILL_BECOME_NON_TERMINAL;
};
} // namespace latinime
-#endif /* LATINIME_DYNAMIC_PT_READING_UTILS_H */
+#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_READING_UTILS_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp
new file mode 100644
index 000000000..052558bfc
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp
@@ -0,0 +1,558 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h"
+
+#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h"
+#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h"
+#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h"
+#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h"
+#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h"
+#include "suggest/policyimpl/dictionary/header/header_policy.h"
+#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h"
+#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
+#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
+#include "utils/hash_map_compat.h"
+
+namespace latinime {
+
+const int DynamicPatriciaTrieWritingHelper::CHILDREN_POSITION_FIELD_SIZE = 3;
+// TODO: Make MAX_DICTIONARY_SIZE 8MB.
+const size_t DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE = 2 * 1024 * 1024;
+
+bool DynamicPatriciaTrieWritingHelper::addUnigramWord(
+ DynamicPatriciaTrieReadingHelper *const readingHelper,
+ const int *const wordCodePoints, const int codePointCount, const int probability,
+ bool *const outAddedNewUnigram) {
+ int parentPos = NOT_A_DICT_POS;
+ while (!readingHelper->isEnd()) {
+ const int matchedCodePointCount = readingHelper->getPrevTotalCodePointCount();
+ if (!readingHelper->isMatchedCodePoint(0 /* index */,
+ wordCodePoints[matchedCodePointCount])) {
+ // The first code point is different from target code point. Skip this node and read
+ // the next sibling node.
+ readingHelper->readNextSiblingNode();
+ continue;
+ }
+ // Check following merged node code points.
+ const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper->getNodeReader();
+ const int nodeCodePointCount = nodeReader->getCodePointCount();
+ for (int j = 1; j < nodeCodePointCount; ++j) {
+ const int nextIndex = matchedCodePointCount + j;
+ if (nextIndex >= codePointCount || !readingHelper->isMatchedCodePoint(j,
+ wordCodePoints[matchedCodePointCount + j])) {
+ *outAddedNewUnigram = true;
+ return reallocatePtNodeAndAddNewPtNodes(nodeReader,
+ readingHelper->getMergedNodeCodePoints(), j,
+ getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */,
+ probability),
+ wordCodePoints + matchedCodePointCount,
+ codePointCount - matchedCodePointCount);
+ }
+ }
+ // All characters are matched.
+ if (codePointCount == readingHelper->getTotalCodePointCount()) {
+ return setPtNodeProbability(nodeReader, probability,
+ readingHelper->getMergedNodeCodePoints(), outAddedNewUnigram);
+ }
+ if (!nodeReader->hasChildren()) {
+ *outAddedNewUnigram = true;
+ return createChildrenPtNodeArrayAndAChildPtNode(nodeReader,
+ getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, probability),
+ wordCodePoints + readingHelper->getTotalCodePointCount(),
+ codePointCount - readingHelper->getTotalCodePointCount());
+ }
+ // Advance to the children nodes.
+ parentPos = nodeReader->getHeadPos();
+ readingHelper->readChildNode();
+ }
+ if (readingHelper->isError()) {
+ // The dictionary is invalid.
+ return false;
+ }
+ int pos = readingHelper->getPosOfLastForwardLinkField();
+ *outAddedNewUnigram = true;
+ return createAndInsertNodeIntoPtNodeArray(parentPos,
+ wordCodePoints + readingHelper->getPrevTotalCodePointCount(),
+ codePointCount - readingHelper->getPrevTotalCodePointCount(),
+ getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, probability), &pos);
+}
+
+bool DynamicPatriciaTrieWritingHelper::addBigramWords(const int word0Pos, const int word1Pos,
+ const int probability, bool *const outAddedNewBigram) {
+ int mMergedNodeCodePoints[MAX_WORD_LENGTH];
+ DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
+ nodeReader.fetchNodeInfoInBufferFromPtNodePosAndGetNodeCodePoints(word0Pos, MAX_WORD_LENGTH,
+ mMergedNodeCodePoints);
+ // Move node to add bigram entry.
+ const int newNodePos = mBuffer->getTailPosition();
+ if (!markNodeAsMovedAndSetPosition(&nodeReader, newNodePos, newNodePos)) {
+ return false;
+ }
+ int writingPos = newNodePos;
+ // Write a new PtNode using original PtNode's info to the tail of the dictionary in mBuffer.
+ if (!writePtNodeToBufferByCopyingPtNodeInfo(mBuffer, &nodeReader, nodeReader.getParentPos(),
+ mMergedNodeCodePoints, nodeReader.getCodePointCount(), nodeReader.getProbability(),
+ &writingPos)) {
+ return false;
+ }
+ nodeReader.fetchNodeInfoInBufferFromPtNodePos(newNodePos);
+ if (nodeReader.getBigramsPos() != NOT_A_DICT_POS) {
+ // Insert a new bigram entry into the existing bigram list.
+ int bigramListPos = nodeReader.getBigramsPos();
+ return mBigramPolicy->addNewBigramEntryToBigramList(word1Pos, probability, &bigramListPos,
+ outAddedNewBigram);
+ } else {
+ // The PtNode doesn't have a bigram list.
+ *outAddedNewBigram = true;
+ // First, Write a bigram entry at the tail position of the PtNode.
+ if (!mBigramPolicy->writeNewBigramEntry(word1Pos, probability, &writingPos)) {
+ return false;
+ }
+ // Then, Mark as the PtNode having bigram list in the flags.
+ const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
+ PatriciaTrieReadingUtils::createAndGetFlags(nodeReader.isBlacklisted(),
+ nodeReader.isNotAWord(), nodeReader.getProbability() != NOT_A_PROBABILITY,
+ nodeReader.getShortcutPos() != NOT_A_DICT_POS, true /* hasBigrams */,
+ nodeReader.getCodePointCount() > 1, CHILDREN_POSITION_FIELD_SIZE);
+ writingPos = newNodePos;
+ // Write updated flags into the moved PtNode's flags field.
+ return DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags,
+ &writingPos);
+ }
+}
+
+// Remove a bigram relation from word0Pos to word1Pos.
+bool DynamicPatriciaTrieWritingHelper::removeBigramWords(const int word0Pos, const int word1Pos) {
+ DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
+ nodeReader.fetchNodeInfoInBufferFromPtNodePos(word0Pos);
+ if (nodeReader.getBigramsPos() == NOT_A_DICT_POS) {
+ return false;
+ }
+ return mBigramPolicy->removeBigram(nodeReader.getBigramsPos(), word1Pos);
+}
+
+void DynamicPatriciaTrieWritingHelper::writeToDictFile(const char *const fileName,
+ const HeaderPolicy *const headerPolicy, const int unigramCount, const int bigramCount) {
+ BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */);
+ const int extendedRegionSize = headerPolicy->getExtendedRegionSize() +
+ mBuffer->getUsedAdditionalBufferSize();
+ if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, false /* updatesLastUpdatedTime */,
+ false /* updatesLastDecayedTime */, unigramCount, bigramCount, extendedRegionSize)) {
+ return;
+ }
+ DictFileWritingUtils::flushAllHeaderAndBodyToFile(fileName, &headerBuffer, mBuffer);
+}
+
+void DynamicPatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNodeArrayPos,
+ const char *const fileName, const HeaderPolicy *const headerPolicy) {
+ BufferWithExtendableBuffer newDictBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */,
+ MAX_DICTIONARY_SIZE);
+ int unigramCount = 0;
+ int bigramCount = 0;
+ if (mNeedsToDecay) {
+ ForgettingCurveUtils::sTimeKeeper.setCurrentTime();
+ }
+ if (!runGC(rootPtNodeArrayPos, headerPolicy, &newDictBuffer, &unigramCount, &bigramCount)) {
+ return;
+ }
+ BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */);
+ if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, true /* updatesLastUpdatedTime */,
+ mNeedsToDecay, unigramCount, bigramCount, 0 /* extendedRegionSize */)) {
+ return;
+ }
+ DictFileWritingUtils::flushAllHeaderAndBodyToFile(fileName, &headerBuffer, &newDictBuffer);
+}
+
+bool DynamicPatriciaTrieWritingHelper::markNodeAsDeleted(
+ const DynamicPatriciaTrieNodeReader *const nodeToUpdate) {
+ int pos = nodeToUpdate->getHeadPos();
+ const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(pos);
+ const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
+ if (usesAdditionalBuffer) {
+ pos -= mBuffer->getOriginalBufferSize();
+ }
+ // Read original flags
+ const PatriciaTrieReadingUtils::NodeFlags originalFlags =
+ PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
+ const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
+ DynamicPatriciaTrieReadingUtils::updateAndGetFlags(originalFlags, false /* isMoved */,
+ true /* isDeleted */);
+ int writingPos = nodeToUpdate->getHeadPos();
+ // Update flags.
+ return DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags,
+ &writingPos);
+}
+
+bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition(
+ const DynamicPatriciaTrieNodeReader *const originalNode, const int movedPos,
+ const int bigramLinkedNodePos) {
+ int pos = originalNode->getHeadPos();
+ const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(pos);
+ const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
+ if (usesAdditionalBuffer) {
+ pos -= mBuffer->getOriginalBufferSize();
+ }
+ // Read original flags
+ const PatriciaTrieReadingUtils::NodeFlags originalFlags =
+ PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
+ const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
+ DynamicPatriciaTrieReadingUtils::updateAndGetFlags(originalFlags, true /* isMoved */,
+ false /* isDeleted */);
+ int writingPos = originalNode->getHeadPos();
+ // Update flags.
+ if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags,
+ &writingPos)) {
+ return false;
+ }
+ // Update moved position, which is stored in the parent offset field.
+ if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(
+ mBuffer, movedPos, originalNode->getHeadPos(), &writingPos)) {
+ return false;
+ }
+ // Update bigram linked node position, which is stored in the children position field.
+ int childrenPosFieldPos = originalNode->getChildrenPosFieldPos();
+ if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(
+ mBuffer, bigramLinkedNodePos, &childrenPosFieldPos)) {
+ return false;
+ }
+ if (originalNode->hasChildren()) {
+ // Update children's parent position.
+ DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, mBigramPolicy, mShortcutPolicy);
+ const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader();
+ readingHelper.initWithPtNodeArrayPos(originalNode->getChildrenPos());
+ while (!readingHelper.isEnd()) {
+ int parentOffsetFieldPos = nodeReader->getHeadPos()
+ + DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE;
+ if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(
+ mBuffer, bigramLinkedNodePos, nodeReader->getHeadPos(),
+ &parentOffsetFieldPos)) {
+ // Parent offset cannot be written because of a bug or a broken dictionary; thus,
+ // we give up to update dictionary.
+ return false;
+ }
+ readingHelper.readNextSiblingNode();
+ }
+ }
+ return true;
+}
+
+// Write new PtNode at writingPos.
+bool DynamicPatriciaTrieWritingHelper::writePtNodeWithFullInfoToBuffer(
+ BufferWithExtendableBuffer *const bufferToWrite, const bool isBlacklisted,
+ const bool isNotAWord, const int parentPos, const int *const codePoints,
+ const int codePointCount, const int probability, const int childrenPos,
+ const int originalBigramListPos, const int originalShortcutListPos,
+ int *const writingPos) {
+ const int nodePos = *writingPos;
+ // Write dummy flags. The Node flags are updated with appropriate flags at the last step of the
+ // PtNode writing.
+ if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(bufferToWrite,
+ 0 /* nodeFlags */, writingPos)) {
+ return false;
+ }
+ // Calculate a parent offset and write the offset.
+ if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(bufferToWrite,
+ parentPos, nodePos, writingPos)) {
+ return false;
+ }
+ // Write code points
+ if (!DynamicPatriciaTrieWritingUtils::writeCodePointsAndAdvancePosition(bufferToWrite,
+ codePoints, codePointCount, writingPos)) {
+ return false;
+ }
+ // Write probability when the probability is a valid probability, which means this node is
+ // terminal.
+ if (probability != NOT_A_PROBABILITY) {
+ if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(bufferToWrite,
+ probability, writingPos)) {
+ return false;
+ }
+ }
+ // Write children position
+ if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(bufferToWrite,
+ childrenPos, writingPos)) {
+ return false;
+ }
+ // Copy shortcut list when the originalShortcutListPos is valid dictionary position.
+ if (originalShortcutListPos != NOT_A_DICT_POS) {
+ int fromPos = originalShortcutListPos;
+ if (!mShortcutPolicy->copyAllShortcutsAndReturnIfSucceededOrNot(bufferToWrite, &fromPos,
+ writingPos)) {
+ return false;
+ }
+ }
+ // Copy bigram list when the originalBigramListPos is valid dictionary position.
+ int bigramCount = 0;
+ if (originalBigramListPos != NOT_A_DICT_POS) {
+ int fromPos = originalBigramListPos;
+ if (!mBigramPolicy->copyAllBigrams(bufferToWrite, &fromPos, writingPos, &bigramCount)) {
+ return false;
+ }
+ }
+ // Create node flags and write them.
+ PatriciaTrieReadingUtils::NodeFlags nodeFlags =
+ PatriciaTrieReadingUtils::createAndGetFlags(isBlacklisted, isNotAWord,
+ probability != NOT_A_PROBABILITY /* isTerminal */,
+ originalShortcutListPos != NOT_A_DICT_POS /* hasShortcutTargets */,
+ bigramCount > 0 /* hasBigrams */, codePointCount > 1 /* hasMultipleChars */,
+ CHILDREN_POSITION_FIELD_SIZE);
+ int flagsFieldPos = nodePos;
+ if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(bufferToWrite, nodeFlags,
+ &flagsFieldPos)) {
+ return false;
+ }
+ return true;
+}
+
+bool DynamicPatriciaTrieWritingHelper::writePtNodeToBuffer(
+ BufferWithExtendableBuffer *const bufferToWrite, const int parentPos,
+ const int *const codePoints, const int codePointCount, const int probability,
+ int *const writingPos) {
+ return writePtNodeWithFullInfoToBuffer(bufferToWrite, false /* isBlacklisted */,
+ false /* isNotAWord */, parentPos, codePoints, codePointCount, probability,
+ NOT_A_DICT_POS /* childrenPos */, NOT_A_DICT_POS /* originalBigramsPos */,
+ NOT_A_DICT_POS /* originalShortcutPos */, writingPos);
+}
+
+bool DynamicPatriciaTrieWritingHelper::writePtNodeToBufferByCopyingPtNodeInfo(
+ BufferWithExtendableBuffer *const bufferToWrite,
+ const DynamicPatriciaTrieNodeReader *const originalNode, const int parentPos,
+ const int *const codePoints, const int codePointCount, const int probability,
+ int *const writingPos) {
+ return writePtNodeWithFullInfoToBuffer(bufferToWrite, originalNode->isBlacklisted(),
+ originalNode->isNotAWord(), parentPos, codePoints, codePointCount, probability,
+ originalNode->getChildrenPos(), originalNode->getBigramsPos(),
+ originalNode->getShortcutPos(), writingPos);
+}
+
+bool DynamicPatriciaTrieWritingHelper::createAndInsertNodeIntoPtNodeArray(const int parentPos,
+ const int *const nodeCodePoints, const int nodeCodePointCount, const int probability,
+ int *const forwardLinkFieldPos) {
+ const int newPtNodeArrayPos = mBuffer->getTailPosition();
+ if (!DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer,
+ newPtNodeArrayPos, forwardLinkFieldPos)) {
+ return false;
+ }
+ return createNewPtNodeArrayWithAChildPtNode(parentPos, nodeCodePoints, nodeCodePointCount,
+ probability);
+}
+
+bool DynamicPatriciaTrieWritingHelper::setPtNodeProbability(
+ const DynamicPatriciaTrieNodeReader *const originalPtNode, const int probability,
+ const int *const codePoints, bool *const outAddedNewUnigram) {
+ if (originalPtNode->isTerminal()) {
+ // Overwrites the probability.
+ *outAddedNewUnigram = false;
+ const int probabilityToWrite = getUpdatedProbability(originalPtNode->getProbability(),
+ probability);
+ int probabilityFieldPos = originalPtNode->getProbabilityFieldPos();
+ if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(mBuffer,
+ probabilityToWrite, &probabilityFieldPos)) {
+ return false;
+ }
+ } else {
+ // Make the node terminal and write the probability.
+ *outAddedNewUnigram = true;
+ int movedPos = mBuffer->getTailPosition();
+ if (!markNodeAsMovedAndSetPosition(originalPtNode, movedPos, movedPos)) {
+ return false;
+ }
+ if (!writePtNodeToBufferByCopyingPtNodeInfo(mBuffer, originalPtNode,
+ originalPtNode->getParentPos(), codePoints, originalPtNode->getCodePointCount(),
+ getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, probability),
+ &movedPos)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+bool DynamicPatriciaTrieWritingHelper::createChildrenPtNodeArrayAndAChildPtNode(
+ const DynamicPatriciaTrieNodeReader *const parentNode, const int probability,
+ const int *const codePoints, const int codePointCount) {
+ const int newPtNodeArrayPos = mBuffer->getTailPosition();
+ int childrenPosFieldPos = parentNode->getChildrenPosFieldPos();
+ if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBuffer,
+ newPtNodeArrayPos, &childrenPosFieldPos)) {
+ return false;
+ }
+ return createNewPtNodeArrayWithAChildPtNode(parentNode->getHeadPos(), codePoints,
+ codePointCount, probability);
+}
+
+bool DynamicPatriciaTrieWritingHelper::createNewPtNodeArrayWithAChildPtNode(
+ const int parentPtNodePos, const int *const nodeCodePoints, const int nodeCodePointCount,
+ const int probability) {
+ int writingPos = mBuffer->getTailPosition();
+ if (!DynamicPatriciaTrieWritingUtils::writePtNodeArraySizeAndAdvancePosition(mBuffer,
+ 1 /* arraySize */, &writingPos)) {
+ return false;
+ }
+ if (!writePtNodeToBuffer(mBuffer, parentPtNodePos, nodeCodePoints, nodeCodePointCount,
+ probability, &writingPos)) {
+ return false;
+ }
+ if (!DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer,
+ NOT_A_DICT_POS /* forwardLinkPos */, &writingPos)) {
+ return false;
+ }
+ return true;
+}
+
+// Returns whether the dictionary updating was succeeded or not.
+bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes(
+ const DynamicPatriciaTrieNodeReader *const reallocatingPtNode,
+ const int *const reallocatingPtNodeCodePoints, const int overlappingCodePointCount,
+ const int probabilityOfNewPtNode, const int *const newNodeCodePoints,
+ const int newNodeCodePointCount) {
+ // When addsExtraChild is true, split the reallocating PtNode and add new child.
+ // Reallocating PtNode: abcde, newNode: abcxy.
+ // abc (1st, not terminal) __ de (2nd)
+ // \_ xy (extra child, terminal)
+ // Otherwise, this method makes 1st part terminal and write probabilityOfNewPtNode.
+ // Reallocating PtNode: abcde, newNode: abc.
+ // abc (1st, terminal) __ de (2nd)
+ const bool addsExtraChild = newNodeCodePointCount > overlappingCodePointCount;
+ const int firstPartOfReallocatedPtNodePos = mBuffer->getTailPosition();
+ int writingPos = firstPartOfReallocatedPtNodePos;
+ // Write the 1st part of the reallocating node. The children position will be updated later
+ // with actual children position.
+ const int newProbability = addsExtraChild ? NOT_A_PROBABILITY : probabilityOfNewPtNode;
+ if (!writePtNodeToBuffer(mBuffer, reallocatingPtNode->getParentPos(),
+ reallocatingPtNodeCodePoints, overlappingCodePointCount, newProbability,
+ &writingPos)) {
+ return false;
+ }
+ const int actualChildrenPos = writingPos;
+ // Create new children PtNode array.
+ const size_t newPtNodeCount = addsExtraChild ? 2 : 1;
+ if (!DynamicPatriciaTrieWritingUtils::writePtNodeArraySizeAndAdvancePosition(mBuffer,
+ newPtNodeCount, &writingPos)) {
+ return false;
+ }
+ // Write the 2nd part of the reallocating node.
+ const int secondPartOfReallocatedPtNodePos = writingPos;
+ if (!writePtNodeToBufferByCopyingPtNodeInfo(mBuffer, reallocatingPtNode,
+ firstPartOfReallocatedPtNodePos,
+ reallocatingPtNodeCodePoints + overlappingCodePointCount,
+ reallocatingPtNode->getCodePointCount() - overlappingCodePointCount,
+ reallocatingPtNode->getProbability(), &writingPos)) {
+ return false;
+ }
+ if (addsExtraChild) {
+ if (!writePtNodeToBuffer(mBuffer, firstPartOfReallocatedPtNodePos,
+ newNodeCodePoints + overlappingCodePointCount,
+ newNodeCodePointCount - overlappingCodePointCount, probabilityOfNewPtNode,
+ &writingPos)) {
+ return false;
+ }
+ }
+ if (!DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer,
+ NOT_A_DICT_POS /* forwardLinkPos */, &writingPos)) {
+ return false;
+ }
+ // Update original reallocatingPtNode as moved.
+ if (!markNodeAsMovedAndSetPosition(reallocatingPtNode, firstPartOfReallocatedPtNodePos,
+ secondPartOfReallocatedPtNodePos)) {
+ return false;
+ }
+ // Load node info. Information of the 1st part will be fetched.
+ DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
+ nodeReader.fetchNodeInfoInBufferFromPtNodePos(firstPartOfReallocatedPtNodePos);
+ // Update children position.
+ int childrenPosFieldPos = nodeReader.getChildrenPosFieldPos();
+ if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBuffer,
+ actualChildrenPos, &childrenPosFieldPos)) {
+ return false;
+ }
+ return true;
+}
+
+bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
+ const HeaderPolicy *const headerPolicy, BufferWithExtendableBuffer *const bufferToWrite,
+ int *const outUnigramCount, int *const outBigramCount) {
+ DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, mBigramPolicy, mShortcutPolicy);
+ readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
+ DynamicPatriciaTrieGcEventListeners
+ ::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
+ traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted(
+ headerPolicy, this, mBuffer, mNeedsToDecay);
+ if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
+ &traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted)) {
+ return false;
+ }
+ if (mNeedsToDecay && traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
+ .getValidUnigramCount() > ForgettingCurveUtils::MAX_UNIGRAM_COUNT_AFTER_GC) {
+ // TODO: Remove more unigrams.
+ }
+
+ readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
+ DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateBigramProbability
+ traversePolicyToUpdateBigramProbability(mBigramPolicy);
+ if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
+ &traversePolicyToUpdateBigramProbability)) {
+ return false;
+ }
+ if (mNeedsToDecay && traversePolicyToUpdateBigramProbability.getValidBigramEntryCount()
+ > ForgettingCurveUtils::MAX_BIGRAM_COUNT_AFTER_GC) {
+ // TODO: Remove more bigrams.
+ }
+
+ // Mapping from positions in mBuffer to positions in bufferToWrite.
+ DictPositionRelocationMap dictPositionRelocationMap;
+ readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
+ DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
+ traversePolicyToPlaceAndWriteValidPtNodesToBuffer(this, bufferToWrite,
+ &dictPositionRelocationMap);
+ if (!readingHelper.traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
+ &traversePolicyToPlaceAndWriteValidPtNodesToBuffer)) {
+ return false;
+ }
+
+ // Create policy instance for the GCed dictionary.
+ DynamicShortcutListPolicy newDictShortcutPolicy(bufferToWrite);
+ DynamicBigramListPolicy newDictBigramPolicy(headerPolicy, bufferToWrite, &newDictShortcutPolicy,
+ mNeedsToDecay);
+ // Create reading helper for the GCed dictionary.
+ DynamicPatriciaTrieReadingHelper newDictReadingHelper(bufferToWrite, &newDictBigramPolicy,
+ &newDictShortcutPolicy);
+ newDictReadingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
+ DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateAllPositionFields
+ traversePolicyToUpdateAllPositionFields(this, &newDictBigramPolicy, bufferToWrite,
+ &dictPositionRelocationMap);
+ if (!newDictReadingHelper.traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
+ &traversePolicyToUpdateAllPositionFields)) {
+ return false;
+ }
+ *outUnigramCount = traversePolicyToUpdateAllPositionFields.getUnigramCount();
+ *outBigramCount = traversePolicyToUpdateAllPositionFields.getBigramCount();
+ return true;
+}
+
+int DynamicPatriciaTrieWritingHelper::getUpdatedProbability(const int originalProbability,
+ const int newProbability) {
+ if (mNeedsToDecay) {
+ return ForgettingCurveUtils::getUpdatedEncodedProbability(originalProbability,
+ newProbability);
+ } else {
+ return newProbability;
+ }
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h
new file mode 100644
index 000000000..ca8664729
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h
@@ -0,0 +1,138 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H
+#define LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H
+
+#include <stdint.h>
+
+#include "defines.h"
+#include "utils/hash_map_compat.h"
+
+namespace latinime {
+
+class BufferWithExtendableBuffer;
+class DynamicBigramListPolicy;
+class DynamicPatriciaTrieNodeReader;
+class DynamicPatriciaTrieReadingHelper;
+class DynamicShortcutListPolicy;
+class HeaderPolicy;
+
+class DynamicPatriciaTrieWritingHelper {
+ public:
+ typedef hash_map_compat<int, int> PtNodeArrayPositionRelocationMap;
+ typedef hash_map_compat<int, int> PtNodePositionRelocationMap;
+ struct DictPositionRelocationMap {
+ public:
+ DictPositionRelocationMap()
+ : mPtNodeArrayPositionRelocationMap(), mPtNodePositionRelocationMap() {}
+
+ PtNodeArrayPositionRelocationMap mPtNodeArrayPositionRelocationMap;
+ PtNodePositionRelocationMap mPtNodePositionRelocationMap;
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(DictPositionRelocationMap);
+ };
+
+ static const size_t MAX_DICTIONARY_SIZE;
+
+ DynamicPatriciaTrieWritingHelper(BufferWithExtendableBuffer *const buffer,
+ DynamicBigramListPolicy *const bigramPolicy,
+ DynamicShortcutListPolicy *const shortcutPolicy, const bool needsToDecay)
+ : mBuffer(buffer), mBigramPolicy(bigramPolicy), mShortcutPolicy(shortcutPolicy),
+ mNeedsToDecay(needsToDecay) {}
+
+ ~DynamicPatriciaTrieWritingHelper() {}
+
+ // Add a word to the dictionary. If the word already exists, update the probability.
+ bool addUnigramWord(DynamicPatriciaTrieReadingHelper *const readingHelper,
+ const int *const wordCodePoints, const int codePointCount, const int probability,
+ bool *const outAddedNewUnigram);
+
+ // Add a bigram relation from word0Pos to word1Pos.
+ bool addBigramWords(const int word0Pos, const int word1Pos, const int probability,
+ bool *const outAddedNewBigram);
+
+ // Remove a bigram relation from word0Pos to word1Pos.
+ bool removeBigramWords(const int word0Pos, const int word1Pos);
+
+ void writeToDictFile(const char *const fileName, const HeaderPolicy *const headerPolicy,
+ const int unigramCount, const int bigramCount);
+
+ void writeToDictFileWithGC(const int rootPtNodeArrayPos, const char *const fileName,
+ const HeaderPolicy *const headerPolicy);
+
+ // CAVEAT: This method must be called only from inner classes of
+ // DynamicPatriciaTrieGcEventListeners.
+ bool markNodeAsDeleted(const DynamicPatriciaTrieNodeReader *const nodeToUpdate);
+
+ // CAVEAT: This method must be called only from this class or inner classes of
+ // DynamicPatriciaTrieGcEventListeners.
+ bool writePtNodeToBufferByCopyingPtNodeInfo(BufferWithExtendableBuffer *const bufferToWrite,
+ const DynamicPatriciaTrieNodeReader *const originalNode, const int parentPos,
+ const int *const codePoints, const int codePointCount, const int probability,
+ int *const writingPos);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieWritingHelper);
+
+ static const int CHILDREN_POSITION_FIELD_SIZE;
+
+ BufferWithExtendableBuffer *const mBuffer;
+ DynamicBigramListPolicy *const mBigramPolicy;
+ DynamicShortcutListPolicy *const mShortcutPolicy;
+ const bool mNeedsToDecay;
+
+ bool markNodeAsMovedAndSetPosition(const DynamicPatriciaTrieNodeReader *const nodeToUpdate,
+ const int movedPos, const int bigramLinkedNodePos);
+
+ bool writePtNodeWithFullInfoToBuffer(BufferWithExtendableBuffer *const bufferToWrite,
+ const bool isBlacklisted, const bool isNotAWord,
+ const int parentPos, const int *const codePoints, const int codePointCount,
+ const int probability, const int childrenPos, const int originalBigramListPos,
+ const int originalShortcutListPos, int *const writingPos);
+
+ bool writePtNodeToBuffer(BufferWithExtendableBuffer *const bufferToWrite,
+ const int parentPos, const int *const codePoints, const int codePointCount,
+ const int probability, int *const writingPos);
+
+ bool createAndInsertNodeIntoPtNodeArray(const int parentPos, const int *const nodeCodePoints,
+ const int nodeCodePointCount, const int probability, int *const forwardLinkFieldPos);
+
+ bool setPtNodeProbability(const DynamicPatriciaTrieNodeReader *const originalNode,
+ const int probability, const int *const codePoints, bool *const outAddedNewUnigram);
+
+ bool createChildrenPtNodeArrayAndAChildPtNode(
+ const DynamicPatriciaTrieNodeReader *const parentNode, const int probability,
+ const int *const codePoints, const int codePointCount);
+
+ bool createNewPtNodeArrayWithAChildPtNode(const int parentPos, const int *const nodeCodePoints,
+ const int nodeCodePointCount, const int probability);
+
+ bool reallocatePtNodeAndAddNewPtNodes(
+ const DynamicPatriciaTrieNodeReader *const reallocatingPtNode,
+ const int *const reallocatingPtNodeCodePoints, const int overlappingCodePointCount,
+ const int probabilityOfNewPtNode, const int *const newNodeCodePoints,
+ const int newNodeCodePointCount);
+
+ bool runGC(const int rootPtNodeArrayPos, const HeaderPolicy *const headerPolicy,
+ BufferWithExtendableBuffer *const bufferToWrite, int *const outUnigramCount,
+ int *const outBigramCount);
+
+ int getUpdatedProbability(const int originalProbability, const int newProbability);
+};
+} // namespace latinime
+#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.cpp
index ebbdc2ea2..30ff10cd6 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.h"
+#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h"
#include <cstddef>
#include <cstdlib>
@@ -24,18 +24,19 @@
namespace latinime {
-const size_t DynamicPtWritingUtils::MAX_PTNODE_ARRAY_SIZE_TO_USE_SMALL_SIZE_FIELD = 0x7F;
-const size_t DynamicPtWritingUtils::MAX_PTNODE_ARRAY_SIZE = 0x7FFF;
-const int DynamicPtWritingUtils::SMALL_PTNODE_ARRAY_SIZE_FIELD_SIZE = 1;
-const int DynamicPtWritingUtils::LARGE_PTNODE_ARRAY_SIZE_FIELD_SIZE = 2;
-const int DynamicPtWritingUtils::LARGE_PTNODE_ARRAY_SIZE_FIELD_SIZE_FLAG = 0x8000;
-const int DynamicPtWritingUtils::DICT_OFFSET_FIELD_SIZE = 3;
-const int DynamicPtWritingUtils::MAX_DICT_OFFSET_VALUE = 0x7FFFFF;
-const int DynamicPtWritingUtils::MIN_DICT_OFFSET_VALUE = -0x7FFFFF;
-const int DynamicPtWritingUtils::DICT_OFFSET_NEGATIVE_FLAG = 0x800000;
-const int DynamicPtWritingUtils::NODE_FLAG_FIELD_SIZE = 1;
+const size_t DynamicPatriciaTrieWritingUtils::MAX_PTNODE_ARRAY_SIZE_TO_USE_SMALL_SIZE_FIELD = 0x7F;
+const size_t DynamicPatriciaTrieWritingUtils::MAX_PTNODE_ARRAY_SIZE = 0x7FFF;
+const int DynamicPatriciaTrieWritingUtils::SMALL_PTNODE_ARRAY_SIZE_FIELD_SIZE = 1;
+const int DynamicPatriciaTrieWritingUtils::LARGE_PTNODE_ARRAY_SIZE_FIELD_SIZE = 2;
+const int DynamicPatriciaTrieWritingUtils::LARGE_PTNODE_ARRAY_SIZE_FIELD_SIZE_FLAG = 0x8000;
+const int DynamicPatriciaTrieWritingUtils::DICT_OFFSET_FIELD_SIZE = 3;
+const int DynamicPatriciaTrieWritingUtils::MAX_DICT_OFFSET_VALUE = 0x7FFFFF;
+const int DynamicPatriciaTrieWritingUtils::MIN_DICT_OFFSET_VALUE = -0x7FFFFF;
+const int DynamicPatriciaTrieWritingUtils::DICT_OFFSET_NEGATIVE_FLAG = 0x800000;
+const int DynamicPatriciaTrieWritingUtils::PROBABILITY_FIELD_SIZE = 1;
+const int DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE = 1;
-/* static */ bool DynamicPtWritingUtils::writeEmptyDictionary(
+/* static */ bool DynamicPatriciaTrieWritingUtils::writeEmptyDictionary(
BufferWithExtendableBuffer *const buffer, const int rootPos) {
int writingPos = rootPos;
if (!writePtNodeArraySizeAndAdvancePosition(buffer, 0 /* arraySize */, &writingPos)) {
@@ -45,13 +46,13 @@ const int DynamicPtWritingUtils::NODE_FLAG_FIELD_SIZE = 1;
&writingPos);
}
-/* static */ bool DynamicPtWritingUtils::writeForwardLinkPositionAndAdvancePosition(
+/* static */ bool DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition(
BufferWithExtendableBuffer *const buffer, const int forwardLinkPos,
int *const forwardLinkFieldPos) {
return writeDictOffset(buffer, forwardLinkPos, (*forwardLinkFieldPos), forwardLinkFieldPos);
}
-/* static */ bool DynamicPtWritingUtils::writePtNodeArraySizeAndAdvancePosition(
+/* static */ bool DynamicPatriciaTrieWritingUtils::writePtNodeArraySizeAndAdvancePosition(
BufferWithExtendableBuffer *const buffer, const size_t arraySize,
int *const arraySizeFieldPos) {
// Currently, all array size field to be created has LARGE_PTNODE_ARRAY_SIZE_FIELD_SIZE to
@@ -73,20 +74,20 @@ const int DynamicPtWritingUtils::NODE_FLAG_FIELD_SIZE = 1;
}
}
-/* static */ bool DynamicPtWritingUtils::writeFlagsAndAdvancePosition(
+/* static */ bool DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(
BufferWithExtendableBuffer *const buffer,
- const DynamicPtReadingUtils::NodeFlags nodeFlags, int *const nodeFlagsFieldPos) {
+ const DynamicPatriciaTrieReadingUtils::NodeFlags nodeFlags, int *const nodeFlagsFieldPos) {
return buffer->writeUintAndAdvancePosition(nodeFlags, NODE_FLAG_FIELD_SIZE, nodeFlagsFieldPos);
}
// Note that parentOffset is offset from node's head position.
-/* static */ bool DynamicPtWritingUtils::writeParentPosOffsetAndAdvancePosition(
+/* static */ bool DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(
BufferWithExtendableBuffer *const buffer, const int parentPos, const int basePos,
int *const parentPosFieldPos) {
return writeDictOffset(buffer, parentPos, basePos, parentPosFieldPos);
}
-/* static */ bool DynamicPtWritingUtils::writeCodePointsAndAdvancePosition(
+/* static */ bool DynamicPatriciaTrieWritingUtils::writeCodePointsAndAdvancePosition(
BufferWithExtendableBuffer *const buffer, const int *const codePoints,
const int codePointCount, int *const codePointFieldPos) {
if (codePointCount <= 0) {
@@ -100,20 +101,34 @@ const int DynamicPtWritingUtils::NODE_FLAG_FIELD_SIZE = 1;
hasMultipleCodePoints, codePointFieldPos);
}
-/* static */ bool DynamicPtWritingUtils::writeChildrenPositionAndAdvancePosition(
+/* static */ bool DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(
+ BufferWithExtendableBuffer *const buffer, const int probability,
+ int *const probabilityFieldPos) {
+ if (probability < 0 || probability > MAX_PROBABILITY) {
+ AKLOGI("probability cannot be written because the probability is invalid: %d",
+ probability);
+ ASSERT(false);
+ return false;
+ }
+ return buffer->writeUintAndAdvancePosition(probability, PROBABILITY_FIELD_SIZE,
+ probabilityFieldPos);
+}
+
+/* static */ bool DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(
BufferWithExtendableBuffer *const buffer, const int childrenPosition,
int *const childrenPositionFieldPos) {
return writeDictOffset(buffer, childrenPosition, (*childrenPositionFieldPos),
childrenPositionFieldPos);
}
-/* static */ bool DynamicPtWritingUtils::writeDictOffset(BufferWithExtendableBuffer *const buffer,
- const int targetPos, const int basePos, int *const offsetFieldPos) {
+/* static */ bool DynamicPatriciaTrieWritingUtils::writeDictOffset(
+ BufferWithExtendableBuffer *const buffer, const int targetPos, const int basePos,
+ int *const offsetFieldPos) {
int offset = targetPos - basePos;
if (targetPos == NOT_A_DICT_POS) {
- offset = DynamicPtReadingUtils::DICT_OFFSET_INVALID;
+ offset = DynamicPatriciaTrieReadingUtils::DICT_OFFSET_INVALID;
} else if (offset == 0) {
- offset = DynamicPtReadingUtils::DICT_OFFSET_ZERO_OFFSET;
+ offset = DynamicPatriciaTrieReadingUtils::DICT_OFFSET_ZERO_OFFSET;
}
if (offset > MAX_DICT_OFFSET_VALUE || offset < MIN_DICT_OFFSET_VALUE) {
AKLOGI("offset cannot be written because the offset is too large or too small: %d",
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h
index 362fbd1cc..af76bc6b5 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h
@@ -14,19 +14,19 @@
* limitations under the License.
*/
-#ifndef LATINIME_DYNAMIC_PT_WRITING_UTILS_H
-#define LATINIME_DYNAMIC_PT_WRITING_UTILS_H
+#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_UTILS_H
+#define LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_UTILS_H
#include <cstddef>
#include "defines.h"
-#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h"
+#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
namespace latinime {
class BufferWithExtendableBuffer;
-class DynamicPtWritingUtils {
+class DynamicPatriciaTrieWritingUtils {
public:
static const int NODE_FLAG_FIELD_SIZE;
@@ -39,15 +39,8 @@ class DynamicPtWritingUtils {
static bool writePtNodeArraySizeAndAdvancePosition(BufferWithExtendableBuffer *const buffer,
const size_t arraySize, int *const arraySizeFieldPos);
- static bool writeFlags(BufferWithExtendableBuffer *const buffer,
- const DynamicPtReadingUtils::NodeFlags nodeFlags,
- const int nodeFlagsFieldPos) {
- int writingPos = nodeFlagsFieldPos;
- return writeFlagsAndAdvancePosition(buffer, nodeFlags, &writingPos);
- }
-
static bool writeFlagsAndAdvancePosition(BufferWithExtendableBuffer *const buffer,
- const DynamicPtReadingUtils::NodeFlags nodeFlags,
+ const DynamicPatriciaTrieReadingUtils::NodeFlags nodeFlags,
int *const nodeFlagsFieldPos);
static bool writeParentPosOffsetAndAdvancePosition(BufferWithExtendableBuffer *const buffer,
@@ -56,11 +49,14 @@ class DynamicPtWritingUtils {
static bool writeCodePointsAndAdvancePosition(BufferWithExtendableBuffer *const buffer,
const int *const codePoints, const int codePointCount, int *const codePointFieldPos);
+ static bool writeProbabilityAndAdvancePosition(BufferWithExtendableBuffer *const buffer,
+ const int probability, int *const probabilityFieldPos);
+
static bool writeChildrenPositionAndAdvancePosition(BufferWithExtendableBuffer *const buffer,
const int childrenPosition, int *const childrenPositionFieldPos);
private:
- DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPtWritingUtils);
+ DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieWritingUtils);
static const size_t MAX_PTNODE_ARRAY_SIZE_TO_USE_SMALL_SIZE_FIELD;
static const size_t MAX_PTNODE_ARRAY_SIZE;
@@ -71,9 +67,10 @@ class DynamicPtWritingUtils {
static const int MAX_DICT_OFFSET_VALUE;
static const int MIN_DICT_OFFSET_VALUE;
static const int DICT_OFFSET_NEGATIVE_FLAG;
+ static const int PROBABILITY_FIELD_SIZE;
static bool writeDictOffset(BufferWithExtendableBuffer *const buffer, const int targetPos,
const int basePos, int *const offsetFieldPos);
};
} // namespace latinime
-#endif /* LATINIME_DYNAMIC_PT_WRITING_UTILS_H */
+#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_UTILS_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp
index be7a3c228..eb072fbaf 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp
@@ -20,8 +20,6 @@ namespace latinime {
// Note that these are corresponding definitions in Java side in FormatSpec.FileHeader.
const char *const HeaderPolicy::MULTIPLE_WORDS_DEMOTION_RATE_KEY = "MULTIPLE_WORDS_DEMOTION_RATE";
-const char *const HeaderPolicy::REQUIRES_GERMAN_UMLAUT_PROCESSING_KEY =
- "REQUIRES_GERMAN_UMLAUT_PROCESSING";
// TODO: Change attribute string to "IS_DECAYING_DICT".
const char *const HeaderPolicy::IS_DECAYING_DICT_KEY = "USES_FORGETTING_CURVE";
const char *const HeaderPolicy::LAST_UPDATED_TIME_KEY = "date";
@@ -29,9 +27,6 @@ const char *const HeaderPolicy::LAST_DECAYED_TIME_KEY = "LAST_DECAYED_TIME";
const char *const HeaderPolicy::UNIGRAM_COUNT_KEY = "UNIGRAM_COUNT";
const char *const HeaderPolicy::BIGRAM_COUNT_KEY = "BIGRAM_COUNT";
const char *const HeaderPolicy::EXTENDED_REGION_SIZE_KEY = "EXTENDED_REGION_SIZE";
-// Historical info is information that is needed to support decaying such as timestamp, level and
-// count.
-const char *const HeaderPolicy::HAS_HISTORICAL_INFO_KEY = "HAS_HISTORICAL_INFO";
const int HeaderPolicy::DEFAULT_MULTIPLE_WORDS_DEMOTION_RATE = 100;
const float HeaderPolicy::MULTIPLE_WORD_COST_MULTIPLIER_SCALE = 100.0f;
@@ -68,11 +63,6 @@ float HeaderPolicy::readMultipleWordCostMultiplier() const {
return MULTIPLE_WORD_COST_MULTIPLIER_SCALE / static_cast<float>(demotionRate);
}
-bool HeaderPolicy::readRequiresGermanUmlautProcessing() const {
- return HeaderReadWriteUtils::readBoolAttributeValue(&mAttributeMap,
- REQUIRES_GERMAN_UMLAUT_PROCESSING_KEY, false);
-}
-
bool HeaderPolicy::writeHeaderToBuffer(BufferWithExtendableBuffer *const bufferToWrite,
const bool updatesLastUpdatedTime, const bool updatesLastDecayedTime,
const int unigramCount, const int bigramCount, const int extendedRegionSize) const {
@@ -99,12 +89,12 @@ bool HeaderPolicy::writeHeaderToBuffer(BufferWithExtendableBuffer *const bufferT
if (updatesLastUpdatedTime) {
// Set current time as a last updated time.
HeaderReadWriteUtils::setIntAttribute(&attributeMapTowrite, LAST_UPDATED_TIME_KEY,
- TimeKeeper::peekCurrentTime());
+ time(0));
}
if (updatesLastDecayedTime) {
// Set current time as a last updated time.
HeaderReadWriteUtils::setIntAttribute(&attributeMapTowrite, LAST_DECAYED_TIME_KEY,
- TimeKeeper::peekCurrentTime());
+ time(0));
}
if (!HeaderReadWriteUtils::writeHeaderAttributes(bufferToWrite, &attributeMapTowrite,
&writingPos)) {
diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
index 1208d2c2a..a9c7805a8 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
@@ -17,40 +17,37 @@
#ifndef LATINIME_HEADER_POLICY_H
#define LATINIME_HEADER_POLICY_H
+#include <ctime>
#include <stdint.h>
#include "defines.h"
#include "suggest/core/policy/dictionary_header_structure_policy.h"
#include "suggest/policyimpl/dictionary/header/header_read_write_utils.h"
#include "suggest/policyimpl/dictionary/utils/format_utils.h"
-#include "utils/time_keeper.h"
namespace latinime {
class HeaderPolicy : public DictionaryHeaderStructurePolicy {
public:
// Reads information from existing dictionary buffer.
- HeaderPolicy(const uint8_t *const dictBuf, const FormatUtils::FORMAT_VERSION formatVersion)
- : mDictFormatVersion(formatVersion),
+ HeaderPolicy(const uint8_t *const dictBuf, const int dictSize)
+ : mDictFormatVersion(FormatUtils::detectFormatVersion(dictBuf, dictSize)),
mDictionaryFlags(HeaderReadWriteUtils::getFlags(dictBuf)),
mSize(HeaderReadWriteUtils::getHeaderSize(dictBuf)),
mAttributeMap(createAttributeMapAndReadAllAttributes(dictBuf)),
mMultiWordCostMultiplier(readMultipleWordCostMultiplier()),
- mRequiresGermanUmlautProcessing(readRequiresGermanUmlautProcessing()),
mIsDecayingDict(HeaderReadWriteUtils::readBoolAttributeValue(&mAttributeMap,
IS_DECAYING_DICT_KEY, false /* defaultValue */)),
mLastUpdatedTime(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
- LAST_UPDATED_TIME_KEY, TimeKeeper::peekCurrentTime() /* defaultValue */)),
+ LAST_UPDATED_TIME_KEY, time(0) /* defaultValue */)),
mLastDecayedTime(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
- LAST_DECAYED_TIME_KEY, TimeKeeper::peekCurrentTime() /* defaultValue */)),
+ LAST_DECAYED_TIME_KEY, time(0) /* defaultValue */)),
mUnigramCount(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
UNIGRAM_COUNT_KEY, 0 /* defaultValue */)),
mBigramCount(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
BIGRAM_COUNT_KEY, 0 /* defaultValue */)),
mExtendedRegionSize(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
- EXTENDED_REGION_SIZE_KEY, 0 /* defaultValue */)),
- mHasHistoricalInfoOfWords(HeaderReadWriteUtils::readBoolAttributeValue(
- &mAttributeMap, HAS_HISTORICAL_INFO_KEY, false /* defaultValue */)) {}
+ EXTENDED_REGION_SIZE_KEY, 0 /* defaultValue */)) {}
// Constructs header information using an attribute map.
HeaderPolicy(const FormatUtils::FORMAT_VERSION dictFormatVersion,
@@ -59,55 +56,30 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
mDictionaryFlags(HeaderReadWriteUtils::createAndGetDictionaryFlagsUsingAttributeMap(
attributeMap)), mSize(0), mAttributeMap(*attributeMap),
mMultiWordCostMultiplier(readMultipleWordCostMultiplier()),
- mRequiresGermanUmlautProcessing(readRequiresGermanUmlautProcessing()),
mIsDecayingDict(HeaderReadWriteUtils::readBoolAttributeValue(&mAttributeMap,
IS_DECAYING_DICT_KEY, false /* defaultValue */)),
mLastUpdatedTime(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
- LAST_UPDATED_TIME_KEY, TimeKeeper::peekCurrentTime() /* defaultValue */)),
+ LAST_UPDATED_TIME_KEY, time(0) /* defaultValue */)),
mLastDecayedTime(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
- LAST_UPDATED_TIME_KEY, TimeKeeper::peekCurrentTime() /* defaultValue */)),
- mUnigramCount(0), mBigramCount(0), mExtendedRegionSize(0),
- mHasHistoricalInfoOfWords(HeaderReadWriteUtils::readBoolAttributeValue(
- &mAttributeMap, HAS_HISTORICAL_INFO_KEY, false /* defaultValue */)) {}
-
- // Temporary dummy header.
- HeaderPolicy()
- : mDictFormatVersion(FormatUtils::UNKNOWN_VERSION), mDictionaryFlags(0), mSize(0),
- mAttributeMap(), mMultiWordCostMultiplier(0.0f),
- mRequiresGermanUmlautProcessing(false), mIsDecayingDict(false),
- mLastUpdatedTime(0), mLastDecayedTime(0), mUnigramCount(0), mBigramCount(0),
- mExtendedRegionSize(0), mHasHistoricalInfoOfWords(false) {}
+ LAST_UPDATED_TIME_KEY, time(0) /* defaultValue */)),
+ mUnigramCount(0), mBigramCount(0), mExtendedRegionSize(0) {}
~HeaderPolicy() {}
- virtual int getFormatVersionNumber() const {
- // Conceptually this converts the symbolic value we use in the code into the
- // hardcoded of the bytes in the file. But we want the constants to be the
- // same so we use them for both here.
- switch (mDictFormatVersion) {
- case FormatUtils::VERSION_2:
- return FormatUtils::VERSION_2;
- case FormatUtils::VERSION_4:
- return FormatUtils::VERSION_4;
- default:
- return FormatUtils::UNKNOWN_VERSION;
- }
+ AK_FORCE_INLINE int getSize() const {
+ return mSize;
}
- AK_FORCE_INLINE bool isValid() const {
- // Decaying dictionary must have historical information.
- if (!mIsDecayingDict) {
- return true;
- }
- if (mHasHistoricalInfoOfWords) {
- return true;
- } else {
- return false;
- }
+ AK_FORCE_INLINE bool supportsDynamicUpdate() const {
+ return HeaderReadWriteUtils::supportsDynamicUpdate(mDictionaryFlags);
}
- AK_FORCE_INLINE int getSize() const {
- return mSize;
+ AK_FORCE_INLINE bool requiresGermanUmlautProcessing() const {
+ return HeaderReadWriteUtils::requiresGermanUmlautProcessing(mDictionaryFlags);
+ }
+
+ AK_FORCE_INLINE bool requiresFrenchLigatureProcessing() const {
+ return HeaderReadWriteUtils::requiresFrenchLigatureProcessing(mDictionaryFlags);
}
AK_FORCE_INLINE float getMultiWordCostMultiplier() const {
@@ -118,10 +90,6 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
return mIsDecayingDict;
}
- AK_FORCE_INLINE bool requiresGermanUmlautProcessing() const {
- return mRequiresGermanUmlautProcessing;
- }
-
AK_FORCE_INLINE int getLastUpdatedTime() const {
return mLastUpdatedTime;
}
@@ -142,10 +110,6 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
return mExtendedRegionSize;
}
- AK_FORCE_INLINE bool hasHistoricalInfoOfWords() const {
- return mHasHistoricalInfoOfWords;
- }
-
void readHeaderValueOrQuestionMark(const char *const key,
int *outValue, int outValueSize) const;
@@ -154,17 +118,15 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
const int unigramCount, const int bigramCount, const int extendedRegionSize) const;
private:
- DISALLOW_COPY_AND_ASSIGN(HeaderPolicy);
+ DISALLOW_IMPLICIT_CONSTRUCTORS(HeaderPolicy);
static const char *const MULTIPLE_WORDS_DEMOTION_RATE_KEY;
- static const char *const REQUIRES_GERMAN_UMLAUT_PROCESSING_KEY;
static const char *const IS_DECAYING_DICT_KEY;
static const char *const LAST_UPDATED_TIME_KEY;
static const char *const LAST_DECAYED_TIME_KEY;
static const char *const UNIGRAM_COUNT_KEY;
static const char *const BIGRAM_COUNT_KEY;
static const char *const EXTENDED_REGION_SIZE_KEY;
- static const char *const HAS_HISTORICAL_INFO_KEY;
static const int DEFAULT_MULTIPLE_WORDS_DEMOTION_RATE;
static const float MULTIPLE_WORD_COST_MULTIPLIER_SCALE;
@@ -173,17 +135,14 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
const int mSize;
HeaderReadWriteUtils::AttributeMap mAttributeMap;
const float mMultiWordCostMultiplier;
- const bool mRequiresGermanUmlautProcessing;
const bool mIsDecayingDict;
const int mLastUpdatedTime;
const int mLastDecayedTime;
const int mUnigramCount;
const int mBigramCount;
const int mExtendedRegionSize;
- const bool mHasHistoricalInfoOfWords;
float readMultipleWordCostMultiplier() const;
- bool readRequiresGermanUmlautProcessing() const;
static HeaderReadWriteUtils::AttributeMap createAttributeMapAndReadAllAttributes(
const uint8_t *const dictBuf);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp
index 6b4598642..5ded8f6a1 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp
@@ -35,6 +35,22 @@ const int HeaderReadWriteUtils::HEADER_FLAG_SIZE = 2;
const int HeaderReadWriteUtils::HEADER_SIZE_FIELD_SIZE = 4;
const HeaderReadWriteUtils::DictionaryFlags HeaderReadWriteUtils::NO_FLAGS = 0;
+// Flags for special processing
+// Those *must* match the flags in makedict (FormatSpec#*_PROCESSING_FLAG) or
+// something very bad (like, the apocalypse) will happen. Please update both at the same time.
+const HeaderReadWriteUtils::DictionaryFlags
+ HeaderReadWriteUtils::GERMAN_UMLAUT_PROCESSING_FLAG = 0x1;
+const HeaderReadWriteUtils::DictionaryFlags
+ HeaderReadWriteUtils::SUPPORTS_DYNAMIC_UPDATE_FLAG = 0x2;
+const HeaderReadWriteUtils::DictionaryFlags
+ HeaderReadWriteUtils::FRENCH_LIGATURE_PROCESSING_FLAG = 0x4;
+
+// Note that these are corresponding definitions in Java side in FormatSpec.FileHeader.
+const char *const HeaderReadWriteUtils::SUPPORTS_DYNAMIC_UPDATE_KEY = "SUPPORTS_DYNAMIC_UPDATE";
+const char *const HeaderReadWriteUtils::REQUIRES_GERMAN_UMLAUT_PROCESSING_KEY =
+ "REQUIRES_GERMAN_UMLAUT_PROCESSING";
+const char *const HeaderReadWriteUtils::REQUIRES_FRENCH_LIGATURE_PROCESSING_KEY =
+ "REQUIRES_FRENCH_LIGATURE_PROCESSING";
/* static */ int HeaderReadWriteUtils::getHeaderSize(const uint8_t *const dictBuf) {
// See the format of the header in the comment in
@@ -52,7 +68,17 @@ const HeaderReadWriteUtils::DictionaryFlags HeaderReadWriteUtils::NO_FLAGS = 0;
/* static */ HeaderReadWriteUtils::DictionaryFlags
HeaderReadWriteUtils::createAndGetDictionaryFlagsUsingAttributeMap(
const HeaderReadWriteUtils::AttributeMap *const attributeMap) {
- return NO_FLAGS;
+ const bool requiresGermanUmlautProcessing = readBoolAttributeValue(attributeMap,
+ REQUIRES_GERMAN_UMLAUT_PROCESSING_KEY, false /* defaultValue */);
+ const bool requiresFrenchLigatureProcessing = readBoolAttributeValue(attributeMap,
+ REQUIRES_FRENCH_LIGATURE_PROCESSING_KEY, false /* defaultValue */);
+ const bool supportsDynamicUpdate = readBoolAttributeValue(attributeMap,
+ SUPPORTS_DYNAMIC_UPDATE_KEY, false /* defaultValue */);
+ DictionaryFlags dictflags = NO_FLAGS;
+ dictflags |= requiresGermanUmlautProcessing ? GERMAN_UMLAUT_PROCESSING_FLAG : 0;
+ dictflags |= requiresFrenchLigatureProcessing ? FRENCH_LIGATURE_PROCESSING_FLAG : 0;
+ dictflags |= supportsDynamicUpdate ? SUPPORTS_DYNAMIC_UPDATE_FLAG : 0;
+ return dictflags;
}
/* static */ void HeaderReadWriteUtils::fetchAllHeaderAttributes(const uint8_t *const dictBuf,
@@ -89,8 +115,8 @@ const HeaderReadWriteUtils::DictionaryFlags HeaderReadWriteUtils::NO_FLAGS = 0;
case FormatUtils::VERSION_2:
// Version 2 dictionary writing is not supported.
return false;
- case FormatUtils::VERSION_4:
- return buffer->writeUintAndAdvancePosition(FormatUtils::VERSION_4 /* data */,
+ case FormatUtils::VERSION_3:
+ return buffer->writeUintAndAdvancePosition(3 /* data */,
HEADER_DICTIONARY_VERSION_SIZE, writingPos);
default:
return false;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.h
index fc24bbdd5..225968323 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.h
@@ -37,6 +37,18 @@ class HeaderReadWriteUtils {
static DictionaryFlags getFlags(const uint8_t *const dictBuf);
+ static AK_FORCE_INLINE bool supportsDynamicUpdate(const DictionaryFlags flags) {
+ return (flags & SUPPORTS_DYNAMIC_UPDATE_FLAG) != 0;
+ }
+
+ static AK_FORCE_INLINE bool requiresGermanUmlautProcessing(const DictionaryFlags flags) {
+ return (flags & GERMAN_UMLAUT_PROCESSING_FLAG) != 0;
+ }
+
+ static AK_FORCE_INLINE bool requiresFrenchLigatureProcessing(const DictionaryFlags flags) {
+ return (flags & FRENCH_LIGATURE_PROCESSING_FLAG) != 0;
+ }
+
static AK_FORCE_INLINE int getHeaderOptionsPosition() {
return HEADER_MAGIC_NUMBER_SIZE + HEADER_DICTIONARY_VERSION_SIZE + HEADER_FLAG_SIZE
+ HEADER_SIZE_FIELD_SIZE;
@@ -89,8 +101,17 @@ class HeaderReadWriteUtils {
static const int HEADER_FLAG_SIZE;
static const int HEADER_SIZE_FIELD_SIZE;
- // Value for the "flags" field. It's unused at the moment.
static const DictionaryFlags NO_FLAGS;
+ // Flags for special processing
+ // Those *must* match the flags in makedict (FormatSpec#*_PROCESSING_FLAGS) or
+ // something very bad (like, the apocalypse) will happen. Please update both at the same time.
+ static const DictionaryFlags GERMAN_UMLAUT_PROCESSING_FLAG;
+ static const DictionaryFlags SUPPORTS_DYNAMIC_UPDATE_FLAG;
+ static const DictionaryFlags FRENCH_LIGATURE_PROCESSING_FLAG;
+
+ static const char *const SUPPORTS_DYNAMIC_UPDATE_KEY;
+ static const char *const REQUIRES_GERMAN_UMLAUT_PROCESSING_KEY;
+ static const char *const REQUIRES_FRENCH_LIGATURE_PROCESSING_KEY;
static void setIntAttributeInner(AttributeMap *const headerAttributes,
const AttributeMap::key_type *const key, const int value);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp
index 960c1b936..8a84bd261 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp
@@ -15,22 +15,22 @@
*/
-#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h"
+#include "suggest/policyimpl/dictionary/patricia_trie_policy.h"
#include "defines.h"
#include "suggest/core/dicnode/dic_node.h"
#include "suggest/core/dicnode/dic_node_vector.h"
-#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/utils/probability_utils.h"
namespace latinime {
-void PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode,
+void PatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode,
DicNodeVector *const childDicNodes) const {
if (!dicNode->hasChildren()) {
return;
}
- int nextPos = dicNode->getChildrenPtNodeArrayPos();
+ int nextPos = dicNode->getChildrenPos();
if (nextPos < 0 || nextPos >= mDictBufferSize) {
AKLOGE("Children PtNode array position is invalid. pos: %d, dict size: %d",
nextPos, mDictBufferSize);
@@ -52,14 +52,14 @@ void PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNo
// This retrieves code points and the probability of the word by its terminal position.
// Due to the fact that words are ordered in the dictionary in a strict breadth-first order,
-// it is possible to check for this with advantageous complexity. For each PtNode array, we search
+// it is possible to check for this with advantageous complexity. For each node, we search
// for PtNodes with children and compare the children position with the position we look for.
// When we shoot the position we look for, it means the word we look for is in the children
// of the previous PtNode. The only tricky part is the fact that if we arrive at the end of a
// PtNode array with the last PtNode's children position still less than what we are searching for,
// we must descend the last PtNode's children (for example, if the word we are searching for starts
// with a z, it's the last PtNode of the root array, so all children addresses will be smaller
-// than the position we look for, and we have to descend the z PtNode).
+// than the position we look for, and we have to descend the z node).
/* Parameters :
* ptNodePos: the byte position of the terminal PtNode of the word we are searching for (this is
* what is stored as the "bigram position" in each bigram)
@@ -74,9 +74,9 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
int pos = getRootPosition();
int wordPos = 0;
// One iteration of the outer loop iterates through PtNode arrays. As stated above, we will
- // only traverse PtNodes that are actually a part of the terminal we are searching, so each
- // time we enter this loop we are one depth level further than last time.
- // The only reason we count PtNodes is because we want to reduce the probability of infinite
+ // only traverse nodes that are actually a part of the terminal we are searching, so each time
+ // we enter this loop we are one depth level further than last time.
+ // The only reason we count nodes is because we want to reduce the probability of infinite
// looping in case there is a bug. Since we know there is an upper bound to the depth we are
// supposed to traverse, it does not hurt to count iterations.
for (int loopCount = maxCodePointCount; loopCount > 0; --loopCount) {
@@ -140,9 +140,8 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
found = true;
} else if (1 >= ptNodeCount) {
// However if we are on the LAST PtNode of this array, and we have NOT shot the
- // position we should descend THIS PtNode. So we trick the
- // lastCandidatePtNodePos so that we will descend this PtNode, not the previous
- // one.
+ // position we should descend THIS node. So we trick the lastCandidatePtNodePos
+ // so that we will descend this PtNode, not the previous one.
lastCandidatePtNodePos = startPos;
found = true;
} else {
@@ -150,7 +149,7 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
found = false;
}
} else {
- // Even if we don't have children here, we could still be on the last PtNode of
+ // Even if we don't have children here, we could still be on the last PtNode of /
// this array. If this is the case, we should descend the last PtNode that had
// children, and their position is already in lastCandidatePtNodePos.
found = (1 >= ptNodeCount);
@@ -231,9 +230,9 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
return 0;
}
-// This function gets the position of the terminal PtNode of the exact matching word in the
+// This function gets the position of the terminal node of the exact matching word in the
// dictionary. If no match is found, it returns NOT_A_DICT_POS.
-int PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord,
+int PatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord,
const int length, const bool forceLowerCaseSearch) const {
int pos = getRootPosition();
int wordPos = 0;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h
index 2adafd22b..0f8662aea 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h
@@ -24,7 +24,6 @@
#include "suggest/policyimpl/dictionary/bigram/bigram_list_policy.h"
#include "suggest/policyimpl/dictionary/header/header_policy.h"
#include "suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h"
-#include "suggest/policyimpl/dictionary/utils/format_utils.h"
#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
namespace latinime {
@@ -34,26 +33,28 @@ class DicNodeVector;
class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
public:
- PatriciaTriePolicy(const MmappedBuffer::MmappedBufferPtr &mmappedBuffer)
- : mMmappedBuffer(mmappedBuffer),
- mHeaderPolicy(mMmappedBuffer.get()->getBuffer(), FormatUtils::VERSION_2),
- mDictRoot(mMmappedBuffer.get()->getBuffer() + mHeaderPolicy.getSize()),
- mDictBufferSize(mMmappedBuffer.get()->getBufferSize()
- - mHeaderPolicy.getSize()),
+ PatriciaTriePolicy(const MmappedBuffer *const buffer)
+ : mBuffer(buffer), mHeaderPolicy(mBuffer->getBuffer(), buffer->getBufferSize()),
+ mDictRoot(mBuffer->getBuffer() + mHeaderPolicy.getSize()),
+ mDictBufferSize(mBuffer->getBufferSize() - mHeaderPolicy.getSize()),
mBigramListPolicy(mDictRoot), mShortcutListPolicy(mDictRoot) {}
+ ~PatriciaTriePolicy() {
+ delete mBuffer;
+ }
+
AK_FORCE_INLINE int getRootPosition() const {
return 0;
}
- void createAndGetAllChildDicNodes(const DicNode *const dicNode,
+ void createAndGetAllChildNodes(const DicNode *const dicNode,
DicNodeVector *const childDicNodes) const;
int getCodePointsAndProbabilityAndReturnCodePointCount(
const int terminalNodePos, const int maxCodePointCount, int *const outCodePoints,
int *const outUnigramProbability) const;
- int getTerminalPtNodePositionOfWord(const int *const inWord,
+ int getTerminalNodePositionOfWord(const int *const inWord,
const int length, const bool forceLowerCaseSearch) const;
int getProbability(const int unigramProbability, const int bigramProbability) const;
@@ -76,17 +77,14 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
return &mShortcutListPolicy;
}
- bool addUnigramWord(const int *const word, const int length, const int probability,
- const int *const shortcutTargetCodePoints, const int shortcutLength,
- const int shortcutProbability, const bool isNotAWord, const bool isBlacklisted,
- const int timestamp) {
+ bool addUnigramWord(const int *const word, const int length, const int probability) {
// This method should not be called for non-updatable dictionary.
AKLOGI("Warning: addUnigramWord() is called for non-updatable dictionary.");
return false;
}
bool addBigramWords(const int *const word0, const int length0, const int *const word1,
- const int length1, const int probability, const int timestamp) {
+ const int length1, const int probability) {
// This method should not be called for non-updatable dictionary.
AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary.");
return false;
@@ -115,7 +113,7 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
return false;
}
- void getProperty(const char *const query, const int queryLength, char *const outResult,
+ void getProperty(const char *const query, char *const outResult,
const int maxResultLength) {
// getProperty is not supported for this class.
if (maxResultLength > 0) {
@@ -123,16 +121,10 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
}
}
- const UnigramProperty getUnigramProperty(const int *const codePoints,
- const int codePointCount) const {
- // getUnigramProperty is not supported.
- return UnigramProperty();
- }
-
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTriePolicy);
- const MmappedBuffer::MmappedBufferPtr mMmappedBuffer;
+ const MmappedBuffer *const mBuffer;
const HeaderPolicy mHeaderPolicy;
const uint8_t *const mDictRoot;
const int mDictBufferSize;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp
index 82b3593c8..7df55815f 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h"
#include "defines.h"
#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.h
index b28f58336..8420ee95a 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.h
@@ -23,7 +23,6 @@
namespace latinime {
-// TODO: Move to pt_common
class PatriciaTrieReadingUtils {
public:
typedef uint8_t NodeFlags;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h
new file mode 100644
index 000000000..bd3211f6a
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h
@@ -0,0 +1,123 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DYNAMIC_SHORTCUT_LIST_POLICY_H
+#define LATINIME_DYNAMIC_SHORTCUT_LIST_POLICY_H
+
+#include <stdint.h>
+
+#include "defines.h"
+#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h"
+#include "suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h"
+#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+
+namespace latinime {
+
+/*
+ * This is a dynamic version of ShortcutListPolicy and supports an additional buffer.
+ */
+class DynamicShortcutListPolicy : public DictionaryShortcutsStructurePolicy {
+ public:
+ explicit DynamicShortcutListPolicy(const BufferWithExtendableBuffer *const buffer)
+ : mBuffer(buffer) {}
+
+ ~DynamicShortcutListPolicy() {}
+
+ int getStartPos(const int pos) const {
+ if (pos == NOT_A_DICT_POS) {
+ return NOT_A_DICT_POS;
+ }
+ return pos + ShortcutListReadingUtils::getShortcutListSizeFieldSize();
+ }
+
+ void getNextShortcut(const int maxCodePointCount, int *const outCodePoint,
+ int *const outCodePointCount, bool *const outIsWhitelist, bool *const outHasNext,
+ int *const pos) const {
+ const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*pos);
+ const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
+ if (usesAdditionalBuffer) {
+ *pos -= mBuffer->getOriginalBufferSize();
+ }
+ const ShortcutListReadingUtils::ShortcutFlags flags =
+ ShortcutListReadingUtils::getFlagsAndForwardPointer(buffer, pos);
+ if (outHasNext) {
+ *outHasNext = ShortcutListReadingUtils::hasNext(flags);
+ }
+ if (outIsWhitelist) {
+ *outIsWhitelist = ShortcutListReadingUtils::isWhitelist(flags);
+ }
+ if (outCodePoint) {
+ *outCodePointCount = ShortcutListReadingUtils::readShortcutTarget(
+ buffer, maxCodePointCount, outCodePoint, pos);
+ }
+ if (usesAdditionalBuffer) {
+ *pos += mBuffer->getOriginalBufferSize();
+ }
+ }
+
+ void skipAllShortcuts(int *const pos) const {
+ const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*pos);
+ const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
+ if (usesAdditionalBuffer) {
+ *pos -= mBuffer->getOriginalBufferSize();
+ }
+ const int shortcutListSize = ShortcutListReadingUtils
+ ::getShortcutListSizeAndForwardPointer(buffer, pos);
+ *pos += shortcutListSize;
+ if (usesAdditionalBuffer) {
+ *pos += mBuffer->getOriginalBufferSize();
+ }
+ }
+
+ // Copy shortcuts from the shortcut list that starts at fromPos in mBuffer to toPos in
+ // bufferToWrite and advance these positions after the shortcut lists. This returns whether
+ // the copy was succeeded or not.
+ bool copyAllShortcutsAndReturnIfSucceededOrNot(BufferWithExtendableBuffer *const bufferToWrite,
+ int *const fromPos, int *const toPos) const {
+ const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*fromPos);
+ if (usesAdditionalBuffer) {
+ *fromPos -= mBuffer->getOriginalBufferSize();
+ }
+ const int shortcutListSize = ShortcutListReadingUtils
+ ::getShortcutListSizeAndForwardPointer(mBuffer->getBuffer(usesAdditionalBuffer),
+ fromPos);
+ // Copy shortcut list size.
+ if (!bufferToWrite->writeUintAndAdvancePosition(
+ shortcutListSize + ShortcutListReadingUtils::getShortcutListSizeFieldSize(),
+ ShortcutListReadingUtils::getShortcutListSizeFieldSize(), toPos)) {
+ return false;
+ }
+ // Copy shortcut list.
+ for (int i = 0; i < shortcutListSize; ++i) {
+ const uint8_t data = ByteArrayUtils::readUint8AndAdvancePosition(
+ mBuffer->getBuffer(usesAdditionalBuffer), fromPos);
+ if (!bufferToWrite->writeUintAndAdvancePosition(data, 1 /* size */, toPos)) {
+ return false;
+ }
+ }
+ if (usesAdditionalBuffer) {
+ *fromPos += mBuffer->getOriginalBufferSize();
+ }
+ return true;
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicShortcutListPolicy);
+
+ const BufferWithExtendableBuffer *const mBuffer;
+};
+} // namespace latinime
+#endif // LATINIME_DYNAMIC_SHORTCUT_LIST_POLICY_H
diff --git a/native/jni/src/suggest/policyimpl/dictionary/shortcut/ver4_shortcut_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/shortcut/ver4_shortcut_list_policy.h
deleted file mode 100644
index ae863af57..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/shortcut/ver4_shortcut_list_policy.h
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LATINIME_VER4_SHORTCUT_LIST_POLICY_H
-#define LATINIME_VER4_SHORTCUT_LIST_POLICY_H
-
-#include <stdint.h>
-
-#include "defines.h"
-#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h"
-#include "suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h"
-#include "suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h"
-#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
-
-namespace latinime {
-
-class Ver4ShortcutListPolicy : public DictionaryShortcutsStructurePolicy {
- public:
- Ver4ShortcutListPolicy(ShortcutDictContent *const shortcutDictContent,
- const TerminalPositionLookupTable *const terminalPositionLookupTable)
- : mShortcutDictContent(shortcutDictContent),
- mTerminalPositionLookupTable(terminalPositionLookupTable) {}
-
- ~Ver4ShortcutListPolicy() {}
-
- int getStartPos(const int pos) const {
- // The first shortcut entry is located at the head position of the shortcut list.
- return pos;
- }
-
- void getNextShortcut(const int maxCodePointCount, int *const outCodePoint,
- int *const outCodePointCount, bool *const outIsWhitelist, bool *const outHasNext,
- int *const pos) const {
- int probability = 0;
- mShortcutDictContent->getShortcutEntryAndAdvancePosition(maxCodePointCount,
- outCodePoint, outCodePointCount, &probability, outHasNext, pos);
- if (outIsWhitelist) {
- *outIsWhitelist = ShortcutListReadingUtils::isWhitelist(probability);
- }
- }
-
- void skipAllShortcuts(int *const pos) const {
- // Do nothing because we don't need to skip shortcut lists in ver4 dictionaries.
- }
-
- bool addNewShortcut(const int terminalId, const int *const codePoints, const int codePointCount,
- const int probability) {
- const int shortcutListPos = mShortcutDictContent->getShortcutListHeadPos(terminalId);
- if (shortcutListPos == NOT_A_DICT_POS) {
- // Create shortcut list.
- if (!mShortcutDictContent->createNewShortcutList(terminalId)) {
- AKLOGE("Cannot create new shortcut list. terminal id: %d", terminalId);
- return false;
- }
- const int writingPos = mShortcutDictContent->getShortcutListHeadPos(terminalId);
- return mShortcutDictContent->writeShortcutEntry(codePoints, codePointCount, probability,
- false /* hasNext */, writingPos);
- }
- const int entryPos = mShortcutDictContent->findShortcutEntryAndGetPos(shortcutListPos,
- codePoints, codePointCount);
- if (entryPos == NOT_A_DICT_POS) {
- // Add new entry to the shortcut list.
- // Create new shortcut list.
- if (!mShortcutDictContent->createNewShortcutList(terminalId)) {
- AKLOGE("Cannot create new shortcut list. terminal id: %d", terminalId);
- return false;
- }
- int writingPos = mShortcutDictContent->getShortcutListHeadPos(terminalId);
- if (!mShortcutDictContent->writeShortcutEntryAndAdvancePosition(codePoints,
- codePointCount, probability, true /* hasNext */, &writingPos)) {
- AKLOGE("Cannot write shortcut entry. terminal id: %d, pos: %d", terminalId,
- writingPos);
- return false;
- }
- return mShortcutDictContent->copyShortcutList(shortcutListPos, writingPos);
- }
- // Overwrite existing entry.
- bool hasNext = false;
- mShortcutDictContent->getShortcutEntry(MAX_WORD_LENGTH, 0 /* outCodePoint */,
- 0 /* outCodePointCount */ , 0 /* probability */, &hasNext, entryPos);
- if (!mShortcutDictContent->writeShortcutEntry(codePoints,
- codePointCount, probability, hasNext, entryPos)) {
- AKLOGE("Cannot overwrite shortcut entry. terminal id: %d, pos: %d", terminalId,
- entryPos);
- return false;
- }
- return true;
- }
-
- private:
- DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4ShortcutListPolicy);
-
- ShortcutDictContent *const mShortcutDictContent;
- const TerminalPositionLookupTable *const mTerminalPositionLookupTable;
-};
-} // namespace latinime
-#endif // LATINIME_VER4_SHORTCUT_LIST_POLICY_H
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp
deleted file mode 100644
index c81c61d23..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp
+++ /dev/null
@@ -1,132 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h"
-
-#include <climits>
-#include <stdint.h>
-
-#include "defines.h"
-#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h"
-#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h"
-#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
-#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h"
-#include "suggest/policyimpl/dictionary/utils/file_utils.h"
-#include "suggest/policyimpl/dictionary/utils/format_utils.h"
-#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
-
-namespace latinime {
-
-/* static */ DictionaryStructureWithBufferPolicy::StructurePolicyPtr
- DictionaryStructureWithBufferPolicyFactory
- ::newDictionaryStructureWithBufferPolicy(const char *const path,
- const int bufOffset, const int size, const bool isUpdatable) {
- if (FileUtils::existsDir(path)) {
- // Given path represents a directory.
- return newPolicyforDirectoryDict(path, isUpdatable);
- } else {
- if (isUpdatable) {
- AKLOGE("One file dictionaries don't support updating. path: %s", path);
- ASSERT(false);
- return DictionaryStructureWithBufferPolicy::StructurePolicyPtr(0);
- }
- return newPolicyforFileDict(path, bufOffset, size);
- }
-}
-
-/* static */ DictionaryStructureWithBufferPolicy::StructurePolicyPtr
- DictionaryStructureWithBufferPolicyFactory::newPolicyforDirectoryDict(
- const char *const path, const bool isUpdatable) {
- const int headerFilePathBufSize = PATH_MAX + 1 /* terminator */;
- char headerFilePath[headerFilePathBufSize];
- getHeaderFilePathInDictDir(path, headerFilePathBufSize, headerFilePath);
- // Allocated buffer in MmapedBuffer::openBuffer() will be freed in the destructor of
- // MmappedBufferPtr if the instance has the responsibility.
- MmappedBuffer::MmappedBufferPtr mmappedBuffer = MmappedBuffer::openBuffer(headerFilePath,
- isUpdatable);
- if (!mmappedBuffer.get()) {
- return DictionaryStructureWithBufferPolicy::StructurePolicyPtr(0);
- }
- switch (FormatUtils::detectFormatVersion(mmappedBuffer.get()->getBuffer(),
- mmappedBuffer.get()->getBufferSize())) {
- case FormatUtils::VERSION_2:
- AKLOGE("Given path is a directory but the format is version 2. path: %s", path);
- break;
- case FormatUtils::VERSION_4: {
- const int dictDirPathBufSize = strlen(headerFilePath) + 1 /* terminator */;
- char dictPath[dictDirPathBufSize];
- if (!FileUtils::getFilePathWithoutSuffix(headerFilePath,
- Ver4DictConstants::HEADER_FILE_EXTENSION, dictDirPathBufSize, dictPath)) {
- AKLOGE("Dictionary file name is not valid as a ver4 dictionary. path: %s", path);
- ASSERT(false);
- return DictionaryStructureWithBufferPolicy::StructurePolicyPtr(0);
- }
- const Ver4DictBuffers::Ver4DictBuffersPtr dictBuffers =
- Ver4DictBuffers::openVer4DictBuffers(dictPath, mmappedBuffer);
- if (!dictBuffers.get()->isValid()) {
- AKLOGE("DICT: The dictionary doesn't satisfy ver4 format requirements. path: %s",
- path);
- ASSERT(false);
- return DictionaryStructureWithBufferPolicy::StructurePolicyPtr(0);
- }
- return DictionaryStructureWithBufferPolicy::StructurePolicyPtr(
- new Ver4PatriciaTriePolicy(dictBuffers));
- }
- default:
- AKLOGE("DICT: dictionary format is unknown, bad magic number. path: %s", path);
- break;
- }
- ASSERT(false);
- return DictionaryStructureWithBufferPolicy::StructurePolicyPtr(0);
-}
-
-/* static */ DictionaryStructureWithBufferPolicy::StructurePolicyPtr
- DictionaryStructureWithBufferPolicyFactory::newPolicyforFileDict(
- const char *const path, const int bufOffset, const int size) {
- // Allocated buffer in MmapedBuffer::openBuffer() will be freed in the destructor of
- // MmappedBufferPtr if the instance has the responsibility.
- MmappedBuffer::MmappedBufferPtr mmappedBuffer = MmappedBuffer::openBuffer(path, bufOffset,
- size, false /* isUpdatable */);
- if (!mmappedBuffer.get()) {
- return DictionaryStructureWithBufferPolicy::StructurePolicyPtr(0);
- }
- switch (FormatUtils::detectFormatVersion(mmappedBuffer.get()->getBuffer(),
- mmappedBuffer.get()->getBufferSize())) {
- case FormatUtils::VERSION_2:
- return DictionaryStructureWithBufferPolicy::StructurePolicyPtr(
- new PatriciaTriePolicy(mmappedBuffer));
- case FormatUtils::VERSION_4:
- AKLOGE("Given path is a file but the format is version 4. path: %s", path);
- break;
- default:
- AKLOGE("DICT: dictionary format is unknown, bad magic number. path: %s", path);
- break;
- }
- ASSERT(false);
- return DictionaryStructureWithBufferPolicy::StructurePolicyPtr(0);
-}
-
-/* static */ void DictionaryStructureWithBufferPolicyFactory::getHeaderFilePathInDictDir(
- const char *const dictDirPath, const int outHeaderFileBufSize,
- char *const outHeaderFilePath) {
- const int dictNameBufSize = strlen(dictDirPath) + 1 /* terminator */;
- char dictName[dictNameBufSize];
- FileUtils::getBasename(dictDirPath, dictNameBufSize, dictName);
- snprintf(outHeaderFilePath, outHeaderFileBufSize, "%s/%s%s", dictDirPath,
- dictName, Ver4DictConstants::HEADER_FILE_EXTENSION);
-}
-
-} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.cpp
deleted file mode 100644
index 8f42df6d2..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.cpp
+++ /dev/null
@@ -1,144 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.h"
-
-#include "suggest/core/policy/dictionary_header_structure_policy.h"
-#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.h"
-#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
-#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h"
-
-namespace latinime {
-
-bool DynamicPtGcEventListeners
- ::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
- ::onVisitingPtNode(const PtNodeParams *const ptNodeParams) {
- // PtNode is useless when the PtNode is not a terminal and doesn't have any not useless
- // children.
- bool isUselessPtNode = !ptNodeParams->isTerminal();
- if (ptNodeParams->isTerminal()) {
- bool needsToKeepPtNode = true;
- if (!mPtNodeWriter->updatePtNodeProbabilityAndGetNeedsToKeepPtNodeAfterGC(ptNodeParams,
- &needsToKeepPtNode)) {
- AKLOGE("Cannot update PtNode probability or get needs to keep PtNode after GC.");
- return false;
- }
- if (!needsToKeepPtNode) {
- isUselessPtNode = true;
- }
- }
- if (mChildrenValue > 0) {
- isUselessPtNode = false;
- } else if (ptNodeParams->isTerminal()) {
- // Remove children as all children are useless.
- if (!mPtNodeWriter->updateChildrenPosition(ptNodeParams,
- NOT_A_DICT_POS /* newChildrenPosition */)) {
- return false;
- }
- }
- if (isUselessPtNode) {
- // Current PtNode is no longer needed. Mark it as deleted.
- if (!mPtNodeWriter->markPtNodeAsDeleted(ptNodeParams)) {
- return false;
- }
- } else {
- mValueStack.back() += 1;
- if (ptNodeParams->isTerminal()) {
- mValidUnigramCount += 1;
- }
- }
- return true;
-}
-
-bool DynamicPtGcEventListeners::TraversePolicyToUpdateBigramProbability
- ::onVisitingPtNode(const PtNodeParams *const ptNodeParams) {
- if (!ptNodeParams->isDeleted() && ptNodeParams->hasBigrams()) {
- int bigramEntryCount = 0;
- if (!mPtNodeWriter->updateAllBigramEntriesAndDeleteUselessEntries(ptNodeParams,
- &bigramEntryCount)) {
- return false;
- }
- mValidBigramEntryCount += bigramEntryCount;
- }
- return true;
-}
-
-// Writes dummy PtNode array size when the head of PtNode array is read.
-bool DynamicPtGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
- ::onDescend(const int ptNodeArrayPos) {
- mValidPtNodeCount = 0;
- int writingPos = mBufferToWrite->getTailPosition();
- mDictPositionRelocationMap->mPtNodeArrayPositionRelocationMap.insert(
- PtNodeWriter::PtNodeArrayPositionRelocationMap::value_type(ptNodeArrayPos, writingPos));
- // Writes dummy PtNode array size because arrays can have a forward link or needles PtNodes.
- // This field will be updated later in onReadingPtNodeArrayTail() with actual PtNode count.
- mPtNodeArraySizeFieldPos = writingPos;
- return DynamicPtWritingUtils::writePtNodeArraySizeAndAdvancePosition(
- mBufferToWrite, 0 /* arraySize */, &writingPos);
-}
-
-// Write PtNode array terminal and actual PtNode array size.
-bool DynamicPtGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
- ::onReadingPtNodeArrayTail() {
- int writingPos = mBufferToWrite->getTailPosition();
- // Write PtNode array terminal.
- if (!DynamicPtWritingUtils::writeForwardLinkPositionAndAdvancePosition(
- mBufferToWrite, NOT_A_DICT_POS /* forwardLinkPos */, &writingPos)) {
- return false;
- }
- // Write actual PtNode array size.
- if (!DynamicPtWritingUtils::writePtNodeArraySizeAndAdvancePosition(
- mBufferToWrite, mValidPtNodeCount, &mPtNodeArraySizeFieldPos)) {
- return false;
- }
- return true;
-}
-
-// Write valid PtNode to buffer and memorize mapping from the old position to the new position.
-bool DynamicPtGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
- ::onVisitingPtNode(const PtNodeParams *const ptNodeParams) {
- if (ptNodeParams->isDeleted()) {
- // Current PtNode is not written in new buffer because it has been deleted.
- mDictPositionRelocationMap->mPtNodePositionRelocationMap.insert(
- PtNodeWriter::PtNodePositionRelocationMap::value_type(
- ptNodeParams->getHeadPos(), NOT_A_DICT_POS));
- return true;
- }
- int writingPos = mBufferToWrite->getTailPosition();
- mDictPositionRelocationMap->mPtNodePositionRelocationMap.insert(
- PtNodeWriter::PtNodePositionRelocationMap::value_type(
- ptNodeParams->getHeadPos(), writingPos));
- mValidPtNodeCount++;
- // Writes current PtNode.
- return mPtNodeWriter->writePtNodeAndAdvancePosition(ptNodeParams, &writingPos);
-}
-
-bool DynamicPtGcEventListeners::TraversePolicyToUpdateAllPositionFields
- ::onVisitingPtNode(const PtNodeParams *const ptNodeParams) {
- // Updates parent position.
- int bigramCount = 0;
- if (!mPtNodeWriter->updateAllPositionFields(ptNodeParams, mDictPositionRelocationMap,
- &bigramCount)) {
- return false;
- }
- mBigramCount += bigramCount;
- if (ptNodeParams->isTerminal()) {
- mUnigramCount++;
- }
- return true;
-}
-
-} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp
deleted file mode 100644
index 2457b49c8..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp
+++ /dev/null
@@ -1,294 +0,0 @@
-/*
- * Copyright (C) 2013, The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h"
-
-#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h"
-#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.h"
-#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h"
-#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h"
-#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
-#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
-
-namespace latinime {
-
-const int DynamicPtUpdatingHelper::CHILDREN_POSITION_FIELD_SIZE = 3;
-
-bool DynamicPtUpdatingHelper::addUnigramWord(
- DynamicPtReadingHelper *const readingHelper,
- const int *const wordCodePoints, const int codePointCount, const int probability,
- const bool isNotAWord, const bool isBlacklisted, const int timestamp,
- bool *const outAddedNewUnigram) {
- int parentPos = NOT_A_DICT_POS;
- while (!readingHelper->isEnd()) {
- const PtNodeParams ptNodeParams(readingHelper->getPtNodeParams());
- if (!ptNodeParams.isValid()) {
- break;
- }
- const int matchedCodePointCount = readingHelper->getPrevTotalCodePointCount();
- if (!readingHelper->isMatchedCodePoint(ptNodeParams, 0 /* index */,
- wordCodePoints[matchedCodePointCount])) {
- // The first code point is different from target code point. Skip this node and read
- // the next sibling node.
- readingHelper->readNextSiblingNode(ptNodeParams);
- continue;
- }
- // Check following merged node code points.
- const int nodeCodePointCount = ptNodeParams.getCodePointCount();
- for (int j = 1; j < nodeCodePointCount; ++j) {
- const int nextIndex = matchedCodePointCount + j;
- if (nextIndex >= codePointCount || !readingHelper->isMatchedCodePoint(ptNodeParams, j,
- wordCodePoints[matchedCodePointCount + j])) {
- *outAddedNewUnigram = true;
- return reallocatePtNodeAndAddNewPtNodes(&ptNodeParams, j, isNotAWord, isBlacklisted,
- probability, timestamp, wordCodePoints + matchedCodePointCount,
- codePointCount - matchedCodePointCount);
- }
- }
- // All characters are matched.
- if (codePointCount == readingHelper->getTotalCodePointCount(ptNodeParams)) {
- return setPtNodeProbability(&ptNodeParams, isNotAWord, isBlacklisted, probability,
- timestamp, outAddedNewUnigram);
- }
- if (!ptNodeParams.hasChildren()) {
- *outAddedNewUnigram = true;
- return createChildrenPtNodeArrayAndAChildPtNode(&ptNodeParams,
- isNotAWord, isBlacklisted, probability, timestamp,
- wordCodePoints + readingHelper->getTotalCodePointCount(ptNodeParams),
- codePointCount - readingHelper->getTotalCodePointCount(ptNodeParams));
- }
- // Advance to the children nodes.
- parentPos = ptNodeParams.getHeadPos();
- readingHelper->readChildNode(ptNodeParams);
- }
- if (readingHelper->isError()) {
- // The dictionary is invalid.
- return false;
- }
- int pos = readingHelper->getPosOfLastForwardLinkField();
- *outAddedNewUnigram = true;
- return createAndInsertNodeIntoPtNodeArray(parentPos,
- wordCodePoints + readingHelper->getPrevTotalCodePointCount(),
- codePointCount - readingHelper->getPrevTotalCodePointCount(),
- isNotAWord, isBlacklisted, probability, timestamp, &pos);
-}
-
-bool DynamicPtUpdatingHelper::addBigramWords(const int word0Pos, const int word1Pos,
- const int probability, const int timestamp, bool *const outAddedNewBigram) {
- const PtNodeParams sourcePtNodeParams(
- mPtNodeReader->fetchNodeInfoInBufferFromPtNodePos(word0Pos));
- const PtNodeParams targetPtNodeParams(
- mPtNodeReader->fetchNodeInfoInBufferFromPtNodePos(word1Pos));
- return mPtNodeWriter->addNewBigramEntry(&sourcePtNodeParams, &targetPtNodeParams, probability,
- timestamp, outAddedNewBigram);
-}
-
-// Remove a bigram relation from word0Pos to word1Pos.
-bool DynamicPtUpdatingHelper::removeBigramWords(const int word0Pos, const int word1Pos) {
- const PtNodeParams sourcePtNodeParams(
- mPtNodeReader->fetchNodeInfoInBufferFromPtNodePos(word0Pos));
- const PtNodeParams targetPtNodeParams(
- mPtNodeReader->fetchNodeInfoInBufferFromPtNodePos(word1Pos));
- return mPtNodeWriter->removeBigramEntry(&sourcePtNodeParams, &targetPtNodeParams);
-}
-
-bool DynamicPtUpdatingHelper::addShortcutTarget(const int wordPos,
- const int *const targetCodePoints, const int targetCodePointCount,
- const int shortcutProbability) {
- const PtNodeParams ptNodeParams(mPtNodeReader->fetchNodeInfoInBufferFromPtNodePos(wordPos));
- return mPtNodeWriter->addShortcutTarget(&ptNodeParams, targetCodePoints, targetCodePointCount,
- shortcutProbability);
-}
-
-bool DynamicPtUpdatingHelper::createAndInsertNodeIntoPtNodeArray(const int parentPos,
- const int *const nodeCodePoints, const int nodeCodePointCount,
- const bool isNotAWord, const bool isBlacklisted, const int probability,
- const int timestamp, int *const forwardLinkFieldPos) {
- const int newPtNodeArrayPos = mBuffer->getTailPosition();
- if (!DynamicPtWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer,
- newPtNodeArrayPos, forwardLinkFieldPos)) {
- return false;
- }
- return createNewPtNodeArrayWithAChildPtNode(parentPos, nodeCodePoints, nodeCodePointCount,
- isNotAWord, isBlacklisted, probability, timestamp);
-}
-
-bool DynamicPtUpdatingHelper::setPtNodeProbability(
- const PtNodeParams *const originalPtNodeParams, const bool isNotAWord,
- const bool isBlacklisted, const int probability, const int timestamp,
- bool *const outAddedNewUnigram) {
- if (originalPtNodeParams->isTerminal()) {
- // Overwrites the probability.
- *outAddedNewUnigram = false;
- return mPtNodeWriter->updatePtNodeProbability(originalPtNodeParams, probability, timestamp);
- } else {
- // Make the node terminal and write the probability.
- *outAddedNewUnigram = true;
- const int movedPos = mBuffer->getTailPosition();
- int writingPos = movedPos;
- const PtNodeParams ptNodeParamsToWrite(getUpdatedPtNodeParams(originalPtNodeParams,
- isNotAWord, isBlacklisted, true /* isTerminal */,
- originalPtNodeParams->getParentPos(), originalPtNodeParams->getCodePointCount(),
- originalPtNodeParams->getCodePoints(), probability));
- if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite,
- timestamp, &writingPos)) {
- return false;
- }
- if (!mPtNodeWriter->markPtNodeAsMoved(originalPtNodeParams, movedPos, movedPos)) {
- return false;
- }
- }
- return true;
-}
-
-bool DynamicPtUpdatingHelper::createChildrenPtNodeArrayAndAChildPtNode(
- const PtNodeParams *const parentPtNodeParams, const bool isNotAWord,
- const bool isBlacklisted, const int probability, const int timestamp,
- const int *const codePoints, const int codePointCount) {
- const int newPtNodeArrayPos = mBuffer->getTailPosition();
- if (!mPtNodeWriter->updateChildrenPosition(parentPtNodeParams, newPtNodeArrayPos)) {
- return false;
- }
- return createNewPtNodeArrayWithAChildPtNode(parentPtNodeParams->getHeadPos(), codePoints,
- codePointCount, isNotAWord, isBlacklisted, probability, timestamp);
-}
-
-bool DynamicPtUpdatingHelper::createNewPtNodeArrayWithAChildPtNode(
- const int parentPtNodePos, const int *const nodeCodePoints, const int nodeCodePointCount,
- const bool isNotAWord, const bool isBlacklisted, const int probability,
- const int timestamp) {
- int writingPos = mBuffer->getTailPosition();
- if (!DynamicPtWritingUtils::writePtNodeArraySizeAndAdvancePosition(mBuffer,
- 1 /* arraySize */, &writingPos)) {
- return false;
- }
- const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(
- isNotAWord, isBlacklisted, true /* isTerminal */,
- parentPtNodePos, nodeCodePointCount, nodeCodePoints, probability));
- if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite, timestamp,
- &writingPos)) {
- return false;
- }
- if (!DynamicPtWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer,
- NOT_A_DICT_POS /* forwardLinkPos */, &writingPos)) {
- return false;
- }
- return true;
-}
-
-// Returns whether the dictionary updating was succeeded or not.
-bool DynamicPtUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
- const PtNodeParams *const reallocatingPtNodeParams, const int overlappingCodePointCount,
- const bool isNotAWord, const bool isBlacklisted, const int probabilityOfNewPtNode,
- const int timestamp, const int *const newNodeCodePoints, const int newNodeCodePointCount) {
- // When addsExtraChild is true, split the reallocating PtNode and add new child.
- // Reallocating PtNode: abcde, newNode: abcxy.
- // abc (1st, not terminal) __ de (2nd)
- // \_ xy (extra child, terminal)
- // Otherwise, this method makes 1st part terminal and write probabilityOfNewPtNode.
- // Reallocating PtNode: abcde, newNode: abc.
- // abc (1st, terminal) __ de (2nd)
- const bool addsExtraChild = newNodeCodePointCount > overlappingCodePointCount;
- const int firstPartOfReallocatedPtNodePos = mBuffer->getTailPosition();
- int writingPos = firstPartOfReallocatedPtNodePos;
- // Write the 1st part of the reallocating node. The children position will be updated later
- // with actual children position.
- if (addsExtraChild) {
- const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(
- false /* isNotAWord */, false /* isBlacklisted */, false /* isTerminal */,
- reallocatingPtNodeParams->getParentPos(), overlappingCodePointCount,
- reallocatingPtNodeParams->getCodePoints(), NOT_A_PROBABILITY));
- if (!mPtNodeWriter->writePtNodeAndAdvancePosition(&ptNodeParamsToWrite, &writingPos)) {
- return false;
- }
- } else {
- const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(
- isNotAWord, isBlacklisted, true /* isTerminal */,
- reallocatingPtNodeParams->getParentPos(), overlappingCodePointCount,
- reallocatingPtNodeParams->getCodePoints(), probabilityOfNewPtNode));
- if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite,
- timestamp, &writingPos)) {
- return false;
- }
- }
- const int actualChildrenPos = writingPos;
- // Create new children PtNode array.
- const size_t newPtNodeCount = addsExtraChild ? 2 : 1;
- if (!DynamicPtWritingUtils::writePtNodeArraySizeAndAdvancePosition(mBuffer,
- newPtNodeCount, &writingPos)) {
- return false;
- }
- // Write the 2nd part of the reallocating node.
- const int secondPartOfReallocatedPtNodePos = writingPos;
- const PtNodeParams childPartPtNodeParams(getUpdatedPtNodeParams(reallocatingPtNodeParams,
- reallocatingPtNodeParams->isNotAWord(), reallocatingPtNodeParams->isBlacklisted(),
- reallocatingPtNodeParams->isTerminal(), firstPartOfReallocatedPtNodePos,
- reallocatingPtNodeParams->getCodePointCount() - overlappingCodePointCount,
- reallocatingPtNodeParams->getCodePoints() + overlappingCodePointCount,
- reallocatingPtNodeParams->getProbability()));
- if (!mPtNodeWriter->writePtNodeAndAdvancePosition(&childPartPtNodeParams, &writingPos)) {
- return false;
- }
- if (addsExtraChild) {
- const PtNodeParams extraChildPtNodeParams(getPtNodeParamsForNewPtNode(
- isNotAWord, isBlacklisted, true /* isTerminal */,
- firstPartOfReallocatedPtNodePos, newNodeCodePointCount - overlappingCodePointCount,
- newNodeCodePoints + overlappingCodePointCount, probabilityOfNewPtNode));
- if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&extraChildPtNodeParams,
- timestamp, &writingPos)) {
- return false;
- }
- }
- if (!DynamicPtWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer,
- NOT_A_DICT_POS /* forwardLinkPos */, &writingPos)) {
- return false;
- }
- // Update original reallocating PtNode as moved.
- if (!mPtNodeWriter->markPtNodeAsMoved(reallocatingPtNodeParams, firstPartOfReallocatedPtNodePos,
- secondPartOfReallocatedPtNodePos)) {
- return false;
- }
- // Load node info. Information of the 1st part will be fetched.
- const PtNodeParams ptNodeParams(
- mPtNodeReader->fetchNodeInfoInBufferFromPtNodePos(firstPartOfReallocatedPtNodePos));
- // Update children position.
- return mPtNodeWriter->updateChildrenPosition(&ptNodeParams, actualChildrenPos);
-}
-
-const PtNodeParams DynamicPtUpdatingHelper::getUpdatedPtNodeParams(
- const PtNodeParams *const originalPtNodeParams, const bool isNotAWord,
- const bool isBlacklisted, const bool isTerminal, const int parentPos,
- const int codePointCount, const int *const codePoints, const int probability) const {
- const PatriciaTrieReadingUtils::NodeFlags flags = PatriciaTrieReadingUtils::createAndGetFlags(
- isBlacklisted, isNotAWord, isTerminal, originalPtNodeParams->hasShortcutTargets(),
- originalPtNodeParams->hasBigrams(), codePointCount > 1 /* hasMultipleChars */,
- CHILDREN_POSITION_FIELD_SIZE);
- return PtNodeParams(originalPtNodeParams, flags, parentPos, codePointCount, codePoints,
- probability);
-}
-
-const PtNodeParams DynamicPtUpdatingHelper::getPtNodeParamsForNewPtNode(
- const bool isNotAWord, const bool isBlacklisted, const bool isTerminal,
- const int parentPos, const int codePointCount, const int *const codePoints,
- const int probability) const {
- const PatriciaTrieReadingUtils::NodeFlags flags = PatriciaTrieReadingUtils::createAndGetFlags(
- isBlacklisted, isNotAWord, isTerminal, false /* hasShortcutTargets */,
- false /* hasBigrams */, codePointCount > 1 /* hasMultipleChars */,
- CHILDREN_POSITION_FIELD_SIZE);
- return PtNodeParams(flags, parentPos, codePointCount, codePoints, probability);
-}
-
-} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h
deleted file mode 100644
index 71f473096..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * Copyright (C) 2013, The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LATINIME_DYNAMIC_PT_UPDATING_HELPER_H
-#define LATINIME_DYNAMIC_PT_UPDATING_HELPER_H
-
-#include <stdint.h>
-
-#include "defines.h"
-#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
-#include "utils/hash_map_compat.h"
-
-namespace latinime {
-
-class BufferWithExtendableBuffer;
-class DynamicPtReadingHelper;
-class PtNodeReader;
-class PtNodeWriter;
-
-class DynamicPtUpdatingHelper {
- public:
- DynamicPtUpdatingHelper(BufferWithExtendableBuffer *const buffer,
- const PtNodeReader *const ptNodeReader, PtNodeWriter *const ptNodeWriter)
- : mBuffer(buffer), mPtNodeReader(ptNodeReader), mPtNodeWriter(ptNodeWriter) {}
-
- ~DynamicPtUpdatingHelper() {}
-
- // Add a word to the dictionary. If the word already exists, update the probability.
- bool addUnigramWord(DynamicPtReadingHelper *const readingHelper,
- const int *const wordCodePoints, const int codePointCount, const int probability,
- const bool isNotAWord, const bool isBlacklisted, const int timestamp,
- bool *const outAddedNewUnigram);
-
- // Add a bigram relation from word0Pos to word1Pos.
- bool addBigramWords(const int word0Pos, const int word1Pos, const int probability,
- const int timestamp, bool *const outAddedNewBigram);
-
- // Remove a bigram relation from word0Pos to word1Pos.
- bool removeBigramWords(const int word0Pos, const int word1Pos);
-
- // Add a shortcut target.
- bool addShortcutTarget(const int wordPos, const int *const targetCodePoints,
- const int targetCodePointCount, const int shortcutProbability);
-
- private:
- DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPtUpdatingHelper);
-
- static const int CHILDREN_POSITION_FIELD_SIZE;
-
- BufferWithExtendableBuffer *const mBuffer;
- const PtNodeReader *const mPtNodeReader;
- PtNodeWriter *const mPtNodeWriter;
-
- bool createAndInsertNodeIntoPtNodeArray(const int parentPos, const int *const nodeCodePoints,
- const int nodeCodePointCount, const bool isNotAWord, const bool isBlacklisted,
- const int probability, const int timestamp, int *const forwardLinkFieldPos);
-
- bool setPtNodeProbability(const PtNodeParams *const originalPtNodeParams, const bool isNotAWord,
- const bool isBlacklisted, const int probability, const int timestamp,
- bool *const outAddedNewUnigram);
-
- bool createChildrenPtNodeArrayAndAChildPtNode(const PtNodeParams *const parentPtNodeParams,
- const bool isNotAWord, const bool isBlacklisted, const int probability,
- const int timestamp, const int *const codePoints, const int codePointCount);
-
- bool createNewPtNodeArrayWithAChildPtNode(const int parentPos, const int *const nodeCodePoints,
- const int nodeCodePointCount, const bool isNotAWord, const bool isBlacklisted,
- const int probability, const int timestamp);
-
- bool reallocatePtNodeAndAddNewPtNodes(
- const PtNodeParams *const reallocatingPtNodeParams, const int overlappingCodePointCount,
- const bool isNotAWord, const bool isBlacklisted, const int probabilityOfNewPtNode,
- const int timestamp, const int *const newNodeCodePoints,
- const int newNodeCodePointCount);
-
- const PtNodeParams getUpdatedPtNodeParams(const PtNodeParams *const originalPtNodeParams,
- const bool isNotAWord, const bool isBlacklisted, const bool isTerminal,
- const int parentPos, const int codePointCount,
- const int *const codePoints, const int probability) const;
-
- const PtNodeParams getPtNodeParamsForNewPtNode(const bool isNotAWord, const bool isBlacklisted,
- const bool isTerminal, const int parentPos,
- const int codePointCount, const int *const codePoints, const int probability) const;
-};
-} // namespace latinime
-#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_UPDATING_HELPER_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h
deleted file mode 100644
index 84731eb17..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h
+++ /dev/null
@@ -1,229 +0,0 @@
-/*
- * Copyright (C) 2013, The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LATINIME_PT_NODE_PARAMS_H
-#define LATINIME_PT_NODE_PARAMS_H
-
-#include <cstring>
-
-#include "defines.h"
-#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h"
-#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
-#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
-
-namespace latinime {
-
-// This class has information of a PtNode. This class is immutable.
-class PtNodeParams {
- public:
- // Invalid PtNode.
- PtNodeParams() : mHeadPos(NOT_A_DICT_POS), mFlags(0), mParentPos(NOT_A_DICT_POS),
- mCodePointCount(0), mCodePoints(), mTerminalIdFieldPos(NOT_A_DICT_POS),
- mTerminalId(Ver4DictConstants::NOT_A_TERMINAL_ID), mProbabilityFieldPos(NOT_A_DICT_POS),
- mProbability(NOT_A_PROBABILITY), mChildrenPosFieldPos(NOT_A_DICT_POS),
- mChildrenPos(NOT_A_DICT_POS), mBigramLinkedNodePos(NOT_A_DICT_POS),
- mShortcutPos(NOT_A_DICT_POS), mBigramPos(NOT_A_DICT_POS),
- mSiblingPos(NOT_A_DICT_POS) {}
-
- PtNodeParams(const PtNodeParams& ptNodeParams)
- : mHeadPos(ptNodeParams.mHeadPos), mFlags(ptNodeParams.mFlags),
- mParentPos(ptNodeParams.mParentPos), mCodePointCount(ptNodeParams.mCodePointCount),
- mCodePoints(), mTerminalIdFieldPos(ptNodeParams.mTerminalIdFieldPos),
- mTerminalId(ptNodeParams.mTerminalId),
- mProbabilityFieldPos(ptNodeParams.mProbabilityFieldPos),
- mProbability(ptNodeParams.mProbability),
- mChildrenPosFieldPos(ptNodeParams.mChildrenPosFieldPos),
- mChildrenPos(ptNodeParams.mChildrenPos),
- mBigramLinkedNodePos(ptNodeParams.mBigramLinkedNodePos),
- mShortcutPos(ptNodeParams.mShortcutPos), mBigramPos(ptNodeParams.mBigramPos),
- mSiblingPos(ptNodeParams.mSiblingPos) {
- memcpy(mCodePoints, ptNodeParams.getCodePoints(), sizeof(int) * mCodePointCount);
- }
-
- // PtNode with a terminal id.
- PtNodeParams(const int headPos, const PatriciaTrieReadingUtils::NodeFlags flags,
- const int parentPos, const int codePointCount, const int *const codePoints,
- const int terminalIdFieldPos, const int terminalId, const int probability,
- const int childrenPosFieldPos, const int childrenPos, const int siblingPos)
- : mHeadPos(headPos), mFlags(flags), mParentPos(parentPos),
- mCodePointCount(codePointCount), mCodePoints(),
- mTerminalIdFieldPos(terminalIdFieldPos), mTerminalId(terminalId),
- mProbabilityFieldPos(NOT_A_DICT_POS), mProbability(probability),
- mChildrenPosFieldPos(childrenPosFieldPos), mChildrenPos(childrenPos),
- mBigramLinkedNodePos(NOT_A_DICT_POS), mShortcutPos(terminalId),
- mBigramPos(terminalId), mSiblingPos(siblingPos) {
- memcpy(mCodePoints, codePoints, sizeof(int) * mCodePointCount);
- }
-
- // Construct new params by updating existing PtNode params.
- PtNodeParams(const PtNodeParams *const ptNodeParams,
- const PatriciaTrieReadingUtils::NodeFlags flags, const int parentPos,
- const int codePointCount, const int *const codePoints, const int probability)
- : mHeadPos(ptNodeParams->getHeadPos()), mFlags(flags), mParentPos(parentPos),
- mCodePointCount(codePointCount), mCodePoints(),
- mTerminalIdFieldPos(ptNodeParams->getTerminalIdFieldPos()),
- mTerminalId(ptNodeParams->getTerminalId()),
- mProbabilityFieldPos(ptNodeParams->getProbabilityFieldPos()),
- mProbability(probability),
- mChildrenPosFieldPos(ptNodeParams->getChildrenPosFieldPos()),
- mChildrenPos(ptNodeParams->getChildrenPos()),
- mBigramLinkedNodePos(ptNodeParams->getBigramLinkedNodePos()),
- mShortcutPos(ptNodeParams->getShortcutPos()),
- mBigramPos(ptNodeParams->getBigramsPos()),
- mSiblingPos(ptNodeParams->getSiblingNodePos()) {
- memcpy(mCodePoints, codePoints, sizeof(int) * mCodePointCount);
- }
-
- PtNodeParams(const PatriciaTrieReadingUtils::NodeFlags flags, const int parentPos,
- const int codePointCount, const int *const codePoints, const int probability)
- : mHeadPos(NOT_A_DICT_POS), mFlags(flags), mParentPos(parentPos),
- mCodePointCount(codePointCount), mCodePoints(),
- mTerminalIdFieldPos(NOT_A_DICT_POS),
- mTerminalId(Ver4DictConstants::NOT_A_TERMINAL_ID),
- mProbabilityFieldPos(NOT_A_DICT_POS), mProbability(probability),
- mChildrenPosFieldPos(NOT_A_DICT_POS), mChildrenPos(NOT_A_DICT_POS),
- mBigramLinkedNodePos(NOT_A_DICT_POS), mShortcutPos(NOT_A_DICT_POS),
- mBigramPos(NOT_A_DICT_POS), mSiblingPos(NOT_A_DICT_POS) {
- memcpy(mCodePoints, codePoints, sizeof(int) * mCodePointCount);
- }
-
- AK_FORCE_INLINE bool isValid() const {
- return mCodePointCount > 0;
- }
-
- // Head position of the PtNode
- AK_FORCE_INLINE int getHeadPos() const {
- return mHeadPos;
- }
-
- // Flags
- AK_FORCE_INLINE bool isDeleted() const {
- return DynamicPtReadingUtils::isDeleted(mFlags);
- }
-
- AK_FORCE_INLINE bool willBecomeNonTerminal() const {
- return DynamicPtReadingUtils::willBecomeNonTerminal(mFlags);
- }
-
- AK_FORCE_INLINE bool hasChildren() const {
- return mChildrenPos != NOT_A_DICT_POS;
- }
-
- AK_FORCE_INLINE bool isTerminal() const {
- return PatriciaTrieReadingUtils::isTerminal(mFlags);
- }
-
- AK_FORCE_INLINE bool isBlacklisted() const {
- return PatriciaTrieReadingUtils::isBlacklisted(mFlags);
- }
-
- AK_FORCE_INLINE bool isNotAWord() const {
- return PatriciaTrieReadingUtils::isNotAWord(mFlags);
- }
-
- AK_FORCE_INLINE bool hasBigrams() const {
- return PatriciaTrieReadingUtils::hasBigrams(mFlags);
- }
-
- AK_FORCE_INLINE bool hasShortcutTargets() const {
- return PatriciaTrieReadingUtils::hasShortcutTargets(mFlags);
- }
-
- // Parent node position
- AK_FORCE_INLINE int getParentPos() const {
- return mParentPos;
- }
-
- // Number of code points
- AK_FORCE_INLINE uint8_t getCodePointCount() const {
- return mCodePointCount;
- }
-
- AK_FORCE_INLINE const int *getCodePoints() const {
- return mCodePoints;
- }
-
- // Probability
- AK_FORCE_INLINE int getTerminalIdFieldPos() const {
- return mTerminalIdFieldPos;
- }
-
- AK_FORCE_INLINE int getTerminalId() const {
- return mTerminalId;
- }
-
- // Probability
- AK_FORCE_INLINE int getProbabilityFieldPos() const {
- return mProbabilityFieldPos;
- }
-
- AK_FORCE_INLINE int getProbability() const {
- return mProbability;
- }
-
- // Children PtNode array position
- AK_FORCE_INLINE int getChildrenPosFieldPos() const {
- return mChildrenPosFieldPos;
- }
-
- AK_FORCE_INLINE int getChildrenPos() const {
- return mChildrenPos;
- }
-
- // Bigram linked node position.
- AK_FORCE_INLINE int getBigramLinkedNodePos() const {
- return mBigramLinkedNodePos;
- }
-
- // Shortcutlist position
- AK_FORCE_INLINE int getShortcutPos() const {
- return mShortcutPos;
- }
-
- // Bigrams position
- AK_FORCE_INLINE int getBigramsPos() const {
- return mBigramPos;
- }
-
- // Sibling node position
- AK_FORCE_INLINE int getSiblingNodePos() const {
- return mSiblingPos;
- }
-
- private:
- // This class have a public copy constructor to be used as a return value.
-
- // Disallowing the assignment operator.
- PtNodeParams &operator=(PtNodeParams &ptNodeParams);
-
- const int mHeadPos;
- const PatriciaTrieReadingUtils::NodeFlags mFlags;
- const int mParentPos;
- const uint8_t mCodePointCount;
- int mCodePoints[MAX_WORD_LENGTH];
- const int mTerminalIdFieldPos;
- const int mTerminalId;
- const int mProbabilityFieldPos;
- const int mProbability;
- const int mChildrenPosFieldPos;
- const int mChildrenPos;
- const int mBigramLinkedNodePos;
- const int mShortcutPos;
- const int mBigramPos;
- const int mSiblingPos;
-};
-} // namespace latinime
-#endif /* LATINIME_PT_NODE_PARAMS_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h
deleted file mode 100644
index c6b2a8bed..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (C) 2013, The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LATINIME_PT_NODE_READER_H
-#define LATINIME_PT_NODE_READER_H
-
-#include "defines.h"
-
-#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
-
-namespace latinime {
-
-// Interface class used to read PtNode information.
-class PtNodeReader {
- public:
- virtual ~PtNodeReader() {}
- virtual const PtNodeParams fetchNodeInfoInBufferFromPtNodePos(const int ptNodePos) const = 0;
-
- protected:
- PtNodeReader() {};
-
- private:
- DISALLOW_COPY_AND_ASSIGN(PtNodeReader);
-};
-} // namespace latinime
-#endif /* LATINIME_PT_NODE_READER_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h
deleted file mode 100644
index 84dd6870e..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Copyright (C) 2013, The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LATINIME_PT_NODE_WRITER_H
-#define LATINIME_PT_NODE_WRITER_H
-
-#include "defines.h"
-
-#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
-#include "utils/hash_map_compat.h"
-
-namespace latinime {
-
-// Interface class used to write PtNode information.
-class PtNodeWriter {
- public:
- typedef hash_map_compat<int, int> PtNodeArrayPositionRelocationMap;
- typedef hash_map_compat<int, int> PtNodePositionRelocationMap;
- struct DictPositionRelocationMap {
- public:
- DictPositionRelocationMap()
- : mPtNodeArrayPositionRelocationMap(), mPtNodePositionRelocationMap() {}
-
- PtNodeArrayPositionRelocationMap mPtNodeArrayPositionRelocationMap;
- PtNodePositionRelocationMap mPtNodePositionRelocationMap;
-
- private:
- DISALLOW_COPY_AND_ASSIGN(DictPositionRelocationMap);
- };
-
- virtual ~PtNodeWriter() {}
-
- virtual bool markPtNodeAsDeleted(const PtNodeParams *const toBeUpdatedPtNodeParams) = 0;
-
- virtual bool markPtNodeAsMoved(const PtNodeParams *const toBeUpdatedPtNodeParams,
- const int movedPos, const int bigramLinkedNodePos) = 0;
-
- virtual bool markPtNodeAsWillBecomeNonTerminal(
- const PtNodeParams *const toBeUpdatedPtNodeParams) = 0;
-
- virtual bool updatePtNodeProbability(const PtNodeParams *const toBeUpdatedPtNodeParams,
- const int probability, const int timestamp) = 0;
-
- virtual bool updatePtNodeProbabilityAndGetNeedsToKeepPtNodeAfterGC(
- const PtNodeParams *const toBeUpdatedPtNodeParams,
- bool *const outNeedsToKeepPtNode) = 0;
-
- virtual bool updateChildrenPosition(const PtNodeParams *const toBeUpdatedPtNodeParams,
- const int newChildrenPosition) = 0;
-
- virtual bool writePtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams,
- int *const ptNodeWritingPos) = 0;
-
- virtual bool writeNewTerminalPtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams,
- const int timestamp, int *const ptNodeWritingPos) = 0;
-
- virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams,
- const PtNodeParams *const targetPtNodeParam, const int probability, const int timestamp,
- bool *const outAddedNewBigram) = 0;
-
- virtual bool removeBigramEntry(const PtNodeParams *const sourcePtNodeParams,
- const PtNodeParams *const targetPtNodeParam) = 0;
-
- virtual bool updateAllBigramEntriesAndDeleteUselessEntries(
- const PtNodeParams *const sourcePtNodeParams, int *const outBigramEntryCount) = 0;
-
- virtual bool updateAllPositionFields(const PtNodeParams *const toBeUpdatedPtNodeParams,
- const DictPositionRelocationMap *const dictPositionRelocationMap,
- int *const outBigramEntryCount) = 0;
-
- virtual bool addShortcutTarget(const PtNodeParams *const ptNodeParams,
- const int *const targetCodePoints, const int targetCodePointCount,
- const int shortcutProbability) = 0;
-
- protected:
- PtNodeWriter() {};
-
- private:
- DISALLOW_COPY_AND_ASSIGN(PtNodeWriter);
-};
-} // namespace latinime
-#endif /* LATINIME_PT_NODE_WRITER_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp
deleted file mode 100644
index cb9d450ec..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp
+++ /dev/null
@@ -1,202 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h"
-
-#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
-
-namespace latinime {
-
-const BigramEntry BigramDictContent::getBigramEntryAndAdvancePosition(
- int *const bigramEntryPos) const {
- const BufferWithExtendableBuffer *const bigramListBuffer = getContentBuffer();
- const int bigramFlags = bigramListBuffer->readUintAndAdvancePosition(
- Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, bigramEntryPos);
- const bool hasNext = (bigramFlags & Ver4DictConstants::BIGRAM_HAS_NEXT_MASK) != 0;
- int probability = NOT_A_PROBABILITY;
- int timestamp = NOT_A_TIMESTAMP;
- int level = 0;
- int count = 0;
- if (mHasHistoricalInfo) {
- probability = bigramListBuffer->readUintAndAdvancePosition(
- Ver4DictConstants::PROBABILITY_SIZE, bigramEntryPos);
- timestamp = bigramListBuffer->readUintAndAdvancePosition(
- Ver4DictConstants::TIME_STAMP_FIELD_SIZE, bigramEntryPos);
- level = bigramListBuffer->readUintAndAdvancePosition(
- Ver4DictConstants::WORD_LEVEL_FIELD_SIZE, bigramEntryPos);
- count = bigramListBuffer->readUintAndAdvancePosition(
- Ver4DictConstants::WORD_COUNT_FIELD_SIZE, bigramEntryPos);
- } else {
- probability = bigramFlags & Ver4DictConstants::BIGRAM_PROBABILITY_MASK;
- }
- const int encodedTargetTerminalId = bigramListBuffer->readUintAndAdvancePosition(
- Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, bigramEntryPos);
- const int targetTerminalId =
- (encodedTargetTerminalId == Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID) ?
- Ver4DictConstants::NOT_A_TERMINAL_ID : encodedTargetTerminalId;
- if (mHasHistoricalInfo) {
- const HistoricalInfo historicalInfo(timestamp, level, count);
- return BigramEntry(hasNext, probability, &historicalInfo, targetTerminalId);
- } else {
- return BigramEntry(hasNext, probability, targetTerminalId);
- }
-}
-
-bool BigramDictContent::writeBigramEntryAndAdvancePosition(
- const BigramEntry *const bigramEntryToWrite, int *const entryWritingPos) {
- BufferWithExtendableBuffer *const bigramListBuffer = getWritableContentBuffer();
- const int bigramFlags = createAndGetBigramFlags(
- mHasHistoricalInfo ? 0 : bigramEntryToWrite->getProbability(),
- bigramEntryToWrite->hasNext());
- if (!bigramListBuffer->writeUintAndAdvancePosition(bigramFlags,
- Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, entryWritingPos)) {
- AKLOGE("Cannot write bigram flags. pos: %d, flags: %x", *entryWritingPos, bigramFlags);
- return false;
- }
- if (mHasHistoricalInfo) {
- if (!bigramListBuffer->writeUintAndAdvancePosition(bigramEntryToWrite->getProbability(),
- Ver4DictConstants::PROBABILITY_SIZE, entryWritingPos)) {
- AKLOGE("Cannot write bigram probability. pos: %d, probability: %d", *entryWritingPos,
- bigramEntryToWrite->getProbability());
- return false;
- }
- const HistoricalInfo *const historicalInfo = bigramEntryToWrite->getHistoricalInfo();
- if (!bigramListBuffer->writeUintAndAdvancePosition(historicalInfo->getTimeStamp(),
- Ver4DictConstants::TIME_STAMP_FIELD_SIZE, entryWritingPos)) {
- AKLOGE("Cannot write bigram timestamps. pos: %d, timestamp: %d", *entryWritingPos,
- historicalInfo->getTimeStamp());
- return false;
- }
- if (!bigramListBuffer->writeUintAndAdvancePosition(historicalInfo->getLevel(),
- Ver4DictConstants::WORD_LEVEL_FIELD_SIZE, entryWritingPos)) {
- AKLOGE("Cannot write bigram level. pos: %d, level: %d", *entryWritingPos,
- historicalInfo->getLevel());
- return false;
- }
- if (!bigramListBuffer->writeUintAndAdvancePosition(historicalInfo->getCount(),
- Ver4DictConstants::WORD_COUNT_FIELD_SIZE, entryWritingPos)) {
- AKLOGE("Cannot write bigram count. pos: %d, count: %d", *entryWritingPos,
- historicalInfo->getCount());
- return false;
- }
- }
- const int targetTerminalIdToWrite =
- (bigramEntryToWrite->getTargetTerminalId() == Ver4DictConstants::NOT_A_TERMINAL_ID) ?
- Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID :
- bigramEntryToWrite->getTargetTerminalId();
- if (!bigramListBuffer->writeUintAndAdvancePosition(targetTerminalIdToWrite,
- Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, entryWritingPos)) {
- AKLOGE("Cannot write bigram target terminal id. pos: %d, target terminal id: %d",
- *entryWritingPos, bigramEntryToWrite->getTargetTerminalId());
- return false;
- }
- return true;
-}
-
-bool BigramDictContent::copyBigramList(const int bigramListPos, const int toPos) {
- int readingPos = bigramListPos;
- int writingPos = toPos;
- bool hasNext = true;
- while (hasNext) {
- const BigramEntry bigramEntry = getBigramEntryAndAdvancePosition(&readingPos);
- hasNext = bigramEntry.hasNext();
- if (!writeBigramEntryAndAdvancePosition(&bigramEntry, &writingPos)) {
- AKLOGE("Cannot write bigram entry to copy. pos: %d", writingPos);
- return false;
- }
- }
- return true;
-}
-
-bool BigramDictContent::runGC(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
- const BigramDictContent *const originalBigramDictContent,
- int *const outBigramEntryCount) {
- for (TerminalPositionLookupTable::TerminalIdMap::const_iterator it = terminalIdMap->begin();
- it != terminalIdMap->end(); ++it) {
- const int originalBigramListPos =
- originalBigramDictContent->getBigramListHeadPos(it->first);
- if (originalBigramListPos == NOT_A_DICT_POS) {
- // This terminal does not have a bigram list.
- continue;
- }
- const int bigramListPos = getContentBuffer()->getTailPosition();
- int bigramEntryCount = 0;
- // Copy bigram list with GC from original content.
- if (!runGCBigramList(originalBigramListPos, originalBigramDictContent, bigramListPos,
- terminalIdMap, &bigramEntryCount)) {
- AKLOGE("Cannot complete GC for the bigram list. original pos: %d, pos: %d",
- originalBigramListPos, bigramListPos);
- return false;
- }
- if (bigramEntryCount == 0) {
- // All bigram entries are useless. This terminal does not have a bigram list.
- continue;
- }
- *outBigramEntryCount += bigramEntryCount;
- // Set bigram list position to the lookup table.
- if (!getUpdatableAddressLookupTable()->set(it->second, bigramListPos)) {
- AKLOGE("Cannot set bigram list position. terminal id: %d, pos: %d",
- it->second, bigramListPos);
- return false;
- }
- }
- return true;
-}
-
-// Returns whether GC for the bigram list was succeeded or not.
-bool BigramDictContent::runGCBigramList(const int bigramListPos,
- const BigramDictContent *const sourceBigramDictContent, const int toPos,
- const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
- int *const outEntrycount) {
- bool hasNext = true;
- int readingPos = bigramListPos;
- int writingPos = toPos;
- int lastEntryPos = NOT_A_DICT_POS;
- while (hasNext) {
- const BigramEntry originalBigramEntry =
- sourceBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
- hasNext = originalBigramEntry.hasNext();
- if (originalBigramEntry.getTargetTerminalId() == Ver4DictConstants::NOT_A_TERMINAL_ID) {
- continue;
- }
- TerminalPositionLookupTable::TerminalIdMap::const_iterator it =
- terminalIdMap->find(originalBigramEntry.getTargetTerminalId());
- if (it == terminalIdMap->end()) {
- // Target word has been removed.
- continue;
- }
- lastEntryPos = hasNext ? writingPos : NOT_A_DICT_POS;
- const BigramEntry updatedBigramEntry =
- originalBigramEntry.updateTargetTerminalIdAndGetEntry(it->second);
- if (!writeBigramEntryAndAdvancePosition(&updatedBigramEntry, &writingPos)) {
- AKLOGE("Cannot write bigram entry to run GC. pos: %d", writingPos);
- return false;
- }
- *outEntrycount += 1;
- }
- if (lastEntryPos != NOT_A_DICT_POS) {
- // Update has next flag in the last written entry.
- const BigramEntry bigramEntry = getBigramEntry(lastEntryPos).updateHasNextAndGetEntry(
- false /* hasNext */);
- if (!writeBigramEntry(&bigramEntry, lastEntryPos)) {
- AKLOGE("Cannot write bigram entry to set hasNext flag after GC. pos: %d", writingPos);
- return false;
- }
- }
- return true;
-}
-
-} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h
deleted file mode 100644
index ba2a05209..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
- * Copyright (C) 2013, The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LATINIME_BIGRAM_DICT_CONTENT_H
-#define LATINIME_BIGRAM_DICT_CONTENT_H
-
-#include "defines.h"
-#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_entry.h"
-#include "suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h"
-#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
-#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
-
-namespace latinime {
-
-class BigramDictContent : public SparseTableDictContent {
- public:
- BigramDictContent(const char *const dictPath, const bool hasHistoricalInfo,
- const bool isUpdatable)
- : SparseTableDictContent(dictPath,
- Ver4DictConstants::BIGRAM_LOOKUP_TABLE_FILE_EXTENSION,
- Ver4DictConstants::BIGRAM_CONTENT_TABLE_FILE_EXTENSION,
- Ver4DictConstants::BIGRAM_FILE_EXTENSION, isUpdatable,
- Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE,
- Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE),
- mHasHistoricalInfo(hasHistoricalInfo) {}
-
- BigramDictContent(const bool hasHistoricalInfo)
- : SparseTableDictContent(Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE,
- Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE),
- mHasHistoricalInfo(hasHistoricalInfo) {}
-
- const BigramEntry getBigramEntry(const int bigramEntryPos) const {
- int readingPos = bigramEntryPos;
- return getBigramEntryAndAdvancePosition(&readingPos);
- }
-
- const BigramEntry getBigramEntryAndAdvancePosition(int *const bigramEntryPos) const;
-
- // Returns head position of bigram list for a PtNode specified by terminalId.
- int getBigramListHeadPos(const int terminalId) const {
- const SparseTable *const addressLookupTable = getAddressLookupTable();
- if (!addressLookupTable->contains(terminalId)) {
- return NOT_A_DICT_POS;
- }
- return addressLookupTable->get(terminalId);
- }
-
- bool writeBigramEntry(const BigramEntry *const bigramEntryToWrite, const int entryWritingPos) {
- int writingPos = entryWritingPos;
- return writeBigramEntryAndAdvancePosition(bigramEntryToWrite, &writingPos);
- }
-
- bool writeBigramEntryAndAdvancePosition(const BigramEntry *const bigramEntryToWrite,
- int *const entryWritingPos);
-
- bool createNewBigramList(const int terminalId) {
- const int bigramListPos = getContentBuffer()->getTailPosition();
- return getUpdatableAddressLookupTable()->set(terminalId, bigramListPos);
- }
-
- bool copyBigramList(const int bigramListPos, const int toPos);
-
- bool flushToFile(const char *const dictPath) const {
- return flush(dictPath, Ver4DictConstants::BIGRAM_LOOKUP_TABLE_FILE_EXTENSION,
- Ver4DictConstants::BIGRAM_CONTENT_TABLE_FILE_EXTENSION,
- Ver4DictConstants::BIGRAM_FILE_EXTENSION);
- }
-
- bool runGC(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
- const BigramDictContent *const originalBigramDictContent,
- int *const outBigramEntryCount);
-
- private:
- DISALLOW_COPY_AND_ASSIGN(BigramDictContent);
-
- int createAndGetBigramFlags(const int probability, const bool hasNext) const {
- return (probability & Ver4DictConstants::BIGRAM_PROBABILITY_MASK)
- | (hasNext ? Ver4DictConstants::BIGRAM_HAS_NEXT_MASK : 0);
- }
-
- bool runGCBigramList(const int bigramListPos,
- const BigramDictContent *const sourceBigramDictContent, const int toPos,
- const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
- int *const outEntryCount);
-
- bool mHasHistoricalInfo;
-};
-} // namespace latinime
-#endif /* LATINIME_BIGRAM_DICT_CONTENT_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_entry.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_entry.h
deleted file mode 100644
index 2b0cbd93b..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_entry.h
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * Copyright (C) 2013, The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LATINIME_BIGRAM_ENTRY_H
-#define LATINIME_BIGRAM_ENTRY_H
-
-#include "defines.h"
-#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
-#include "suggest/policyimpl/dictionary/utils/historical_info.h"
-
-namespace latinime {
-
-class BigramEntry {
- public:
- BigramEntry(const BigramEntry& bigramEntry)
- : mHasNext(bigramEntry.mHasNext), mProbability(bigramEntry.mProbability),
- mHistoricalInfo(), mTargetTerminalId(bigramEntry.mTargetTerminalId) {}
-
- // Entry with historical information.
- BigramEntry(const bool hasNext, const int probability, const int targetTerminalId)
- : mHasNext(hasNext), mProbability(probability), mHistoricalInfo(),
- mTargetTerminalId(targetTerminalId) {}
-
- // Entry with historical information.
- BigramEntry(const bool hasNext, const int probability,
- const HistoricalInfo *const historicalInfo, const int targetTerminalId)
- : mHasNext(hasNext), mProbability(probability), mHistoricalInfo(*historicalInfo),
- mTargetTerminalId(targetTerminalId) {}
-
- const BigramEntry getInvalidatedEntry() const {
- return updateTargetTerminalIdAndGetEntry(Ver4DictConstants::NOT_A_TERMINAL_ID);
- }
-
- const BigramEntry updateHasNextAndGetEntry(const bool hasNext) const {
- return BigramEntry(hasNext, mProbability, &mHistoricalInfo, mTargetTerminalId);
- }
-
- const BigramEntry updateTargetTerminalIdAndGetEntry(const int newTargetTerminalId) const {
- return BigramEntry(mHasNext, mProbability, &mHistoricalInfo, newTargetTerminalId);
- }
-
- const BigramEntry updateProbabilityAndGetEntry(const int probability) const {
- return BigramEntry(mHasNext, probability, &mHistoricalInfo, mTargetTerminalId);
- }
-
- const BigramEntry updateHistoricalInfoAndGetEntry(
- const HistoricalInfo *const historicalInfo) const {
- return BigramEntry(mHasNext, mProbability, historicalInfo, mTargetTerminalId);
- }
-
- bool isValid() const {
- return mTargetTerminalId != Ver4DictConstants::NOT_A_TERMINAL_ID;
- }
-
- bool hasNext() const {
- return mHasNext;
- }
-
- int getProbability() const {
- return mProbability;
- }
-
- bool hasHistoricalInfo() const {
- return mHistoricalInfo.isValid();
- }
-
- const HistoricalInfo *getHistoricalInfo() const {
- return &mHistoricalInfo;
- }
-
- int getTargetTerminalId() const {
- return mTargetTerminalId;
- }
-
- private:
- // Copy constructor is public to use this class as a type of return value.
- DISALLOW_DEFAULT_CONSTRUCTOR(BigramEntry);
- DISALLOW_ASSIGNMENT_OPERATOR(BigramEntry);
-
- const bool mHasNext;
- const int mProbability;
- const HistoricalInfo mHistoricalInfo;
- const int mTargetTerminalId;
-};
-} // namespace latinime
-#endif /* LATINIME_BIGRAM_ENTRY_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/dict_content.h
deleted file mode 100644
index 0c2f47073..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/dict_content.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright (C) 2013, The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LATINIME_DICT_CONTENT_H
-#define LATINIME_DICT_CONTENT_H
-
-#include "defines.h"
-
-namespace latinime {
-
-class DictContent {
- public:
- virtual ~DictContent() {}
- virtual bool isValid() const = 0;
-
- protected:
- DictContent() {}
-
- private:
- DISALLOW_COPY_AND_ASSIGN(DictContent);
-};
-} // namespace latinime
-#endif /* LATINIME_DICT_CONTENT_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.cpp
deleted file mode 100644
index 3b7c70efd..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.cpp
+++ /dev/null
@@ -1,160 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h"
-
-#include "suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h"
-#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
-#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
-#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
-
-namespace latinime {
-
-const ProbabilityEntry ProbabilityDictContent::getProbabilityEntry(const int terminalId) const {
- if (terminalId < 0 || terminalId >= mSize) {
- // This method can be called with invalid terminal id during GC.
- return ProbabilityEntry(0 /* flags */, NOT_A_PROBABILITY);
- }
- const BufferWithExtendableBuffer *const buffer = getBuffer();
- int entryPos = getEntryPos(terminalId);
- const int flags = buffer->readUintAndAdvancePosition(
- Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE, &entryPos);
- const int probability = buffer->readUintAndAdvancePosition(
- Ver4DictConstants::PROBABILITY_SIZE, &entryPos);
- if (mHasHistoricalInfo) {
- const int timestamp = buffer->readUintAndAdvancePosition(
- Ver4DictConstants::TIME_STAMP_FIELD_SIZE, &entryPos);
- const int level = buffer->readUintAndAdvancePosition(
- Ver4DictConstants::WORD_LEVEL_FIELD_SIZE, &entryPos);
- const int count = buffer->readUintAndAdvancePosition(
- Ver4DictConstants::WORD_COUNT_FIELD_SIZE, &entryPos);
- const HistoricalInfo historicalInfo(timestamp, level, count);
- return ProbabilityEntry(flags, probability, &historicalInfo);
- } else {
- return ProbabilityEntry(flags, probability);
- }
-}
-
-bool ProbabilityDictContent::setProbabilityEntry(const int terminalId,
- const ProbabilityEntry *const probabilityEntry) {
- if (terminalId < 0) {
- return false;
- }
- const int entryPos = getEntryPos(terminalId);
- if (terminalId >= mSize) {
- ProbabilityEntry dummyEntry;
- // Write new entry.
- int writingPos = getBuffer()->getTailPosition();
- while (writingPos <= entryPos) {
- // Fulfilling with dummy entries until writingPos.
- if (!writeEntry(&dummyEntry, writingPos)) {
- AKLOGE("Cannot write dummy entry. pos: %d, mSize: %d", writingPos, mSize);
- return false;
- }
- writingPos += getEntrySize();
- mSize++;
- }
- }
- return writeEntry(probabilityEntry, entryPos);
-}
-
-bool ProbabilityDictContent::flushToFile(const char *const dictPath) const {
- if (getEntryPos(mSize) < getBuffer()->getTailPosition()) {
- ProbabilityDictContent probabilityDictContentToWrite(mHasHistoricalInfo);
- for (int i = 0; i < mSize; ++i) {
- const ProbabilityEntry probabilityEntry = getProbabilityEntry(i);
- if (!probabilityDictContentToWrite.setProbabilityEntry(i, &probabilityEntry)) {
- AKLOGE("Cannot set probability entry in flushToFile. terminalId: %d", i);
- return false;
- }
- }
- return probabilityDictContentToWrite.flush(dictPath,
- Ver4DictConstants::FREQ_FILE_EXTENSION);
- } else {
- return flush(dictPath, Ver4DictConstants::FREQ_FILE_EXTENSION);
- }
-}
-
-bool ProbabilityDictContent::runGC(
- const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
- const ProbabilityDictContent *const originalProbabilityDictContent) {
- mSize = 0;
- for (TerminalPositionLookupTable::TerminalIdMap::const_iterator it = terminalIdMap->begin();
- it != terminalIdMap->end(); ++it) {
- const ProbabilityEntry probabilityEntry =
- originalProbabilityDictContent->getProbabilityEntry(it->first);
- if (!setProbabilityEntry(it->second, &probabilityEntry)) {
- AKLOGE("Cannot set probability entry in runGC. terminalId: %d", it->second);
- return false;
- }
- mSize++;
- }
- return true;
-}
-
-int ProbabilityDictContent::getEntrySize() const {
- if (mHasHistoricalInfo) {
- return Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE
- + Ver4DictConstants::PROBABILITY_SIZE
- + Ver4DictConstants::TIME_STAMP_FIELD_SIZE
- + Ver4DictConstants::WORD_LEVEL_FIELD_SIZE
- + Ver4DictConstants::WORD_COUNT_FIELD_SIZE;
- } else {
- return Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE
- + Ver4DictConstants::PROBABILITY_SIZE;
- }
-}
-
-int ProbabilityDictContent::getEntryPos(const int terminalId) const {
- return terminalId * getEntrySize();
-}
-
-bool ProbabilityDictContent::writeEntry(const ProbabilityEntry *const probabilityEntry,
- const int entryPos) {
- BufferWithExtendableBuffer *const bufferToWrite = getWritableBuffer();
- int writingPos = entryPos;
- if (!bufferToWrite->writeUintAndAdvancePosition(probabilityEntry->getFlags(),
- Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE, &writingPos)) {
- AKLOGE("Cannot write flags in probability dict content. pos: %d", writingPos);
- return false;
- }
- if (!bufferToWrite->writeUintAndAdvancePosition(probabilityEntry->getProbability(),
- Ver4DictConstants::PROBABILITY_SIZE, &writingPos)) {
- AKLOGE("Cannot write probability in probability dict content. pos: %d", writingPos);
- return false;
- }
- if (mHasHistoricalInfo) {
- const HistoricalInfo *const historicalInfo = probabilityEntry->getHistoricalInfo();
- if (!bufferToWrite->writeUintAndAdvancePosition(historicalInfo->getTimeStamp(),
- Ver4DictConstants::TIME_STAMP_FIELD_SIZE, &writingPos)) {
- AKLOGE("Cannot write timestamp in probability dict content. pos: %d", writingPos);
- return false;
- }
- if (!bufferToWrite->writeUintAndAdvancePosition(historicalInfo->getLevel(),
- Ver4DictConstants::WORD_LEVEL_FIELD_SIZE, &writingPos)) {
- AKLOGE("Cannot write level in probability dict content. pos: %d", writingPos);
- return false;
- }
- if (!bufferToWrite->writeUintAndAdvancePosition(historicalInfo->getCount(),
- Ver4DictConstants::WORD_COUNT_FIELD_SIZE, &writingPos)) {
- AKLOGE("Cannot write count in probability dict content. pos: %d", writingPos);
- return false;
- }
- }
- return true;
-}
-
-} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h
deleted file mode 100644
index b065bc954..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (C) 2013, The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LATINIME_PROBABILITY_DICT_CONTENT_H
-#define LATINIME_PROBABILITY_DICT_CONTENT_H
-
-#include "defines.h"
-#include "suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h"
-#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
-#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
-#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
-
-namespace latinime {
-
-class ProbabilityEntry;
-
-class ProbabilityDictContent : public SingleDictContent {
- public:
- ProbabilityDictContent(const char *const dictPath, const bool hasHistoricalInfo,
- const bool isUpdatable)
- : SingleDictContent(dictPath, Ver4DictConstants::FREQ_FILE_EXTENSION, isUpdatable),
- mHasHistoricalInfo(hasHistoricalInfo),
- mSize(getBuffer()->getTailPosition() / getEntrySize()) {}
-
- ProbabilityDictContent(const bool hasHistoricalInfo)
- : mHasHistoricalInfo(hasHistoricalInfo), mSize(0) {}
-
- const ProbabilityEntry getProbabilityEntry(const int terminalId) const;
-
- bool setProbabilityEntry(const int terminalId, const ProbabilityEntry *const probabilityEntry);
-
- bool flushToFile(const char *const dictPath) const;
-
- bool runGC(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
- const ProbabilityDictContent *const originalProbabilityDictContent);
-
- private:
- DISALLOW_COPY_AND_ASSIGN(ProbabilityDictContent);
-
- int getEntrySize() const;
-
- int getEntryPos(const int terminalId) const;
-
- bool writeEntry(const ProbabilityEntry *const probabilityEntry, const int entryPos);
-
- bool mHasHistoricalInfo;
- int mSize;
-};
-} // namespace latinime
-#endif /* LATINIME_PROBABILITY_DICT_CONTENT_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h
deleted file mode 100644
index 36ba82be1..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Copyright (C) 2013, The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LATINIME_PROBABILITY_ENTRY_H
-#define LATINIME_PROBABILITY_ENTRY_H
-
-#include "defines.h"
-#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
-#include "suggest/policyimpl/dictionary/utils/historical_info.h"
-
-namespace latinime {
-
-class ProbabilityEntry {
- public:
- ProbabilityEntry(const ProbabilityEntry &probabilityEntry)
- : mFlags(probabilityEntry.mFlags), mProbability(probabilityEntry.mProbability),
- mHistoricalInfo(probabilityEntry.mHistoricalInfo) {}
-
- // Dummy entry
- ProbabilityEntry()
- : mFlags(0), mProbability(NOT_A_PROBABILITY), mHistoricalInfo() {}
-
- // Entry without historical information
- ProbabilityEntry(const int flags, const int probability)
- : mFlags(flags), mProbability(probability), mHistoricalInfo() {}
-
- // Entry with historical information.
- ProbabilityEntry(const int flags, const int probability,
- const HistoricalInfo *const historicalInfo)
- : mFlags(flags), mProbability(probability), mHistoricalInfo(*historicalInfo) {}
-
- const ProbabilityEntry createEntryWithUpdatedProbability(const int probability) const {
- return ProbabilityEntry(mFlags, probability, &mHistoricalInfo);
- }
-
- const ProbabilityEntry createEntryWithUpdatedHistoricalInfo(
- const HistoricalInfo *const historicalInfo) const {
- return ProbabilityEntry(mFlags, mProbability, historicalInfo);
- }
-
- bool hasHistoricalInfo() const {
- return mHistoricalInfo.isValid();
- }
-
- int getFlags() const {
- return mFlags;
- }
-
- int getProbability() const {
- return mProbability;
- }
-
- const HistoricalInfo *getHistoricalInfo() const {
- return &mHistoricalInfo;
- }
-
- private:
- // Copy constructor is public to use this class as a type of return value.
- DISALLOW_ASSIGNMENT_OPERATOR(ProbabilityEntry);
-
- const int mFlags;
- const int mProbability;
- const HistoricalInfo mHistoricalInfo;
-};
-} // namespace latinime
-#endif /* LATINIME_PROBABILITY_ENTRY_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.cpp
deleted file mode 100644
index 29972a4e8..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.cpp
+++ /dev/null
@@ -1,175 +0,0 @@
-/*
- * Copyright (C) 2013, The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h"
-
-#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
-
-namespace latinime {
-
-void ShortcutDictContent::getShortcutEntryAndAdvancePosition(const int maxCodePointCount,
- int *const outCodePoint, int *const outCodePointCount, int *const outProbability,
- bool *const outhasNext, int *const shortcutEntryPos) const {
- const BufferWithExtendableBuffer *const shortcutListBuffer = getContentBuffer();
- const int shortcutFlags = shortcutListBuffer->readUintAndAdvancePosition(
- Ver4DictConstants::SHORTCUT_FLAGS_FIELD_SIZE, shortcutEntryPos);
- if (outProbability) {
- *outProbability = shortcutFlags & Ver4DictConstants::SHORTCUT_PROBABILITY_MASK;
- }
- if (outhasNext) {
- *outhasNext = shortcutFlags & Ver4DictConstants::SHORTCUT_HAS_NEXT_MASK;
- }
- if (outCodePoint && outCodePointCount) {
- shortcutListBuffer->readCodePointsAndAdvancePosition(
- maxCodePointCount, outCodePoint, outCodePointCount, shortcutEntryPos);
- }
-}
-
-int ShortcutDictContent::getShortcutListHeadPos(const int terminalId) const {
- const SparseTable *const addressLookupTable = getAddressLookupTable();
- if (!addressLookupTable->contains(terminalId)) {
- return NOT_A_DICT_POS;
- }
- return addressLookupTable->get(terminalId);
-}
-
-bool ShortcutDictContent::flushToFile(const char *const dictPath) const {
- return flush(dictPath, Ver4DictConstants::SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION,
- Ver4DictConstants::SHORTCUT_CONTENT_TABLE_FILE_EXTENSION,
- Ver4DictConstants::SHORTCUT_FILE_EXTENSION);
-}
-
-bool ShortcutDictContent::runGC(
- const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
- const ShortcutDictContent *const originalShortcutDictContent) {
- for (TerminalPositionLookupTable::TerminalIdMap::const_iterator it = terminalIdMap->begin();
- it != terminalIdMap->end(); ++it) {
- const int originalShortcutListPos =
- originalShortcutDictContent->getShortcutListHeadPos(it->first);
- if (originalShortcutListPos == NOT_A_DICT_POS) {
- continue;
- }
- const int shortcutListPos = getContentBuffer()->getTailPosition();
- // Copy shortcut list from original content.
- if (!copyShortcutListFromDictContent(originalShortcutListPos, originalShortcutDictContent,
- shortcutListPos)) {
- AKLOGE("Cannot copy shortcut list during GC. original pos: %d, pos: %d",
- originalShortcutListPos, shortcutListPos);
- return false;
- }
- // Set shortcut list position to the lookup table.
- if (!getUpdatableAddressLookupTable()->set(it->second, shortcutListPos)) {
- AKLOGE("Cannot set shortcut list position. terminal id: %d, pos: %d",
- it->second, shortcutListPos);
- return false;
- }
- }
- return true;
-}
-
-bool ShortcutDictContent::createNewShortcutList(const int terminalId) {
- const int shortcutListListPos = getContentBuffer()->getTailPosition();
- return getUpdatableAddressLookupTable()->set(terminalId, shortcutListListPos);
-}
-
-bool ShortcutDictContent::copyShortcutList(const int shortcutListPos, const int toPos) {
- return copyShortcutListFromDictContent(shortcutListPos, this, toPos);
-}
-
-bool ShortcutDictContent::copyShortcutListFromDictContent(const int shortcutListPos,
- const ShortcutDictContent *const sourceShortcutDictContent, const int toPos) {
- bool hasNext = true;
- int readingPos = shortcutListPos;
- int writingPos = toPos;
- int codePoints[MAX_WORD_LENGTH];
- while (hasNext) {
- int probability = 0;
- int codePointCount = 0;
- sourceShortcutDictContent->getShortcutEntryAndAdvancePosition(MAX_WORD_LENGTH,
- codePoints, &codePointCount, &probability, &hasNext, &readingPos);
- if (!writeShortcutEntryAndAdvancePosition(codePoints, codePointCount, probability,
- hasNext, &writingPos)) {
- AKLOGE("Cannot write shortcut entry to copy. pos: %d", writingPos);
- return false;
- }
- }
- return true;
-}
-
-bool ShortcutDictContent::setProbability(const int probability, const int shortcutEntryPos) {
- BufferWithExtendableBuffer *const shortcutListBuffer = getWritableContentBuffer();
- const int shortcutFlags = shortcutListBuffer->readUint(
- Ver4DictConstants::SHORTCUT_FLAGS_FIELD_SIZE, shortcutEntryPos);
- const bool hasNext = shortcutFlags & Ver4DictConstants::SHORTCUT_HAS_NEXT_MASK;
- const int shortcutFlagsToWrite = createAndGetShortcutFlags(probability, hasNext);
- return shortcutListBuffer->writeUint(shortcutFlagsToWrite,
- Ver4DictConstants::SHORTCUT_FLAGS_FIELD_SIZE, shortcutEntryPos);
-}
-
-bool ShortcutDictContent::writeShortcutEntryAndAdvancePosition(const int *const codePoint,
- const int codePointCount, const int probability, const bool hasNext,
- int *const shortcutEntryPos) {
- BufferWithExtendableBuffer *const shortcutListBuffer = getWritableContentBuffer();
- const int shortcutFlags = createAndGetShortcutFlags(probability, hasNext);
- if (!shortcutListBuffer->writeUintAndAdvancePosition(shortcutFlags,
- Ver4DictConstants::SHORTCUT_FLAGS_FIELD_SIZE, shortcutEntryPos)) {
- AKLOGE("Cannot write shortcut flags. flags; %x, pos: %d", shortcutFlags, *shortcutEntryPos);
- return false;
- }
- if (!shortcutListBuffer->writeCodePointsAndAdvancePosition(codePoint, codePointCount,
- true /* writesTerminator */, shortcutEntryPos)) {
- AKLOGE("Cannot write shortcut target code points. pos: %d", *shortcutEntryPos);
- return false;
- }
- return true;
-}
-
-// Find a shortcut entry that has specified target and return its position.
-int ShortcutDictContent::findShortcutEntryAndGetPos(const int shortcutListPos,
- const int *const targetCodePointsToFind, const int codePointCount) const {
- bool hasNext = true;
- int readingPos = shortcutListPos;
- int targetCodePoints[MAX_WORD_LENGTH];
- while (hasNext) {
- const int entryPos = readingPos;
- int probability = 0;
- int targetCodePointCount = 0;
- getShortcutEntryAndAdvancePosition(MAX_WORD_LENGTH, targetCodePoints, &targetCodePointCount,
- &probability, &hasNext, &readingPos);
- if (targetCodePointCount != codePointCount) {
- continue;
- }
- bool matched = true;
- for (int i = 0; i < codePointCount; ++i) {
- if (targetCodePointsToFind[i] != targetCodePoints[i]) {
- matched = false;
- break;
- }
- }
- if (matched) {
- return entryPos;
- }
- }
- return NOT_A_DICT_POS;
-}
-
-int ShortcutDictContent::createAndGetShortcutFlags(const int probability,
- const bool hasNext) const {
- return (probability & Ver4DictConstants::SHORTCUT_PROBABILITY_MASK)
- | (hasNext ? Ver4DictConstants::SHORTCUT_HAS_NEXT_MASK : 0);
-}
-
-} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h
deleted file mode 100644
index eaafc27bc..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright (C) 2013, The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LATINIME_SHORTCUT_DICT_CONTENT_H
-#define LATINIME_SHORTCUT_DICT_CONTENT_H
-
-#include "defines.h"
-#include "suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h"
-#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
-#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
-
-namespace latinime {
-
-class ShortcutDictContent : public SparseTableDictContent {
- public:
- ShortcutDictContent(const char *const dictPath, const bool isUpdatable)
- : SparseTableDictContent(dictPath,
- Ver4DictConstants::SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION,
- Ver4DictConstants::SHORTCUT_CONTENT_TABLE_FILE_EXTENSION,
- Ver4DictConstants::SHORTCUT_FILE_EXTENSION, isUpdatable,
- Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE,
- Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE) {}
-
- ShortcutDictContent()
- : SparseTableDictContent(Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE,
- Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE) {}
-
- void getShortcutEntry(const int maxCodePointCount, int *const outCodePoint,
- int *const outCodePointCount, int *const outProbability, bool *const outhasNext,
- const int shortcutEntryPos) {
- int readingPos = shortcutEntryPos;
- return getShortcutEntryAndAdvancePosition(maxCodePointCount, outCodePoint,
- outCodePointCount, outProbability, outhasNext, &readingPos);
- }
-
- void getShortcutEntryAndAdvancePosition(const int maxCodePointCount,
- int *const outCodePoint, int *const outCodePointCount, int *const outProbability,
- bool *const outhasNext, int *const shortcutEntryPos) const;
-
- // Returns head position of shortcut list for a PtNode specified by terminalId.
- int getShortcutListHeadPos(const int terminalId) const;
-
- bool flushToFile(const char *const dictPath) const;
-
- bool runGC(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
- const ShortcutDictContent *const originalShortcutDictContent);
-
- bool createNewShortcutList(const int terminalId);
-
- bool copyShortcutList(const int shortcutListPos, const int toPos);
-
- bool setProbability(const int probability, const int shortcutEntryPos);
-
- bool writeShortcutEntry(const int *const codePoint, const int codePointCount,
- const int probability, const bool hasNext, const int shortcutEntryPos) {
- int writingPos = shortcutEntryPos;
- return writeShortcutEntryAndAdvancePosition(codePoint, codePointCount, probability,
- hasNext, &writingPos);
- }
-
- bool writeShortcutEntryAndAdvancePosition(const int *const codePoint,
- const int codePointCount, const int probability, const bool hasNext,
- int *const shortcutEntryPos);
-
- int findShortcutEntryAndGetPos(const int shortcutListPos,
- const int *const targetCodePointsToFind, const int codePointCount) const;
-
- private:
- DISALLOW_COPY_AND_ASSIGN(ShortcutDictContent);
-
- bool copyShortcutListFromDictContent(const int shortcutListPos,
- const ShortcutDictContent *const sourceShortcutDictContent, const int toPos);
-
- int createAndGetShortcutFlags(const int probability, const bool hasNext) const;
-};
-} // namespace latinime
-#endif /* LATINIME_SHORTCUT_DICT_CONTENT_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h
deleted file mode 100644
index 9064b7e72..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Copyright (C) 2013, The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LATINIME_SINGLE_DICT_CONTENT_H
-#define LATINIME_SINGLE_DICT_CONTENT_H
-
-#include "defines.h"
-#include "suggest/policyimpl/dictionary/structure/v4/content/dict_content.h"
-#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
-#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
-#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
-#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
-
-namespace latinime {
-
-class SingleDictContent : public DictContent {
- public:
- SingleDictContent(const char *const dictPath, const char *const contentFileName,
- const bool isUpdatable)
- : mMmappedBuffer(MmappedBuffer::openBuffer(dictPath, contentFileName, isUpdatable)),
- mExpandableContentBuffer(mMmappedBuffer.get() ? mMmappedBuffer.get()->getBuffer() : 0,
- mMmappedBuffer.get() ? mMmappedBuffer.get()->getBufferSize() : 0,
- BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
- mIsValid(mMmappedBuffer.get() != 0) {}
-
- SingleDictContent()
- : mMmappedBuffer(0), mExpandableContentBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
- mIsValid(true) {}
-
- virtual ~SingleDictContent() {}
-
- virtual bool isValid() const {
- return mIsValid;
- }
-
- bool isNearSizeLimit() const {
- return mExpandableContentBuffer.isNearSizeLimit();
- }
-
- protected:
- BufferWithExtendableBuffer *getWritableBuffer() {
- return &mExpandableContentBuffer;
- }
-
- const BufferWithExtendableBuffer *getBuffer() const {
- return &mExpandableContentBuffer;
- }
-
- bool flush(const char *const dictPath, const char *const contentFileNameSuffix) const {
- return DictFileWritingUtils::flushBufferToFileWithSuffix(dictPath,
- contentFileNameSuffix, &mExpandableContentBuffer);
- }
-
- private:
- DISALLOW_COPY_AND_ASSIGN(SingleDictContent);
-
- const MmappedBuffer::MmappedBufferPtr mMmappedBuffer;
- BufferWithExtendableBuffer mExpandableContentBuffer;
- const bool mIsValid;
-};
-} // namespace latinime
-#endif /* LATINIME_SINGLE_DICT_CONTENT_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.cpp
deleted file mode 100644
index 63c6ea3a4..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.cpp
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h"
-
-namespace latinime {
-
-bool SparseTableDictContent::flush(const char *const dictPath,
- const char *const lookupTableFileNameSuffix, const char *const addressTableFileNameSuffix,
- const char *const contentFileNameSuffix) const {
- if (!DictFileWritingUtils::flushBufferToFileWithSuffix(dictPath, lookupTableFileNameSuffix,
- &mExpandableLookupTableBuffer)){
- return false;
- }
- if (!DictFileWritingUtils::flushBufferToFileWithSuffix(dictPath, addressTableFileNameSuffix,
- &mExpandableAddressTableBuffer)) {
- return false;
- }
- if (!DictFileWritingUtils::flushBufferToFileWithSuffix(dictPath, contentFileNameSuffix,
- &mExpandableContentBuffer)) {
- return false;
- }
- return true;
-}
-
-} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h
deleted file mode 100644
index a82e3f50a..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h
+++ /dev/null
@@ -1,111 +0,0 @@
-/*
- * Copyright (C) 2013, The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LATINIME_SPARSE_TABLE_DICT_CONTENT_H
-#define LATINIME_SPARSE_TABLE_DICT_CONTENT_H
-
-#include "defines.h"
-#include "suggest/policyimpl/dictionary/structure/v4/content/dict_content.h"
-#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
-#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
-#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
-#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
-#include "suggest/policyimpl/dictionary/utils/sparse_table.h"
-
-namespace latinime {
-
-// TODO: Support multiple contents.
-class SparseTableDictContent : public DictContent {
- public:
- AK_FORCE_INLINE SparseTableDictContent(const char *const dictPath,
- const char *const lookupTableFileName, const char *const addressTableFileName,
- const char *const contentFileName, const bool isUpdatable,
- const int sparseTableBlockSize, const int sparseTableDataSize)
- : mLookupTableBuffer(
- MmappedBuffer::openBuffer(dictPath, lookupTableFileName, isUpdatable)),
- mAddressTableBuffer(
- MmappedBuffer::openBuffer(dictPath, addressTableFileName, isUpdatable)),
- mContentBuffer(MmappedBuffer::openBuffer(dictPath, contentFileName, isUpdatable)),
- mExpandableLookupTableBuffer(
- mLookupTableBuffer.get() ? mLookupTableBuffer.get()->getBuffer() : 0,
- mLookupTableBuffer.get() ? mLookupTableBuffer.get()->getBufferSize() : 0,
- BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
- mExpandableAddressTableBuffer(
- mAddressTableBuffer.get() ? mAddressTableBuffer.get()->getBuffer() : 0,
- mAddressTableBuffer.get() ? mAddressTableBuffer.get()->getBufferSize() : 0,
- BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
- mExpandableContentBuffer(mContentBuffer.get() ? mContentBuffer.get()->getBuffer() : 0,
- mContentBuffer.get() ? mContentBuffer.get()->getBufferSize() : 0,
- BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
- mAddressLookupTable(&mExpandableLookupTableBuffer, &mExpandableAddressTableBuffer,
- sparseTableBlockSize, sparseTableDataSize),
- mIsValid(mLookupTableBuffer.get() != 0 && mAddressTableBuffer.get() != 0
- && mContentBuffer.get() != 0) {}
-
- SparseTableDictContent(const int sparseTableBlockSize, const int sparseTableDataSize)
- : mLookupTableBuffer(0), mAddressTableBuffer(0), mContentBuffer(0),
- mExpandableLookupTableBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
- mExpandableAddressTableBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
- mExpandableContentBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
- mAddressLookupTable(&mExpandableLookupTableBuffer, &mExpandableAddressTableBuffer,
- sparseTableBlockSize, sparseTableDataSize), mIsValid(true) {}
-
- virtual ~SparseTableDictContent() {}
-
- virtual bool isValid() const {
- return mIsValid;
- }
-
- bool isNearSizeLimit() const {
- return mExpandableLookupTableBuffer.isNearSizeLimit()
- || mExpandableAddressTableBuffer.isNearSizeLimit()
- || mExpandableContentBuffer.isNearSizeLimit();
- }
-
- protected:
- SparseTable *getUpdatableAddressLookupTable() {
- return &mAddressLookupTable;
- }
-
- const SparseTable *getAddressLookupTable() const {
- return &mAddressLookupTable;
- }
-
- BufferWithExtendableBuffer *getWritableContentBuffer() {
- return &mExpandableContentBuffer;
- }
-
- const BufferWithExtendableBuffer *getContentBuffer() const {
- return &mExpandableContentBuffer;
- }
-
- bool flush(const char *const dictDirPath, const char *const lookupTableFileName,
- const char *const addressTableFileName, const char *const contentFileName) const;
-
- private:
- DISALLOW_IMPLICIT_CONSTRUCTORS(SparseTableDictContent);
-
- const MmappedBuffer::MmappedBufferPtr mLookupTableBuffer;
- const MmappedBuffer::MmappedBufferPtr mAddressTableBuffer;
- const MmappedBuffer::MmappedBufferPtr mContentBuffer;
- BufferWithExtendableBuffer mExpandableLookupTableBuffer;
- BufferWithExtendableBuffer mExpandableAddressTableBuffer;
- BufferWithExtendableBuffer mExpandableContentBuffer;
- SparseTable mAddressLookupTable;
- const bool mIsValid;
-};
-} // namespace latinime
-#endif /* LATINIME_SPARSE_TABLE_DICT_CONTENT_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.cpp
deleted file mode 100644
index 0b17a009d..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.cpp
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
-
-#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.h"
-#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
-
-namespace latinime {
-
-int TerminalPositionLookupTable::getTerminalPtNodePosition(const int terminalId) const {
- if (terminalId < 0 || terminalId >= mSize) {
- return NOT_A_DICT_POS;
- }
- const int terminalPos = getBuffer()->readUint(
- Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE, getEntryPos(terminalId));
- return (terminalPos == Ver4DictConstants::NOT_A_TERMINAL_ADDRESS) ?
- NOT_A_DICT_POS : terminalPos;
-}
-
-bool TerminalPositionLookupTable::setTerminalPtNodePosition(
- const int terminalId, const int terminalPtNodePos) {
- if (terminalId < 0) {
- return NOT_A_DICT_POS;
- }
- while (terminalId >= mSize) {
- // Write new entry.
- if (!getWritableBuffer()->writeUint(Ver4DictConstants::NOT_A_TERMINAL_ADDRESS,
- Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE, getEntryPos(mSize))) {
- return false;
- }
- mSize++;
- }
- const int terminalPos = (terminalPtNodePos != NOT_A_DICT_POS) ?
- terminalPtNodePos : Ver4DictConstants::NOT_A_TERMINAL_ADDRESS;
- return getWritableBuffer()->writeUint(terminalPos,
- Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE, getEntryPos(terminalId));
-}
-
-bool TerminalPositionLookupTable::flushToFile(const char *const dictPath) const {
- // If the used buffer size is smaller than the actual buffer size, regenerate the lookup
- // table and write the new table to the file.
- if (getEntryPos(mSize) < getBuffer()->getTailPosition()) {
- TerminalPositionLookupTable lookupTableToWrite;
- for (int i = 0; i < mSize; ++i) {
- const int terminalPtNodePosition = getTerminalPtNodePosition(i);
- if (!lookupTableToWrite.setTerminalPtNodePosition(i, terminalPtNodePosition)) {
- AKLOGE("Cannot set terminal position to lookupTableToWrite."
- " terminalId: %d, position: %d", i, terminalPtNodePosition);
- return false;
- }
- }
- return lookupTableToWrite.flush(dictPath,
- Ver4DictConstants::TERMINAL_ADDRESS_TABLE_FILE_EXTENSION);
- } else {
- // We can simply use this lookup table because the buffer size has not been
- // changed.
- return flush(dictPath, Ver4DictConstants::TERMINAL_ADDRESS_TABLE_FILE_EXTENSION);
- }
-}
-
-bool TerminalPositionLookupTable::runGCTerminalIds(TerminalIdMap *const terminalIdMap) {
- int removedEntryCount = 0;
- int nextNewTerminalId = 0;
- for (int i = 0; i < mSize; ++i) {
- const int terminalPos = getBuffer()->readUint(
- Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE, getEntryPos(i));
- if (terminalPos == Ver4DictConstants::NOT_A_TERMINAL_ADDRESS) {
- // This entry is a garbage.
- removedEntryCount++;
- } else {
- // Give a new terminal id to the entry.
- if (!getWritableBuffer()->writeUint(terminalPos,
- Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE,
- getEntryPos(nextNewTerminalId))) {
- return false;
- }
- // Memorize the mapping to the old terminal id to the new terminal id.
- terminalIdMap->insert(TerminalIdMap::value_type(i, nextNewTerminalId));
- nextNewTerminalId++;
- }
- }
- mSize = nextNewTerminalId;
- return true;
-}
-
-} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h
deleted file mode 100644
index f73e22754..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LATINIME_TERMINAL_POSITION_LOOKUP_TABLE_H
-#define LATINIME_TERMINAL_POSITION_LOOKUP_TABLE_H
-
-#include "defines.h"
-#include "suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h"
-#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
-#include "utils/hash_map_compat.h"
-
-namespace latinime {
-
-class TerminalPositionLookupTable : public SingleDictContent {
- public:
- typedef hash_map_compat<int, int> TerminalIdMap;
-
- TerminalPositionLookupTable(const char *const dictPath, const bool isUpdatable)
- : SingleDictContent(dictPath,
- Ver4DictConstants::TERMINAL_ADDRESS_TABLE_FILE_EXTENSION, isUpdatable),
- mSize(getBuffer()->getTailPosition()
- / Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE) {}
-
- TerminalPositionLookupTable() : mSize(0) {}
-
- int getTerminalPtNodePosition(const int terminalId) const;
-
- bool setTerminalPtNodePosition(const int terminalId, const int terminalPtNodePos);
-
- int getNextTerminalId() const {
- return mSize;
- }
-
- bool flushToFile(const char *const dictPath) const;
-
- bool runGCTerminalIds(TerminalIdMap *const terminalIdMap);
-
- private:
- DISALLOW_COPY_AND_ASSIGN(TerminalPositionLookupTable);
-
- int getEntryPos(const int terminalId) const {
- return terminalId * Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE;
- }
-
- int mSize;
-};
-} // namespace latinime
-#endif // LATINIME_TERMINAL_POSITION_LOOKUP_TABLE_H
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp
deleted file mode 100644
index 918c02ba2..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp
+++ /dev/null
@@ -1,138 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h"
-
-#include <cerrno>
-#include <cstring>
-#include <sys/stat.h>
-#include <sys/types.h>
-
-#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
-#include "suggest/policyimpl/dictionary/utils/file_utils.h"
-
-namespace latinime {
-
-/* static */ Ver4DictBuffers::Ver4DictBuffersPtr Ver4DictBuffers::openVer4DictBuffers(
- const char *const dictPath, const MmappedBuffer::MmappedBufferPtr &headerBuffer) {
- const bool isUpdatable = headerBuffer.get() ? headerBuffer.get()->isUpdatable() : false;
- // TODO: take only dictDirPath, and open both header and trie files in the constructor below
- return Ver4DictBuffersPtr(new Ver4DictBuffers(dictPath, headerBuffer, isUpdatable));
-}
-
-bool Ver4DictBuffers::flushHeaderAndDictBuffers(const char *const dictDirPath,
- const BufferWithExtendableBuffer *const headerBuffer) const {
- // Create temporary directory.
- const int tmpDirPathBufSize = FileUtils::getFilePathWithSuffixBufSize(dictDirPath,
- DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE);
- char tmpDirPath[tmpDirPathBufSize];
- FileUtils::getFilePathWithSuffix(dictDirPath,
- DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE, tmpDirPathBufSize,
- tmpDirPath);
- if (FileUtils::existsDir(tmpDirPath)) {
- if (!FileUtils::removeDirAndFiles(tmpDirPath)) {
- AKLOGE("Existing directory %s cannot be removed.", tmpDirPath);
- ASSERT(false);
- return false;
- }
- }
- if (mkdir(tmpDirPath, S_IRWXU) == -1) {
- AKLOGE("Cannot create directory: %s. errno: %d.", tmpDirPath, errno);
- return false;
- }
- // Get dictionary base path.
- const int dictNameBufSize = strlen(dictDirPath) + 1 /* terminator */;
- char dictName[dictNameBufSize];
- FileUtils::getBasename(dictDirPath, dictNameBufSize, dictName);
- const int dictPathBufSize = FileUtils::getFilePathBufSize(tmpDirPath, dictName);
- char dictPath[dictPathBufSize];
- FileUtils::getFilePath(tmpDirPath, dictName, dictPathBufSize, dictPath);
-
- // Write header file.
- if (!DictFileWritingUtils::flushBufferToFileWithSuffix(dictPath,
- Ver4DictConstants::HEADER_FILE_EXTENSION, headerBuffer)) {
- AKLOGE("Dictionary header file %s%s cannot be written.", tmpDirPath,
- Ver4DictConstants::HEADER_FILE_EXTENSION);
- return false;
- }
- // Write trie file.
- if (!DictFileWritingUtils::flushBufferToFileWithSuffix(dictPath,
- Ver4DictConstants::TRIE_FILE_EXTENSION, &mExpandableTrieBuffer)) {
- AKLOGE("Dictionary trie file %s%s cannot be written.", tmpDirPath,
- Ver4DictConstants::TRIE_FILE_EXTENSION);
- return false;
- }
- // Write dictionary contents.
- if (!mTerminalPositionLookupTable.flushToFile(dictPath)) {
- AKLOGE("Terminal position lookup table cannot be written. %s", tmpDirPath);
- return false;
- }
- if (!mProbabilityDictContent.flushToFile(dictPath)) {
- AKLOGE("Probability dict content cannot be written. %s", tmpDirPath);
- return false;
- }
- if (!mBigramDictContent.flushToFile(dictPath)) {
- AKLOGE("Bigram dict content cannot be written. %s", tmpDirPath);
- return false;
- }
- if (!mShortcutDictContent.flushToFile(dictPath)) {
- AKLOGE("Shortcut dict content cannot be written. %s", tmpDirPath);
- return false;
- }
- // Remove existing dictionary.
- if (!FileUtils::removeDirAndFiles(dictDirPath)) {
- AKLOGE("Existing directory %s cannot be removed.", dictDirPath);
- ASSERT(false);
- return false;
- }
- // Rename temporary directory.
- if (rename(tmpDirPath, dictDirPath) != 0) {
- AKLOGE("%s cannot be renamed to %s", tmpDirPath, dictDirPath);
- ASSERT(false);
- return false;
- }
- return true;
-}
-
-Ver4DictBuffers::Ver4DictBuffers(const char *const dictPath,
- const MmappedBuffer::MmappedBufferPtr &headerBuffer, const bool isUpdatable)
- : mHeaderBuffer(headerBuffer),
- mDictBuffer(MmappedBuffer::openBuffer(dictPath,
- Ver4DictConstants::TRIE_FILE_EXTENSION, isUpdatable)),
- mHeaderPolicy(headerBuffer.get()->getBuffer(), FormatUtils::VERSION_4),
- mExpandableHeaderBuffer(headerBuffer.get()->getBuffer(), mHeaderPolicy.getSize(),
- BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
- mExpandableTrieBuffer(mDictBuffer.get()->getBuffer(),
- mDictBuffer.get()->getBufferSize(),
- BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
- mTerminalPositionLookupTable(dictPath, isUpdatable),
- mProbabilityDictContent(dictPath, mHeaderPolicy.hasHistoricalInfoOfWords(),
- isUpdatable),
- mBigramDictContent(dictPath, mHeaderPolicy.hasHistoricalInfoOfWords(),
- isUpdatable),
- mShortcutDictContent(dictPath, isUpdatable),
- mIsUpdatable(isUpdatable) {}
-
-Ver4DictBuffers::Ver4DictBuffers(const HeaderPolicy *const headerPolicy)
- : mHeaderBuffer(0), mDictBuffer(0), mHeaderPolicy(),
- mExpandableHeaderBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
- mExpandableTrieBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
- mTerminalPositionLookupTable(),
- mProbabilityDictContent(headerPolicy->hasHistoricalInfoOfWords()),
- mBigramDictContent(headerPolicy->hasHistoricalInfoOfWords()), mShortcutDictContent(),
- mIsUpdatable(true) {}
-
-} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h
deleted file mode 100644
index a0c219e4d..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h
+++ /dev/null
@@ -1,137 +0,0 @@
-/*
- * Copyright (C) 2013, The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LATINIME_VER4_DICT_BUFFER_H
-#define LATINIME_VER4_DICT_BUFFER_H
-
-#include "defines.h"
-#include "suggest/policyimpl/dictionary/header/header_policy.h"
-#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h"
-#include "suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h"
-#include "suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h"
-#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
-#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
-#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
-#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
-
-namespace latinime {
-
-class Ver4DictBuffers {
- public:
- typedef ExclusiveOwnershipPointer<Ver4DictBuffers> Ver4DictBuffersPtr;
-
- static Ver4DictBuffersPtr openVer4DictBuffers(const char *const dictDirPath,
- const MmappedBuffer::MmappedBufferPtr &headerBuffer);
-
- static AK_FORCE_INLINE Ver4DictBuffersPtr createVer4DictBuffers(
- const HeaderPolicy *const headerPolicy) {
- return Ver4DictBuffersPtr(new Ver4DictBuffers(headerPolicy));
- }
-
- AK_FORCE_INLINE bool isValid() const {
- return mDictBuffer.get() != 0 && mHeaderPolicy.isValid()
- && mProbabilityDictContent.isValid() && mTerminalPositionLookupTable.isValid()
- && mBigramDictContent.isValid() && mShortcutDictContent.isValid();
- }
-
- AK_FORCE_INLINE bool isNearSizeLimit() const {
- return mExpandableTrieBuffer.isNearSizeLimit()
- || mTerminalPositionLookupTable.isNearSizeLimit()
- || mProbabilityDictContent.isNearSizeLimit()
- || mBigramDictContent.isNearSizeLimit()
- || mShortcutDictContent.isNearSizeLimit();
- }
-
- AK_FORCE_INLINE const HeaderPolicy *getHeaderPolicy() const {
- return &mHeaderPolicy;
- }
-
- AK_FORCE_INLINE BufferWithExtendableBuffer *getWritableHeaderBuffer() {
- return &mExpandableHeaderBuffer;
- }
-
- AK_FORCE_INLINE BufferWithExtendableBuffer *getWritableTrieBuffer() {
- return &mExpandableTrieBuffer;
- }
-
- AK_FORCE_INLINE const BufferWithExtendableBuffer *getTrieBuffer() const {
- return &mExpandableTrieBuffer;
- }
-
- AK_FORCE_INLINE TerminalPositionLookupTable *getMutableTerminalPositionLookupTable() {
- return &mTerminalPositionLookupTable;
- }
-
- AK_FORCE_INLINE const TerminalPositionLookupTable *getTerminalPositionLookupTable() const {
- return &mTerminalPositionLookupTable;
- }
-
- AK_FORCE_INLINE ProbabilityDictContent *getMutableProbabilityDictContent() {
- return &mProbabilityDictContent;
- }
-
- AK_FORCE_INLINE const ProbabilityDictContent *getProbabilityDictContent() const {
- return &mProbabilityDictContent;
- }
-
- AK_FORCE_INLINE BigramDictContent *getMutableBigramDictContent() {
- return &mBigramDictContent;
- }
-
- AK_FORCE_INLINE const BigramDictContent *getBigramDictContent() const {
- return &mBigramDictContent;
- }
-
- AK_FORCE_INLINE ShortcutDictContent *getMutableShortcutDictContent() {
- return &mShortcutDictContent;
- }
-
- AK_FORCE_INLINE const ShortcutDictContent *getShortcutDictContent() const {
- return &mShortcutDictContent;
- }
-
- AK_FORCE_INLINE bool isUpdatable() const {
- return mIsUpdatable;
- }
-
- bool flush(const char *const dictDirPath) const {
- return flushHeaderAndDictBuffers(dictDirPath, &mExpandableHeaderBuffer);
- }
-
- bool flushHeaderAndDictBuffers(const char *const dictDirPath,
- const BufferWithExtendableBuffer *const headerBuffer) const;
-
- private:
- DISALLOW_COPY_AND_ASSIGN(Ver4DictBuffers);
-
- Ver4DictBuffers(const char *const dictDirPath,
- const MmappedBuffer::MmappedBufferPtr &headerBuffer, const bool isUpdatable);
-
- Ver4DictBuffers(const HeaderPolicy *const headerPolicy);
-
- const MmappedBuffer::MmappedBufferPtr mHeaderBuffer;
- const MmappedBuffer::MmappedBufferPtr mDictBuffer;
- const HeaderPolicy mHeaderPolicy;
- BufferWithExtendableBuffer mExpandableHeaderBuffer;
- BufferWithExtendableBuffer mExpandableTrieBuffer;
- TerminalPositionLookupTable mTerminalPositionLookupTable;
- ProbabilityDictContent mProbabilityDictContent;
- BigramDictContent mBigramDictContent;
- ShortcutDictContent mShortcutDictContent;
- const int mIsUpdatable;
-};
-} // namespace latinime
-#endif /* LATINIME_VER4_DICT_BUFFER_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp
deleted file mode 100644
index 34fecc25f..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * Copyright (C) 2013, The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
-
-namespace latinime {
-
-// These values MUST match the definitions in FormatSpec.java.
-const char *const Ver4DictConstants::TRIE_FILE_EXTENSION = ".trie";
-const char *const Ver4DictConstants::HEADER_FILE_EXTENSION = ".header";
-const char *const Ver4DictConstants::FREQ_FILE_EXTENSION = ".freq";
-// tat = Terminal Address Table
-const char *const Ver4DictConstants::TERMINAL_ADDRESS_TABLE_FILE_EXTENSION = ".tat";
-const char *const Ver4DictConstants::BIGRAM_FILE_EXTENSION = ".bigram_freq";
-const char *const Ver4DictConstants::BIGRAM_LOOKUP_TABLE_FILE_EXTENSION = ".bigram_lookup";
-const char *const Ver4DictConstants::BIGRAM_CONTENT_TABLE_FILE_EXTENSION = ".bigram_index_freq";
-const char *const Ver4DictConstants::SHORTCUT_FILE_EXTENSION = ".shortcut_shortcut";
-const char *const Ver4DictConstants::SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION = ".shortcut_lookup";
-const char *const Ver4DictConstants::SHORTCUT_CONTENT_TABLE_FILE_EXTENSION =
- ".shortcut_index_shortcut";
-
-// Version 4 dictionary size is implicitly limited to 8MB due to 3-byte offsets.
-// TODO: Make MAX_DICTIONARY_SIZE 8MB.
-const int Ver4DictConstants::MAX_DICTIONARY_SIZE = 2 * 1024 * 1024;
-// Extended region size, which is not GCed region size in dict file + additional buffer size, is
-// limited to 1MB to prevent from inefficient traversing.
-const int Ver4DictConstants::MAX_DICT_EXTENDED_REGION_SIZE = 1 * 1024 * 1024;
-
-const int Ver4DictConstants::NOT_A_TERMINAL_ID = -1;
-const int Ver4DictConstants::PROBABILITY_SIZE = 1;
-const int Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE = 1;
-const int Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE = 3;
-const int Ver4DictConstants::NOT_A_TERMINAL_ADDRESS = 0;
-const int Ver4DictConstants::TERMINAL_ID_FIELD_SIZE = 4;
-const int Ver4DictConstants::TIME_STAMP_FIELD_SIZE = 4;
-const int Ver4DictConstants::WORD_LEVEL_FIELD_SIZE = 1;
-const int Ver4DictConstants::WORD_COUNT_FIELD_SIZE = 1;
-
-const int Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE = 4;
-const int Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE = 4;
-const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE = 16;
-const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE = 4;
-
-const int Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE = 3;
-// Unsigned int max value of BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE-byte is used for representing
-// invalid terminal ID in bigram lists.
-const int Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID =
- (1 << (BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE * 8)) - 1;
-const int Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE = 1;
-const int Ver4DictConstants::BIGRAM_PROBABILITY_MASK = 0x0F;
-const int Ver4DictConstants::BIGRAM_HAS_NEXT_MASK = 0x80;
-const int Ver4DictConstants::BIGRAM_LARGE_PROBABILITY_FIELD_SIZE = 1;
-
-const int Ver4DictConstants::SHORTCUT_FLAGS_FIELD_SIZE = 1;
-const int Ver4DictConstants::SHORTCUT_PROBABILITY_MASK = 0x0F;
-const int Ver4DictConstants::SHORTCUT_HAS_NEXT_MASK = 0x80;
-
-} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h
deleted file mode 100644
index d6d22c5c1..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Copyright (C) 2013, The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LATINIME_VER4_DICT_CONSTANTS_H
-#define LATINIME_VER4_DICT_CONSTANTS_H
-
-#include "defines.h"
-
-namespace latinime {
-
-// TODO: Create PtConstants under the pt_common and move some constant values there.
-// Note that there are corresponding definitions in FormatSpec.java.
-class Ver4DictConstants {
- public:
- static const char *const TRIE_FILE_EXTENSION;
- static const char *const HEADER_FILE_EXTENSION;
- static const char *const FREQ_FILE_EXTENSION;
- static const char *const TERMINAL_ADDRESS_TABLE_FILE_EXTENSION;
- static const char *const BIGRAM_FILE_EXTENSION;
- static const char *const BIGRAM_LOOKUP_TABLE_FILE_EXTENSION;
- static const char *const BIGRAM_CONTENT_TABLE_FILE_EXTENSION;
- static const char *const SHORTCUT_FILE_EXTENSION;
- static const char *const SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION;
- static const char *const SHORTCUT_CONTENT_TABLE_FILE_EXTENSION;
-
- static const int MAX_DICTIONARY_SIZE;
- static const int MAX_DICT_EXTENDED_REGION_SIZE;
-
- static const int NOT_A_TERMINAL_ID;
- static const int PROBABILITY_SIZE;
- static const int FLAGS_IN_PROBABILITY_FILE_SIZE;
- static const int TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE;
- static const int NOT_A_TERMINAL_ADDRESS;
- static const int TERMINAL_ID_FIELD_SIZE;
- static const int TIME_STAMP_FIELD_SIZE;
- static const int WORD_LEVEL_FIELD_SIZE;
- static const int WORD_COUNT_FIELD_SIZE;
-
- static const int BIGRAM_ADDRESS_TABLE_BLOCK_SIZE;
- static const int BIGRAM_ADDRESS_TABLE_DATA_SIZE;
- static const int SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE;
- static const int SHORTCUT_ADDRESS_TABLE_DATA_SIZE;
-
- static const int BIGRAM_FLAGS_FIELD_SIZE;
- static const int BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE;
- static const int INVALID_BIGRAM_TARGET_TERMINAL_ID;
- static const int BIGRAM_PROBABILITY_MASK;
- static const int BIGRAM_HAS_NEXT_MASK;
- // Used when bigram list has time stamp.
- static const int BIGRAM_LARGE_PROBABILITY_FIELD_SIZE;
-
- static const int SHORTCUT_FLAGS_FIELD_SIZE;
- static const int SHORTCUT_PROBABILITY_MASK;
- static const int SHORTCUT_HAS_NEXT_MASK;
-
- private:
- DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4DictConstants);
-};
-} // namespace latinime
-#endif /* LATINIME_VER4_DICT_CONSTANTS_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp
deleted file mode 100644
index 17fc9483b..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * Copyright (C) 2013, The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
-
-#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h"
-#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
-#include "suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h"
-#include "suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h"
-#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.h"
-#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
-#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
-
-namespace latinime {
-
-const PtNodeParams Ver4PatriciaTrieNodeReader::fetchPtNodeInfoFromBufferAndProcessMovedPtNode(
- const int ptNodePos, const int siblingNodePos) const {
- if (ptNodePos < 0 || ptNodePos >= mBuffer->getTailPosition()) {
- // Reading invalid position because of bug or broken dictionary.
- AKLOGE("Fetching PtNode info from invalid dictionary position: %d, dictionary size: %d",
- ptNodePos, mBuffer->getTailPosition());
- ASSERT(false);
- return PtNodeParams();
- }
- const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(ptNodePos);
- const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
- int pos = ptNodePos;
- const int headPos = ptNodePos;
- if (usesAdditionalBuffer) {
- pos -= mBuffer->getOriginalBufferSize();
- }
- const PatriciaTrieReadingUtils::NodeFlags flags =
- PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
- const int parentPosOffset =
- DynamicPtReadingUtils::getParentPtNodePosOffsetAndAdvancePosition(
- dictBuf, &pos);
- const int parentPos =
- DynamicPtReadingUtils::getParentPtNodePos(parentPosOffset, headPos);
- int codePoints[MAX_WORD_LENGTH];
- const int codePonitCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition(
- dictBuf, flags, MAX_WORD_LENGTH, codePoints, &pos);
- int terminalIdFieldPos = NOT_A_DICT_POS;
- int terminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
- int probability = NOT_A_PROBABILITY;
- if (PatriciaTrieReadingUtils::isTerminal(flags)) {
- terminalIdFieldPos = pos;
- if (usesAdditionalBuffer) {
- terminalIdFieldPos += mBuffer->getOriginalBufferSize();
- }
- terminalId = Ver4PatriciaTrieReadingUtils::getTerminalIdAndAdvancePosition(dictBuf, &pos);
- const ProbabilityEntry probabilityEntry =
- mProbabilityDictContent->getProbabilityEntry(terminalId);
- if (probabilityEntry.hasHistoricalInfo()) {
- probability = ForgettingCurveUtils::decodeProbability(
- probabilityEntry.getHistoricalInfo());
- } else {
- probability = probabilityEntry.getProbability();
- }
- }
- int childrenPosFieldPos = pos;
- if (usesAdditionalBuffer) {
- childrenPosFieldPos += mBuffer->getOriginalBufferSize();
- }
- int childrenPos = DynamicPtReadingUtils::readChildrenPositionAndAdvancePosition(
- dictBuf, &pos);
- if (usesAdditionalBuffer && childrenPos != NOT_A_DICT_POS) {
- childrenPos += mBuffer->getOriginalBufferSize();
- }
- if (usesAdditionalBuffer) {
- pos += mBuffer->getOriginalBufferSize();
- }
- // Sibling position is the tail position of original PtNode.
- int newSiblingNodePos = (siblingNodePos == NOT_A_DICT_POS) ? pos : siblingNodePos;
- // Read destination node if the read node is a moved node.
- if (DynamicPtReadingUtils::isMoved(flags)) {
- // The destination position is stored at the same place as the parent position.
- return fetchPtNodeInfoFromBufferAndProcessMovedPtNode(parentPos, newSiblingNodePos);
- } else {
- return PtNodeParams(headPos, flags, parentPos, codePonitCount, codePoints,
- terminalIdFieldPos, terminalId, probability, childrenPosFieldPos, childrenPos,
- newSiblingNodePos);
- }
-}
-
-}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h
deleted file mode 100644
index 9d932457c..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Copyright (C) 2013, The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LATINIME_VER4_PATRICIA_TRIE_NODE_READER_H
-#define LATINIME_VER4_PATRICIA_TRIE_NODE_READER_H
-
-#include <stdint.h>
-
-#include "defines.h"
-#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
-#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h"
-
-namespace latinime {
-
-class BufferWithExtendableBuffer;
-class ProbabilityDictContent;
-
-/*
- * This class is used for helping to read nodes of ver4 patricia trie. This class handles moved
- * node and reads node attributes including probability form probabilityBuffer.
- */
-class Ver4PatriciaTrieNodeReader : public PtNodeReader {
- public:
- Ver4PatriciaTrieNodeReader(const BufferWithExtendableBuffer *const buffer,
- const ProbabilityDictContent *const probabilityDictContent)
- : mBuffer(buffer), mProbabilityDictContent(probabilityDictContent) {}
-
- ~Ver4PatriciaTrieNodeReader() {}
-
- virtual const PtNodeParams fetchNodeInfoInBufferFromPtNodePos(const int ptNodePos) const {
- return fetchPtNodeInfoFromBufferAndProcessMovedPtNode(ptNodePos,
- NOT_A_DICT_POS /* siblingNodePos */);
- }
-
- private:
- DISALLOW_COPY_AND_ASSIGN(Ver4PatriciaTrieNodeReader);
-
- const BufferWithExtendableBuffer *const mBuffer;
- const ProbabilityDictContent *const mProbabilityDictContent;
-
- const PtNodeParams fetchPtNodeInfoFromBufferAndProcessMovedPtNode(const int ptNodePos,
- const int siblingNodePos) const;
-};
-} // namespace latinime
-#endif /* LATINIME_VER4_PATRICIA_TRIE_NODE_READER_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp
deleted file mode 100644
index 32576cf0a..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp
+++ /dev/null
@@ -1,411 +0,0 @@
-/*
- * Copyright (C) 2013, The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h"
-
-#include "suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h"
-#include "suggest/policyimpl/dictionary/header/header_policy.h"
-#include "suggest/policyimpl/dictionary/shortcut/ver4_shortcut_list_policy.h"
-#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h"
-#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.h"
-#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
-#include "suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h"
-#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
-#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h"
-#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
-#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
-
-namespace latinime {
-
-const int Ver4PatriciaTrieNodeWriter::CHILDREN_POSITION_FIELD_SIZE = 3;
-
-bool Ver4PatriciaTrieNodeWriter::markPtNodeAsDeleted(
- const PtNodeParams *const toBeUpdatedPtNodeParams) {
- int pos = toBeUpdatedPtNodeParams->getHeadPos();
- const bool usesAdditionalBuffer = mTrieBuffer->isInAdditionalBuffer(pos);
- const uint8_t *const dictBuf = mTrieBuffer->getBuffer(usesAdditionalBuffer);
- if (usesAdditionalBuffer) {
- pos -= mTrieBuffer->getOriginalBufferSize();
- }
- // Read original flags
- const PatriciaTrieReadingUtils::NodeFlags originalFlags =
- PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
- const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
- DynamicPtReadingUtils::updateAndGetFlags(originalFlags, false /* isMoved */,
- true /* isDeleted */, false /* willBecomeNonTerminal */);
- int writingPos = toBeUpdatedPtNodeParams->getHeadPos();
- // Update flags.
- if (!DynamicPtWritingUtils::writeFlagsAndAdvancePosition(mTrieBuffer, updatedFlags,
- &writingPos)) {
- return false;
- }
- if (toBeUpdatedPtNodeParams->isTerminal()) {
- // The PtNode is a terminal. Delete entry from the terminal position lookup table.
- return mBuffers->getMutableTerminalPositionLookupTable()->setTerminalPtNodePosition(
- toBeUpdatedPtNodeParams->getTerminalId(), NOT_A_DICT_POS /* ptNodePos */);
- } else {
- return true;
- }
-}
-
-bool Ver4PatriciaTrieNodeWriter::markPtNodeAsMoved(
- const PtNodeParams *const toBeUpdatedPtNodeParams,
- const int movedPos, const int bigramLinkedNodePos) {
- int pos = toBeUpdatedPtNodeParams->getHeadPos();
- const bool usesAdditionalBuffer = mTrieBuffer->isInAdditionalBuffer(pos);
- const uint8_t *const dictBuf = mTrieBuffer->getBuffer(usesAdditionalBuffer);
- if (usesAdditionalBuffer) {
- pos -= mTrieBuffer->getOriginalBufferSize();
- }
- // Read original flags
- const PatriciaTrieReadingUtils::NodeFlags originalFlags =
- PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
- const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
- DynamicPtReadingUtils::updateAndGetFlags(originalFlags, true /* isMoved */,
- false /* isDeleted */, false /* willBecomeNonTerminal */);
- int writingPos = toBeUpdatedPtNodeParams->getHeadPos();
- // Update flags.
- if (!DynamicPtWritingUtils::writeFlagsAndAdvancePosition(mTrieBuffer, updatedFlags,
- &writingPos)) {
- return false;
- }
- // Update moved position, which is stored in the parent offset field.
- if (!DynamicPtWritingUtils::writeParentPosOffsetAndAdvancePosition(
- mTrieBuffer, movedPos, toBeUpdatedPtNodeParams->getHeadPos(), &writingPos)) {
- return false;
- }
- if (toBeUpdatedPtNodeParams->hasChildren()) {
- // Update children's parent position.
- mReadingHelper.initWithPtNodeArrayPos(toBeUpdatedPtNodeParams->getChildrenPos());
- while (!mReadingHelper.isEnd()) {
- const PtNodeParams childPtNodeParams(mReadingHelper.getPtNodeParams());
- int parentOffsetFieldPos = childPtNodeParams.getHeadPos()
- + DynamicPtWritingUtils::NODE_FLAG_FIELD_SIZE;
- if (!DynamicPtWritingUtils::writeParentPosOffsetAndAdvancePosition(
- mTrieBuffer, bigramLinkedNodePos, childPtNodeParams.getHeadPos(),
- &parentOffsetFieldPos)) {
- // Parent offset cannot be written because of a bug or a broken dictionary; thus,
- // we give up to update dictionary.
- return false;
- }
- mReadingHelper.readNextSiblingNode(childPtNodeParams);
- }
- }
- return true;
-}
-
-bool Ver4PatriciaTrieNodeWriter::markPtNodeAsWillBecomeNonTerminal(
- const PtNodeParams *const toBeUpdatedPtNodeParams) {
- int pos = toBeUpdatedPtNodeParams->getHeadPos();
- const bool usesAdditionalBuffer = mTrieBuffer->isInAdditionalBuffer(pos);
- const uint8_t *const dictBuf = mTrieBuffer->getBuffer(usesAdditionalBuffer);
- if (usesAdditionalBuffer) {
- pos -= mTrieBuffer->getOriginalBufferSize();
- }
- // Read original flags
- const PatriciaTrieReadingUtils::NodeFlags originalFlags =
- PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
- const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
- DynamicPtReadingUtils::updateAndGetFlags(originalFlags, false /* isMoved */,
- false /* isDeleted */, true /* willBecomeNonTerminal */);
- if (!mBuffers->getMutableTerminalPositionLookupTable()->setTerminalPtNodePosition(
- toBeUpdatedPtNodeParams->getTerminalId(), NOT_A_DICT_POS /* ptNodePos */)) {
- AKLOGE("Cannot update terminal position lookup table. terminal id: %d",
- toBeUpdatedPtNodeParams->getTerminalId());
- return false;
- }
- // Update flags.
- int writingPos = toBeUpdatedPtNodeParams->getHeadPos();
- return DynamicPtWritingUtils::writeFlagsAndAdvancePosition(mTrieBuffer, updatedFlags,
- &writingPos);
-}
-
-bool Ver4PatriciaTrieNodeWriter::updatePtNodeProbability(
- const PtNodeParams *const toBeUpdatedPtNodeParams, const int newProbability,
- const int timestamp) {
- if (!toBeUpdatedPtNodeParams->isTerminal()) {
- return false;
- }
- const ProbabilityEntry originalProbabilityEntry =
- mBuffers->getProbabilityDictContent()->getProbabilityEntry(
- toBeUpdatedPtNodeParams->getTerminalId());
- const ProbabilityEntry probabilityEntry = createUpdatedEntryFrom(&originalProbabilityEntry,
- newProbability, timestamp);
- return mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry(
- toBeUpdatedPtNodeParams->getTerminalId(), &probabilityEntry);
-}
-
-bool Ver4PatriciaTrieNodeWriter::updatePtNodeProbabilityAndGetNeedsToKeepPtNodeAfterGC(
- const PtNodeParams *const toBeUpdatedPtNodeParams, bool *const outNeedsToKeepPtNode) {
- if (!toBeUpdatedPtNodeParams->isTerminal()) {
- AKLOGE("updatePtNodeProbabilityAndGetNeedsToSaveForGC is called for non-terminal PtNode.");
- return false;
- }
- const ProbabilityEntry originalProbabilityEntry =
- mBuffers->getProbabilityDictContent()->getProbabilityEntry(
- toBeUpdatedPtNodeParams->getTerminalId());
- if (originalProbabilityEntry.hasHistoricalInfo()) {
- const HistoricalInfo historicalInfo = ForgettingCurveUtils::createHistoricalInfoToSave(
- originalProbabilityEntry.getHistoricalInfo());
- const ProbabilityEntry probabilityEntry =
- originalProbabilityEntry.createEntryWithUpdatedHistoricalInfo(&historicalInfo);
- if (!mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry(
- toBeUpdatedPtNodeParams->getTerminalId(), &probabilityEntry)) {
- AKLOGE("Cannot write updated probability entry. terminalId: %d",
- toBeUpdatedPtNodeParams->getTerminalId());
- return false;
- }
- const bool isValid = ForgettingCurveUtils::needsToKeep(&historicalInfo);
- if (!isValid) {
- if (!markPtNodeAsWillBecomeNonTerminal(toBeUpdatedPtNodeParams)) {
- AKLOGE("Cannot mark PtNode as willBecomeNonTerminal.");
- return false;
- }
- }
- *outNeedsToKeepPtNode = isValid;
- } else {
- // No need to update probability.
- *outNeedsToKeepPtNode = true;
- }
- return true;
-}
-
-bool Ver4PatriciaTrieNodeWriter::updateChildrenPosition(
- const PtNodeParams *const toBeUpdatedPtNodeParams, const int newChildrenPosition) {
- int childrenPosFieldPos = toBeUpdatedPtNodeParams->getChildrenPosFieldPos();
- return DynamicPtWritingUtils::writeChildrenPositionAndAdvancePosition(mTrieBuffer,
- newChildrenPosition, &childrenPosFieldPos);
-}
-
-bool Ver4PatriciaTrieNodeWriter::updateTerminalId(const PtNodeParams *const toBeUpdatedPtNodeParams,
- const int newTerminalId) {
- return mTrieBuffer->writeUint(newTerminalId, Ver4DictConstants::TERMINAL_ID_FIELD_SIZE,
- toBeUpdatedPtNodeParams->getTerminalIdFieldPos());
-}
-
-bool Ver4PatriciaTrieNodeWriter::writePtNodeAndAdvancePosition(
- const PtNodeParams *const ptNodeParams, int *const ptNodeWritingPos) {
- return writePtNodeAndGetTerminalIdAndAdvancePosition(ptNodeParams, 0 /* outTerminalId */,
- ptNodeWritingPos);
-}
-
-
-bool Ver4PatriciaTrieNodeWriter::writeNewTerminalPtNodeAndAdvancePosition(
- const PtNodeParams *const ptNodeParams, const int timestamp, int *const ptNodeWritingPos) {
- int terminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
- if (!writePtNodeAndGetTerminalIdAndAdvancePosition(ptNodeParams, &terminalId,
- ptNodeWritingPos)) {
- return false;
- }
- // Write probability.
- ProbabilityEntry newProbabilityEntry;
- const ProbabilityEntry probabilityEntryToWrite = createUpdatedEntryFrom(
- &newProbabilityEntry, ptNodeParams->getProbability(), timestamp);
- return mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry(terminalId,
- &probabilityEntryToWrite);
-}
-
-bool Ver4PatriciaTrieNodeWriter::addNewBigramEntry(
- const PtNodeParams *const sourcePtNodeParams,
- const PtNodeParams *const targetPtNodeParam, const int probability, const int timestamp,
- bool *const outAddedNewBigram) {
- if (!mBigramPolicy->addNewEntry(sourcePtNodeParams->getTerminalId(),
- targetPtNodeParam->getTerminalId(), probability, timestamp, outAddedNewBigram)) {
- AKLOGE("Cannot add new bigram entry. terminalId: %d, targetTerminalId: %d",
- sourcePtNodeParams->getTerminalId(), targetPtNodeParam->getTerminalId());
- return false;
- }
- if (!sourcePtNodeParams->hasBigrams()) {
- // Update has bigrams flag.
- return updatePtNodeFlags(sourcePtNodeParams->getHeadPos(),
- sourcePtNodeParams->isBlacklisted(), sourcePtNodeParams->isNotAWord(),
- sourcePtNodeParams->isTerminal(), sourcePtNodeParams->hasShortcutTargets(),
- true /* hasBigrams */,
- sourcePtNodeParams->getCodePointCount() > 1 /* hasMultipleChars */);
- }
- return true;
-}
-
-bool Ver4PatriciaTrieNodeWriter::removeBigramEntry(
- const PtNodeParams *const sourcePtNodeParams, const PtNodeParams *const targetPtNodeParam) {
- return mBigramPolicy->removeEntry(sourcePtNodeParams->getTerminalId(),
- targetPtNodeParam->getTerminalId());
-}
-
-bool Ver4PatriciaTrieNodeWriter::updateAllBigramEntriesAndDeleteUselessEntries(
- const PtNodeParams *const sourcePtNodeParams, int *const outBigramEntryCount) {
- return mBigramPolicy->updateAllBigramEntriesAndDeleteUselessEntries(
- sourcePtNodeParams->getTerminalId(), outBigramEntryCount);
-}
-
-bool Ver4PatriciaTrieNodeWriter::updateAllPositionFields(
- const PtNodeParams *const toBeUpdatedPtNodeParams,
- const DictPositionRelocationMap *const dictPositionRelocationMap,
- int *const outBigramEntryCount) {
- int parentPos = toBeUpdatedPtNodeParams->getParentPos();
- if (parentPos != NOT_A_DICT_POS) {
- PtNodeWriter::PtNodePositionRelocationMap::const_iterator it =
- dictPositionRelocationMap->mPtNodePositionRelocationMap.find(parentPos);
- if (it != dictPositionRelocationMap->mPtNodePositionRelocationMap.end()) {
- parentPos = it->second;
- }
- }
- int writingPos = toBeUpdatedPtNodeParams->getHeadPos()
- + DynamicPtWritingUtils::NODE_FLAG_FIELD_SIZE;
- // Write updated parent offset.
- if (!DynamicPtWritingUtils::writeParentPosOffsetAndAdvancePosition(mTrieBuffer,
- parentPos, toBeUpdatedPtNodeParams->getHeadPos(), &writingPos)) {
- return false;
- }
-
- // Updates children position.
- int childrenPos = toBeUpdatedPtNodeParams->getChildrenPos();
- if (childrenPos != NOT_A_DICT_POS) {
- PtNodeWriter::PtNodeArrayPositionRelocationMap::const_iterator it =
- dictPositionRelocationMap->mPtNodeArrayPositionRelocationMap.find(childrenPos);
- if (it != dictPositionRelocationMap->mPtNodeArrayPositionRelocationMap.end()) {
- childrenPos = it->second;
- }
- }
- if (!updateChildrenPosition(toBeUpdatedPtNodeParams, childrenPos)) {
- return false;
- }
-
- // Counts bigram entries.
- if (outBigramEntryCount) {
- *outBigramEntryCount = mBigramPolicy->getBigramEntryConut(
- toBeUpdatedPtNodeParams->getTerminalId());
- }
- return true;
-}
-
-bool Ver4PatriciaTrieNodeWriter::addShortcutTarget(const PtNodeParams *const ptNodeParams,
- const int *const targetCodePoints, const int targetCodePointCount,
- const int shortcutProbability) {
- if (!mShortcutPolicy->addNewShortcut(ptNodeParams->getTerminalId(),
- targetCodePoints, targetCodePointCount, shortcutProbability)) {
- AKLOGE("Cannot add new shortuct entry. terminalId: %d", ptNodeParams->getTerminalId());
- return false;
- }
- if (!ptNodeParams->hasShortcutTargets()) {
- // Update has shortcut targets flag.
- return updatePtNodeFlags(ptNodeParams->getHeadPos(),
- ptNodeParams->isBlacklisted(), ptNodeParams->isNotAWord(),
- ptNodeParams->isTerminal(), true /* hasShortcutTargets */,
- ptNodeParams->hasBigrams(),
- ptNodeParams->getCodePointCount() > 1 /* hasMultipleChars */);
- }
- return true;
-}
-
-bool Ver4PatriciaTrieNodeWriter::updatePtNodeHasBigramsAndShortcutTargetsFlags(
- const PtNodeParams *const ptNodeParams) {
- const bool hasBigrams = mBuffers->getBigramDictContent()->getBigramListHeadPos(
- ptNodeParams->getTerminalId()) != NOT_A_DICT_POS;
- const bool hasShortcutTargets = mBuffers->getShortcutDictContent()->getShortcutListHeadPos(
- ptNodeParams->getTerminalId()) != NOT_A_DICT_POS;
- return updatePtNodeFlags(ptNodeParams->getHeadPos(), ptNodeParams->isBlacklisted(),
- ptNodeParams->isNotAWord(), ptNodeParams->isTerminal(), hasShortcutTargets,
- hasBigrams, ptNodeParams->getCodePointCount() > 1 /* hasMultipleChars */);
-}
-
-bool Ver4PatriciaTrieNodeWriter::writePtNodeAndGetTerminalIdAndAdvancePosition(
- const PtNodeParams *const ptNodeParams, int *const outTerminalId,
- int *const ptNodeWritingPos) {
- const int nodePos = *ptNodeWritingPos;
- // Write dummy flags. The Node flags are updated with appropriate flags at the last step of the
- // PtNode writing.
- if (!DynamicPtWritingUtils::writeFlagsAndAdvancePosition(mTrieBuffer,
- 0 /* nodeFlags */, ptNodeWritingPos)) {
- return false;
- }
- // Calculate a parent offset and write the offset.
- if (!DynamicPtWritingUtils::writeParentPosOffsetAndAdvancePosition(mTrieBuffer,
- ptNodeParams->getParentPos(), nodePos, ptNodeWritingPos)) {
- return false;
- }
- // Write code points
- if (!DynamicPtWritingUtils::writeCodePointsAndAdvancePosition(mTrieBuffer,
- ptNodeParams->getCodePoints(), ptNodeParams->getCodePointCount(), ptNodeWritingPos)) {
- return false;
- }
- int terminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
- if (!ptNodeParams->willBecomeNonTerminal()) {
- if (ptNodeParams->getTerminalId() != Ver4DictConstants::NOT_A_TERMINAL_ID) {
- terminalId = ptNodeParams->getTerminalId();
- } else if (ptNodeParams->isTerminal()) {
- // Write terminal information using a new terminal id.
- // Get a new unused terminal id.
- terminalId = mBuffers->getTerminalPositionLookupTable()->getNextTerminalId();
- }
- }
- const int isTerminal = terminalId != Ver4DictConstants::NOT_A_TERMINAL_ID;
- if (isTerminal) {
- // Update the lookup table.
- if (!mBuffers->getMutableTerminalPositionLookupTable()->setTerminalPtNodePosition(
- terminalId, nodePos)) {
- return false;
- }
- // Write terminal Id.
- if (!mTrieBuffer->writeUintAndAdvancePosition(terminalId,
- Ver4DictConstants::TERMINAL_ID_FIELD_SIZE, ptNodeWritingPos)) {
- return false;
- }
- if (outTerminalId) {
- *outTerminalId = terminalId;
- }
- }
- // Write children position
- if (!DynamicPtWritingUtils::writeChildrenPositionAndAdvancePosition(mTrieBuffer,
- ptNodeParams->getChildrenPos(), ptNodeWritingPos)) {
- return false;
- }
- return updatePtNodeFlags(nodePos, ptNodeParams->isBlacklisted(), ptNodeParams->isNotAWord(),
- isTerminal, ptNodeParams->hasShortcutTargets(), ptNodeParams->hasBigrams(),
- ptNodeParams->getCodePointCount() > 1 /* hasMultipleChars */);
-}
-
-const ProbabilityEntry Ver4PatriciaTrieNodeWriter::createUpdatedEntryFrom(
- const ProbabilityEntry *const originalProbabilityEntry, const int newProbability,
- const int timestamp) const {
- // TODO: Consolidate historical info and probability.
- if (mBuffers->getHeaderPolicy()->hasHistoricalInfoOfWords()) {
- const HistoricalInfo updatedHistoricalInfo =
- ForgettingCurveUtils::createUpdatedHistoricalInfo(
- originalProbabilityEntry->getHistoricalInfo(), newProbability, timestamp);
- return originalProbabilityEntry->createEntryWithUpdatedHistoricalInfo(
- &updatedHistoricalInfo);
- } else {
- return originalProbabilityEntry->createEntryWithUpdatedProbability(newProbability);
- }
-}
-
-bool Ver4PatriciaTrieNodeWriter::updatePtNodeFlags(const int ptNodePos,
- const bool isBlacklisted, const bool isNotAWord, const bool isTerminal,
- const bool hasShortcutTargets, const bool hasBigrams, const bool hasMultipleChars) {
- // Create node flags and write them.
- PatriciaTrieReadingUtils::NodeFlags nodeFlags =
- PatriciaTrieReadingUtils::createAndGetFlags(isBlacklisted, isNotAWord, isTerminal,
- hasShortcutTargets, hasBigrams, hasMultipleChars,
- CHILDREN_POSITION_FIELD_SIZE);
- if (!DynamicPtWritingUtils::writeFlags(mTrieBuffer, nodeFlags, ptNodePos)) {
- AKLOGE("Cannot write PtNode flags. flags: %x, pos: %d", nodeFlags, ptNodePos);
- return false;
- }
- return true;
-}
-
-}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h
deleted file mode 100644
index 69576d8e5..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- * Copyright (C) 2013, The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LATINIME_VER4_PATRICIA_TRIE_NODE_WRITER_H
-#define LATINIME_VER4_PATRICIA_TRIE_NODE_WRITER_H
-
-#include <stdint.h>
-
-#include "defines.h"
-#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h"
-#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
-#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h"
-#include "suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h"
-#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
-
-namespace latinime {
-
-class BufferWithExtendableBuffer;
-class Ver4BigramListPolicy;
-class Ver4DictBuffers;
-class Ver4ShortcutListPolicy;
-
-/*
- * This class is used for helping to writes nodes of ver4 patricia trie.
- */
-class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
- public:
- Ver4PatriciaTrieNodeWriter(BufferWithExtendableBuffer *const trieBuffer,
- Ver4DictBuffers *const buffers, const Ver4PatriciaTrieNodeReader *const ptNodeReader,
- Ver4BigramListPolicy *const bigramPolicy, Ver4ShortcutListPolicy *const shortcutPolicy)
- : mTrieBuffer(trieBuffer), mBuffers(buffers), mPtNodeReader(ptNodeReader),
- mReadingHelper(mTrieBuffer, mPtNodeReader),
- mBigramPolicy(bigramPolicy), mShortcutPolicy(shortcutPolicy) {}
-
- virtual ~Ver4PatriciaTrieNodeWriter() {}
-
- virtual bool markPtNodeAsDeleted(const PtNodeParams *const toBeUpdatedPtNodeParams);
-
- virtual bool markPtNodeAsMoved(const PtNodeParams *const toBeUpdatedPtNodeParams,
- const int movedPos, const int bigramLinkedNodePos);
-
- virtual bool markPtNodeAsWillBecomeNonTerminal(
- const PtNodeParams *const toBeUpdatedPtNodeParams);
-
- virtual bool updatePtNodeProbability(const PtNodeParams *const toBeUpdatedPtNodeParams,
- const int newProbability, const int timestamp);
-
- virtual bool updatePtNodeProbabilityAndGetNeedsToKeepPtNodeAfterGC(
- const PtNodeParams *const toBeUpdatedPtNodeParams, bool *const outNeedsToKeepPtNode);
-
- virtual bool updateChildrenPosition(const PtNodeParams *const toBeUpdatedPtNodeParams,
- const int newChildrenPosition);
-
- bool updateTerminalId(const PtNodeParams *const toBeUpdatedPtNodeParams,
- const int newTerminalId);
-
- virtual bool writePtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams,
- int *const ptNodeWritingPos);
-
- virtual bool writeNewTerminalPtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams,
- const int timestamp, int *const ptNodeWritingPos);
-
- virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams,
- const PtNodeParams *const targetPtNodeParam, const int probability, const int timestamp,
- bool *const outAddedNewBigram);
-
- virtual bool removeBigramEntry(const PtNodeParams *const sourcePtNodeParams,
- const PtNodeParams *const targetPtNodeParam);
-
- virtual bool updateAllBigramEntriesAndDeleteUselessEntries(
- const PtNodeParams *const sourcePtNodeParams, int *const outBigramEntryCount);
-
- virtual bool updateAllPositionFields(const PtNodeParams *const toBeUpdatedPtNodeParams,
- const DictPositionRelocationMap *const dictPositionRelocationMap,
- int *const outBigramEntryCount);
-
- virtual bool addShortcutTarget(const PtNodeParams *const ptNodeParams,
- const int *const targetCodePoints, const int targetCodePointCount,
- const int shortcutProbability);
-
- bool updatePtNodeHasBigramsAndShortcutTargetsFlags(const PtNodeParams *const ptNodeParams);
-
- private:
- DISALLOW_COPY_AND_ASSIGN(Ver4PatriciaTrieNodeWriter);
-
- bool writePtNodeAndGetTerminalIdAndAdvancePosition(
- const PtNodeParams *const ptNodeParams, int *const outTerminalId,
- int *const ptNodeWritingPos);
-
- // Create updated probability entry using given probability and timestamp. In addition to the
- // probability, this method updates historical information if needed.
- const ProbabilityEntry createUpdatedEntryFrom(
- const ProbabilityEntry *const originalProbabilityEntry, const int newProbability,
- const int timestamp) const;
-
- bool updatePtNodeFlags(const int ptNodePos, const bool isBlacklisted, const bool isNotAWord,
- const bool isTerminal, const bool hasShortcutTargets, const bool hasBigrams,
- const bool hasMultipleChars);
-
- static const int CHILDREN_POSITION_FIELD_SIZE;
-
- BufferWithExtendableBuffer *const mTrieBuffer;
- Ver4DictBuffers *const mBuffers;
- const Ver4PatriciaTrieNodeReader *const mPtNodeReader;
- DynamicPtReadingHelper mReadingHelper;
- Ver4BigramListPolicy *const mBigramPolicy;
- Ver4ShortcutListPolicy *const mShortcutPolicy;
-};
-} // namespace latinime
-#endif /* LATINIME_VER4_PATRICIA_TRIE_NODE_WRITER_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
deleted file mode 100644
index 96bb8128e..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
+++ /dev/null
@@ -1,352 +0,0 @@
-/*
- * Copyright (C) 2013, The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h"
-
-#include <vector>
-
-#include "suggest/core/dicnode/dic_node.h"
-#include "suggest/core/dicnode/dic_node_vector.h"
-#include "suggest/core/dictionary/unigram_property.h"
-#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h"
-#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
-#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
-#include "suggest/policyimpl/dictionary/utils/probability_utils.h"
-
-namespace latinime {
-
-// Note that there are corresponding definitions in Java side in BinaryDictionaryTests and
-// BinaryDictionaryDecayingTests.
-const char *const Ver4PatriciaTriePolicy::UNIGRAM_COUNT_QUERY = "UNIGRAM_COUNT";
-const char *const Ver4PatriciaTriePolicy::BIGRAM_COUNT_QUERY = "BIGRAM_COUNT";
-const char *const Ver4PatriciaTriePolicy::MAX_UNIGRAM_COUNT_QUERY = "MAX_UNIGRAM_COUNT";
-const char *const Ver4PatriciaTriePolicy::MAX_BIGRAM_COUNT_QUERY = "MAX_BIGRAM_COUNT";
-const char *const Ver4PatriciaTriePolicy::SET_CURRENT_TIME_FOR_TESTING_QUERY_FORMAT =
- "SET_CURRENT_TIME_FOR_TESTING:%d";
-const char *const Ver4PatriciaTriePolicy::GET_CURRENT_TIME_QUERY = "GET_CURRENT_TIME";
-const char *const Ver4PatriciaTriePolicy::QUIT_TIMEKEEPER_TEST_MODE_QUERY =
- "QUIT_TIMEKEEPER_TEST_MODE";
-const int Ver4PatriciaTriePolicy::MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS = 1024;
-const int Ver4PatriciaTriePolicy::MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS =
- Ver4DictConstants::MAX_DICTIONARY_SIZE - MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS;
-
-void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode,
- DicNodeVector *const childDicNodes) const {
- if (!dicNode->hasChildren()) {
- return;
- }
- DynamicPtReadingHelper readingHelper(mDictBuffer, &mNodeReader);
- readingHelper.initWithPtNodeArrayPos(dicNode->getChildrenPtNodeArrayPos());
- while (!readingHelper.isEnd()) {
- const PtNodeParams ptNodeParams = readingHelper.getPtNodeParams();
- if (!ptNodeParams.isValid()) {
- break;
- }
- bool isTerminal = ptNodeParams.isTerminal() && !ptNodeParams.isDeleted();
- if (isTerminal && mHeaderPolicy->isDecayingDict()) {
- // A DecayingDict may have a terminal PtNode that has a terminal DicNode whose
- // probability is NOT_A_PROBABILITY. In such case, we don't want to treat it as a
- // valid terminal DicNode.
- isTerminal = ptNodeParams.getProbability() != NOT_A_PROBABILITY;
- }
- childDicNodes->pushLeavingChild(dicNode, ptNodeParams.getHeadPos(),
- ptNodeParams.getChildrenPos(), ptNodeParams.getProbability(), isTerminal,
- ptNodeParams.hasChildren(),
- ptNodeParams.isBlacklisted()
- || ptNodeParams.isNotAWord() /* isBlacklistedOrNotAWord */,
- ptNodeParams.getCodePointCount(), ptNodeParams.getCodePoints());
- readingHelper.readNextSiblingNode(ptNodeParams);
- }
-}
-
-int Ver4PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
- const int ptNodePos, const int maxCodePointCount, int *const outCodePoints,
- int *const outUnigramProbability) const {
- DynamicPtReadingHelper readingHelper(mDictBuffer, &mNodeReader);
- readingHelper.initWithPtNodePos(ptNodePos);
- return readingHelper.getCodePointsAndProbabilityAndReturnCodePointCount(
- maxCodePointCount, outCodePoints, outUnigramProbability);
-}
-
-int Ver4PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord,
- const int length, const bool forceLowerCaseSearch) const {
- DynamicPtReadingHelper readingHelper(mDictBuffer, &mNodeReader);
- readingHelper.initWithPtNodeArrayPos(getRootPosition());
- return readingHelper.getTerminalPtNodePositionOfWord(inWord, length, forceLowerCaseSearch);
-}
-
-int Ver4PatriciaTriePolicy::getProbability(const int unigramProbability,
- const int bigramProbability) const {
- if (mHeaderPolicy->isDecayingDict()) {
- // Both probabilities are encoded. Decode them and get probability.
- return ForgettingCurveUtils::getProbability(unigramProbability, bigramProbability);
- } else {
- if (unigramProbability == NOT_A_PROBABILITY) {
- return NOT_A_PROBABILITY;
- } else if (bigramProbability == NOT_A_PROBABILITY) {
- return ProbabilityUtils::backoff(unigramProbability);
- } else {
- // bigramProbability is a bigram probability delta.
- return ProbabilityUtils::computeProbabilityForBigram(unigramProbability,
- bigramProbability);
- }
- }
-}
-
-int Ver4PatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int ptNodePos) const {
- if (ptNodePos == NOT_A_DICT_POS) {
- return NOT_A_PROBABILITY;
- }
- const PtNodeParams ptNodeParams(mNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos));
- if (ptNodeParams.isDeleted() || ptNodeParams.isBlacklisted() || ptNodeParams.isNotAWord()) {
- return NOT_A_PROBABILITY;
- }
- return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY);
-}
-
-int Ver4PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const {
- if (ptNodePos == NOT_A_DICT_POS) {
- return NOT_A_DICT_POS;
- }
- const PtNodeParams ptNodeParams(mNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos));
- if (ptNodeParams.isDeleted()) {
- return NOT_A_DICT_POS;
- }
- return mBuffers.get()->getShortcutDictContent()->getShortcutListHeadPos(
- ptNodeParams.getTerminalId());
-}
-
-int Ver4PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const {
- if (ptNodePos == NOT_A_DICT_POS) {
- return NOT_A_DICT_POS;
- }
- const PtNodeParams ptNodeParams(mNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos));
- if (ptNodeParams.isDeleted()) {
- return NOT_A_DICT_POS;
- }
- return mBuffers.get()->getBigramDictContent()->getBigramListHeadPos(
- ptNodeParams.getTerminalId());
-}
-
-bool Ver4PatriciaTriePolicy::addUnigramWord(const int *const word, const int length,
- const int probability, const int *const shortcutTargetCodePoints, const int shortcutLength,
- const int shortcutProbability, const bool isNotAWord, const bool isBlacklisted,
- const int timestamp) {
- if (!mBuffers.get()->isUpdatable()) {
- AKLOGI("Warning: addUnigramWord() is called for non-updatable dictionary.");
- return false;
- }
- if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
- AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d",
- mDictBuffer->getTailPosition());
- return false;
- }
- DynamicPtReadingHelper readingHelper(mDictBuffer, &mNodeReader);
- readingHelper.initWithPtNodeArrayPos(getRootPosition());
- bool addedNewUnigram = false;
- if (mUpdatingHelper.addUnigramWord(&readingHelper, word, length, probability, isNotAWord,
- isBlacklisted, timestamp, &addedNewUnigram)) {
- if (addedNewUnigram) {
- mUnigramCount++;
- }
- if (shortcutLength > 0) {
- // Add shortcut target.
- const int wordPos = getTerminalPtNodePositionOfWord(word, length,
- false /* forceLowerCaseSearch */);
- if (wordPos == NOT_A_DICT_POS) {
- AKLOGE("Cannot find terminal PtNode position to add shortcut target.");
- return false;
- }
- if (!mUpdatingHelper.addShortcutTarget(wordPos, shortcutTargetCodePoints,
- shortcutLength, shortcutProbability)) {
- AKLOGE("Cannot add new shortcut target. PtNodePos: %d, length: %d, probability: %d",
- wordPos, shortcutLength, shortcutProbability);
- return false;
- }
- }
- return true;
- } else {
- return false;
- }
-}
-
-bool Ver4PatriciaTriePolicy::addBigramWords(const int *const word0, const int length0,
- const int *const word1, const int length1, const int probability,
- const int timestamp) {
- if (!mBuffers.get()->isUpdatable()) {
- AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary.");
- return false;
- }
- if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
- AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d",
- mDictBuffer->getTailPosition());
- return false;
- }
- const int word0Pos = getTerminalPtNodePositionOfWord(word0, length0,
- false /* forceLowerCaseSearch */);
- if (word0Pos == NOT_A_DICT_POS) {
- return false;
- }
- const int word1Pos = getTerminalPtNodePositionOfWord(word1, length1,
- false /* forceLowerCaseSearch */);
- if (word1Pos == NOT_A_DICT_POS) {
- return false;
- }
- bool addedNewBigram = false;
- if (mUpdatingHelper.addBigramWords(word0Pos, word1Pos, probability, timestamp,
- &addedNewBigram)) {
- if (addedNewBigram) {
- mBigramCount++;
- }
- return true;
- } else {
- return false;
- }
-}
-
-bool Ver4PatriciaTriePolicy::removeBigramWords(const int *const word0, const int length0,
- const int *const word1, const int length1) {
- if (!mBuffers.get()->isUpdatable()) {
- AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary.");
- return false;
- }
- if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
- AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d",
- mDictBuffer->getTailPosition());
- return false;
- }
- const int word0Pos = getTerminalPtNodePositionOfWord(word0, length0,
- false /* forceLowerCaseSearch */);
- if (word0Pos == NOT_A_DICT_POS) {
- return false;
- }
- const int word1Pos = getTerminalPtNodePositionOfWord(word1, length1,
- false /* forceLowerCaseSearch */);
- if (word1Pos == NOT_A_DICT_POS) {
- return false;
- }
- if (mUpdatingHelper.removeBigramWords(word0Pos, word1Pos)) {
- mBigramCount--;
- return true;
- } else {
- return false;
- }
-}
-
-void Ver4PatriciaTriePolicy::flush(const char *const filePath) {
- if (!mBuffers.get()->isUpdatable()) {
- AKLOGI("Warning: flush() is called for non-updatable dictionary. filePath: %s", filePath);
- return;
- }
- mWritingHelper.writeToDictFile(filePath, mUnigramCount, mBigramCount);
-}
-
-void Ver4PatriciaTriePolicy::flushWithGC(const char *const filePath) {
- if (!mBuffers.get()->isUpdatable()) {
- AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary.");
- return;
- }
- mWritingHelper.writeToDictFileWithGC(getRootPosition(), filePath);
-}
-
-bool Ver4PatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const {
- if (!mBuffers.get()->isUpdatable()) {
- AKLOGI("Warning: needsToRunGC() is called for non-updatable dictionary.");
- return false;
- }
- if (mBuffers.get()->isNearSizeLimit()) {
- // Additional buffer size is near the limit.
- return true;
- } else if (mHeaderPolicy->getExtendedRegionSize() + mDictBuffer->getUsedAdditionalBufferSize()
- > Ver4DictConstants::MAX_DICT_EXTENDED_REGION_SIZE) {
- // Total extended region size of the trie exceeds the limit.
- return true;
- } else if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS
- && mDictBuffer->getUsedAdditionalBufferSize() > 0) {
- // Needs to reduce dictionary size.
- return true;
- } else if (mHeaderPolicy->isDecayingDict()) {
- return ForgettingCurveUtils::needsToDecay(mindsBlockByGC, mUnigramCount, mBigramCount,
- mHeaderPolicy);
- }
- return false;
-}
-
-void Ver4PatriciaTriePolicy::getProperty(const char *const query, const int queryLength,
- char *const outResult, const int maxResultLength) {
- const int compareLength = queryLength + 1 /* terminator */;
- int timestamp = NOT_A_TIMESTAMP;
- if (strncmp(query, UNIGRAM_COUNT_QUERY, compareLength) == 0) {
- snprintf(outResult, maxResultLength, "%d", mUnigramCount);
- } else if (strncmp(query, BIGRAM_COUNT_QUERY, compareLength) == 0) {
- snprintf(outResult, maxResultLength, "%d", mBigramCount);
- } else if (strncmp(query, MAX_UNIGRAM_COUNT_QUERY, compareLength) == 0) {
- snprintf(outResult, maxResultLength, "%d",
- mHeaderPolicy->isDecayingDict() ? ForgettingCurveUtils::MAX_UNIGRAM_COUNT :
- static_cast<int>(Ver4DictConstants::MAX_DICTIONARY_SIZE));
- } else if (strncmp(query, MAX_BIGRAM_COUNT_QUERY, compareLength) == 0) {
- snprintf(outResult, maxResultLength, "%d",
- mHeaderPolicy->isDecayingDict() ? ForgettingCurveUtils::MAX_BIGRAM_COUNT :
- static_cast<int>(Ver4DictConstants::MAX_DICTIONARY_SIZE));
- } else if (sscanf(query, SET_CURRENT_TIME_FOR_TESTING_QUERY_FORMAT, &timestamp) == 1) {
- TimeKeeper::startTestModeWithForceCurrentTime(timestamp);
- } else if (strncmp(query, GET_CURRENT_TIME_QUERY, compareLength) == 0) {
- snprintf(outResult, maxResultLength, "%d", TimeKeeper::peekCurrentTime());
- } else if (strncmp(query, QUIT_TIMEKEEPER_TEST_MODE_QUERY, compareLength) == 0) {
- TimeKeeper::stopTestMode();
- }
-}
-
-const UnigramProperty Ver4PatriciaTriePolicy::getUnigramProperty(const int *const codePoints,
- const int codePointCount) const {
- const int ptNodePos = getTerminalPtNodePositionOfWord(codePoints, codePointCount,
- false /* forceLowerCaseSearch */);
- if (ptNodePos == NOT_A_DICT_POS) {
- AKLOGE("fetchUnigramProperty is called for invalid word.");
- return UnigramProperty();
- }
- const PtNodeParams ptNodeParams = mNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos);
- const ProbabilityEntry probabilityEntry =
- mBuffers.get()->getProbabilityDictContent()->getProbabilityEntry(
- ptNodeParams.getTerminalId());
- const HistoricalInfo *const historicalInfo = probabilityEntry.getHistoricalInfo();
- // Fetch shortcut information.
- std::vector<std::vector<int> > shortcutTargets;
- std::vector<int> shortcutProbabilities;
- int shortcutPos = getShortcutPositionOfPtNode(ptNodePos);
- if (shortcutPos != NOT_A_DICT_POS) {
- int shortcutTarget[MAX_WORD_LENGTH];
- const ShortcutDictContent *const shortcutDictContent =
- mBuffers.get()->getShortcutDictContent();
- bool hasNext = true;
- while (hasNext) {
- int shortcutTargetLength = 0;
- int shortcutProbability = NOT_A_PROBABILITY;
- shortcutDictContent->getShortcutEntryAndAdvancePosition(MAX_WORD_LENGTH, shortcutTarget,
- &shortcutTargetLength, &shortcutProbability, &hasNext, &shortcutPos);
- std::vector<int> target(shortcutTarget, shortcutTarget + shortcutTargetLength);
- shortcutTargets.push_back(target);
- shortcutProbabilities.push_back(shortcutProbability);
- }
- }
- return UnigramProperty(ptNodeParams.getCodePoints(), ptNodeParams.getCodePointCount(),
- ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.hasBigrams(),
- ptNodeParams.hasShortcutTargets(), ptNodeParams.getProbability(),
- historicalInfo->getTimeStamp(), historicalInfo->getLevel(),
- historicalInfo->getCount(), &shortcutTargets, &shortcutProbabilities);
-}
-
-} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
deleted file mode 100644
index 8187b7a39..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
+++ /dev/null
@@ -1,140 +0,0 @@
-/*
- * Copyright (C) 2013, The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LATINIME_VER4_PATRICIA_TRIE_POLICY_H
-#define LATINIME_VER4_PATRICIA_TRIE_POLICY_H
-
-#include "defines.h"
-#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
-#include "suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h"
-#include "suggest/policyimpl/dictionary/header/header_policy.h"
-#include "suggest/policyimpl/dictionary/shortcut/ver4_shortcut_list_policy.h"
-#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h"
-#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h"
-#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
-#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h"
-#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.h"
-#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
-
-namespace latinime {
-
-class DicNode;
-class DicNodeVector;
-
-// TODO: Implement.
-class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
- public:
- Ver4PatriciaTriePolicy(const Ver4DictBuffers::Ver4DictBuffersPtr &buffers)
- : mBuffers(buffers), mHeaderPolicy(mBuffers.get()->getHeaderPolicy()),
- mDictBuffer(mBuffers.get()->getWritableTrieBuffer()),
- mBigramPolicy(mBuffers.get()->getMutableBigramDictContent(),
- mBuffers.get()->getTerminalPositionLookupTable(), mHeaderPolicy),
- mShortcutPolicy(mBuffers.get()->getMutableShortcutDictContent(),
- mBuffers.get()->getTerminalPositionLookupTable()),
- mNodeReader(mDictBuffer, mBuffers.get()->getProbabilityDictContent()),
- mNodeWriter(mDictBuffer, mBuffers.get(), &mNodeReader, &mBigramPolicy,
- &mShortcutPolicy),
- mUpdatingHelper(mDictBuffer, &mNodeReader, &mNodeWriter),
- mWritingHelper(mBuffers.get()),
- mUnigramCount(mHeaderPolicy->getUnigramCount()),
- mBigramCount(mHeaderPolicy->getBigramCount()) {};
-
- AK_FORCE_INLINE int getRootPosition() const {
- return 0;
- }
-
- void createAndGetAllChildDicNodes(const DicNode *const dicNode,
- DicNodeVector *const childDicNodes) const;
-
- int getCodePointsAndProbabilityAndReturnCodePointCount(
- const int terminalPtNodePos, const int maxCodePointCount, int *const outCodePoints,
- int *const outUnigramProbability) const;
-
- int getTerminalPtNodePositionOfWord(const int *const inWord,
- const int length, const bool forceLowerCaseSearch) const;
-
- int getProbability(const int unigramProbability, const int bigramProbability) const;
-
- int getUnigramProbabilityOfPtNode(const int ptNodePos) const;
-
- int getShortcutPositionOfPtNode(const int ptNodePos) const;
-
- int getBigramsPositionOfPtNode(const int ptNodePos) const;
-
- const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const {
- return mHeaderPolicy;
- }
-
- const DictionaryBigramsStructurePolicy *getBigramsStructurePolicy() const {
- return &mBigramPolicy;
- }
-
- const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const {
- return &mShortcutPolicy;
- }
-
- bool addUnigramWord(const int *const word, const int length, const int probability,
- const int *const shortcutTargetCodePoints, const int shortcutLength,
- const int shortcutProbability, const bool isNotAWord, const bool isBlacklisted,
- const int timestamp);
-
- bool addBigramWords(const int *const word0, const int length0, const int *const word1,
- const int length1, const int probability, const int timestamp);
-
- bool removeBigramWords(const int *const word0, const int length0, const int *const word1,
- const int length1);
-
- void flush(const char *const filePath);
-
- void flushWithGC(const char *const filePath);
-
- bool needsToRunGC(const bool mindsBlockByGC) const;
-
- void getProperty(const char *const query, const int queryLength, char *const outResult,
- const int maxResultLength);
-
- const UnigramProperty getUnigramProperty(const int *const codePoints,
- const int codePointCount) const;
-
- private:
- DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4PatriciaTriePolicy);
-
- static const char *const UNIGRAM_COUNT_QUERY;
- static const char *const BIGRAM_COUNT_QUERY;
- static const char *const MAX_UNIGRAM_COUNT_QUERY;
- static const char *const MAX_BIGRAM_COUNT_QUERY;
- static const char *const SET_CURRENT_TIME_FOR_TESTING_QUERY_FORMAT;
- static const char *const GET_CURRENT_TIME_QUERY;
- static const char *const QUIT_TIMEKEEPER_TEST_MODE_QUERY;
- // When the dictionary size is near the maximum size, we have to refuse dynamic operations to
- // prevent the dictionary from overflowing.
- static const int MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS;
- static const int MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS;
-
- Ver4DictBuffers::Ver4DictBuffersPtr mBuffers;
- const HeaderPolicy *const mHeaderPolicy;
- BufferWithExtendableBuffer *const mDictBuffer;
- Ver4BigramListPolicy mBigramPolicy;
- Ver4ShortcutListPolicy mShortcutPolicy;
- Ver4PatriciaTrieNodeReader mNodeReader;
- Ver4PatriciaTrieNodeWriter mNodeWriter;
- DynamicPtUpdatingHelper mUpdatingHelper;
- Ver4PatriciaTrieWritingHelper mWritingHelper;
- int mUnigramCount;
- int mBigramCount;
-};
-} // namespace latinime
-#endif // LATINIME_VER4_PATRICIA_TRIE_POLICY_H
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.cpp
deleted file mode 100644
index 254022db4..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Copyright (C) 2013, The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.h"
-
-#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
-
-namespace latinime {
-
-/* static */ int Ver4PatriciaTrieReadingUtils::getTerminalIdAndAdvancePosition(
- const uint8_t *const buffer, int *pos) {
- return ByteArrayUtils::readUint32AndAdvancePosition(buffer, pos);
-}
-
-} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.h
deleted file mode 100644
index e418c4933..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (C) 2013, The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LATINIME_VER4_PATRICIA_TRIE_READING_UTILS_H
-#define LATINIME_VER4_PATRICIA_TRIE_READING_UTILS_H
-
-#include <stdint.h>
-
-#include "defines.h"
-
-namespace latinime {
-
-class BufferWithExtendableBuffer;
-
-class Ver4PatriciaTrieReadingUtils {
- public:
- static int getTerminalIdAndAdvancePosition(const uint8_t *const buffer,
- int *const pos);
-
- private:
- DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4PatriciaTrieReadingUtils);
-};
-} // namespace latinime
-#endif /* LATINIME_VER4_PATRICIA_TRIE_READING_UTILS_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp
deleted file mode 100644
index 43227635c..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp
+++ /dev/null
@@ -1,285 +0,0 @@
-/*
- * Copyright (C) 2013, The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.h"
-
-#include <cstring>
-#include <queue>
-
-#include "suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h"
-#include "suggest/policyimpl/dictionary/header/header_policy.h"
-#include "suggest/policyimpl/dictionary/shortcut/ver4_shortcut_list_policy.h"
-#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h"
-#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
-#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
-#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h"
-#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
-#include "suggest/policyimpl/dictionary/utils/file_utils.h"
-#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
-
-namespace latinime {
-
-void Ver4PatriciaTrieWritingHelper::writeToDictFile(const char *const dictDirPath,
- const int unigramCount, const int bigramCount) const {
- const HeaderPolicy *const headerPolicy = mBuffers->getHeaderPolicy();
- BufferWithExtendableBuffer headerBuffer(
- BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE);
- const int extendedRegionSize = headerPolicy->getExtendedRegionSize()
- + mBuffers->getTrieBuffer()->getUsedAdditionalBufferSize();
- if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, false /* updatesLastUpdatedTime */,
- false /* updatesLastDecayedTime */, unigramCount, bigramCount, extendedRegionSize)) {
- AKLOGE("Cannot write header structure to buffer. updatesLastUpdatedTime: %d, "
- "updatesLastDecayedTime: %d, unigramCount: %d, bigramCount: %d, "
- "extendedRegionSize: %d", false, false, unigramCount, bigramCount,
- extendedRegionSize);
- return;
- }
- mBuffers->flushHeaderAndDictBuffers(dictDirPath, &headerBuffer);
-}
-
-void Ver4PatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNodeArrayPos,
- const char *const dictDirPath) {
- const HeaderPolicy *const headerPolicy = mBuffers->getHeaderPolicy();
- Ver4DictBuffers::Ver4DictBuffersPtr dictBuffers(
- Ver4DictBuffers::createVer4DictBuffers(headerPolicy));
- int unigramCount = 0;
- int bigramCount = 0;
- if (!runGC(rootPtNodeArrayPos, headerPolicy, dictBuffers.get(), &unigramCount, &bigramCount)) {
- return;
- }
- BufferWithExtendableBuffer headerBuffer(
- BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE);
- if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, true /* updatesLastUpdatedTime */,
- true /* updatesLastDecayedTime */, unigramCount, bigramCount,
- 0 /* extendedRegionSize */)) {
- return;
- }
- dictBuffers.get()->flushHeaderAndDictBuffers(dictDirPath, &headerBuffer);
-}
-
-bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
- const HeaderPolicy *const headerPolicy, Ver4DictBuffers *const buffersToWrite,
- int *const outUnigramCount, int *const outBigramCount) {
- Ver4PatriciaTrieNodeReader ptNodeReader(mBuffers->getTrieBuffer(),
- mBuffers->getProbabilityDictContent());
- Ver4BigramListPolicy bigramPolicy(mBuffers->getMutableBigramDictContent(),
- mBuffers->getTerminalPositionLookupTable(), headerPolicy);
- Ver4ShortcutListPolicy shortcutPolicy(mBuffers->getMutableShortcutDictContent(),
- mBuffers->getTerminalPositionLookupTable());
- Ver4PatriciaTrieNodeWriter ptNodeWriter(mBuffers->getWritableTrieBuffer(),
- mBuffers, &ptNodeReader, &bigramPolicy, &shortcutPolicy);
-
- DynamicPtReadingHelper readingHelper(mBuffers->getTrieBuffer(), &ptNodeReader);
- readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
- DynamicPtGcEventListeners
- ::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
- traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted(
- &ptNodeWriter);
- if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
- &traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted)) {
- return false;
- }
- const int unigramCount = traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
- .getValidUnigramCount();
- if (headerPolicy->isDecayingDict()
- && unigramCount > ForgettingCurveUtils::MAX_UNIGRAM_COUNT_AFTER_GC) {
- if (!truncateUnigrams(&ptNodeReader, &ptNodeWriter,
- ForgettingCurveUtils::MAX_UNIGRAM_COUNT_AFTER_GC)) {
- AKLOGE("Cannot remove unigrams. current: %d, max: %d", unigramCount,
- ForgettingCurveUtils::MAX_UNIGRAM_COUNT_AFTER_GC);
- return false;
- }
- }
-
- readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
- DynamicPtGcEventListeners::TraversePolicyToUpdateBigramProbability
- traversePolicyToUpdateBigramProbability(&ptNodeWriter);
- if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
- &traversePolicyToUpdateBigramProbability)) {
- return false;
- }
- const int bigramCount = traversePolicyToUpdateBigramProbability.getValidBigramEntryCount();
- if (headerPolicy->isDecayingDict()
- && bigramCount > ForgettingCurveUtils::MAX_BIGRAM_COUNT_AFTER_GC) {
- if (!truncateBigrams(ForgettingCurveUtils::MAX_BIGRAM_COUNT_AFTER_GC)) {
- AKLOGE("Cannot remove bigrams. current: %d, max: %d", bigramCount,
- ForgettingCurveUtils::MAX_BIGRAM_COUNT_AFTER_GC);
- return false;
- }
- }
-
- // Mapping from positions in mBuffer to positions in bufferToWrite.
- PtNodeWriter::DictPositionRelocationMap dictPositionRelocationMap;
- readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
- Ver4PatriciaTrieNodeWriter ptNodeWriterForNewBuffers(buffersToWrite->getWritableTrieBuffer(),
- buffersToWrite, &ptNodeReader, &bigramPolicy, &shortcutPolicy);
- DynamicPtGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
- traversePolicyToPlaceAndWriteValidPtNodesToBuffer(&ptNodeWriterForNewBuffers,
- buffersToWrite->getWritableTrieBuffer(), &dictPositionRelocationMap);
- if (!readingHelper.traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
- &traversePolicyToPlaceAndWriteValidPtNodesToBuffer)) {
- return false;
- }
-
- // Create policy instances for the GCed dictionary.
- Ver4PatriciaTrieNodeReader newPtNodeReader(buffersToWrite->getTrieBuffer(),
- buffersToWrite->getProbabilityDictContent());
- Ver4BigramListPolicy newBigramPolicy(buffersToWrite->getMutableBigramDictContent(),
- buffersToWrite->getTerminalPositionLookupTable(), headerPolicy);
- Ver4ShortcutListPolicy newShortcutPolicy(buffersToWrite->getMutableShortcutDictContent(),
- buffersToWrite->getTerminalPositionLookupTable());
- Ver4PatriciaTrieNodeWriter newPtNodeWriter(buffersToWrite->getWritableTrieBuffer(),
- buffersToWrite, &newPtNodeReader, &newBigramPolicy, &newShortcutPolicy);
- // Re-assign terminal IDs for valid terminal PtNodes.
- TerminalPositionLookupTable::TerminalIdMap terminalIdMap;
- if(!buffersToWrite->getMutableTerminalPositionLookupTable()->runGCTerminalIds(
- &terminalIdMap)) {
- return false;
- }
- // Run GC for probability dict content.
- if (!buffersToWrite->getMutableProbabilityDictContent()->runGC(&terminalIdMap,
- mBuffers->getProbabilityDictContent())) {
- return false;
- }
- // Run GC for bigram dict content.
- if(!buffersToWrite->getMutableBigramDictContent()->runGC(&terminalIdMap,
- mBuffers->getBigramDictContent(), outBigramCount)) {
- return false;
- }
- // Run GC for shortcut dict content.
- if(!buffersToWrite->getMutableShortcutDictContent()->runGC(&terminalIdMap,
- mBuffers->getShortcutDictContent())) {
- return false;
- }
- DynamicPtReadingHelper newDictReadingHelper(buffersToWrite->getTrieBuffer(),
- &newPtNodeReader);
- newDictReadingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
- DynamicPtGcEventListeners::TraversePolicyToUpdateAllPositionFields
- traversePolicyToUpdateAllPositionFields(&newPtNodeWriter, &dictPositionRelocationMap);
- if (!newDictReadingHelper.traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
- &traversePolicyToUpdateAllPositionFields)) {
- return false;
- }
- newDictReadingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
- TraversePolicyToUpdateAllPtNodeFlagsAndTerminalIds
- traversePolicyToUpdateAllPtNodeFlagsAndTerminalIds(&newPtNodeWriter, &terminalIdMap);
- if (!newDictReadingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
- &traversePolicyToUpdateAllPtNodeFlagsAndTerminalIds)) {
- return false;
- }
- *outUnigramCount = traversePolicyToUpdateAllPositionFields.getUnigramCount();
- return true;
-}
-
-bool Ver4PatriciaTrieWritingHelper::truncateUnigrams(
- const Ver4PatriciaTrieNodeReader *const ptNodeReader,
- Ver4PatriciaTrieNodeWriter *const ptNodeWriter, const int maxUnigramCount) {
- const TerminalPositionLookupTable *const terminalPosLookupTable =
- mBuffers->getTerminalPositionLookupTable();
- const int nextTerminalId = terminalPosLookupTable->getNextTerminalId();
- std::priority_queue<DictProbability, std::vector<DictProbability>, DictProbabilityComparator>
- priorityQueue;
- for (int i = 0; i < nextTerminalId; ++i) {
- const int terminalPos = terminalPosLookupTable->getTerminalPtNodePosition(i);
- if (terminalPos == NOT_A_DICT_POS) {
- continue;
- }
- const ProbabilityEntry probabilityEntry =
- mBuffers->getProbabilityDictContent()->getProbabilityEntry(i);
- const int probability = probabilityEntry.hasHistoricalInfo() ?
- ForgettingCurveUtils::decodeProbability(probabilityEntry.getHistoricalInfo()) :
- probabilityEntry.getProbability();
- priorityQueue.push(DictProbability(terminalPos, probability,
- probabilityEntry.getHistoricalInfo()->getTimeStamp()));
- }
-
- // Delete unigrams.
- while (static_cast<int>(priorityQueue.size()) > maxUnigramCount) {
- const int ptNodePos = priorityQueue.top().getDictPos();
- const PtNodeParams ptNodeParams =
- ptNodeReader->fetchNodeInfoInBufferFromPtNodePos(ptNodePos);
- if (!ptNodeWriter->markPtNodeAsWillBecomeNonTerminal(&ptNodeParams)) {
- AKLOGE("Cannot mark PtNode as willBecomeNonterminal. PtNode pos: %d", ptNodePos);
- return false;
- }
- priorityQueue.pop();
- }
- return true;
-}
-
-bool Ver4PatriciaTrieWritingHelper::truncateBigrams(const int maxBigramCount) {
- const TerminalPositionLookupTable *const terminalPosLookupTable =
- mBuffers->getTerminalPositionLookupTable();
- const int nextTerminalId = terminalPosLookupTable->getNextTerminalId();
- std::priority_queue<DictProbability, std::vector<DictProbability>, DictProbabilityComparator>
- priorityQueue;
- BigramDictContent *const bigramDictContent = mBuffers->getMutableBigramDictContent();
- for (int i = 0; i < nextTerminalId; ++i) {
- const int bigramListPos = bigramDictContent->getBigramListHeadPos(i);
- if (bigramListPos == NOT_A_DICT_POS) {
- continue;
- }
- bool hasNext = true;
- int readingPos = bigramListPos;
- while (hasNext) {
- const int entryPos = readingPos;
- const BigramEntry bigramEntry =
- bigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
- hasNext = bigramEntry.hasNext();
- if (!bigramEntry.isValid()) {
- continue;
- }
- const int probability = bigramEntry.hasHistoricalInfo() ?
- ForgettingCurveUtils::decodeProbability(bigramEntry.getHistoricalInfo()) :
- bigramEntry.getProbability();
- priorityQueue.push(DictProbability(entryPos, probability,
- bigramEntry.getHistoricalInfo()->getTimeStamp()));
- }
- }
-
- // Delete bigrams.
- while (static_cast<int>(priorityQueue.size()) > maxBigramCount) {
- const int entryPos = priorityQueue.top().getDictPos();
- const BigramEntry bigramEntry = bigramDictContent->getBigramEntry(entryPos);
- const BigramEntry invalidatedBigramEntry = bigramEntry.getInvalidatedEntry();
- if (!bigramDictContent->writeBigramEntry(&invalidatedBigramEntry, entryPos)) {
- AKLOGE("Cannot write bigram entry to remove. pos: %d", entryPos);
- return false;
- }
- priorityQueue.pop();
- }
- return true;
-}
-
-bool Ver4PatriciaTrieWritingHelper::TraversePolicyToUpdateAllPtNodeFlagsAndTerminalIds
- ::onVisitingPtNode(const PtNodeParams *const ptNodeParams) {
- if (!ptNodeParams->isTerminal()) {
- return true;
- }
- TerminalPositionLookupTable::TerminalIdMap::const_iterator it =
- mTerminalIdMap->find(ptNodeParams->getTerminalId());
- if (it == mTerminalIdMap->end()) {
- AKLOGE("terminal Id %d is not in the terminal position map. map size: %zd",
- ptNodeParams->getTerminalId(), mTerminalIdMap->size());
- return false;
- }
- if (!mPtNodeWriter->updateTerminalId(ptNodeParams, it->second)) {
- AKLOGE("Cannot update terminal id. %d -> %d", it->first, it->second);
- }
- return mPtNodeWriter->updatePtNodeHasBigramsAndShortcutTargetsFlags(ptNodeParams);
-}
-
-} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.h
deleted file mode 100644
index c3a155e0e..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.h
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- * Copyright (C) 2013, The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LATINIME_VER4_PATRICIA_TRIE_WRITING_HELPER_H
-#define LATINIME_VER4_PATRICIA_TRIE_WRITING_HELPER_H
-
-#include "defines.h"
-#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.h"
-#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
-
-namespace latinime {
-
-class HeaderPolicy;
-class Ver4DictBuffers;
-class Ver4PatriciaTrieNodeReader;
-class Ver4PatriciaTrieNodeWriter;
-
-class Ver4PatriciaTrieWritingHelper {
- public:
- Ver4PatriciaTrieWritingHelper(Ver4DictBuffers *const buffers)
- : mBuffers(buffers) {}
-
- void writeToDictFile(const char *const dictDirPath, const int unigramCount,
- const int bigramCount) const;
-
- void writeToDictFileWithGC(const int rootPtNodeArrayPos, const char *const dictDirPath);
-
- private:
- DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4PatriciaTrieWritingHelper);
-
- class TraversePolicyToUpdateAllPtNodeFlagsAndTerminalIds
- : public DynamicPtReadingHelper::TraversingEventListener {
- public:
- TraversePolicyToUpdateAllPtNodeFlagsAndTerminalIds(
- Ver4PatriciaTrieNodeWriter *const ptNodeWriter,
- const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap)
- : mPtNodeWriter(ptNodeWriter), mTerminalIdMap(terminalIdMap) {}
-
- bool onAscend() { return true; }
-
- bool onDescend(const int ptNodeArrayPos) { return true; }
-
- bool onReadingPtNodeArrayTail() { return true; }
-
- bool onVisitingPtNode(const PtNodeParams *const ptNodeParams);
-
- private:
- DISALLOW_IMPLICIT_CONSTRUCTORS(TraversePolicyToUpdateAllPtNodeFlagsAndTerminalIds);
-
- Ver4PatriciaTrieNodeWriter *const mPtNodeWriter;
- const TerminalPositionLookupTable::TerminalIdMap *const mTerminalIdMap;
- };
-
- // For truncateUnigrams() and truncateBigrams().
- class DictProbability {
- public:
- DictProbability(const int dictPos, const int probability, const int timestamp)
- : mDictPos(dictPos), mProbability(probability), mTimestamp(timestamp) {}
-
- int getDictPos() const {
- return mDictPos;
- }
-
- int getProbability() const {
- return mProbability;
- }
-
- int getTimestamp() const {
- return mTimestamp;
- }
-
- private:
- DISALLOW_DEFAULT_CONSTRUCTOR(DictProbability);
-
- int mDictPos;
- int mProbability;
- int mTimestamp;
- };
-
- // For truncateUnigrams() and truncateBigrams().
- class DictProbabilityComparator {
- public:
- bool operator()(const DictProbability &left, const DictProbability &right) {
- if (left.getProbability() != right.getProbability()) {
- return left.getProbability() > right.getProbability();
- }
- if (left.getTimestamp() != right.getTimestamp()) {
- return left.getTimestamp() < right.getTimestamp();
- }
- return left.getDictPos() > right.getDictPos();
- }
-
- private:
- DISALLOW_ASSIGNMENT_OPERATOR(DictProbabilityComparator);
- };
-
- bool runGC(const int rootPtNodeArrayPos, const HeaderPolicy *const headerPolicy,
- Ver4DictBuffers *const buffersToWrite, int *const outUnigramCount,
- int *const outBigramCount);
-
- bool truncateUnigrams(const Ver4PatriciaTrieNodeReader *const ptNodeReader,
- Ver4PatriciaTrieNodeWriter *const ptNodeWriter, const int maxUnigramCount);
-
- bool truncateBigrams(const int maxBigramCount);
-
- Ver4DictBuffers *const mBuffers;
-};
-} // namespace latinime
-
-#endif /* LATINIME_VER4_PATRICIA_TRIE_WRITING_HELPER_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp
index 259dae4c6..f692882f2 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp
@@ -18,42 +18,11 @@
namespace latinime {
-const size_t BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE = 1024 * 1024;
+const size_t BufferWithExtendableBuffer::MAX_ADDITIONAL_BUFFER_SIZE = 1024 * 1024;
const int BufferWithExtendableBuffer::NEAR_BUFFER_LIMIT_THRESHOLD_PERCENTILE = 90;
// TODO: Needs to allocate larger memory corresponding to the current vector size.
const size_t BufferWithExtendableBuffer::EXTEND_ADDITIONAL_BUFFER_SIZE_STEP = 128 * 1024;
-uint32_t BufferWithExtendableBuffer::readUint(const int size, const int pos) const {
- const bool readingPosIsInAdditionalBuffer = isInAdditionalBuffer(pos);
- const int posInBuffer = readingPosIsInAdditionalBuffer ? pos - mOriginalBufferSize : pos;
- return ByteArrayUtils::readUint(getBuffer(readingPosIsInAdditionalBuffer), size, posInBuffer);
-}
-
-uint32_t BufferWithExtendableBuffer::readUintAndAdvancePosition(const int size,
- int *const pos) const {
- const int value = readUint(size, *pos);
- *pos += size;
- return value;
-}
-
-void BufferWithExtendableBuffer::readCodePointsAndAdvancePosition(const int maxCodePointCount,
- int *const outCodePoints, int *outCodePointCount, int *const pos) const {
- const bool readingPosIsInAdditionalBuffer = isInAdditionalBuffer(*pos);
- if (readingPosIsInAdditionalBuffer) {
- *pos -= mOriginalBufferSize;
- }
- *outCodePointCount = ByteArrayUtils::readStringAndAdvancePosition(
- getBuffer(readingPosIsInAdditionalBuffer), maxCodePointCount, outCodePoints, pos);
- if (readingPosIsInAdditionalBuffer) {
- *pos += mOriginalBufferSize;
- }
-}
-
-bool BufferWithExtendableBuffer::writeUint(const uint32_t data, const int size, const int pos) {
- int writingPos = pos;
- return writeUintAndAdvancePosition(data, size, &writingPos);
-}
-
bool BufferWithExtendableBuffer::writeUintAndAdvancePosition(const uint32_t data, const int size,
int *const pos) {
if (!(size >= 1 && size <= 4)) {
@@ -77,7 +46,7 @@ bool BufferWithExtendableBuffer::writeUintAndAdvancePosition(const uint32_t data
}
bool BufferWithExtendableBuffer::writeCodePointsAndAdvancePosition(const int *const codePoints,
- const int codePointCount, const bool writesTerminator, int *const pos) {
+ const int codePointCount, const bool writesTerminator ,int *const pos) {
const size_t size = ByteArrayUtils::calculateRequiredByteCountToStoreCodePoints(
codePoints, codePointCount, writesTerminator);
if (!checkAndPrepareWriting(*pos, size)) {
@@ -131,21 +100,4 @@ bool BufferWithExtendableBuffer::checkAndPrepareWriting(const int pos, const int
return true;
}
-bool BufferWithExtendableBuffer::copy(const BufferWithExtendableBuffer *const sourceBuffer) {
- int copyingPos = 0;
- const int tailPos = sourceBuffer->getTailPosition();
- const int maxDataChunkSize = sizeof(uint32_t);
- while (copyingPos < tailPos) {
- const int remainingSize = tailPos - copyingPos;
- const int copyingSize = (remainingSize >= maxDataChunkSize) ?
- maxDataChunkSize : remainingSize;
- const uint32_t data = sourceBuffer->readUint(copyingSize, copyingPos);
- if (!writeUint(data, copyingSize, copyingPos)) {
- return false;
- }
- copyingPos += copyingSize;
- }
- return true;
-}
-
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h
index 76be16518..9dc34823c 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h
@@ -32,20 +32,12 @@ namespace latinime {
// raw pointer but provides several methods that handle boundary checking for writing data.
class BufferWithExtendableBuffer {
public:
- static const size_t DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE;
-
BufferWithExtendableBuffer(uint8_t *const originalBuffer, const int originalBufferSize,
- const int maxAdditionalBufferSize)
+ const int maxAdditionalBufferSize = MAX_ADDITIONAL_BUFFER_SIZE)
: mOriginalBuffer(originalBuffer), mOriginalBufferSize(originalBufferSize),
mAdditionalBuffer(EXTEND_ADDITIONAL_BUFFER_SIZE_STEP), mUsedAdditionalBufferSize(0),
mMaxAdditionalBufferSize(maxAdditionalBufferSize) {}
- // Without original buffer.
- BufferWithExtendableBuffer(const int maxAdditionalBufferSize)
- : mOriginalBuffer(0), mOriginalBufferSize(0),
- mAdditionalBuffer(EXTEND_ADDITIONAL_BUFFER_SIZE_STEP), mUsedAdditionalBufferSize(0),
- mMaxAdditionalBufferSize(maxAdditionalBufferSize) {}
-
AK_FORCE_INLINE int getTailPosition() const {
return mOriginalBufferSize + mUsedAdditionalBufferSize;
}
@@ -71,13 +63,6 @@ class BufferWithExtendableBuffer {
}
}
- uint32_t readUint(const int size, const int pos) const;
-
- uint32_t readUintAndAdvancePosition(const int size, int *const pos) const;
-
- void readCodePointsAndAdvancePosition(const int maxCodePointCount,
- int *const outCodePoints, int *outCodePointCount, int *const pos) const;
-
AK_FORCE_INLINE int getOriginalBufferSize() const {
return mOriginalBufferSize;
}
@@ -93,18 +78,15 @@ class BufferWithExtendableBuffer {
* Writing is allowed for original buffer, already written region of additional buffer and the
* tail of additional buffer.
*/
- bool writeUint(const uint32_t data, const int size, const int pos);
-
bool writeUintAndAdvancePosition(const uint32_t data, const int size, int *const pos);
bool writeCodePointsAndAdvancePosition(const int *const codePoints, const int codePointCount,
const bool writesTerminator, int *const pos);
- bool copy(const BufferWithExtendableBuffer *const sourceBuffer);
-
private:
DISALLOW_COPY_AND_ASSIGN(BufferWithExtendableBuffer);
+ static const size_t MAX_ADDITIONAL_BUFFER_SIZE;
static const int NEAR_BUFFER_LIMIT_THRESHOLD_PERCENTILE;
static const size_t EXTEND_ADDITIONAL_BUFFER_SIZE_STEP;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h
index ebdd523e1..0c1576818 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h
@@ -114,24 +114,6 @@ class ByteArrayUtils {
return buffer[(*pos)++];
}
- static AK_FORCE_INLINE int readUint(const uint8_t *const buffer,
- const int size, const int pos) {
- // size must be in 1 to 4.
- ASSERT(size >= 1 && size <= 4);
- switch (size) {
- case 1:
- return ByteArrayUtils::readUint8(buffer, pos);
- case 2:
- return ByteArrayUtils::readUint16(buffer, pos);
- case 3:
- return ByteArrayUtils::readUint24(buffer, pos);
- case 4:
- return ByteArrayUtils::readUint32(buffer, pos);
- default:
- return 0;
- }
- }
-
/**
* Code Point Reading
*
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp
index 442373b29..994826fa8 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp
@@ -17,14 +17,12 @@
#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
#include <cstdio>
+#include <cstring>
#include "suggest/policyimpl/dictionary/header/header_policy.h"
-#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.h"
-#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h"
+#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
-#include "suggest/policyimpl/dictionary/utils/file_utils.h"
#include "suggest/policyimpl/dictionary/utils/format_utils.h"
-#include "utils/time_keeper.h"
namespace latinime {
@@ -32,81 +30,60 @@ const char *const DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE =
/* static */ bool DictFileWritingUtils::createEmptyDictFile(const char *const filePath,
const int dictVersion, const HeaderReadWriteUtils::AttributeMap *const attributeMap) {
- TimeKeeper::setCurrentTime();
switch (dictVersion) {
- case FormatUtils::VERSION_4:
- return createEmptyV4DictFile(filePath, attributeMap);
+ case 3:
+ return createEmptyV3DictFile(filePath, attributeMap);
default:
- AKLOGE("Cannot create dictionary %s because format version %d is not supported.",
- filePath, dictVersion);
+ // Only version 3 dictionary is supported for now.
return false;
}
}
-/* static */ bool DictFileWritingUtils::createEmptyV4DictFile(const char *const dirPath,
+/* static */ bool DictFileWritingUtils::createEmptyV3DictFile(const char *const filePath,
const HeaderReadWriteUtils::AttributeMap *const attributeMap) {
- HeaderPolicy headerPolicy(FormatUtils::VERSION_4, attributeMap);
- Ver4DictBuffers::Ver4DictBuffersPtr dictBuffers =
- Ver4DictBuffers::createVer4DictBuffers(&headerPolicy);
- headerPolicy.writeHeaderToBuffer(dictBuffers.get()->getWritableHeaderBuffer(),
- true /* updatesLastUpdatedTime */, true /* updatesLastDecayedTime */,
- 0 /* unigramCount */, 0 /* bigramCount */, 0 /* extendedRegionSize */);
- if (!DynamicPtWritingUtils::writeEmptyDictionary(
- dictBuffers.get()->getWritableTrieBuffer(), 0 /* rootPos */)) {
- AKLOGE("Empty ver4 dictionary structure cannot be created on memory.");
+ BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */);
+ HeaderPolicy headerPolicy(FormatUtils::VERSION_3, attributeMap);
+ headerPolicy.writeHeaderToBuffer(&headerBuffer, true /* updatesLastUpdatedTime */,
+ true /* updatesLastDecayedTime */, 0 /* unigramCount */, 0 /* bigramCount */,
+ 0 /* extendedRegionSize */);
+ BufferWithExtendableBuffer bodyBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */);
+ if (!DynamicPatriciaTrieWritingUtils::writeEmptyDictionary(&bodyBuffer, 0 /* rootPos */)) {
return false;
}
- return dictBuffers.get()->flush(dirPath);
+ return flushAllHeaderAndBodyToFile(filePath, &headerBuffer, &bodyBuffer);
}
/* static */ bool DictFileWritingUtils::flushAllHeaderAndBodyToFile(const char *const filePath,
BufferWithExtendableBuffer *const dictHeader, BufferWithExtendableBuffer *const dictBody) {
- const int tmpFileNameBufSize = FileUtils::getFilePathWithSuffixBufSize(filePath,
- TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE);
+ const int tmpFileNameBufSize = strlen(filePath)
+ + strlen(TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE) + 1 /* terminator */;
// Name of a temporary file used for writing that is a connected string of original name and
// TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE.
char tmpFileName[tmpFileNameBufSize];
- FileUtils::getFilePathWithSuffix(filePath, TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE,
- tmpFileNameBufSize, tmpFileName);
- if (!DictFileWritingUtils::flushBufferToFile(tmpFileName, dictHeader)) {
- AKLOGE("Dictionary header cannot be written to %s.", tmpFileName);
- return false;
- }
- if (!DictFileWritingUtils::flushBufferToFile(tmpFileName, dictBody)) {
- AKLOGE("Dictionary structure cannot be written to %s.", tmpFileName);
- return false;
- }
- if (rename(tmpFileName, filePath) != 0) {
- AKLOGE("Dictionary file %s cannot be renamed to %s", tmpFileName, filePath);;
+ snprintf(tmpFileName, tmpFileNameBufSize, "%s%s", filePath,
+ TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE);
+ FILE *const file = fopen(tmpFileName, "wb");
+ if (!file) {
+ AKLOGE("Dictionary file %s cannnot be opened.", tmpFileName);
+ ASSERT(false);
return false;
}
- return true;
-}
-
-/* static */ bool DictFileWritingUtils::flushBufferToFileWithSuffix(const char *const basePath,
- const char *const suffix, const BufferWithExtendableBuffer *const buffer) {
- const int filePathBufSize = FileUtils::getFilePathWithSuffixBufSize(basePath, suffix);
- char filePath[filePathBufSize];
- FileUtils::getFilePathWithSuffix(basePath, suffix, filePathBufSize, filePath);
- return flushBufferToFile(filePath, buffer);
-}
-
-/* static */ bool DictFileWritingUtils::flushBufferToFile(const char *const filePath,
- const BufferWithExtendableBuffer *const buffer) {
- FILE *const file = fopen(filePath, "wb");
- if (!file) {
- AKLOGE("File %s cannot be opened.", filePath);
+ // Write the dictionary header.
+ if (!writeBufferToFile(file, dictHeader)) {
+ remove(tmpFileName);
+ AKLOGE("Dictionary header cannnot be written. size: %d", dictHeader->getTailPosition());
ASSERT(false);
return false;
}
- if (!writeBufferToFile(file, buffer)) {
- remove(filePath);
- AKLOGE("Buffer cannot be written to the file %s. size: %d", filePath,
- buffer->getTailPosition());
+ // Write the dictionary body.
+ if (!writeBufferToFile(file, dictBody)) {
+ remove(tmpFileName);
+ AKLOGE("Dictionary body cannnot be written. size: %d", dictBody->getTailPosition());
ASSERT(false);
return false;
}
fclose(file);
+ rename(tmpFileName, filePath);
return true;
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h
index bdf9fd63c..bd4ac66fd 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h
@@ -28,8 +28,6 @@ class BufferWithExtendableBuffer;
class DictFileWritingUtils {
public:
- static const char *const TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE;
-
static bool createEmptyDictFile(const char *const filePath, const int dictVersion,
const HeaderReadWriteUtils::AttributeMap *const attributeMap);
@@ -37,17 +35,13 @@ class DictFileWritingUtils {
BufferWithExtendableBuffer *const dictHeader,
BufferWithExtendableBuffer *const dictBody);
- static bool flushBufferToFileWithSuffix(const char *const basePath, const char *const suffix,
- const BufferWithExtendableBuffer *const buffer);
-
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(DictFileWritingUtils);
- static bool createEmptyV4DictFile(const char *const filePath,
- const HeaderReadWriteUtils::AttributeMap *const attributeMap);
+ static const char *const TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE;
- static bool flushBufferToFile(const char *const filePath,
- const BufferWithExtendableBuffer *const buffer);
+ static bool createEmptyV3DictFile(const char *const filePath,
+ const HeaderReadWriteUtils::AttributeMap *const attributeMap);
static bool writeBufferToFile(FILE *const file,
const BufferWithExtendableBuffer *const buffer);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/file_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/file_utils.cpp
deleted file mode 100644
index 1f25cfa1e..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/file_utils.cpp
+++ /dev/null
@@ -1,157 +0,0 @@
-/*
- * Copyright (C) 2013, The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "suggest/policyimpl/dictionary/utils/file_utils.h"
-
-#include <cstdio>
-#include <cstring>
-#include <dirent.h>
-#include <fcntl.h>
-#include <libgen.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <unistd.h>
-
-namespace latinime {
-
-// Returns -1 on error.
-/* static */ int FileUtils::getFileSize(const char *const filePath) {
- const int fd = open(filePath, O_RDONLY);
- if (fd == -1) {
- return -1;
- }
- struct stat statBuf;
- if (fstat(fd, &statBuf) != 0) {
- close(fd);
- return -1;
- }
- close(fd);
- return static_cast<int>(statBuf.st_size);
-}
-
-/* static */ bool FileUtils::existsDir(const char *const dirPath) {
- DIR *const dir = opendir(dirPath);
- if (dir == NULL) {
- return false;
- }
- closedir(dir);
- return true;
-}
-
-// Remove a directory and all files in the directory.
-/* static */ bool FileUtils::removeDirAndFiles(const char *const dirPath) {
- DIR *const dir = opendir(dirPath);
- if (dir == NULL) {
- AKLOGE("Cannot open dir %s.", dirPath);
- return true;
- }
- struct dirent *dirent;
- while ((dirent = readdir(dir)) != NULL) {
- if (dirent->d_type != DT_REG) {
- continue;
- }
- const int filePathBufSize = getFilePathBufSize(dirPath, dirent->d_name);
- char filePath[filePathBufSize];
- getFilePath(dirPath, dirent->d_name, filePathBufSize, filePath);
- if (remove(filePath) != 0) {
- AKLOGE("Cannot remove file %s.", filePath);
- closedir(dir);
- return false;
- }
- }
- closedir(dir);
- if (remove(dirPath) != 0) {
- AKLOGE("Cannot remove directory %s.", dirPath);
- return false;
- }
- return true;
-}
-
-/* static */ int FileUtils::getFilePathWithSuffixBufSize(const char *const filePath,
- const char *const suffix) {
- return strlen(filePath) + strlen(suffix) + 1 /* terminator */;
-}
-
-/* static */ void FileUtils::getFilePathWithSuffix(const char *const filePath,
- const char *const suffix, const int filePathBufSize, char *const outFilePath) {
- snprintf(outFilePath, filePathBufSize, "%s%s", filePath, suffix);
-}
-
-/* static */ int FileUtils::getFilePathBufSize(const char *const dirPath,
- const char *const fileName) {
- return strlen(dirPath) + 1 /* '/' */ + strlen(fileName) + 1 /* terminator */;
-}
-
-/* static */ void FileUtils::getFilePath(const char *const dirPath, const char *const fileName,
- const int filePathBufSize, char *const outFilePath) {
- snprintf(outFilePath, filePathBufSize, "%s/%s", dirPath, fileName);
-}
-
-/* static */ bool FileUtils::getFilePathWithoutSuffix(const char *const filePath,
- const char *const suffix, const int outDirPathBufSize, char *const outDirPath) {
- const int filePathLength = strlen(filePath);
- const int suffixLength = strlen(suffix);
- if (filePathLength <= suffixLength) {
- AKLOGE("File path length (%s:%d) is shorter that suffix length (%s:%d).",
- filePath, filePathLength, suffix, suffixLength);
- return false;
- }
- const int resultFilePathLength = filePathLength - suffixLength;
- if (outDirPathBufSize <= resultFilePathLength) {
- AKLOGE("outDirPathBufSize is too small. filePath: %s, suffix: %s, outDirPathBufSize: %d",
- filePath, suffix, outDirPathBufSize);
- return false;
- }
- if (strncmp(filePath + resultFilePathLength, suffix, suffixLength) != 0) {
- AKLOGE("File Path %s does not have %s as a suffix", filePath, suffix);
- return false;
- }
- snprintf(outDirPath, resultFilePathLength + 1 /* terminator */, "%s", filePath);
- return true;
-}
-
-/* static */ void FileUtils::getDirPath(const char *const filePath, const int outDirPathBufSize,
- char *const outDirPath) {
- for (int i = strlen(filePath) - 1; i >= 0; --i) {
- if (filePath[i] == '/') {
- if (i >= outDirPathBufSize) {
- AKLOGE("outDirPathBufSize is too small. filePath: %s, outDirPathBufSize: %d",
- filePath, outDirPathBufSize);
- ASSERT(false);
- return;
- }
- snprintf(outDirPath, i + 1 /* terminator */, "%s", filePath);
- return;
- }
- }
-}
-
-/* static */ void FileUtils::getBasename(const char *const filePath,
- const int outNameBufSize, char *const outName) {
- const int filePathBufSize = strlen(filePath) + 1 /* terminator */;
- char filePathBuf[filePathBufSize];
- snprintf(filePathBuf, filePathBufSize, "%s", filePath);
- const char *const baseName = basename(filePathBuf);
- const int baseNameLength = strlen(baseName);
- if (baseNameLength >= outNameBufSize) {
- AKLOGE("outNameBufSize is too small. filePath: %s, outNameBufSize: %d",
- filePath, outNameBufSize);
- return;
- }
- snprintf(outName, baseNameLength + 1 /* terminator */, "%s", baseName);
-}
-
-} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/file_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/file_utils.h
deleted file mode 100644
index 3e84a3038..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/file_utils.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Copyright (C) 2013, The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LATINIME_FILE_UTILS_H
-#define LATINIME_FILE_UTILS_H
-
-#include "defines.h"
-
-namespace latinime {
-
-class FileUtils {
- public:
- // Returns -1 on error.
- static int getFileSize(const char *const filePath);
-
- static bool existsDir(const char *const dirPath);
-
- // Remove a directory and all files in the directory.
- static bool removeDirAndFiles(const char *const dirPath);
-
- static int getFilePathWithSuffixBufSize(const char *const filePath, const char *const suffix);
-
- static void getFilePathWithSuffix(const char *const filePath, const char *const suffix,
- const int filePathBufSize, char *const outFilePath);
-
- static int getFilePathBufSize(const char *const dirPath, const char *const fileName);
-
- static void getFilePath(const char *const dirPath, const char *const fileName,
- const int filePathBufSize, char *const outFilePath);
-
- // Returns whether the filePath have the suffix.
- static bool getFilePathWithoutSuffix(const char *const filePath, const char *const suffix,
- const int dirPathBufSize, char *const outDirPath);
-
- static void getDirPath(const char *const filePath, const int dirPathBufSize,
- char *const outDirPath);
-
- static void getBasename(const char *const filePath, const int outNameBufSize,
- char *const outName);
-
- private:
- DISALLOW_IMPLICIT_CONSTRUCTORS(FileUtils);
-};
-} // namespace latinime
-#endif /* LATINIME_FILE_UTILS_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp
index 4050ad363..1632fd072 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp
@@ -14,14 +14,14 @@
* limitations under the License.
*/
-#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
-
#include <cmath>
+#include <ctime>
#include <stdlib.h>
+#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
+
#include "suggest/core/policy/dictionary_header_structure_policy.h"
#include "suggest/policyimpl/dictionary/utils/probability_utils.h"
-#include "utils/time_keeper.h"
namespace latinime {
@@ -31,86 +31,76 @@ const int ForgettingCurveUtils::MAX_BIGRAM_COUNT = 12000;
const int ForgettingCurveUtils::MAX_BIGRAM_COUNT_AFTER_GC = 10000;
const int ForgettingCurveUtils::MAX_COMPUTED_PROBABILITY = 127;
+const int ForgettingCurveUtils::MAX_ENCODED_PROBABILITY = 15;
+const int ForgettingCurveUtils::MIN_VALID_ENCODED_PROBABILITY = 3;
+const int ForgettingCurveUtils::ENCODED_PROBABILITY_STEP = 1;
+// Currently, we try to decay each uni/bigram once every 2 hours. Accordingly, the expected
+// duration of the decay is approximately 66hours.
+const float ForgettingCurveUtils::MIN_PROBABILITY_TO_DECAY = 0.03f;
const int ForgettingCurveUtils::DECAY_INTERVAL_SECONDS = 2 * 60 * 60;
-const int ForgettingCurveUtils::MAX_LEVEL = 3;
-const int ForgettingCurveUtils::MAX_COUNT = 3;
-const int ForgettingCurveUtils::MIN_VALID_LEVEL = 1;
-const int ForgettingCurveUtils::TIME_STEP_DURATION_IN_SECONDS = 6 * 60 * 60;
-const int ForgettingCurveUtils::MAX_ELAPSED_TIME_STEP_COUNT = 15;
-const int ForgettingCurveUtils::DISCARD_LEVEL_ZERO_ENTRY_TIME_STEP_COUNT_THRESHOLD = 14;
-
const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityTable;
+ForgettingCurveUtils::TimeKeeper ForgettingCurveUtils::sTimeKeeper;
-/* static */ const HistoricalInfo ForgettingCurveUtils::createUpdatedHistoricalInfo(
- const HistoricalInfo *const originalHistoricalInfo,
- const int newProbability, const int timestamp) {
- if (newProbability != NOT_A_PROBABILITY && originalHistoricalInfo->getLevel() == 0) {
- return HistoricalInfo(timestamp, MIN_VALID_LEVEL /* level */, 0 /* count */);
- } else if (!originalHistoricalInfo->isValid()) {
- // Initial information.
- return HistoricalInfo(timestamp, 0 /* level */, 1 /* count */);
- } else {
- const int updatedCount = originalHistoricalInfo->getCount() + 1;
- if (updatedCount > MAX_COUNT) {
- // The count exceeds the max value the level can be incremented.
- if (originalHistoricalInfo->getLevel() >= MAX_LEVEL) {
- // The level is already max.
- return HistoricalInfo(timestamp, originalHistoricalInfo->getLevel(),
- originalHistoricalInfo->getCount());
- } else {
- // Level up.
- return HistoricalInfo(timestamp, originalHistoricalInfo->getLevel() + 1,
- 0 /* count */);
- }
- } else {
- return HistoricalInfo(timestamp, originalHistoricalInfo->getLevel(), updatedCount);
- }
- }
-}
-
-/* static */ int ForgettingCurveUtils::decodeProbability(
- const HistoricalInfo *const historicalInfo) {
- const int elapsedTimeStepCount = getElapsedTimeStepCount(historicalInfo->getTimeStamp());
- return sProbabilityTable.getProbability(historicalInfo->getLevel(),
- min(max(elapsedTimeStepCount, 0), MAX_ELAPSED_TIME_STEP_COUNT));
+void ForgettingCurveUtils::TimeKeeper::setCurrentTime() {
+ mCurrentTime = time(0);
}
-/* static */ int ForgettingCurveUtils::getProbability(const int unigramProbability,
- const int bigramProbability) {
- if (unigramProbability == NOT_A_PROBABILITY) {
+/* static */ int ForgettingCurveUtils::getProbability(const int encodedUnigramProbability,
+ const int encodedBigramProbability) {
+ if (encodedUnigramProbability == NOT_A_PROBABILITY) {
return NOT_A_PROBABILITY;
- } else if (bigramProbability == NOT_A_PROBABILITY) {
- return min(backoff(unigramProbability), MAX_COMPUTED_PROBABILITY);
+ } else if (encodedBigramProbability == NOT_A_PROBABILITY) {
+ return backoff(decodeProbability(encodedUnigramProbability));
} else {
+ const int unigramProbability = decodeProbability(encodedUnigramProbability);
+ const int bigramProbability = decodeProbability(encodedBigramProbability);
return min(max(unigramProbability, bigramProbability), MAX_COMPUTED_PROBABILITY);
}
}
-/* static */ bool ForgettingCurveUtils::needsToKeep(const HistoricalInfo *const historicalInfo) {
- return historicalInfo->getLevel() > 0
- || getElapsedTimeStepCount(historicalInfo->getTimeStamp())
- < DISCARD_LEVEL_ZERO_ENTRY_TIME_STEP_COUNT_THRESHOLD;
+// Caveat: Unlike getProbability(), this method doesn't assume special bigram probability encoding
+// (i.e. unigram probability + bigram probability delta).
+/* static */ int ForgettingCurveUtils::getUpdatedEncodedProbability(
+ const int originalEncodedProbability, const int newProbability) {
+ if (originalEncodedProbability == NOT_A_PROBABILITY) {
+ // The bigram relation is not in this dictionary.
+ if (newProbability == NOT_A_PROBABILITY) {
+ // The bigram target is not in other dictionaries.
+ return 0;
+ } else {
+ return MIN_VALID_ENCODED_PROBABILITY;
+ }
+ } else {
+ if (newProbability != NOT_A_PROBABILITY
+ && originalEncodedProbability < MIN_VALID_ENCODED_PROBABILITY) {
+ return MIN_VALID_ENCODED_PROBABILITY;
+ }
+ return min(originalEncodedProbability + ENCODED_PROBABILITY_STEP, MAX_ENCODED_PROBABILITY);
+ }
+}
+
+/* static */ int ForgettingCurveUtils::isValidEncodedProbability(const int encodedProbability) {
+ return encodedProbability >= MIN_VALID_ENCODED_PROBABILITY;
}
-/* static */ const HistoricalInfo ForgettingCurveUtils::createHistoricalInfoToSave(
- const HistoricalInfo *const originalHistoricalInfo) {
- if (originalHistoricalInfo->getTimeStamp() == NOT_A_TIMESTAMP) {
- return HistoricalInfo();
- }
- const int elapsedTimeStep = getElapsedTimeStepCount(originalHistoricalInfo->getTimeStamp());
- if (elapsedTimeStep <= MAX_ELAPSED_TIME_STEP_COUNT) {
- // No need to update historical info.
- return *originalHistoricalInfo;
+/* static */ int ForgettingCurveUtils::getEncodedProbabilityToSave(const int encodedProbability,
+ const DictionaryHeaderStructurePolicy *const headerPolicy) {
+ const int elapsedTime = sTimeKeeper.peekCurrentTime() - headerPolicy->getLastDecayedTime();
+ const int decayIterationCount = max(elapsedTime / DECAY_INTERVAL_SECONDS, 1);
+ int currentEncodedProbability = max(min(encodedProbability, MAX_ENCODED_PROBABILITY), 0);
+ // TODO: Implement the decay in more proper way.
+ for (int i = 0; i < decayIterationCount; ++i) {
+ const float currentRate = static_cast<float>(currentEncodedProbability)
+ / static_cast<float>(MAX_ENCODED_PROBABILITY);
+ const float thresholdToDecay = (1.0f - MIN_PROBABILITY_TO_DECAY) * currentRate;
+ const float randValue = static_cast<float>(rand()) / static_cast<float>(RAND_MAX);
+ if (thresholdToDecay < randValue) {
+ currentEncodedProbability = max(currentEncodedProbability - ENCODED_PROBABILITY_STEP,
+ 0);
+ }
}
- // Level down.
- const int maxLevelDownAmonut = elapsedTimeStep / (MAX_ELAPSED_TIME_STEP_COUNT + 1);
- const int levelDownAmount = (maxLevelDownAmonut >= originalHistoricalInfo->getLevel()) ?
- originalHistoricalInfo->getLevel() : maxLevelDownAmonut;
- const int adjustedTimestamp = originalHistoricalInfo->getTimeStamp() +
- levelDownAmount * (MAX_ELAPSED_TIME_STEP_COUNT + 1) * TIME_STEP_DURATION_IN_SECONDS;
- return HistoricalInfo(adjustedTimestamp,
- originalHistoricalInfo->getLevel() - levelDownAmount, 0 /* count */);
+ return currentEncodedProbability;
}
/* static */ bool ForgettingCurveUtils::needsToDecay(const bool mindsBlockByDecay,
@@ -126,14 +116,21 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT
if (mindsBlockByDecay) {
return false;
}
- if (headerPolicy->getLastDecayedTime() + DECAY_INTERVAL_SECONDS
- < TimeKeeper::peekCurrentTime()) {
+ if (headerPolicy->getLastDecayedTime() + DECAY_INTERVAL_SECONDS < time(0)) {
// Time to decay.
return true;
}
return false;
}
+/* static */ int ForgettingCurveUtils::decodeProbability(const int encodedProbability) {
+ if (encodedProbability < MIN_VALID_ENCODED_PROBABILITY) {
+ return NOT_A_PROBABILITY;
+ } else {
+ return min(sProbabilityTable.getProbability(encodedProbability), MAX_ENCODED_PROBABILITY);
+ }
+}
+
// See comments in ProbabilityUtils::backoff().
/* static */ int ForgettingCurveUtils::backoff(const int unigramProbability) {
if (unigramProbability == NOT_A_PROBABILITY) {
@@ -143,29 +140,15 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT
}
}
-/* static */ int ForgettingCurveUtils::getElapsedTimeStepCount(const int timestamp) {
- return (TimeKeeper::peekCurrentTime() - timestamp) / TIME_STEP_DURATION_IN_SECONDS;
-}
-
ForgettingCurveUtils::ProbabilityTable::ProbabilityTable() : mTable() {
- mTable.resize(MAX_LEVEL + 1);
- for (int level = 0; level <= MAX_LEVEL; ++level) {
- mTable[level].resize(MAX_ELAPSED_TIME_STEP_COUNT + 1);
- const float initialProbability =
- static_cast<float>(MAX_COMPUTED_PROBABILITY / (1 << (MAX_LEVEL - level)));
- for (int timeStepCount = 0; timeStepCount <= MAX_ELAPSED_TIME_STEP_COUNT; ++timeStepCount) {
- if (level == 0) {
- mTable[level][timeStepCount] = NOT_A_PROBABILITY;
- continue;
- }
- const int elapsedTime = timeStepCount * TIME_STEP_DURATION_IN_SECONDS;
- const float probability = initialProbability
- * powf(2.0f, -1.0f * static_cast<float>(elapsedTime)
- / static_cast<float>(TIME_STEP_DURATION_IN_SECONDS
- * (MAX_ELAPSED_TIME_STEP_COUNT + 1)));
- mTable[level][timeStepCount] =
- min(max(static_cast<int>(probability), 1), MAX_COMPUTED_PROBABILITY);
- }
+ // Table entry is as follows:
+ // 1, 1, 1, 2, 3, 5, 6, 9, 13, 18, 25, 34, 48, 66, 91, 127.
+ // Note that first MIN_VALID_ENCODED_PROBABILITY values are not used.
+ mTable.resize(MAX_ENCODED_PROBABILITY + 1);
+ for (int i = 0; i <= MAX_ENCODED_PROBABILITY; ++i) {
+ const int probability = static_cast<int>(powf(static_cast<float>(MAX_COMPUTED_PROBABILITY),
+ static_cast<float>(i) / static_cast<float>(MAX_ENCODED_PROBABILITY)));
+ mTable[i] = min(MAX_COMPUTED_PROBABILITY, max(0, probability));
}
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h
index 6ac8dc528..2ad423874 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h
@@ -20,33 +20,45 @@
#include <vector>
#include "defines.h"
-#include "suggest/policyimpl/dictionary/utils/historical_info.h"
namespace latinime {
class DictionaryHeaderStructurePolicy;
+// TODO: Check the elapsed time and decrease the probability depending on the time. Time field is
+// required to introduced to each terminal PtNode and bigram entry.
// TODO: Quit using bigram probability to indicate the delta.
class ForgettingCurveUtils {
public:
+ class TimeKeeper {
+ public:
+ TimeKeeper() : mCurrentTime(0) {}
+ void setCurrentTime();
+ int peekCurrentTime() const { return mCurrentTime; };
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(TimeKeeper);
+
+ int mCurrentTime;
+ };
+
static const int MAX_UNIGRAM_COUNT;
static const int MAX_UNIGRAM_COUNT_AFTER_GC;
static const int MAX_BIGRAM_COUNT;
static const int MAX_BIGRAM_COUNT_AFTER_GC;
- static const HistoricalInfo createUpdatedHistoricalInfo(
- const HistoricalInfo *const originalHistoricalInfo, const int newProbability,
- const int timestamp);
-
- static const HistoricalInfo createHistoricalInfoToSave(
- const HistoricalInfo *const originalHistoricalInfo);
-
- static int decodeProbability(const HistoricalInfo *const historicalInfo);
+ static TimeKeeper sTimeKeeper;
static int getProbability(const int encodedUnigramProbability,
const int encodedBigramProbability);
- static bool needsToKeep(const HistoricalInfo *const historicalInfo);
+ static int getUpdatedEncodedProbability(const int originalEncodedProbability,
+ const int newProbability);
+
+ static int isValidEncodedProbability(const int encodedProbability);
+
+ static int getEncodedProbabilityToSave(const int encodedProbability,
+ const DictionaryHeaderStructurePolicy *const headerPolicy);
static bool needsToDecay(const bool mindsBlockByDecay, const int unigramCount,
const int bigramCount, const DictionaryHeaderStructurePolicy *const headerPolicy);
@@ -58,32 +70,31 @@ class ForgettingCurveUtils {
public:
ProbabilityTable();
- int getProbability(const int level, const int elapsedTimeStepCount) const {
- return mTable[level][elapsedTimeStepCount];
+ int getProbability(const int encodedProbability) const {
+ if (encodedProbability < 0 || encodedProbability > static_cast<int>(mTable.size())) {
+ return NOT_A_PROBABILITY;
+ }
+ return mTable[encodedProbability];
}
private:
DISALLOW_COPY_AND_ASSIGN(ProbabilityTable);
- std::vector<std::vector<int> > mTable;
+ std::vector<int> mTable;
};
static const int MAX_COMPUTED_PROBABILITY;
+ static const int MAX_ENCODED_PROBABILITY;
+ static const int MIN_VALID_ENCODED_PROBABILITY;
+ static const int ENCODED_PROBABILITY_STEP;
+ static const float MIN_PROBABILITY_TO_DECAY;
static const int DECAY_INTERVAL_SECONDS;
- static const int MAX_LEVEL;
- static const int MAX_COUNT;
- static const int MIN_VALID_LEVEL;
- static const int TIME_STEP_DURATION_IN_SECONDS;
- static const int MAX_ELAPSED_TIME_STEP_COUNT;
- static const int DISCARD_LEVEL_ZERO_ENTRY_TIME_STEP_COUNT_THRESHOLD;
- static const int HALF_LIFE_TIME_IN_SECONDS;
-
static const ProbabilityTable sProbabilityTable;
- static int backoff(const int unigramProbability);
+ static int decodeProbability(const int encodedProbability);
- static int getElapsedTimeStepCount(const int timestamp);
+ static int backoff(const int unigramProbability);
};
} // namespace latinime
#endif /* LATINIME_FORGETTING_CURVE_UTILS_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp
index cd3c403fa..1d77d5c27 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp
@@ -41,13 +41,10 @@ const int FormatUtils::DICTIONARY_MINIMUM_SIZE = 12;
// Dictionary format version number (2 bytes)
// Options (2 bytes)
// Header size (4 bytes) : integer, big endian
- // Conceptually this converts the hardcoded value of the bytes in the file into
- // the symbolic value we use in the code. But we want the constants to be the
- // same so we use them for both here.
- if (ByteArrayUtils::readUint16(dict, 4) == VERSION_2) {
+ if (ByteArrayUtils::readUint16(dict, 4) == 2) {
return VERSION_2;
- } else if (ByteArrayUtils::readUint16(dict, 4) == VERSION_4) {
- return VERSION_4;
+ } else if (ByteArrayUtils::readUint16(dict, 4) == 3) {
+ return VERSION_3;
} else {
return UNKNOWN_VERSION;
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h
index eb2227d60..79ed0de29 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h
@@ -29,10 +29,9 @@ namespace latinime {
class FormatUtils {
public:
enum FORMAT_VERSION {
- // These MUST have the same values as the relevant constants in FormatSpec.java.
- VERSION_2 = 2,
- VERSION_4 = 400,
- UNKNOWN_VERSION = -1
+ VERSION_2,
+ VERSION_3,
+ UNKNOWN_VERSION
};
// 32 bit magic number is stored at the beginning of the dictionary header to reject
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/historical_info.h b/native/jni/src/suggest/policyimpl/dictionary/utils/historical_info.h
deleted file mode 100644
index 428ca8626..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/historical_info.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Copyright (C) 2013, The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LATINIME_HISTORICAL_INFO_H
-#define LATINIME_HISTORICAL_INFO_H
-
-#include "defines.h"
-
-namespace latinime {
-
-class HistoricalInfo {
- public:
- // Invalid historical info.
- HistoricalInfo()
- : mTimestamp(NOT_A_TIMESTAMP), mLevel(0), mCount(0) {}
-
- HistoricalInfo(const int timestamp, const int level, const int count)
- : mTimestamp(timestamp), mLevel(level), mCount(count) {}
-
- bool isValid() const {
- return mTimestamp != NOT_A_TIMESTAMP;
- }
-
- int getTimeStamp() const {
- return mTimestamp;
- }
-
- int getLevel() const {
- return mLevel;
- }
-
- int getCount() const {
- return mCount;
- }
-
- private:
- // Copy constructor is public to use this class as a type of return value.
- DISALLOW_ASSIGNMENT_OPERATOR(HistoricalInfo);
-
- const int mTimestamp;
- const int mLevel;
- const int mCount;
-};
-} // namespace latinime
-#endif /* LATINIME_HISTORICAL_INFO_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.cpp
deleted file mode 100644
index e88d6e0a9..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.cpp
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * Copyright (C) 2013, The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
-
-#include <cerrno>
-#include <climits>
-#include <cstdio>
-#include <fcntl.h>
-#include <sys/mman.h>
-#include <unistd.h>
-
-#include "suggest/policyimpl/dictionary/utils/file_utils.h"
-
-namespace latinime {
-
-/* static */ MmappedBuffer::MmappedBufferPtr MmappedBuffer::openBuffer(
- const char *const path, const int bufferOffset, const int bufferSize,
- const bool isUpdatable) {
- const int mmapFd = open(path, O_RDONLY);
- if (mmapFd < 0) {
- AKLOGE("DICT: Can't open the source. path=%s errno=%d", path, errno);
- return MmappedBufferPtr(0);
- }
- const int pagesize = sysconf(_SC_PAGESIZE);
- const int offset = bufferOffset % pagesize;
- int alignedOffset = bufferOffset - offset;
- int alignedSize = bufferSize + offset;
- const int protMode = isUpdatable ? PROT_READ | PROT_WRITE : PROT_READ;
- void *const mmappedBuffer = mmap(0, alignedSize, protMode, MAP_PRIVATE, mmapFd,
- alignedOffset);
- if (mmappedBuffer == MAP_FAILED) {
- AKLOGE("DICT: Can't mmap dictionary. errno=%d", errno);
- close(mmapFd);
- return MmappedBufferPtr(0);
- }
- uint8_t *const buffer = static_cast<uint8_t *>(mmappedBuffer) + offset;
- if (!buffer) {
- AKLOGE("DICT: buffer is null");
- close(mmapFd);
- return MmappedBufferPtr(0);
- }
- return MmappedBufferPtr(new MmappedBuffer(buffer, bufferSize, mmappedBuffer, alignedSize,
- mmapFd, isUpdatable));
-}
-
-/* static */ MmappedBuffer::MmappedBufferPtr MmappedBuffer::openBuffer(
- const char *const path, const bool isUpdatable) {
- const int fileSize = FileUtils::getFileSize(path);
- if (fileSize == -1) {
- return MmappedBufferPtr(0);
- } else if (fileSize == 0) {
- return MmappedBufferPtr(new MmappedBuffer(isUpdatable));
- } else {
- return openBuffer(path, 0 /* bufferOffset */, fileSize, isUpdatable);
- }
-}
-
-/* static */ MmappedBuffer::MmappedBufferPtr MmappedBuffer::openBuffer(
- const char *const dirPath, const char *const fileName, const bool isUpdatable) {
- const int filePathBufferSize = PATH_MAX + 1 /* terminator */;
- char filePath[filePathBufferSize];
- const int filePathLength = snprintf(filePath, filePathBufferSize, "%s%s", dirPath,
- fileName);
- if (filePathLength >= filePathBufferSize) {
- return 0;
- }
- return openBuffer(filePath, isUpdatable);
-}
-
-MmappedBuffer::~MmappedBuffer() {
- if (mAlignedSize == 0) {
- return;
- }
- int ret = munmap(mMmappedBuffer, mAlignedSize);
- if (ret != 0) {
- AKLOGE("DICT: Failure in munmap. ret=%d errno=%d", ret, errno);
- }
- ret = close(mMmapFd);
- if (ret != 0) {
- AKLOGE("DICT: Failure in close. ret=%d errno=%d", ret, errno);
- }
-}
-
-} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h b/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h
index 73a733b0c..6b69116eb 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h
@@ -17,27 +17,58 @@
#ifndef LATINIME_MMAPPED_BUFFER_H
#define LATINIME_MMAPPED_BUFFER_H
+#include <cerrno>
+#include <fcntl.h>
#include <stdint.h>
+#include <sys/mman.h>
+#include <unistd.h>
#include "defines.h"
-#include "utils/exclusive_ownership_pointer.h"
namespace latinime {
class MmappedBuffer {
public:
- typedef ExclusiveOwnershipPointer<MmappedBuffer> MmappedBufferPtr;
-
- static MmappedBufferPtr openBuffer(const char *const path,
- const int bufferOffset, const int bufferSize, const bool isUpdatable);
-
- // Mmap entire file.
- static MmappedBufferPtr openBuffer(const char *const path, const bool isUpdatable);
-
- static MmappedBufferPtr openBuffer(const char *const dirPath, const char *const fileName,
- const bool isUpdatable);
+ static MmappedBuffer* openBuffer(const char *const path, const int bufferOffset,
+ const int bufferSize, const bool isUpdatable) {
+ const int openMode = isUpdatable ? O_RDWR : O_RDONLY;
+ const int mmapFd = open(path, openMode);
+ if (mmapFd < 0) {
+ AKLOGE("DICT: Can't open the source. path=%s errno=%d", path, errno);
+ return 0;
+ }
+ const int pagesize = getpagesize();
+ const int offset = bufferOffset % pagesize;
+ int alignedOffset = bufferOffset - offset;
+ int alignedSize = bufferSize + offset;
+ const int protMode = isUpdatable ? PROT_READ | PROT_WRITE : PROT_READ;
+ void *const mmappedBuffer = mmap(0, alignedSize, protMode, MAP_PRIVATE, mmapFd,
+ alignedOffset);
+ if (mmappedBuffer == MAP_FAILED) {
+ AKLOGE("DICT: Can't mmap dictionary. errno=%d", errno);
+ close(mmapFd);
+ return 0;
+ }
+ uint8_t *const buffer = static_cast<uint8_t *>(mmappedBuffer) + offset;
+ if (!buffer) {
+ AKLOGE("DICT: buffer is null");
+ close(mmapFd);
+ return 0;
+ }
+ return new MmappedBuffer(buffer, bufferSize, mmappedBuffer, alignedSize, mmapFd,
+ isUpdatable);
+ }
- ~MmappedBuffer();
+ ~MmappedBuffer() {
+ int ret = munmap(mMmappedBuffer, mAlignedSize);
+ if (ret != 0) {
+ AKLOGE("DICT: Failure in munmap. ret=%d errno=%d", ret, errno);
+ }
+ ret = close(mMmapFd);
+ if (ret != 0) {
+ AKLOGE("DICT: Failure in close. ret=%d errno=%d", ret, errno);
+ }
+ }
AK_FORCE_INLINE uint8_t *getBuffer() const {
return mBuffer;
@@ -58,11 +89,6 @@ class MmappedBuffer {
: mBuffer(buffer), mBufferSize(bufferSize), mMmappedBuffer(mmappedBuffer),
mAlignedSize(alignedSize), mMmapFd(mmapFd), mIsUpdatable(isUpdatable) {}
- // Empty file. We have to handle an empty file as a valid part of a dictionary.
- AK_FORCE_INLINE MmappedBuffer(const bool isUpdatable)
- : mBuffer(0), mBufferSize(0), mMmappedBuffer(0), mAlignedSize(0), mMmapFd(0),
- mIsUpdatable(isUpdatable) {}
-
DISALLOW_IMPLICIT_CONSTRUCTORS(MmappedBuffer);
uint8_t *const mBuffer;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.cpp
deleted file mode 100644
index 4ad82f9f7..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.cpp
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Copyright (C) 2013, The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "suggest/policyimpl/dictionary/utils/sparse_table.h"
-
-namespace latinime {
-
-const int SparseTable::NOT_EXIST = -1;
-const int SparseTable::INDEX_SIZE = 4;
-
-bool SparseTable::contains(const int id) const {
- const int readingPos = getPosInIndexTable(id);
- if (id < 0 || mIndexTableBuffer->getTailPosition() <= readingPos) {
- return false;
- }
- const int index = mIndexTableBuffer->readUint(INDEX_SIZE, readingPos);
- return index != NOT_EXIST;
-}
-
-uint32_t SparseTable::get(const int id) const {
- const int indexTableReadingPos = getPosInIndexTable(id);
- const int index = mIndexTableBuffer->readUint(INDEX_SIZE, indexTableReadingPos);
- const int contentTableReadingPos = getPosInContentTable(id, index);
- return mContentTableBuffer->readUint(mDataSize, contentTableReadingPos);
-}
-
-bool SparseTable::set(const int id, const uint32_t value) {
- const int posInIndexTable = getPosInIndexTable(id);
- // Extends the index table if needed.
- if (mIndexTableBuffer->getTailPosition() < posInIndexTable) {
- int tailPos = mIndexTableBuffer->getTailPosition();
- while(tailPos < posInIndexTable) {
- if (!mIndexTableBuffer->writeUintAndAdvancePosition(NOT_EXIST, INDEX_SIZE, &tailPos)) {
- AKLOGE("cannot extend index table. tailPos: %d to: %d", tailPos, posInIndexTable);
- return false;
- }
- }
- }
- if (contains(id)) {
- // The entry is already in the content table.
- const int index = mIndexTableBuffer->readUint(INDEX_SIZE, posInIndexTable);
- if (!mContentTableBuffer->writeUint(value, mDataSize, getPosInContentTable(id, index))) {
- AKLOGE("cannot update value %d. pos: %d, tailPos: %d, mDataSize: %d", value,
- getPosInContentTable(id, index), mContentTableBuffer->getTailPosition(),
- mDataSize);
- return false;
- }
- return true;
- }
- // The entry is not in the content table.
- // Create new entry in the content table.
- const int index = getIndexFromContentTablePos(mContentTableBuffer->getTailPosition());
- if (!mIndexTableBuffer->writeUint(index, INDEX_SIZE, posInIndexTable)) {
- AKLOGE("cannot write index %d. pos %d", index, posInIndexTable);
- return false;
- }
- // Write a new block that containing the entry to be set.
- int writingPos = getPosInContentTable(0 /* id */, index);
- for (int i = 0; i < mBlockSize; ++i) {
- if (!mContentTableBuffer->writeUintAndAdvancePosition(NOT_A_DICT_POS, mDataSize,
- &writingPos)) {
- AKLOGE("cannot write content table to extend. writingPos: %d, tailPos: %d, "
- "mDataSize: %d", writingPos, mContentTableBuffer->getTailPosition(), mDataSize);
- return false;
- }
- }
- return mContentTableBuffer->writeUint(value, mDataSize, getPosInContentTable(id, index));
-}
-
-int SparseTable::getIndexFromContentTablePos(const int contentTablePos) const {
- return contentTablePos / mDataSize / mBlockSize;
-}
-
-int SparseTable::getPosInIndexTable(const int id) const {
- return (id / mBlockSize) * INDEX_SIZE;
-}
-
-int SparseTable::getPosInContentTable(const int id, const int index) const {
- const int offset = id % mBlockSize;
- return (index * mBlockSize + offset) * mDataSize;
-}
-
-} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.h b/native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.h
deleted file mode 100644
index 21c167506..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Copyright (C) 2013, The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LATINIME_SPARSE_TABLE_H
-#define LATINIME_SPARSE_TABLE_H
-
-#include <stdint.h>
-
-#include "defines.h"
-#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
-
-namespace latinime {
-
-// Note that there is a corresponding implementation in SparseTable.java.
-// TODO: Support multiple content buffers.
-class SparseTable {
- public:
- SparseTable(BufferWithExtendableBuffer *const indexTableBuffer,
- BufferWithExtendableBuffer *const contentTableBuffer, const int blockSize,
- const int dataSize)
- : mIndexTableBuffer(indexTableBuffer), mContentTableBuffer(contentTableBuffer),
- mBlockSize(blockSize), mDataSize(dataSize) {}
-
- bool contains(const int id) const;
-
- uint32_t get(const int id) const;
-
- bool set(const int id, const uint32_t value);
-
- private:
- DISALLOW_IMPLICIT_CONSTRUCTORS(SparseTable);
-
- int getIndexFromContentTablePos(const int contentTablePos) const;
-
- int getPosInIndexTable(const int id) const;
-
- int getPosInContentTable(const int id, const int index) const;
-
- static const int NOT_EXIST;
- static const int INDEX_SIZE;
-
- BufferWithExtendableBuffer *const mIndexTableBuffer;
- BufferWithExtendableBuffer *const mContentTableBuffer;
- const int mBlockSize;
- const int mDataSize;
-};
-} // namespace latinime
-#endif /* LATINIME_SPARSE_TABLE_H */
diff --git a/native/jni/src/suggest/policyimpl/typing/typing_traversal.h b/native/jni/src/suggest/policyimpl/typing/typing_traversal.h
index fd0ac9eb6..007c19e0a 100644
--- a/native/jni/src/suggest/policyimpl/typing/typing_traversal.h
+++ b/native/jni/src/suggest/policyimpl/typing/typing_traversal.h
@@ -81,7 +81,7 @@ class TypingTraversal : public Traversal {
return false;
}
const int point0Index = dicNode->getInputIndex(0);
- return dicNode->isTerminalDicNode()
+ return dicNode->isTerminalWordNode()
&& traverseSession->getProximityInfoState(0)->
hasSpaceProximity(point0Index);
}
@@ -96,7 +96,7 @@ class TypingTraversal : public Traversal {
if (dicNode->isCompletion(inputSize)) {
return false;
}
- if (!dicNode->isTerminalDicNode()) {
+ if (!dicNode->isTerminalWordNode()) {
return false;
}
const int16_t pointIndex = dicNode->getInputIndex(0);
diff --git a/native/jni/src/suggest/policyimpl/typing/typing_weighting.cpp b/native/jni/src/suggest/policyimpl/typing/typing_weighting.cpp
index 54f65c786..5b6b5e874 100644
--- a/native/jni/src/suggest/policyimpl/typing/typing_weighting.cpp
+++ b/native/jni/src/suggest/policyimpl/typing/typing_weighting.cpp
@@ -23,64 +23,39 @@ namespace latinime {
const TypingWeighting TypingWeighting::sInstance;
-ErrorTypeUtils::ErrorType TypingWeighting::getErrorType(const CorrectionType correctionType,
+ErrorType TypingWeighting::getErrorType(const CorrectionType correctionType,
const DicTraverseSession *const traverseSession, const DicNode *const parentDicNode,
const DicNode *const dicNode) const {
switch (correctionType) {
case CT_MATCH:
if (isProximityDicNode(traverseSession, dicNode)) {
- return ErrorTypeUtils::PROXIMITY_CORRECTION;
- } else if (dicNode->isInDigraph()) {
- return ErrorTypeUtils::MATCH_WITH_DIGRAPH;
+ return ET_PROXIMITY_CORRECTION;
} else {
- // Compare the node code point with original primary code point on the keyboard.
- const ProximityInfoState *const pInfoState =
- traverseSession->getProximityInfoState(0);
- const int primaryOriginalCodePoint = pInfoState->getPrimaryOriginalCodePointAt(
- dicNode->getInputIndex(0));
- const int nodeCodePoint = dicNode->getNodeCodePoint();
- if (primaryOriginalCodePoint == nodeCodePoint) {
- // Node code point is same as original code point on the keyboard.
- return ErrorTypeUtils::NOT_AN_ERROR;
- } else if (CharUtils::toLowerCase(primaryOriginalCodePoint) ==
- CharUtils::toLowerCase(nodeCodePoint)) {
- // Only cases of the code points are different.
- return ErrorTypeUtils::MATCH_WITH_CASE_ERROR;
- } else if (CharUtils::toBaseCodePoint(primaryOriginalCodePoint) ==
- CharUtils::toBaseCodePoint(nodeCodePoint)) {
- // Node code point is a variant of original code point.
- return ErrorTypeUtils::MATCH_WITH_ACCENT_ERROR;
- } else {
- // Node code point is a variant of original code point and the cases are also
- // different.
- return ErrorTypeUtils::MATCH_WITH_ACCENT_ERROR
- | ErrorTypeUtils::MATCH_WITH_CASE_ERROR;
- }
+ return ET_NOT_AN_ERROR;
}
- break;
case CT_ADDITIONAL_PROXIMITY:
- return ErrorTypeUtils::PROXIMITY_CORRECTION;
+ return ET_PROXIMITY_CORRECTION;
case CT_OMISSION:
if (parentDicNode->canBeIntentionalOmission()) {
- return ErrorTypeUtils::INTENTIONAL_OMISSION;
+ return ET_INTENTIONAL_OMISSION;
} else {
- return ErrorTypeUtils::EDIT_CORRECTION;
+ return ET_EDIT_CORRECTION;
}
break;
case CT_SUBSTITUTION:
case CT_INSERTION:
case CT_TERMINAL_INSERTION:
case CT_TRANSPOSITION:
- return ErrorTypeUtils::EDIT_CORRECTION;
+ return ET_EDIT_CORRECTION;
case CT_NEW_WORD_SPACE_OMISSION:
case CT_NEW_WORD_SPACE_SUBSTITUTION:
- return ErrorTypeUtils::NEW_WORD;
+ return ET_NEW_WORD;
case CT_TERMINAL:
- return ErrorTypeUtils::NOT_AN_ERROR;
+ return ET_NOT_AN_ERROR;
case CT_COMPLETION:
- return ErrorTypeUtils::COMPLETION;
+ return ET_COMPLETION;
default:
- return ErrorTypeUtils::NOT_AN_ERROR;
+ return ET_NOT_AN_ERROR;
}
}
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/typing/typing_weighting.h b/native/jni/src/suggest/policyimpl/typing/typing_weighting.h
index 41314ef52..9f0a331e3 100644
--- a/native/jni/src/suggest/policyimpl/typing/typing_weighting.h
+++ b/native/jni/src/suggest/policyimpl/typing/typing_weighting.h
@@ -19,7 +19,6 @@
#include "defines.h"
#include "suggest/core/dicnode/dic_node_utils.h"
-#include "suggest/core/dictionary/error_type_utils.h"
#include "suggest/core/layout/touch_position_correction_utils.h"
#include "suggest/core/policy/weighting.h"
#include "suggest/core/session/dic_traverse_session.h"
@@ -205,7 +204,7 @@ class TypingWeighting : public Weighting {
return cost * traverseSession->getMultiWordCostMultiplier();
}
- ErrorTypeUtils::ErrorType getErrorType(const CorrectionType correctionType,
+ ErrorType getErrorType(const CorrectionType correctionType,
const DicTraverseSession *const traverseSession,
const DicNode *const parentDicNode, const DicNode *const dicNode) const;
diff --git a/native/jni/src/utils/exclusive_ownership_pointer.h b/native/jni/src/utils/exclusive_ownership_pointer.h
deleted file mode 100644
index 081802e8b..000000000
--- a/native/jni/src/utils/exclusive_ownership_pointer.h
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * Copyright (C) 2013, The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LATINIME_EXCLUSIVE_OWNERSHIP_POINTER_H
-#define LATINIME_EXCLUSIVE_OWNERSHIP_POINTER_H
-
-#include "defines.h"
-
-namespace latinime {
-
-template<class T>
-class ExclusiveOwnershipPointer {
- public:
- // This instance become an owner of the raw pointer.
- AK_FORCE_INLINE ExclusiveOwnershipPointer(T *const rawPointer)
- : mPointer(rawPointer),
- mSharedOwnerPtr(new (ExclusiveOwnershipPointer<T> *)(this)) {}
-
- // Move the ownership.
- AK_FORCE_INLINE ExclusiveOwnershipPointer(const ExclusiveOwnershipPointer<T> &pointer)
- : mPointer(pointer.mPointer), mSharedOwnerPtr(pointer.mSharedOwnerPtr) {
- transferOwnership(&pointer);
- }
-
- AK_FORCE_INLINE ~ExclusiveOwnershipPointer() {
- deletePointersIfHavingOwnership();
- }
-
- AK_FORCE_INLINE T *get() const {
- return mPointer;
- }
-
- private:
- // This class allows to copy and ensures only one instance has the ownership of the
- // managed pointer.
- DISALLOW_DEFAULT_CONSTRUCTOR(ExclusiveOwnershipPointer);
- DISALLOW_ASSIGNMENT_OPERATOR(ExclusiveOwnershipPointer);
-
- void transferOwnership(const ExclusiveOwnershipPointer<T> *const src) {
- if (*mSharedOwnerPtr != src) {
- AKLOGE("Failed to transfer the ownership because src is not the current owner."
- "src: %p, owner: %p", src, *mSharedOwnerPtr);
- ASSERT(false);
- return;
- }
- // Transfer the ownership from src to this instance.
- *mSharedOwnerPtr = this;
- }
-
- void deletePointersIfHavingOwnership() {
- if (mSharedOwnerPtr && *mSharedOwnerPtr == this) {
- if (mPointer) {
- if (DEBUG_DICT) {
- AKLOGI("Releasing pointer: %p", mPointer);
- }
- delete mPointer;
- }
- delete mSharedOwnerPtr;
- }
- }
-
- T *mPointer;
- // mSharedOwnerPtr points a shared memory space where the instance which has the ownership is
- // stored.
- ExclusiveOwnershipPointer<T> **mSharedOwnerPtr;
-};
-} // namespace latinime
-#endif /* LATINIME_EXCLUSIVE_OWNERSHIP_POINTER_H */
diff --git a/native/jni/src/utils/time_keeper.cpp b/native/jni/src/utils/time_keeper.cpp
deleted file mode 100644
index 026284060..000000000
--- a/native/jni/src/utils/time_keeper.cpp
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Copyright (C) 2013, The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "utils/time_keeper.h"
-
-#include <ctime>
-
-namespace latinime {
-
-int TimeKeeper::sCurrentTime;
-bool TimeKeeper::sSetForTesting;
-
-/* static */ void TimeKeeper::setCurrentTime() {
- if (!sSetForTesting) {
- sCurrentTime = time(0);
- }
-}
-
-/* static */ void TimeKeeper::startTestModeWithForceCurrentTime(const int currentTime) {
- sCurrentTime = currentTime;
- sSetForTesting = true;
-}
-
-/* static */ void TimeKeeper::stopTestMode() {
- sSetForTesting = false;
-}
-
-} // namespace latinime
diff --git a/native/jni/src/utils/time_keeper.h b/native/jni/src/utils/time_keeper.h
deleted file mode 100644
index d066757e4..000000000
--- a/native/jni/src/utils/time_keeper.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Copyright (C) 2013, The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LATINIME_TIME_KEEPER_H
-#define LATINIME_TIME_KEEPER_H
-
-#include "defines.h"
-
-namespace latinime {
-
-class TimeKeeper {
- public:
- static void setCurrentTime();
-
- static void startTestModeWithForceCurrentTime(const int currentTime);
-
- static void stopTestMode();
-
- static int peekCurrentTime() { return sCurrentTime; };
-
- private:
- DISALLOW_IMPLICIT_CONSTRUCTORS(TimeKeeper);
-
- static int sCurrentTime;
- static bool sSetForTesting;
-};
-} // namespace latinime
-#endif /* LATINIME_TIME_KEEPER_H */