diff options
Diffstat (limited to 'native')
122 files changed, 3406 insertions, 7183 deletions
diff --git a/native/jni/Android.mk b/native/jni/Android.mk index e11e706f3..ca6a77997 100644 --- a/native/jni/Android.mk +++ b/native/jni/Android.mk @@ -57,9 +57,7 @@ LATIN_IME_CORE_SRC_FILES := \ bloom_filter.cpp \ dictionary.cpp \ digraph_utils.cpp \ - error_type_utils.cpp \ - multi_bigram_map.cpp \ - unigram_property.cpp) \ + multi_bigram_map.cpp) \ $(addprefix suggest/core/layout/, \ additional_proximity_chars.cpp \ proximity_info.cpp \ @@ -69,45 +67,27 @@ LATIN_IME_CORE_SRC_FILES := \ suggest/core/policy/weighting.cpp \ suggest/core/session/dic_traverse_session.cpp \ $(addprefix suggest/policyimpl/dictionary/, \ + bigram/bigram_list_read_write_utils.cpp \ + bigram/dynamic_bigram_list_policy.cpp \ header/header_policy.cpp \ header/header_read_write_utils.cpp \ shortcut/shortcut_list_reading_utils.cpp \ - structure/dictionary_structure_with_buffer_policy_factory.cpp) \ - $(addprefix suggest/policyimpl/dictionary/bigram/, \ - bigram_list_read_write_utils.cpp \ - ver4_bigram_list_policy.cpp) \ - $(addprefix suggest/policyimpl/dictionary/structure/pt_common/, \ - dynamic_pt_gc_event_listeners.cpp \ - dynamic_pt_reading_helper.cpp \ - dynamic_pt_reading_utils.cpp \ - dynamic_pt_updating_helper.cpp \ - dynamic_pt_writing_utils.cpp) \ - $(addprefix suggest/policyimpl/dictionary/structure/v2/, \ + dictionary_structure_with_buffer_policy_factory.cpp \ + dynamic_patricia_trie_gc_event_listeners.cpp \ + dynamic_patricia_trie_node_reader.cpp \ + dynamic_patricia_trie_policy.cpp \ + dynamic_patricia_trie_reading_helper.cpp \ + dynamic_patricia_trie_reading_utils.cpp \ + dynamic_patricia_trie_writing_helper.cpp \ + dynamic_patricia_trie_writing_utils.cpp \ patricia_trie_policy.cpp \ patricia_trie_reading_utils.cpp) \ - $(addprefix suggest/policyimpl/dictionary/structure/v4/, \ - ver4_dict_buffers.cpp \ - ver4_dict_constants.cpp \ - ver4_patricia_trie_node_reader.cpp \ - ver4_patricia_trie_node_writer.cpp \ - ver4_patricia_trie_policy.cpp \ - ver4_patricia_trie_reading_utils.cpp \ - ver4_patricia_trie_writing_helper.cpp) \ - $(addprefix suggest/policyimpl/dictionary/structure/v4/content/, \ - bigram_dict_content.cpp \ - probability_dict_content.cpp \ - shortcut_dict_content.cpp \ - sparse_table_dict_content.cpp \ - terminal_position_lookup_table.cpp) \ $(addprefix suggest/policyimpl/dictionary/utils/, \ buffer_with_extendable_buffer.cpp \ byte_array_utils.cpp \ dict_file_writing_utils.cpp \ - file_utils.cpp \ forgetting_curve_utils.cpp \ - format_utils.cpp \ - mmapped_buffer.cpp \ - sparse_table.cpp) \ + format_utils.cpp) \ suggest/policyimpl/gesture/gesture_suggest_policy_factory.cpp \ $(addprefix suggest/policyimpl/typing/, \ scoring_params.cpp \ @@ -118,8 +98,7 @@ LATIN_IME_CORE_SRC_FILES := \ $(addprefix utils/, \ autocorrection_threshold_utils.cpp \ char_utils.cpp \ - log_utils.cpp \ - time_keeper.cpp) + log_utils.cpp) LOCAL_SRC_FILES := \ $(LATIN_IME_JNI_SRC_FILES) \ diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp index 716bda5a7..8f21c50ec 100644 --- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp +++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp @@ -24,9 +24,8 @@ #include "jni.h" #include "jni_common.h" #include "suggest/core/dictionary/dictionary.h" -#include "suggest/core/dictionary/unigram_property.h" #include "suggest/core/suggest_options.h" -#include "suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h" +#include "suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h" #include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h" #include "utils/autocorrection_threshold_utils.h" @@ -87,11 +86,11 @@ static jlong latinime_BinaryDictionary_open(JNIEnv *env, jclass clazz, jstring s char sourceDirChars[sourceDirUtf8Length + 1]; env->GetStringUTFRegion(sourceDir, 0, env->GetStringLength(sourceDir), sourceDirChars); sourceDirChars[sourceDirUtf8Length] = '\0'; - DictionaryStructureWithBufferPolicy::StructurePolicyPtr dictionaryStructureWithBufferPolicy = + DictionaryStructureWithBufferPolicy *const dictionaryStructureWithBufferPolicy = DictionaryStructureWithBufferPolicyFactory::newDictionaryStructureWithBufferPolicy( sourceDirChars, static_cast<int>(dictOffset), static_cast<int>(dictSize), isUpdatable == JNI_TRUE); - if (!dictionaryStructureWithBufferPolicy.get()) { + if (!dictionaryStructureWithBufferPolicy) { return 0; } @@ -136,12 +135,6 @@ static void latinime_BinaryDictionary_close(JNIEnv *env, jclass clazz, jlong dic delete dictionary; } -static int latinime_BinaryDictionary_getFormatVersion(JNIEnv *env, jclass clazz, jlong dict) { - Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); - if (!dictionary) return 0; - return dictionary->getFormatVersionNumber(); -} - static int latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jclass clazz, jlong dict, jlong proximityInfo, jlong dicTraverseSession, jintArray xCoordinatesArray, jintArray yCoordinatesArray, jintArray timesArray, jintArray pointerIdsArray, @@ -259,21 +252,6 @@ static jint latinime_BinaryDictionary_getBigramProbability(JNIEnv *env, jclass c word1Length); } -static void latinime_BinaryDictionary_getUnigramProperty(JNIEnv *env, jclass clazz, - jlong dict, jintArray word, jintArray outCodePoints, jbooleanArray outFlags, - jintArray outProbability, jintArray outHistoricalInfo, jobject outShortcutTargets, - jobject outShortcutProbabilities) { - Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); - if (!dictionary) return; - const jsize wordLength = env->GetArrayLength(word); - int wordCodePoints[wordLength]; - env->GetIntArrayRegion(word, 0, wordLength, wordCodePoints); - const UnigramProperty unigramProperty = dictionary->getUnigramProperty( - wordCodePoints, wordLength); - unigramProperty.outputProperties(env, outCodePoints, outFlags, outProbability, - outHistoricalInfo, outShortcutTargets, outShortcutProbabilities); -} - static jfloat latinime_BinaryDictionary_calcNormalizedScore(JNIEnv *env, jclass clazz, jintArray before, jintArray after, jint score) { jsize beforeLength = env->GetArrayLength(before); @@ -299,8 +277,7 @@ static jint latinime_BinaryDictionary_editDistance(JNIEnv *env, jclass clazz, ji } static void latinime_BinaryDictionary_addUnigramWord(JNIEnv *env, jclass clazz, jlong dict, - jintArray word, jint probability, jintArray shortcutTarget, jint shortuctProbability, - jboolean isNotAWord, jboolean isBlacklisted, jint timestamp) { + jintArray word, jint probability) { Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); if (!dictionary) { return; @@ -308,17 +285,11 @@ static void latinime_BinaryDictionary_addUnigramWord(JNIEnv *env, jclass clazz, jsize wordLength = env->GetArrayLength(word); int codePoints[wordLength]; env->GetIntArrayRegion(word, 0, wordLength, codePoints); - jsize shortcutLength = shortcutTarget ? env->GetArrayLength(shortcutTarget) : 0; - int shortcutTargetCodePoints[shortcutLength]; - if (shortcutTarget) { - env->GetIntArrayRegion(shortcutTarget, 0, shortcutLength, shortcutTargetCodePoints); - } - dictionary->addUnigramWord(codePoints, wordLength, probability, shortcutTargetCodePoints, - shortcutLength, shortuctProbability, isNotAWord, isBlacklisted, timestamp); + dictionary->addUnigramWord(codePoints, wordLength, probability); } static void latinime_BinaryDictionary_addBigramWords(JNIEnv *env, jclass clazz, jlong dict, - jintArray word0, jintArray word1, jint probability, jint timestamp) { + jintArray word0, jintArray word1, jint probability) { Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); if (!dictionary) { return; @@ -330,7 +301,7 @@ static void latinime_BinaryDictionary_addBigramWords(JNIEnv *env, jclass clazz, int word1CodePoints[word1Length]; env->GetIntArrayRegion(word1, 0, word1Length, word1CodePoints); dictionary->addBigramWords(word0CodePoints, word0Length, word1CodePoints, - word1Length, probability, timestamp); + word1Length, probability); } static void latinime_BinaryDictionary_removeBigramWords(JNIEnv *env, jclass clazz, jlong dict, @@ -349,87 +320,6 @@ static void latinime_BinaryDictionary_removeBigramWords(JNIEnv *env, jclass claz word1Length); } -// Returns how many language model params are processed. -static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, jclass clazz, - jlong dict, jobjectArray languageModelParams, jint startIndex) { - Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); - if (!dictionary) { - return 0; - } - jsize languageModelParamCount = env->GetArrayLength(languageModelParams); - if (languageModelParamCount == 0 || startIndex >= languageModelParamCount) { - return 0; - } - jobject languageModelParam = env->GetObjectArrayElement(languageModelParams, 0); - jclass languageModelParamClass = env->GetObjectClass(languageModelParam); - env->DeleteLocalRef(languageModelParam); - - jfieldID word0FieldId = env->GetFieldID(languageModelParamClass, "mWord0", "[I"); - jfieldID word1FieldId = env->GetFieldID(languageModelParamClass, "mWord1", "[I"); - jfieldID unigramProbabilityFieldId = - env->GetFieldID(languageModelParamClass, "mUnigramProbability", "I"); - jfieldID bigramProbabilityFieldId = - env->GetFieldID(languageModelParamClass, "mBigramProbability", "I"); - jfieldID timestampFieldId = - env->GetFieldID(languageModelParamClass, "mTimestamp", "I"); - jfieldID shortcutTargetFieldId = - env->GetFieldID(languageModelParamClass, "mShortcutTarget", "[I"); - jfieldID shortcutProbabilityFieldId = - env->GetFieldID(languageModelParamClass, "mShortcutProbability", "I"); - jfieldID isNotAWordFieldId = - env->GetFieldID(languageModelParamClass, "mIsNotAWord", "Z"); - jfieldID isBlacklistedFieldId = - env->GetFieldID(languageModelParamClass, "mIsBlacklisted", "Z"); - env->DeleteLocalRef(languageModelParamClass); - - for (int i = startIndex; i < languageModelParamCount; ++i) { - jobject languageModelParam = env->GetObjectArrayElement(languageModelParams, i); - // languageModelParam is a set of params for word1; thus, word1 cannot be null. On the - // other hand, word0 can be null and then it means the set of params doesn't contain bigram - // information. - jintArray word0 = static_cast<jintArray>( - env->GetObjectField(languageModelParam, word0FieldId)); - jsize word0Length = word0 ? env->GetArrayLength(word0) : 0; - int word0CodePoints[word0Length]; - if (word0) { - env->GetIntArrayRegion(word0, 0, word0Length, word0CodePoints); - } - jintArray word1 = static_cast<jintArray>( - env->GetObjectField(languageModelParam, word1FieldId)); - jsize word1Length = env->GetArrayLength(word1); - int word1CodePoints[word1Length]; - env->GetIntArrayRegion(word1, 0, word1Length, word1CodePoints); - jint unigramProbability = env->GetIntField(languageModelParam, unigramProbabilityFieldId); - jint timestamp = env->GetIntField(languageModelParam, timestampFieldId); - jboolean isNotAWord = env->GetBooleanField(languageModelParam, isNotAWordFieldId); - jboolean isBlacklisted = env->GetBooleanField(languageModelParam, isBlacklistedFieldId); - jintArray shortcutTarget = static_cast<jintArray>( - env->GetObjectField(languageModelParam, shortcutTargetFieldId)); - jsize shortcutLength = shortcutTarget ? env->GetArrayLength(shortcutTarget) : 0; - int shortcutTargetCodePoints[shortcutLength]; - if (shortcutTarget) { - env->GetIntArrayRegion(shortcutTarget, 0, shortcutLength, shortcutTargetCodePoints); - } - jint shortcutProbability = env->GetIntField(languageModelParam, shortcutProbabilityFieldId); - dictionary->addUnigramWord(word1CodePoints, word1Length, unigramProbability, - shortcutTargetCodePoints, shortcutLength, shortcutProbability, - isNotAWord, isBlacklisted, timestamp); - if (word0) { - jint bigramProbability = env->GetIntField(languageModelParam, bigramProbabilityFieldId); - dictionary->addBigramWords(word0CodePoints, word0Length, word1CodePoints, word1Length, - bigramProbability, timestamp); - } - if (dictionary->needsToRunGC(true /* mindsBlockByGC */)) { - return i + 1; - } - env->DeleteLocalRef(word0); - env->DeleteLocalRef(word1); - env->DeleteLocalRef(shortcutTarget); - env->DeleteLocalRef(languageModelParam); - } - return languageModelParamCount; -} - static int latinime_BinaryDictionary_calculateProbabilityNative(JNIEnv *env, jclass clazz, jlong dict, jint unigramProbability, jint bigramProbability) { Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); @@ -453,7 +343,7 @@ static jstring latinime_BinaryDictionary_getProperty(JNIEnv *env, jclass clazz, static const int GET_PROPERTY_RESULT_LENGTH = 100; char resultChars[GET_PROPERTY_RESULT_LENGTH]; resultChars[0] = '\0'; - dictionary->getProperty(queryChars, queryUtf8Length, resultChars, GET_PROPERTY_RESULT_LENGTH); + dictionary->getProperty(queryChars, resultChars, GET_PROPERTY_RESULT_LENGTH); return env->NewStringUTF(resultChars); } @@ -474,11 +364,6 @@ static const JNINativeMethod sMethods[] = { reinterpret_cast<void *>(latinime_BinaryDictionary_close) }, { - const_cast<char *>("getFormatVersionNative"), - const_cast<char *>("(J)I"), - reinterpret_cast<void *>(latinime_BinaryDictionary_getFormatVersion) - }, - { const_cast<char *>("flushNative"), const_cast<char *>("(JLjava/lang/String;)V"), reinterpret_cast<void *>(latinime_BinaryDictionary_flush) @@ -509,11 +394,6 @@ static const JNINativeMethod sMethods[] = { reinterpret_cast<void *>(latinime_BinaryDictionary_getBigramProbability) }, { - const_cast<char *>("getUnigramPropertyNative"), - const_cast<char *>("(J[I[I[Z[I[ILjava/util/ArrayList;Ljava/util/ArrayList;)V"), - reinterpret_cast<void *>(latinime_BinaryDictionary_getUnigramProperty) - }, - { const_cast<char *>("calcNormalizedScoreNative"), const_cast<char *>("([I[II)F"), reinterpret_cast<void *>(latinime_BinaryDictionary_calcNormalizedScore) @@ -525,12 +405,12 @@ static const JNINativeMethod sMethods[] = { }, { const_cast<char *>("addUnigramWordNative"), - const_cast<char *>("(J[II[IIZZI)V"), + const_cast<char *>("(J[II)V"), reinterpret_cast<void *>(latinime_BinaryDictionary_addUnigramWord) }, { const_cast<char *>("addBigramWordsNative"), - const_cast<char *>("(J[I[III)V"), + const_cast<char *>("(J[I[II)V"), reinterpret_cast<void *>(latinime_BinaryDictionary_addBigramWords) }, { @@ -539,12 +419,6 @@ static const JNINativeMethod sMethods[] = { reinterpret_cast<void *>(latinime_BinaryDictionary_removeBigramWords) }, { - const_cast<char *>("addMultipleDictionaryEntriesNative"), - const_cast<char *>( - "(J[Lcom/android/inputmethod/latin/BinaryDictionary$LanguageModelParam;I)I"), - reinterpret_cast<void *>(latinime_BinaryDictionary_addMultipleDictionaryEntries) - }, - { const_cast<char *>("calculateProbabilityNative"), const_cast<char *>("(JII)I"), reinterpret_cast<void *>(latinime_BinaryDictionary_calculateProbabilityNative) diff --git a/native/jni/src/defines.h b/native/jni/src/defines.h index 9a26fe051..742e388e4 100644 --- a/native/jni/src/defines.h +++ b/native/jni/src/defines.h @@ -298,7 +298,6 @@ static inline void prof_out(void) { #define NOT_AN_INDEX (-1) #define NOT_A_PROBABILITY (-1) #define NOT_A_DICT_POS (S_INT_MIN) -#define NOT_A_TIMESTAMP (-1) // A special value to mean the first word confidence makes no sense in this case, // e.g. this is not a multi-word suggestion. @@ -342,21 +341,12 @@ template<typename T> AK_FORCE_INLINE const T &max(const T &a, const T &b) { retu #define INPUTLENGTH_FOR_DEBUG (-1) #define MIN_OUTPUT_INDEX_FOR_DEBUG (-1) -#define DISALLOW_DEFAULT_CONSTRUCTOR(TypeName) \ - TypeName() - -#define DISALLOW_COPY_CONSTRUCTOR(TypeName) \ - TypeName(const TypeName&) - -#define DISALLOW_ASSIGNMENT_OPERATOR(TypeName) \ - void operator=(const TypeName&) - #define DISALLOW_COPY_AND_ASSIGN(TypeName) \ - DISALLOW_COPY_CONSTRUCTOR(TypeName); \ - DISALLOW_ASSIGNMENT_OPERATOR(TypeName) + TypeName(const TypeName&); \ + void operator=(const TypeName&) #define DISALLOW_IMPLICIT_CONSTRUCTORS(TypeName) \ - DISALLOW_DEFAULT_CONSTRUCTOR(TypeName); \ + TypeName(); \ DISALLOW_COPY_AND_ASSIGN(TypeName) // Used as a return value for character comparison @@ -402,4 +392,24 @@ typedef enum { // Create new word with space substitution CT_NEW_WORD_SPACE_SUBSTITUTION, } CorrectionType; + +// ErrorType is mainly decided by CorrectionType but it is also depending on if +// the correction has really been performed or not. +typedef enum { + // Substitution, omission and transposition + ET_EDIT_CORRECTION, + // Proximity error + ET_PROXIMITY_CORRECTION, + // Completion + ET_COMPLETION, + // New word + // TODO: Remove. + // A new word error should be an edit correction error or a proximity correction error. + ET_NEW_WORD, + // Treat error as an intentional omission when the CorrectionType is omission and the node can + // be intentional omission. + ET_INTENTIONAL_OMISSION, + // Not treated as an error. Tracked for checking exact match + ET_NOT_AN_ERROR +} ErrorType; #endif // LATINIME_DEFINES_H diff --git a/native/jni/src/suggest/core/dicnode/dic_node.h b/native/jni/src/suggest/core/dicnode/dic_node.h index 0b2b4a9e8..49cfdecac 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node.h +++ b/native/jni/src/suggest/core/dicnode/dic_node.h @@ -99,7 +99,7 @@ class DicNode { virtual ~DicNode() {} // Init for copy - void initByCopy(const DicNode *const dicNode) { + void initByCopy(const DicNode *dicNode) { mIsUsed = true; mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion; mDicNodeProperties.init(&dicNode->mDicNodeProperties); @@ -107,25 +107,25 @@ class DicNode { PROF_NODE_COPY(&dicNode->mProfiler, mProfiler); } - // Init for root with prevWordPtNodePos which is used for bigram - void initAsRoot(const int rootPtNodeArrayPos, const int prevWordPtNodePos) { + // Init for root with prevWordNodePos which is used for bigram + void initAsRoot(const int rootGroupPos, const int prevWordNodePos) { mIsUsed = true; mIsCachedForNextSuggestion = false; mDicNodeProperties.init( - NOT_A_DICT_POS /* pos */, rootPtNodeArrayPos, NOT_A_CODE_POINT /* nodeCodePoint */, + NOT_A_DICT_POS /* pos */, rootGroupPos, NOT_A_CODE_POINT /* nodeCodePoint */, NOT_A_PROBABILITY /* probability */, false /* isTerminal */, true /* hasChildren */, false /* isBlacklistedOrNotAWord */, 0 /* depth */, 0 /* terminalDepth */); - mDicNodeState.init(prevWordPtNodePos); + mDicNodeState.init(prevWordNodePos); PROF_NODE_RESET(mProfiler); } // Init for root with previous word - void initAsRootWithPreviousWord(const DicNode *const dicNode, const int rootPtNodeArrayPos) { + void initAsRootWithPreviousWord(DicNode *dicNode, const int rootGroupPos) { mIsUsed = true; mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion; mDicNodeProperties.init( - NOT_A_DICT_POS /* pos */, rootPtNodeArrayPos, NOT_A_CODE_POINT /* nodeCodePoint */, + NOT_A_DICT_POS /* pos */, rootGroupPos, NOT_A_CODE_POINT /* nodeCodePoint */, NOT_A_PROBABILITY /* probability */, false /* isTerminal */, true /* hasChildren */, false /* isBlacklistedOrNotAWord */, 0 /* depth */, 0 /* terminalDepth */); @@ -138,7 +138,7 @@ class DicNode { mDicNodeState.mDicNodeStatePrevWord.init( dicNode->mDicNodeState.mDicNodeStatePrevWord.getPrevWordCount() + 1, dicNode->mDicNodeProperties.getProbability(), - dicNode->mDicNodeProperties.getPtNodePos(), + dicNode->mDicNodeProperties.getPos(), dicNode->mDicNodeState.mDicNodeStatePrevWord.mPrevWord, dicNode->mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength(), dicNode->getOutputWordBuf(), @@ -148,27 +148,26 @@ class DicNode { PROF_NODE_COPY(&dicNode->mProfiler, mProfiler); } - void initAsPassingChild(DicNode *parentDicNode) { + void initAsPassingChild(DicNode *parentNode) { mIsUsed = true; - mIsCachedForNextSuggestion = parentDicNode->mIsCachedForNextSuggestion; - const int parentCodePoint = parentDicNode->getNodeTypedCodePoint(); - mDicNodeProperties.init(&parentDicNode->mDicNodeProperties, parentCodePoint); - mDicNodeState.init(&parentDicNode->mDicNodeState); - PROF_NODE_COPY(&parentDicNode->mProfiler, mProfiler); + mIsCachedForNextSuggestion = parentNode->mIsCachedForNextSuggestion; + const int c = parentNode->getNodeTypedCodePoint(); + mDicNodeProperties.init(&parentNode->mDicNodeProperties, c); + mDicNodeState.init(&parentNode->mDicNodeState); + PROF_NODE_COPY(&parentNode->mProfiler, mProfiler); } - void initAsChild(const DicNode *const dicNode, const int ptNodePos, - const int childrenPtNodeArrayPos, const int probability, const bool isTerminal, - const bool hasChildren, const bool isBlacklistedOrNotAWord, - const uint16_t mergedNodeCodePointCount, const int *const mergedNodeCodePoints) { + void initAsChild(const DicNode *const dicNode, const int pos, const int childrenPos, + const int probability, const bool isTerminal, const bool hasChildren, + const bool isBlacklistedOrNotAWord, const uint16_t mergedNodeCodePointCount, + const int *const mergedNodeCodePoints) { mIsUsed = true; uint16_t newDepth = static_cast<uint16_t>(dicNode->getNodeCodePointCount() + 1); mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion; const uint16_t newLeavingDepth = static_cast<uint16_t>( dicNode->mDicNodeProperties.getLeavingDepth() + mergedNodeCodePointCount); - mDicNodeProperties.init(ptNodePos, childrenPtNodeArrayPos, mergedNodeCodePoints[0], - probability, isTerminal, hasChildren, isBlacklistedOrNotAWord, newDepth, - newLeavingDepth); + mDicNodeProperties.init(pos, childrenPos, mergedNodeCodePoints[0], probability, + isTerminal, hasChildren, isBlacklistedOrNotAWord, newDepth, newLeavingDepth); mDicNodeState.init(&dicNode->mDicNodeState, mergedNodeCodePointCount, mergedNodeCodePoints); PROF_NODE_COPY(&dicNode->mProfiler, mProfiler); @@ -235,7 +234,7 @@ class DicNode { } bool isFirstWord() const { - return mDicNodeState.mDicNodeStatePrevWord.getPrevWordPtNodePos() == NOT_A_DICT_POS; + return mDicNodeState.mDicNodeStatePrevWord.getPrevWordNodePos() == NOT_A_DICT_POS; } bool isCompletion(const int inputSize) const { @@ -247,30 +246,29 @@ class DicNode { } // Used to get bigram probability in DicNodeUtils - int getPtNodePos() const { - return mDicNodeProperties.getPtNodePos(); + int getPos() const { + return mDicNodeProperties.getPos(); } // Used to get bigram probability in DicNodeUtils - int getPrevWordTerminalPtNodePos() const { - return mDicNodeState.mDicNodeStatePrevWord.getPrevWordPtNodePos(); + int getPrevWordPos() const { + return mDicNodeState.mDicNodeStatePrevWord.getPrevWordNodePos(); } // Used in DicNodeUtils - int getChildrenPtNodeArrayPos() const { - return mDicNodeProperties.getChildrenPtNodeArrayPos(); + int getChildrenPos() const { + return mDicNodeProperties.getChildrenPos(); } int getProbability() const { return mDicNodeProperties.getProbability(); } - AK_FORCE_INLINE bool isTerminalDicNode() const { - const bool isTerminalPtNode = mDicNodeProperties.isTerminal(); - const int currentDicNodeDepth = getNodeCodePointCount(); - const int terminalDicNodeDepth = mDicNodeProperties.getLeavingDepth(); - return isTerminalPtNode && currentDicNodeDepth > 0 - && currentDicNodeDepth == terminalDicNodeDepth; + AK_FORCE_INLINE bool isTerminalWordNode() const { + const bool isTerminalNodes = mDicNodeProperties.isTerminal(); + const int currentNodeDepth = getNodeCodePointCount(); + const int terminalNodeDepth = mDicNodeProperties.getLeavingDepth(); + return isTerminalNodes && currentNodeDepth > 0 && currentNodeDepth == terminalNodeDepth; } bool shouldBeFilteredBySafetyNetForBigram() const { @@ -376,8 +374,8 @@ class DicNode { } // Used to commit input partially - int getPrevWordPtNodePos() const { - return mDicNodeState.mDicNodeStatePrevWord.getPrevWordPtNodePos(); + int getPrevWordNodePos() const { + return mDicNodeState.mDicNodeStatePrevWord.getPrevWordNodePos(); } AK_FORCE_INLINE const int *getOutputWordBuf() const { @@ -412,7 +410,7 @@ class DicNode { // TODO: Remove once touch path is merged into ProximityInfoState // Note: Returned codepoint may be a digraph codepoint if the node is in a composite glyph. int getNodeCodePoint() const { - const int codePoint = mDicNodeProperties.getDicNodeCodePoint(); + const int codePoint = mDicNodeProperties.getNodeCodePoint(); const DigraphUtils::DigraphCodePointIndex digraphIndex = mDicNodeState.mDicNodeStateScoring.getDigraphIndex(); if (digraphIndex == DigraphUtils::NOT_A_DIGRAPH_INDEX) { @@ -425,8 +423,8 @@ class DicNode { // Utils for cost calculation // //////////////////////////////// AK_FORCE_INLINE bool isSameNodeCodePoint(const DicNode *const dicNode) const { - return mDicNodeProperties.getDicNodeCodePoint() - == dicNode->mDicNodeProperties.getDicNodeCodePoint(); + return mDicNodeProperties.getNodeCodePoint() + == dicNode->mDicNodeProperties.getNodeCodePoint(); } // TODO: remove @@ -576,8 +574,7 @@ class DicNode { // Caveat: Must not be called outside Weighting // This restriction is guaranteed by "friend" AK_FORCE_INLINE void addCost(const float spatialCost, const float languageCost, - const bool doNormalization, const int inputSize, - const ErrorTypeUtils::ErrorType errorType) { + const bool doNormalization, const int inputSize, const ErrorType errorType) { if (DEBUG_GEO_FULL) { LOGI_SHOW_ADD_COST_PROP; } diff --git a/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp b/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp index 5540b6df5..ec65114c7 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp +++ b/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp @@ -22,6 +22,7 @@ #include "suggest/core/dicnode/dic_node_vector.h" #include "suggest/core/dictionary/multi_bigram_map.h" #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" +#include "utils/char_utils.h" namespace latinime { @@ -31,20 +32,19 @@ namespace latinime { /* static */ void DicNodeUtils::initAsRoot( const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, - const int prevWordPtNodePos, DicNode *const newRootDicNode) { - newRootDicNode->initAsRoot(dictionaryStructurePolicy->getRootPosition(), prevWordPtNodePos); + const int prevWordNodePos, DicNode *const newRootNode) { + newRootNode->initAsRoot(dictionaryStructurePolicy->getRootPosition(), prevWordNodePos); } /*static */ void DicNodeUtils::initAsRootWithPreviousWord( const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, - const DicNode *const prevWordLastDicNode, DicNode *const newRootDicNode) { - newRootDicNode->initAsRootWithPreviousWord( - prevWordLastDicNode, dictionaryStructurePolicy->getRootPosition()); + DicNode *const prevWordLastNode, DicNode *const newRootNode) { + newRootNode->initAsRootWithPreviousWord( + prevWordLastNode, dictionaryStructurePolicy->getRootPosition()); } -/* static */ void DicNodeUtils::initByCopy(const DicNode *const srcDicNode, - DicNode *const destDicNode) { - destDicNode->initByCopy(srcDicNode); +/* static */ void DicNodeUtils::initByCopy(DicNode *srcNode, DicNode *destNode) { + destNode->initByCopy(srcNode); } /////////////////////////////////// @@ -52,14 +52,14 @@ namespace latinime { /////////////////////////////////// /* static */ void DicNodeUtils::getAllChildDicNodes(DicNode *dicNode, const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, - DicNodeVector *const childDicNodes) { + DicNodeVector *childDicNodes) { if (dicNode->isTotalInputSizeExceedingLimit()) { return; } if (!dicNode->isLeavingNode()) { childDicNodes->pushPassingChild(dicNode); } else { - dictionaryStructurePolicy->createAndGetAllChildDicNodes(dicNode, childDicNodes); + dictionaryStructurePolicy->createAndGetAllChildNodes(dicNode, childDicNodes); } } @@ -71,11 +71,11 @@ namespace latinime { */ /* static */ float DicNodeUtils::getBigramNodeImprobability( const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, - const DicNode *const dicNode, MultiBigramMap *const multiBigramMap) { - if (dicNode->hasMultipleWords() && !dicNode->isValidMultipleWordSuggestion()) { + const DicNode *const node, MultiBigramMap *multiBigramMap) { + if (node->hasMultipleWords() && !node->isValidMultipleWordSuggestion()) { return static_cast<float>(MAX_VALUE_FOR_WEIGHTING); } - const int probability = getBigramNodeProbability(dictionaryStructurePolicy, dicNode, + const int probability = getBigramNodeProbability(dictionaryStructurePolicy, node, multiBigramMap); // TODO: This equation to calculate the improbability looks unreasonable. Investigate this. const float cost = static_cast<float>(MAX_PROBABILITY - probability) @@ -85,19 +85,19 @@ namespace latinime { /* static */ int DicNodeUtils::getBigramNodeProbability( const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, - const DicNode *const dicNode, MultiBigramMap *const multiBigramMap) { - const int unigramProbability = dicNode->getProbability(); - const int ptNodePos = dicNode->getPtNodePos(); - const int prevWordTerminalPtNodePos = dicNode->getPrevWordTerminalPtNodePos(); - if (NOT_A_DICT_POS == ptNodePos || NOT_A_DICT_POS == prevWordTerminalPtNodePos) { + const DicNode *const node, MultiBigramMap *multiBigramMap) { + const int unigramProbability = node->getProbability(); + const int wordPos = node->getPos(); + const int prevWordPos = node->getPrevWordPos(); + if (NOT_A_DICT_POS == wordPos || NOT_A_DICT_POS == prevWordPos) { // Note: Normally wordPos comes from the dictionary and should never equal // NOT_A_VALID_WORD_POS. return dictionaryStructurePolicy->getProbability(unigramProbability, NOT_A_PROBABILITY); } if (multiBigramMap) { - return multiBigramMap->getBigramProbability(dictionaryStructurePolicy, - prevWordTerminalPtNodePos, ptNodePos, unigramProbability); + return multiBigramMap->getBigramProbability(dictionaryStructurePolicy, prevWordPos, + wordPos, unigramProbability); } return dictionaryStructurePolicy->getProbability(unigramProbability, NOT_A_PROBABILITY); @@ -109,7 +109,7 @@ namespace latinime { // TODO: Move to char_utils? /* static */ int DicNodeUtils::appendTwoWords(const int *const src0, const int16_t length0, - const int *const src1, const int16_t length1, int *const dest) { + const int *const src1, const int16_t length1, int *dest) { int actualLength0 = 0; for (int i = 0; i < length0; ++i) { if (src0[i] == 0) { diff --git a/native/jni/src/suggest/core/dicnode/dic_node_utils.h b/native/jni/src/suggest/core/dicnode/dic_node_utils.h index 3f1514a52..3fb351a61 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node_utils.h +++ b/native/jni/src/suggest/core/dicnode/dic_node_utils.h @@ -31,20 +31,20 @@ class MultiBigramMap; class DicNodeUtils { public: static int appendTwoWords(const int *src0, const int16_t length0, const int *src1, - const int16_t length1, int *const dest); + const int16_t length1, int *dest); static void initAsRoot( const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, - const int prevWordPtNodePos, DicNode *const newRootDicNode); + const int prevWordNodePos, DicNode *newRootNode); static void initAsRootWithPreviousWord( const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, - const DicNode *const prevWordLastDicNode, DicNode *const newRootDicNode); - static void initByCopy(const DicNode *const srcDicNode, DicNode *const destDicNode); + DicNode *prevWordLastNode, DicNode *newRootNode); + static void initByCopy(DicNode *srcNode, DicNode *destNode); static void getAllChildDicNodes(DicNode *dicNode, const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, DicNodeVector *childDicNodes); static float getBigramNodeImprobability( const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, - const DicNode *const dicNode, MultiBigramMap *const multiBigramMap); + const DicNode *const node, MultiBigramMap *const multiBigramMap); private: DISALLOW_IMPLICIT_CONSTRUCTORS(DicNodeUtils); @@ -53,7 +53,7 @@ class DicNodeUtils { static int getBigramNodeProbability( const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, - const DicNode *const dicNode, MultiBigramMap *const multiBigramMap); + const DicNode *const node, MultiBigramMap *multiBigramMap); }; } // namespace latinime #endif // LATINIME_DIC_NODE_UTILS_H diff --git a/native/jni/src/suggest/core/dicnode/dic_node_vector.h b/native/jni/src/suggest/core/dicnode/dic_node_vector.h index 9364e7751..42addae8d 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node_vector.h +++ b/native/jni/src/suggest/core/dicnode/dic_node_vector.h @@ -62,14 +62,14 @@ class DicNodeVector { mDicNodes.back().initAsPassingChild(dicNode); } - void pushLeavingChild(const DicNode *const dicNode, const int ptNodePos, - const int childrenPtNodeArrayPos, const int probability, const bool isTerminal, - const bool hasChildren, const bool isBlacklistedOrNotAWord, - const uint16_t mergedNodeCodePointCount, const int *const mergedNodeCodePoints) { + void pushLeavingChild(const DicNode *const dicNode, const int pos, const int childrenPos, + const int probability, const bool isTerminal, const bool hasChildren, + const bool isBlacklistedOrNotAWord, const uint16_t mergedNodeCodePointCount, + const int *const mergedNodeCodePoints) { ASSERT(!mLock); mDicNodes.push_back(mEmptyNode); - mDicNodes.back().initAsChild(dicNode, ptNodePos, childrenPtNodeArrayPos, probability, - isTerminal, hasChildren, isBlacklistedOrNotAWord, mergedNodeCodePointCount, + mDicNodes.back().initAsChild(dicNode, pos, childrenPos, probability, isTerminal, + hasChildren, isBlacklistedOrNotAWord, mergedNodeCodePointCount, mergedNodeCodePoints); } diff --git a/native/jni/src/suggest/core/dicnode/internal/dic_node_properties.h b/native/jni/src/suggest/core/dicnode/internal/dic_node_properties.h index c41a7243a..9e0f62ceb 100644 --- a/native/jni/src/suggest/core/dicnode/internal/dic_node_properties.h +++ b/native/jni/src/suggest/core/dicnode/internal/dic_node_properties.h @@ -24,14 +24,15 @@ namespace latinime { /** - * PtNode information related to the DicNode from the lexicon trie. + * Node for traversing the lexicon trie. */ +// TODO: Introduce a dictionary node class which has attribute members required to understand the +// dictionary structure. class DicNodeProperties { public: AK_FORCE_INLINE DicNodeProperties() - : mPtNodePos(0), mChildrenPtNodeArrayPos(0), mProbability(0), mDicNodeCodePoint(0), - mIsTerminal(false), mHasChildrenPtNodes(false), mIsBlacklistedOrNotAWord(false), - mDepth(0), mLeavingDepth(0) {} + : mPos(0), mChildrenPos(0), mProbability(0), mNodeCodePoint(0), mIsTerminal(false), + mHasChildren(false), mIsBlacklistedOrNotAWord(false), mDepth(0), mLeavingDepth(0) {} virtual ~DicNodeProperties() {} @@ -39,57 +40,57 @@ class DicNodeProperties { void init(const int pos, const int childrenPos, const int nodeCodePoint, const int probability, const bool isTerminal, const bool hasChildren, const bool isBlacklistedOrNotAWord, const uint16_t depth, const uint16_t leavingDepth) { - mPtNodePos = pos; - mChildrenPtNodeArrayPos = childrenPos; - mDicNodeCodePoint = nodeCodePoint; + mPos = pos; + mChildrenPos = childrenPos; + mNodeCodePoint = nodeCodePoint; mProbability = probability; mIsTerminal = isTerminal; - mHasChildrenPtNodes = hasChildren; + mHasChildren = hasChildren; mIsBlacklistedOrNotAWord = isBlacklistedOrNotAWord; mDepth = depth; mLeavingDepth = leavingDepth; } // Init for copy - void init(const DicNodeProperties *const dicNodeProp) { - mPtNodePos = dicNodeProp->mPtNodePos; - mChildrenPtNodeArrayPos = dicNodeProp->mChildrenPtNodeArrayPos; - mDicNodeCodePoint = dicNodeProp->mDicNodeCodePoint; - mProbability = dicNodeProp->mProbability; - mIsTerminal = dicNodeProp->mIsTerminal; - mHasChildrenPtNodes = dicNodeProp->mHasChildrenPtNodes; - mIsBlacklistedOrNotAWord = dicNodeProp->mIsBlacklistedOrNotAWord; - mDepth = dicNodeProp->mDepth; - mLeavingDepth = dicNodeProp->mLeavingDepth; + void init(const DicNodeProperties *const nodeProp) { + mPos = nodeProp->mPos; + mChildrenPos = nodeProp->mChildrenPos; + mNodeCodePoint = nodeProp->mNodeCodePoint; + mProbability = nodeProp->mProbability; + mIsTerminal = nodeProp->mIsTerminal; + mHasChildren = nodeProp->mHasChildren; + mIsBlacklistedOrNotAWord = nodeProp->mIsBlacklistedOrNotAWord; + mDepth = nodeProp->mDepth; + mLeavingDepth = nodeProp->mLeavingDepth; } // Init as passing child - void init(const DicNodeProperties *const dicNodeProp, const int codePoint) { - mPtNodePos = dicNodeProp->mPtNodePos; - mChildrenPtNodeArrayPos = dicNodeProp->mChildrenPtNodeArrayPos; - mDicNodeCodePoint = codePoint; // Overwrite the node char of a passing child - mProbability = dicNodeProp->mProbability; - mIsTerminal = dicNodeProp->mIsTerminal; - mHasChildrenPtNodes = dicNodeProp->mHasChildrenPtNodes; - mIsBlacklistedOrNotAWord = dicNodeProp->mIsBlacklistedOrNotAWord; - mDepth = dicNodeProp->mDepth + 1; // Increment the depth of a passing child - mLeavingDepth = dicNodeProp->mLeavingDepth; + void init(const DicNodeProperties *const nodeProp, const int codePoint) { + mPos = nodeProp->mPos; + mChildrenPos = nodeProp->mChildrenPos; + mNodeCodePoint = codePoint; // Overwrite the node char of a passing child + mProbability = nodeProp->mProbability; + mIsTerminal = nodeProp->mIsTerminal; + mHasChildren = nodeProp->mHasChildren; + mIsBlacklistedOrNotAWord = nodeProp->mIsBlacklistedOrNotAWord; + mDepth = nodeProp->mDepth + 1; // Increment the depth of a passing child + mLeavingDepth = nodeProp->mLeavingDepth; } - int getPtNodePos() const { - return mPtNodePos; + int getPos() const { + return mPos; } - int getChildrenPtNodeArrayPos() const { - return mChildrenPtNodeArrayPos; + int getChildrenPos() const { + return mChildrenPos; } int getProbability() const { return mProbability; } - int getDicNodeCodePoint() const { - return mDicNodeCodePoint; + int getNodeCodePoint() const { + return mNodeCodePoint; } uint16_t getDepth() const { @@ -106,7 +107,7 @@ class DicNodeProperties { } bool hasChildren() const { - return mHasChildrenPtNodes || mDepth != mLeavingDepth; + return mHasChildren || mDepth != mLeavingDepth; } bool isBlacklistedOrNotAWord() const { @@ -117,12 +118,12 @@ class DicNodeProperties { // Caution!!! // Use a default copy constructor and an assign operator because shallow copies are ok // for this class - int mPtNodePos; - int mChildrenPtNodeArrayPos; + int mPos; + int mChildrenPos; int mProbability; - int mDicNodeCodePoint; + int mNodeCodePoint; bool mIsTerminal; - bool mHasChildrenPtNodes; + bool mHasChildren; bool mIsBlacklistedOrNotAWord; uint16_t mDepth; uint16_t mLeavingDepth; diff --git a/native/jni/src/suggest/core/dicnode/internal/dic_node_state_prevword.h b/native/jni/src/suggest/core/dicnode/internal/dic_node_state_prevword.h index dba57056b..b8986203d 100644 --- a/native/jni/src/suggest/core/dicnode/internal/dic_node_state_prevword.h +++ b/native/jni/src/suggest/core/dicnode/internal/dic_node_state_prevword.h @@ -30,7 +30,7 @@ class DicNodeStatePrevWord { public: AK_FORCE_INLINE DicNodeStatePrevWord() : mPrevWordCount(0), mPrevWordLength(0), mPrevWordStart(0), mPrevWordProbability(0), - mPrevWordPtNodePos(NOT_A_DICT_POS), mSecondWordFirstInputIndex(NOT_AN_INDEX) { + mPrevWordNodePos(NOT_A_DICT_POS), mSecondWordFirstInputIndex(NOT_AN_INDEX) { memset(mPrevWord, 0, sizeof(mPrevWord)); } @@ -41,7 +41,7 @@ class DicNodeStatePrevWord { mPrevWordCount = 0; mPrevWordStart = 0; mPrevWordProbability = -1; - mPrevWordPtNodePos = NOT_A_DICT_POS; + mPrevWordNodePos = NOT_A_DICT_POS; mSecondWordFirstInputIndex = NOT_AN_INDEX; } @@ -50,7 +50,7 @@ class DicNodeStatePrevWord { mPrevWordCount = 0; mPrevWordStart = 0; mPrevWordProbability = -1; - mPrevWordPtNodePos = prevWordNodePos; + mPrevWordNodePos = prevWordNodePos; mSecondWordFirstInputIndex = NOT_AN_INDEX; } @@ -60,7 +60,7 @@ class DicNodeStatePrevWord { mPrevWordCount = prevWord->mPrevWordCount; mPrevWordStart = prevWord->mPrevWordStart; mPrevWordProbability = prevWord->mPrevWordProbability; - mPrevWordPtNodePos = prevWord->mPrevWordPtNodePos; + mPrevWordNodePos = prevWord->mPrevWordNodePos; mSecondWordFirstInputIndex = prevWord->mSecondWordFirstInputIndex; memcpy(mPrevWord, prevWord->mPrevWord, prevWord->mPrevWordLength * sizeof(mPrevWord[0])); } @@ -71,7 +71,7 @@ class DicNodeStatePrevWord { const int prevWordSecondWordFirstInputIndex, const int lastInputIndex) { mPrevWordCount = min(prevWordCount, static_cast<int16_t>(MAX_RESULTS)); mPrevWordProbability = prevWordProbability; - mPrevWordPtNodePos = prevWordNodePos; + mPrevWordNodePos = prevWordNodePos; int twoWordsLen = DicNodeUtils::appendTwoWords(src0, length0, src1, length1, mPrevWord); if (twoWordsLen >= MAX_WORD_LENGTH) { @@ -116,8 +116,8 @@ class DicNodeStatePrevWord { return mPrevWordStart; } - int getPrevWordPtNodePos() const { - return mPrevWordPtNodePos; + int getPrevWordNodePos() const { + return mPrevWordNodePos; } int getPrevWordCodePointAt(const int id) const { @@ -147,7 +147,7 @@ class DicNodeStatePrevWord { int16_t mPrevWordLength; int16_t mPrevWordStart; int16_t mPrevWordProbability; - int mPrevWordPtNodePos; + int mPrevWordNodePos; int mSecondWordFirstInputIndex; }; } // namespace latinime diff --git a/native/jni/src/suggest/core/dicnode/internal/dic_node_state_scoring.h b/native/jni/src/suggest/core/dicnode/internal/dic_node_state_scoring.h index 74f9eee92..3c85d0e9d 100644 --- a/native/jni/src/suggest/core/dicnode/internal/dic_node_state_scoring.h +++ b/native/jni/src/suggest/core/dicnode/internal/dic_node_state_scoring.h @@ -21,7 +21,6 @@ #include "defines.h" #include "suggest/core/dictionary/digraph_utils.h" -#include "suggest/core/dictionary/error_type_utils.h" namespace latinime { @@ -32,7 +31,7 @@ class DicNodeStateScoring { mDigraphIndex(DigraphUtils::NOT_A_DIGRAPH_INDEX), mEditCorrectionCount(0), mProximityCorrectionCount(0), mNormalizedCompoundDistance(0.0f), mSpatialDistance(0.0f), mLanguageDistance(0.0f), - mRawLength(0.0f), mContainingErrorTypes(ErrorTypeUtils::NOT_AN_ERROR), + mRawLength(0.0f), mExactMatch(true), mNormalizedCompoundDistanceAfterFirstWord(MAX_VALUE_FOR_WEIGHTING) { } @@ -48,7 +47,7 @@ class DicNodeStateScoring { mDoubleLetterLevel = NOT_A_DOUBLE_LETTER; mDigraphIndex = DigraphUtils::NOT_A_DIGRAPH_INDEX; mNormalizedCompoundDistanceAfterFirstWord = MAX_VALUE_FOR_WEIGHTING; - mContainingErrorTypes = ErrorTypeUtils::NOT_AN_ERROR; + mExactMatch = true; } AK_FORCE_INLINE void init(const DicNodeStateScoring *const scoring) { @@ -60,21 +59,34 @@ class DicNodeStateScoring { mRawLength = scoring->mRawLength; mDoubleLetterLevel = scoring->mDoubleLetterLevel; mDigraphIndex = scoring->mDigraphIndex; - mContainingErrorTypes = scoring->mContainingErrorTypes; + mExactMatch = scoring->mExactMatch; mNormalizedCompoundDistanceAfterFirstWord = scoring->mNormalizedCompoundDistanceAfterFirstWord; } void addCost(const float spatialCost, const float languageCost, const bool doNormalization, - const int inputSize, const int totalInputIndex, - const ErrorTypeUtils::ErrorType errorType) { + const int inputSize, const int totalInputIndex, const ErrorType errorType) { addDistance(spatialCost, languageCost, doNormalization, inputSize, totalInputIndex); - mContainingErrorTypes = mContainingErrorTypes | errorType; - if (ErrorTypeUtils::isEditCorrectionError(errorType)) { - ++mEditCorrectionCount; - } - if (ErrorTypeUtils::isProximityCorrectionError(errorType)) { - ++mProximityCorrectionCount; + switch (errorType) { + case ET_EDIT_CORRECTION: + ++mEditCorrectionCount; + mExactMatch = false; + break; + case ET_PROXIMITY_CORRECTION: + ++mProximityCorrectionCount; + mExactMatch = false; + break; + case ET_COMPLETION: + mExactMatch = false; + break; + case ET_NEW_WORD: + mExactMatch = false; + break; + case ET_INTENTIONAL_OMISSION: + mExactMatch = false; + break; + case ET_NOT_AN_ERROR: + break; } } @@ -170,7 +182,7 @@ class DicNodeStateScoring { } bool isExactMatch() const { - return ErrorTypeUtils::isExactMatch(mContainingErrorTypes); + return mExactMatch; } private: @@ -187,8 +199,7 @@ class DicNodeStateScoring { float mSpatialDistance; float mLanguageDistance; float mRawLength; - // All accumulated error types so far - ErrorTypeUtils::ErrorType mContainingErrorTypes; + bool mExactMatch; float mNormalizedCompoundDistanceAfterFirstWord; AK_FORCE_INLINE void addDistance(float spatialDistance, float languageDistance, diff --git a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp index 2a62b555b..71f4ef6ea 100644 --- a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp +++ b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp @@ -144,7 +144,7 @@ int BigramDictionary::getPredictions(const int *prevWord, const int prevWordLeng int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const int prevWordLength, const bool forceLowerCaseSearch) const { if (0 >= prevWordLength) return NOT_A_DICT_POS; - int pos = mDictionaryStructurePolicy->getTerminalPtNodePositionOfWord(prevWord, prevWordLength, + int pos = mDictionaryStructurePolicy->getTerminalNodePositionOfWord(prevWord, prevWordLength, forceLowerCaseSearch); if (NOT_A_DICT_POS == pos) return NOT_A_DICT_POS; return mDictionaryStructurePolicy->getBigramsPositionOfPtNode(pos); @@ -155,7 +155,7 @@ int BigramDictionary::getBigramProbability(const int *word0, int length0, const int pos = getBigramListPositionForWord(word0, length0, false /* forceLowerCaseSearch */); // getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams if (NOT_A_DICT_POS == pos) return NOT_A_PROBABILITY; - int nextWordPos = mDictionaryStructurePolicy->getTerminalPtNodePositionOfWord(word1, length1, + int nextWordPos = mDictionaryStructurePolicy->getTerminalNodePositionOfWord(word1, length1, false /* forceLowerCaseSearch */); if (NOT_A_DICT_POS == nextWordPos) return NOT_A_PROBABILITY; @@ -163,8 +163,7 @@ int BigramDictionary::getBigramProbability(const int *word0, int length0, const mDictionaryStructurePolicy->getBigramsStructurePolicy(), pos); while (bigramsIt.hasNext()) { bigramsIt.next(); - if (bigramsIt.getBigramPos() == nextWordPos - && bigramsIt.getProbability() != NOT_A_PROBABILITY) { + if (bigramsIt.getBigramPos() == nextWordPos) { return mDictionaryStructurePolicy->getProbability( mDictionaryStructurePolicy->getUnigramProbabilityOfPtNode(nextWordPos), bigramsIt.getProbability()); diff --git a/native/jni/src/suggest/core/dictionary/bloom_filter.h b/native/jni/src/suggest/core/dictionary/bloom_filter.h index 5f9700486..5205456a8 100644 --- a/native/jni/src/suggest/core/dictionary/bloom_filter.h +++ b/native/jni/src/suggest/core/dictionary/bloom_filter.h @@ -50,8 +50,6 @@ class BloomFilter { } private: - DISALLOW_ASSIGNMENT_OPERATOR(BloomFilter); - // Size, in bytes, of the bloom filter index for bigrams // 128 gives us 1024 buckets. The probability of false positive is (1 - e ** (-kn/m))**k, // where k is the number of hash functions, n the number of bigrams, and m the number of diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp index e68c0a6d8..59ead1894 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.cpp +++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp @@ -21,39 +21,46 @@ #include <stdint.h> #include "defines.h" +#include "suggest/core/dictionary/bigram_dictionary.h" #include "suggest/core/policy/dictionary_header_structure_policy.h" +#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" #include "suggest/core/session/dic_traverse_session.h" #include "suggest/core/suggest.h" #include "suggest/core/suggest_options.h" #include "suggest/policyimpl/gesture/gesture_suggest_policy_factory.h" #include "suggest/policyimpl/typing/typing_suggest_policy_factory.h" #include "utils/log_utils.h" -#include "utils/time_keeper.h" namespace latinime { const int Dictionary::HEADER_ATTRIBUTE_BUFFER_SIZE = 32; -Dictionary::Dictionary(JNIEnv *env, const DictionaryStructureWithBufferPolicy::StructurePolicyPtr - &dictionaryStructureWithBufferPolicy) +Dictionary::Dictionary(JNIEnv *env, + DictionaryStructureWithBufferPolicy *const dictionaryStructureWithBufferPolicy) : mDictionaryStructureWithBufferPolicy(dictionaryStructureWithBufferPolicy), - mBigramDictionary(new BigramDictionary(mDictionaryStructureWithBufferPolicy.get())), + mBigramDictionary(new BigramDictionary(mDictionaryStructureWithBufferPolicy)), mGestureSuggest(new Suggest(GestureSuggestPolicyFactory::getGestureSuggestPolicy())), mTypingSuggest(new Suggest(TypingSuggestPolicyFactory::getTypingSuggestPolicy())) { logDictionaryInfo(env); } +Dictionary::~Dictionary() { + delete mBigramDictionary; + delete mGestureSuggest; + delete mTypingSuggest; + delete mDictionaryStructureWithBufferPolicy; +} + int Dictionary::getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession *traverseSession, int *xcoordinates, int *ycoordinates, int *times, int *pointerIds, int *inputCodePoints, int inputSize, int *prevWordCodePoints, int prevWordLength, int commitPoint, const SuggestOptions *const suggestOptions, int *outWords, int *frequencies, int *spaceIndices, int *outputTypes, int *outputAutoCommitFirstWordConfidence) const { - TimeKeeper::setCurrentTime(); int result = 0; if (suggestOptions->isGesture()) { DicTraverseSession::initSessionInstance( traverseSession, this, prevWordCodePoints, prevWordLength, suggestOptions); - result = mGestureSuggest.get()->getSuggestions(proximityInfo, traverseSession, xcoordinates, + result = mGestureSuggest->getSuggestions(proximityInfo, traverseSession, xcoordinates, ycoordinates, times, pointerIds, inputCodePoints, inputSize, commitPoint, outWords, frequencies, spaceIndices, outputTypes, outputAutoCommitFirstWordConfidence); if (DEBUG_DICT) { @@ -63,7 +70,7 @@ int Dictionary::getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession } else { DicTraverseSession::initSessionInstance( traverseSession, this, prevWordCodePoints, prevWordLength, suggestOptions); - result = mTypingSuggest.get()->getSuggestions(proximityInfo, traverseSession, xcoordinates, + result = mTypingSuggest->getSuggestions(proximityInfo, traverseSession, xcoordinates, ycoordinates, times, pointerIds, inputCodePoints, inputSize, commitPoint, outWords, frequencies, spaceIndices, outputTypes, outputAutoCommitFirstWordConfidence); @@ -76,15 +83,12 @@ int Dictionary::getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession int Dictionary::getBigrams(const int *word, int length, int *outWords, int *frequencies, int *outputTypes) const { - TimeKeeper::setCurrentTime(); if (length <= 0) return 0; - return mBigramDictionary.get()->getPredictions(word, length, outWords, frequencies, - outputTypes); + return mBigramDictionary->getPredictions(word, length, outWords, frequencies, outputTypes); } int Dictionary::getProbability(const int *word, int length) const { - TimeKeeper::setCurrentTime(); - int pos = getDictionaryStructurePolicy()->getTerminalPtNodePositionOfWord(word, length, + int pos = getDictionaryStructurePolicy()->getTerminalNodePositionOfWord(word, length, false /* forceLowerCaseSearch */); if (NOT_A_DICT_POS == pos) { return NOT_A_PROBABILITY; @@ -94,60 +98,39 @@ int Dictionary::getProbability(const int *word, int length) const { int Dictionary::getBigramProbability(const int *word0, int length0, const int *word1, int length1) const { - TimeKeeper::setCurrentTime(); - return mBigramDictionary.get()->getBigramProbability(word0, length0, word1, length1); + return mBigramDictionary->getBigramProbability(word0, length0, word1, length1); } -void Dictionary::addUnigramWord(const int *const word, const int length, const int probability, - const int *const shortcutTargetCodePoints, const int shortcutLength, - const int shortcutProbability, const bool isNotAWord, const bool isBlacklisted, - const int timestamp) { - TimeKeeper::setCurrentTime(); - mDictionaryStructureWithBufferPolicy.get()->addUnigramWord(word, length, probability, - shortcutTargetCodePoints, shortcutLength, shortcutProbability, isNotAWord, - isBlacklisted, timestamp); +void Dictionary::addUnigramWord(const int *const word, const int length, const int probability) { + mDictionaryStructureWithBufferPolicy->addUnigramWord(word, length, probability); } void Dictionary::addBigramWords(const int *const word0, const int length0, const int *const word1, - const int length1, const int probability, const int timestamp) { - TimeKeeper::setCurrentTime(); - mDictionaryStructureWithBufferPolicy.get()->addBigramWords(word0, length0, word1, length1, - probability, timestamp); + const int length1, const int probability) { + mDictionaryStructureWithBufferPolicy->addBigramWords(word0, length0, word1, length1, + probability); } void Dictionary::removeBigramWords(const int *const word0, const int length0, const int *const word1, const int length1) { - TimeKeeper::setCurrentTime(); - mDictionaryStructureWithBufferPolicy.get()->removeBigramWords(word0, length0, word1, length1); + mDictionaryStructureWithBufferPolicy->removeBigramWords(word0, length0, word1, length1); } void Dictionary::flush(const char *const filePath) { - TimeKeeper::setCurrentTime(); - mDictionaryStructureWithBufferPolicy.get()->flush(filePath); + mDictionaryStructureWithBufferPolicy->flush(filePath); } void Dictionary::flushWithGC(const char *const filePath) { - TimeKeeper::setCurrentTime(); - mDictionaryStructureWithBufferPolicy.get()->flushWithGC(filePath); + mDictionaryStructureWithBufferPolicy->flushWithGC(filePath); } bool Dictionary::needsToRunGC(const bool mindsBlockByGC) { - TimeKeeper::setCurrentTime(); - return mDictionaryStructureWithBufferPolicy.get()->needsToRunGC(mindsBlockByGC); + return mDictionaryStructureWithBufferPolicy->needsToRunGC(mindsBlockByGC); } -void Dictionary::getProperty(const char *const query, const int queryLength, char *const outResult, +void Dictionary::getProperty(const char *const query, char *const outResult, const int maxResultLength) { - TimeKeeper::setCurrentTime(); - return mDictionaryStructureWithBufferPolicy.get()->getProperty(query, queryLength, outResult, - maxResultLength); -} - -const UnigramProperty Dictionary::getUnigramProperty(const int *const codePoints, - const int codePointCount) { - TimeKeeper::setCurrentTime(); - return mDictionaryStructureWithBufferPolicy.get()->getUnigramProperty( - codePoints, codePointCount); + return mDictionaryStructureWithBufferPolicy->getProperty(query, outResult, maxResultLength); } void Dictionary::logDictionaryInfo(JNIEnv *const env) const { diff --git a/native/jni/src/suggest/core/dictionary/dictionary.h b/native/jni/src/suggest/core/dictionary/dictionary.h index b37b4aa18..0195d5bf0 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.h +++ b/native/jni/src/suggest/core/dictionary/dictionary.h @@ -21,20 +21,15 @@ #include "defines.h" #include "jni.h" -#include "suggest/core/dictionary/bigram_dictionary.h" -#include "suggest/core/dictionary/unigram_property.h" -#include "suggest/core/policy/dictionary_header_structure_policy.h" -#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" -#include "suggest/core/suggest_interface.h" -#include "utils/exclusive_ownership_pointer.h" namespace latinime { +class BigramDictionary; class DictionaryStructureWithBufferPolicy; class DicTraverseSession; class ProximityInfo; +class SuggestInterface; class SuggestOptions; -class UnigramProperty; class Dictionary { public: @@ -58,8 +53,8 @@ class Dictionary { static const int KIND_FLAG_POSSIBLY_OFFENSIVE = 0x80000000; static const int KIND_FLAG_EXACT_MATCH = 0x40000000; - Dictionary(JNIEnv *env, const DictionaryStructureWithBufferPolicy::StructurePolicyPtr - &dictionaryStructureWithBufferPolicy); + Dictionary(JNIEnv *env, + DictionaryStructureWithBufferPolicy *const dictionaryStructureWithBufferPoilcy); int getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession *traverseSession, int *xcoordinates, int *ycoordinates, int *times, int *pointerIds, int *inputCodePoints, @@ -74,13 +69,10 @@ class Dictionary { int getBigramProbability(const int *word0, int length0, const int *word1, int length1) const; - void addUnigramWord(const int *const word, const int length, const int probability, - const int *const shortcutTargetCodePoints, const int shortcutLength, - const int shortcutProbability, const bool isNotAWord, const bool isBlacklisted, - const int timestamp); + void addUnigramWord(const int *const word, const int length, const int probability); void addBigramWords(const int *const word0, const int length0, const int *const word1, - const int length1, const int probability, const int timestamp); + const int length1, const int probability); void removeBigramWords(const int *const word0, const int length0, const int *const word1, const int length1); @@ -91,33 +83,24 @@ class Dictionary { bool needsToRunGC(const bool mindsBlockByGC); - void getProperty(const char *const query, const int queryLength, char *const outResult, + void getProperty(const char *const query, char *const outResult, const int maxResultLength); - const UnigramProperty getUnigramProperty(const int *const codePoints, const int codePointCount); - const DictionaryStructureWithBufferPolicy *getDictionaryStructurePolicy() const { - return mDictionaryStructureWithBufferPolicy.get(); + return mDictionaryStructureWithBufferPolicy; } - int getFormatVersionNumber() const { - return mDictionaryStructureWithBufferPolicy.get()->getHeaderStructurePolicy() - ->getFormatVersionNumber(); - } + virtual ~Dictionary(); private: DISALLOW_IMPLICIT_CONSTRUCTORS(Dictionary); - typedef ExclusiveOwnershipPointer<BigramDictionary> BigramDictionaryPtr; - typedef ExclusiveOwnershipPointer<SuggestInterface> SuggestInterfacePtr; - static const int HEADER_ATTRIBUTE_BUFFER_SIZE; - const DictionaryStructureWithBufferPolicy::StructurePolicyPtr - mDictionaryStructureWithBufferPolicy; - const BigramDictionaryPtr mBigramDictionary; - const SuggestInterfacePtr mGestureSuggest; - const SuggestInterfacePtr mTypingSuggest; + DictionaryStructureWithBufferPolicy *const mDictionaryStructureWithBufferPolicy; + const BigramDictionary *const mBigramDictionary; + const SuggestInterface *const mGestureSuggest; + const SuggestInterface *const mTypingSuggest; void logDictionaryInfo(JNIEnv *const env) const; }; diff --git a/native/jni/src/suggest/core/dictionary/digraph_utils.cpp b/native/jni/src/suggest/core/dictionary/digraph_utils.cpp index 5f9b8f3e2..3271c1bfb 100644 --- a/native/jni/src/suggest/core/dictionary/digraph_utils.cpp +++ b/native/jni/src/suggest/core/dictionary/digraph_utils.cpp @@ -28,8 +28,11 @@ const DigraphUtils::digraph_t DigraphUtils::GERMAN_UMLAUT_DIGRAPHS[] = { { 'a', 'e', 0x00E4 }, // U+00E4 : LATIN SMALL LETTER A WITH DIAERESIS { 'o', 'e', 0x00F6 }, // U+00F6 : LATIN SMALL LETTER O WITH DIAERESIS { 'u', 'e', 0x00FC } }; // U+00FC : LATIN SMALL LETTER U WITH DIAERESIS +const DigraphUtils::digraph_t DigraphUtils::FRENCH_LIGATURES_DIGRAPHS[] = + { { 'a', 'e', 0x00E6 }, // U+00E6 : LATIN SMALL LETTER AE + { 'o', 'e', 0x0153 } }; // U+0153 : LATIN SMALL LIGATURE OE const DigraphUtils::DigraphType DigraphUtils::USED_DIGRAPH_TYPES[] = - { DIGRAPH_TYPE_GERMAN_UMLAUT }; + { DIGRAPH_TYPE_GERMAN_UMLAUT, DIGRAPH_TYPE_FRENCH_LIGATURES }; /* static */ bool DigraphUtils::hasDigraphForCodePoint( const DictionaryHeaderStructurePolicy *const headerPolicy, @@ -47,6 +50,9 @@ const DigraphUtils::DigraphType DigraphUtils::USED_DIGRAPH_TYPES[] = if (headerPolicy->requiresGermanUmlautProcessing()) { return DIGRAPH_TYPE_GERMAN_UMLAUT; } + if (headerPolicy->requiresFrenchLigatureProcessing()) { + return DIGRAPH_TYPE_FRENCH_LIGATURES; + } return DIGRAPH_TYPE_NONE; } @@ -80,6 +86,10 @@ const DigraphUtils::DigraphType DigraphUtils::USED_DIGRAPH_TYPES[] = *digraphs = GERMAN_UMLAUT_DIGRAPHS; return NELEMS(GERMAN_UMLAUT_DIGRAPHS); } + if (digraphType == DIGRAPH_TYPE_FRENCH_LIGATURES) { + *digraphs = FRENCH_LIGATURES_DIGRAPHS; + return NELEMS(FRENCH_LIGATURES_DIGRAPHS); + } return 0; } diff --git a/native/jni/src/suggest/core/dictionary/digraph_utils.h b/native/jni/src/suggest/core/dictionary/digraph_utils.h index bec2cd6e2..6ae16e390 100644 --- a/native/jni/src/suggest/core/dictionary/digraph_utils.h +++ b/native/jni/src/suggest/core/dictionary/digraph_utils.h @@ -34,6 +34,7 @@ class DigraphUtils { typedef enum { DIGRAPH_TYPE_NONE, DIGRAPH_TYPE_GERMAN_UMLAUT, + DIGRAPH_TYPE_FRENCH_LIGATURES } DigraphType; typedef struct { int first; int second; int compositeGlyph; } digraph_t; @@ -54,6 +55,7 @@ class DigraphUtils { const DigraphType digraphType, const int compositeGlyphCodePoint); static const digraph_t GERMAN_UMLAUT_DIGRAPHS[]; + static const digraph_t FRENCH_LIGATURES_DIGRAPHS[]; static const DigraphType USED_DIGRAPH_TYPES[]; }; } // namespace latinime diff --git a/native/jni/src/suggest/core/dictionary/error_type_utils.cpp b/native/jni/src/suggest/core/dictionary/error_type_utils.cpp deleted file mode 100644 index 0635fef7e..000000000 --- a/native/jni/src/suggest/core/dictionary/error_type_utils.cpp +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Copyright (C) 2013, The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "suggest/core/dictionary/error_type_utils.h" - -namespace latinime { - -const ErrorTypeUtils::ErrorType ErrorTypeUtils::NOT_AN_ERROR = 0x0; -const ErrorTypeUtils::ErrorType ErrorTypeUtils::MATCH_WITH_CASE_ERROR = 0x1; -const ErrorTypeUtils::ErrorType ErrorTypeUtils::MATCH_WITH_ACCENT_ERROR = 0x2; -const ErrorTypeUtils::ErrorType ErrorTypeUtils::MATCH_WITH_DIGRAPH = 0x4; -const ErrorTypeUtils::ErrorType ErrorTypeUtils::INTENTIONAL_OMISSION = 0x8; -const ErrorTypeUtils::ErrorType ErrorTypeUtils::EDIT_CORRECTION = 0x10; -const ErrorTypeUtils::ErrorType ErrorTypeUtils::PROXIMITY_CORRECTION = 0x20; -const ErrorTypeUtils::ErrorType ErrorTypeUtils::COMPLETION = 0x40; -const ErrorTypeUtils::ErrorType ErrorTypeUtils::NEW_WORD = 0x80; - -const ErrorTypeUtils::ErrorType ErrorTypeUtils::ERRORS_TREATED_AS_AN_EXACT_MATCH = - NOT_AN_ERROR | MATCH_WITH_CASE_ERROR | MATCH_WITH_ACCENT_ERROR | MATCH_WITH_DIGRAPH; - -} // namespace latinime diff --git a/native/jni/src/suggest/core/dictionary/error_type_utils.h b/native/jni/src/suggest/core/dictionary/error_type_utils.h deleted file mode 100644 index ab4a65e48..000000000 --- a/native/jni/src/suggest/core/dictionary/error_type_utils.h +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Copyright (C) 2013 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef LATINIME_ERROR_TYPE_UTILS_H -#define LATINIME_ERROR_TYPE_UTILS_H - -#include <stdint.h> - -#include "defines.h" - -namespace latinime { - -class ErrorTypeUtils { - public: - // ErrorType is mainly decided by CorrectionType but it is also depending on if - // the correction has really been performed or not. - typedef uint32_t ErrorType; - - static const ErrorType NOT_AN_ERROR; - static const ErrorType MATCH_WITH_CASE_ERROR; - static const ErrorType MATCH_WITH_ACCENT_ERROR; - static const ErrorType MATCH_WITH_DIGRAPH; - // Treat error as an intentional omission when the CorrectionType is omission and the node can - // be intentional omission. - static const ErrorType INTENTIONAL_OMISSION; - // Substitution, omission and transposition - static const ErrorType EDIT_CORRECTION; - // Proximity error - static const ErrorType PROXIMITY_CORRECTION; - // Completion - static const ErrorType COMPLETION; - // New word - // TODO: Remove. - // A new word error should be an edit correction error or a proximity correction error. - static const ErrorType NEW_WORD; - - // TODO: Differentiate errors. - static bool isExactMatch(const ErrorType containingErrors) { - return (containingErrors & ~ERRORS_TREATED_AS_AN_EXACT_MATCH) == 0; - } - - static bool isEditCorrectionError(const ErrorType errorType) { - return (errorType & EDIT_CORRECTION) != 0; - } - - static bool isProximityCorrectionError(const ErrorType errorType) { - return (errorType & PROXIMITY_CORRECTION) != 0; - } - - private: - DISALLOW_IMPLICIT_CONSTRUCTORS(ErrorTypeUtils); - - static const ErrorType ERRORS_TREATED_AS_AN_EXACT_MATCH; -}; -} // namespace latinime -#endif // LATINIME_ERROR_TYPE_UTILS_H diff --git a/native/jni/src/suggest/core/dictionary/multi_bigram_map.cpp b/native/jni/src/suggest/core/dictionary/multi_bigram_map.cpp index 49d82e69a..b1d2f4b4d 100644 --- a/native/jni/src/suggest/core/dictionary/multi_bigram_map.cpp +++ b/native/jni/src/suggest/core/dictionary/multi_bigram_map.cpp @@ -30,75 +30,4 @@ const size_t MultiBigramMap::MAX_CACHED_PREV_WORDS_IN_BIGRAM_MAP = 25; // Most common previous word contexts currently have 100 bigrams const int MultiBigramMap::BigramMap::DEFAULT_HASH_MAP_SIZE_FOR_EACH_BIGRAM_MAP = 100; -// Look up the bigram probability for the given word pair from the cached bigram maps. -// Also caches the bigrams if there is space remaining and they have not been cached already. -int MultiBigramMap::getBigramProbability( - const DictionaryStructureWithBufferPolicy *const structurePolicy, - const int wordPosition, const int nextWordPosition, const int unigramProbability) { - hash_map_compat<int, BigramMap>::const_iterator mapPosition = - mBigramMaps.find(wordPosition); - if (mapPosition != mBigramMaps.end()) { - return mapPosition->second.getBigramProbability(structurePolicy, nextWordPosition, - unigramProbability); - } - if (mBigramMaps.size() < MAX_CACHED_PREV_WORDS_IN_BIGRAM_MAP) { - addBigramsForWordPosition(structurePolicy, wordPosition); - return mBigramMaps[wordPosition].getBigramProbability(structurePolicy, - nextWordPosition, unigramProbability); - } - return readBigramProbabilityFromBinaryDictionary(structurePolicy, wordPosition, - nextWordPosition, unigramProbability); -} - -void MultiBigramMap::BigramMap::init( - const DictionaryStructureWithBufferPolicy *const structurePolicy, const int nodePos) { - const int bigramsListPos = structurePolicy->getBigramsPositionOfPtNode(nodePos); - BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(), - bigramsListPos); - while (bigramsIt.hasNext()) { - bigramsIt.next(); - if (bigramsIt.getBigramPos() == NOT_A_DICT_POS) { - continue; - } - mBigramMap[bigramsIt.getBigramPos()] = bigramsIt.getProbability(); - mBloomFilter.setInFilter(bigramsIt.getBigramPos()); - } -} - -int MultiBigramMap::BigramMap::getBigramProbability( - const DictionaryStructureWithBufferPolicy *const structurePolicy, - const int nextWordPosition, const int unigramProbability) const { - int bigramProbability = NOT_A_PROBABILITY; - if (mBloomFilter.isInFilter(nextWordPosition)) { - const hash_map_compat<int, int>::const_iterator bigramProbabilityIt = - mBigramMap.find(nextWordPosition); - if (bigramProbabilityIt != mBigramMap.end()) { - bigramProbability = bigramProbabilityIt->second; - } - } - return structurePolicy->getProbability(unigramProbability, bigramProbability); -} - -void MultiBigramMap::addBigramsForWordPosition( - const DictionaryStructureWithBufferPolicy *const structurePolicy, const int position) { - mBigramMaps[position].init(structurePolicy, position); -} - -int MultiBigramMap::readBigramProbabilityFromBinaryDictionary( - const DictionaryStructureWithBufferPolicy *const structurePolicy, const int nodePos, - const int nextWordPosition, const int unigramProbability) { - int bigramProbability = NOT_A_PROBABILITY; - const int bigramsListPos = structurePolicy->getBigramsPositionOfPtNode(nodePos); - BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(), - bigramsListPos); - while (bigramsIt.hasNext()) { - bigramsIt.next(); - if (bigramsIt.getBigramPos() == nextWordPosition) { - bigramProbability = bigramsIt.getProbability(); - break; - } - } - return structurePolicy->getProbability(unigramProbability, bigramProbability); -} - } // namespace latinime diff --git a/native/jni/src/suggest/core/dictionary/multi_bigram_map.h b/native/jni/src/suggest/core/dictionary/multi_bigram_map.h index 421b2681c..4633c07b0 100644 --- a/native/jni/src/suggest/core/dictionary/multi_bigram_map.h +++ b/native/jni/src/suggest/core/dictionary/multi_bigram_map.h @@ -38,7 +38,21 @@ class MultiBigramMap { // Look up the bigram probability for the given word pair from the cached bigram maps. // Also caches the bigrams if there is space remaining and they have not been cached already. int getBigramProbability(const DictionaryStructureWithBufferPolicy *const structurePolicy, - const int wordPosition, const int nextWordPosition, const int unigramProbability); + const int wordPosition, const int nextWordPosition, const int unigramProbability) { + hash_map_compat<int, BigramMap>::const_iterator mapPosition = + mBigramMaps.find(wordPosition); + if (mapPosition != mBigramMaps.end()) { + return mapPosition->second.getBigramProbability(structurePolicy, nextWordPosition, + unigramProbability); + } + if (mBigramMaps.size() < MAX_CACHED_PREV_WORDS_IN_BIGRAM_MAP) { + addBigramsForWordPosition(structurePolicy, wordPosition); + return mBigramMaps[wordPosition].getBigramProbability(structurePolicy, + nextWordPosition, unigramProbability); + } + return readBigramProbabilityFromBinaryDictionary(structurePolicy, wordPosition, + nextWordPosition, unigramProbability); + } void clear() { mBigramMaps.clear(); @@ -53,11 +67,33 @@ class MultiBigramMap { ~BigramMap() {} void init(const DictionaryStructureWithBufferPolicy *const structurePolicy, - const int nodePos); + const int nodePos) { + const int bigramsListPos = structurePolicy->getBigramsPositionOfPtNode(nodePos); + BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(), + bigramsListPos); + while (bigramsIt.hasNext()) { + bigramsIt.next(); + if (bigramsIt.getBigramPos() == NOT_A_DICT_POS) { + continue; + } + mBigramMap[bigramsIt.getBigramPos()] = bigramsIt.getProbability(); + mBloomFilter.setInFilter(bigramsIt.getBigramPos()); + } + } - int getBigramProbability( + AK_FORCE_INLINE int getBigramProbability( const DictionaryStructureWithBufferPolicy *const structurePolicy, - const int nextWordPosition, const int unigramProbability) const; + const int nextWordPosition, const int unigramProbability) const { + int bigramProbability = NOT_A_PROBABILITY; + if (mBloomFilter.isInFilter(nextWordPosition)) { + const hash_map_compat<int, int>::const_iterator bigramProbabilityIt = + mBigramMap.find(nextWordPosition); + if (bigramProbabilityIt != mBigramMap.end()) { + bigramProbability = bigramProbabilityIt->second; + } + } + return structurePolicy->getProbability(unigramProbability, bigramProbability); + } private: // NOTE: The BigramMap class doesn't use DISALLOW_COPY_AND_ASSIGN() because its default @@ -67,12 +103,27 @@ class MultiBigramMap { BloomFilter mBloomFilter; }; - void addBigramsForWordPosition( - const DictionaryStructureWithBufferPolicy *const structurePolicy, const int position); + AK_FORCE_INLINE void addBigramsForWordPosition( + const DictionaryStructureWithBufferPolicy *const structurePolicy, const int position) { + mBigramMaps[position].init(structurePolicy, position); + } - int readBigramProbabilityFromBinaryDictionary( + AK_FORCE_INLINE int readBigramProbabilityFromBinaryDictionary( const DictionaryStructureWithBufferPolicy *const structurePolicy, const int nodePos, - const int nextWordPosition, const int unigramProbability); + const int nextWordPosition, const int unigramProbability) { + int bigramProbability = NOT_A_PROBABILITY; + const int bigramsListPos = structurePolicy->getBigramsPositionOfPtNode(nodePos); + BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(), + bigramsListPos); + while (bigramsIt.hasNext()) { + bigramsIt.next(); + if (bigramsIt.getBigramPos() == nextWordPosition) { + bigramProbability = bigramsIt.getProbability(); + break; + } + } + return structurePolicy->getProbability(unigramProbability, bigramProbability); + } static const size_t MAX_CACHED_PREV_WORDS_IN_BIGRAM_MAP; hash_map_compat<int, BigramMap> mBigramMaps; diff --git a/native/jni/src/suggest/core/dictionary/unigram_property.cpp b/native/jni/src/suggest/core/dictionary/unigram_property.cpp deleted file mode 100644 index 16bbb69d8..000000000 --- a/native/jni/src/suggest/core/dictionary/unigram_property.cpp +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright (C) 2013 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "suggest/core/dictionary/unigram_property.h" - -namespace latinime { - -void UnigramProperty::outputProperties(JNIEnv *const env, jintArray outCodePoints, - jbooleanArray outFlags, jintArray outProbability, jintArray outHistoricalInfo, - jobject outShortcutTargets, jobject outShortcutProbabilities) const { - env->SetIntArrayRegion(outCodePoints, 0 /* start */, mCodePointCount, mCodePoints); - jboolean flags[] = {mIsNotAWord, mIsBlacklisted, mHasBigrams, mHasShortcuts}; - env->SetBooleanArrayRegion(outFlags, 0 /* start */, NELEMS(flags), flags); - env->SetIntArrayRegion(outProbability, 0 /* start */, 1 /* len */, &mProbability); - int historicalInfo[] = {mTimestamp, mLevel, mCount}; - env->SetIntArrayRegion(outHistoricalInfo, 0 /* start */, NELEMS(historicalInfo), - historicalInfo); - - jclass integerClass = env->FindClass("java/lang/Integer"); - jmethodID intToIntegerConstructorId = env->GetMethodID(integerClass, "<init>", "(I)V"); - jclass arrayListClass = env->FindClass("java/util/ArrayList"); - jmethodID addMethodId = env->GetMethodID(arrayListClass, "add", "(Ljava/lang/Object;)Z"); - const int shortcutTargetCount = mShortcutTargets.size(); - for (int i = 0; i < shortcutTargetCount; ++i) { - jintArray shortcutTargetCodePointArray = env->NewIntArray(mShortcutTargets[i].size()); - env->SetIntArrayRegion(shortcutTargetCodePointArray, 0 /* start */, - mShortcutTargets[i].size(), &mShortcutTargets[i][0]); - env->CallVoidMethod(outShortcutTargets, addMethodId, shortcutTargetCodePointArray); - env->DeleteLocalRef(shortcutTargetCodePointArray); - jobject integerProbability = env->NewObject(integerClass, intToIntegerConstructorId, - mShortcutProbabilities[i]); - env->CallVoidMethod(outShortcutProbabilities, addMethodId, integerProbability); - env->DeleteLocalRef(integerProbability); - } - env->DeleteLocalRef(integerClass); - env->DeleteLocalRef(arrayListClass); -} - -} // namespace latinime diff --git a/native/jni/src/suggest/core/dictionary/unigram_property.h b/native/jni/src/suggest/core/dictionary/unigram_property.h deleted file mode 100644 index c4ebb86ab..000000000 --- a/native/jni/src/suggest/core/dictionary/unigram_property.h +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Copyright (C) 2013 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef LATINIME_UNIGRAM_PROPERTY_H -#define LATINIME_UNIGRAM_PROPERTY_H - -#include <cstring> -#include <vector> - -#include "defines.h" -#include "jni.h" - -namespace latinime { - -// This class is used for returning information belonging to a unigram to java side. -class UnigramProperty { - public: - // Invalid unigram. - UnigramProperty() - : mCodePoints(), mCodePointCount(0), mIsNotAWord(false), mIsBlacklisted(false), - mHasBigrams(false), mHasShortcuts(false), mProbability(NOT_A_PROBABILITY), - mTimestamp(0), mLevel(0), mCount(0), mShortcutTargets(), mShortcutProbabilities() {} - - UnigramProperty(const UnigramProperty &unigramProperty) - : mCodePoints(), mCodePointCount(unigramProperty.mCodePointCount), - mIsNotAWord(unigramProperty.mIsNotAWord), - mIsBlacklisted(unigramProperty.mIsBlacklisted), - mHasBigrams(unigramProperty.mHasBigrams), - mHasShortcuts(unigramProperty.mHasShortcuts), - mProbability(unigramProperty.mProbability), - mTimestamp(unigramProperty.mTimestamp), mLevel(unigramProperty.mLevel), - mCount(unigramProperty.mCount), mShortcutTargets(unigramProperty.mShortcutTargets), - mShortcutProbabilities(unigramProperty.mShortcutProbabilities) { - memcpy(mCodePoints, unigramProperty.mCodePoints, sizeof(mCodePoints)); - } - - UnigramProperty(const int *const codePoints, const int codePointCount, - const bool isNotAWord, const bool isBlacklisted, const bool hasBigrams, - const bool hasShortcuts, const int probability, const int timestamp, - const int level, const int count, - const std::vector<std::vector<int> > *const shortcutTargets, - const std::vector<int> *const shortcutProbabilities) - : mCodePoints(), mCodePointCount(codePointCount), - mIsNotAWord(isNotAWord), mIsBlacklisted(isBlacklisted), mHasBigrams(hasBigrams), - mHasShortcuts(hasShortcuts), mProbability(probability), mTimestamp(timestamp), - mLevel(level), mCount(count), mShortcutTargets(*shortcutTargets), - mShortcutProbabilities(*shortcutProbabilities) { - memcpy(mCodePoints, codePoints, sizeof(mCodePoints)); - } - - void outputProperties(JNIEnv *const env, jintArray outCodePoints, jbooleanArray outFlags, - jintArray outProbability, jintArray outHistoricalInfo, jobject outShortcutTargets, - jobject outShortcutProbabilities) const; - - private: - DISALLOW_ASSIGNMENT_OPERATOR(UnigramProperty); - - int mCodePoints[MAX_WORD_LENGTH]; - int mCodePointCount; - bool mIsNotAWord; - bool mIsBlacklisted; - bool mHasBigrams; - bool mHasShortcuts; - int mProbability; - // Historical information - int mTimestamp; - int mLevel; - int mCount; - // Shortcut - std::vector<std::vector<int> > mShortcutTargets; - std::vector<int> mShortcutProbabilities; -}; -} // namespace latinime -#endif // LATINIME_UNIGRAM_PROPERTY_H diff --git a/native/jni/src/suggest/core/layout/proximity_info.cpp b/native/jni/src/suggest/core/layout/proximity_info.cpp index ee8e59ef9..e64476d82 100644 --- a/native/jni/src/suggest/core/layout/proximity_info.cpp +++ b/native/jni/src/suggest/core/layout/proximity_info.cpp @@ -71,7 +71,7 @@ ProximityInfo::ProximityInfo(JNIEnv *env, const jstring localeJStr, && sweetSpotCenterYs && sweetSpotRadii), mProximityCharsArray(new int[GRID_WIDTH * GRID_HEIGHT * MAX_PROXIMITY_CHARS_SIZE /* proximityCharsLength */]), - mLowerCodePointToKeyMap() { + mCodeToKeyMap() { /* Let's check the input array length here to make sure */ const jsize proximityCharsLength = env->GetArrayLength(proximityChars); if (proximityCharsLength != GRID_WIDTH * GRID_HEIGHT * MAX_PROXIMITY_CHARS_SIZE) { @@ -147,14 +147,7 @@ int ProximityInfo::getCodePointOf(const int keyIndex) const { if (keyIndex < 0 || keyIndex >= KEY_COUNT) { return NOT_A_CODE_POINT; } - return mKeyIndexToLowerCodePointG[keyIndex]; -} - -int ProximityInfo::getOriginalCodePointOf(const int keyIndex) const { - if (keyIndex < 0 || keyIndex >= KEY_COUNT) { - return NOT_A_CODE_POINT; - } - return mKeyIndexToOriginalCodePoint[keyIndex]; + return mKeyIndexToCodePointG[keyIndex]; } void ProximityInfo::initializeG() { @@ -171,9 +164,8 @@ void ProximityInfo::initializeG() { const float gapY = sweetSpotCenterY - mCenterYsG[i]; mSweetSpotCenterYsG[i] = static_cast<int>(mCenterYsG[i] + gapY * verticalScale); } - mLowerCodePointToKeyMap[lowerCode] = i; - mKeyIndexToOriginalCodePoint[i] = code; - mKeyIndexToLowerCodePointG[i] = lowerCode; + mCodeToKeyMap[lowerCode] = i; + mKeyIndexToCodePointG[i] = lowerCode; } for (int i = 0; i < KEY_COUNT; i++) { mKeyKeyDistancesG[i][i] = 0; diff --git a/native/jni/src/suggest/core/layout/proximity_info.h b/native/jni/src/suggest/core/layout/proximity_info.h index a91b9d674..f25949001 100644 --- a/native/jni/src/suggest/core/layout/proximity_info.h +++ b/native/jni/src/suggest/core/layout/proximity_info.h @@ -39,7 +39,6 @@ class ProximityInfo { float getNormalizedSquaredDistanceFromCenterFloatG( const int keyId, const int x, const int y, const bool isGeometric) const; int getCodePointOf(const int keyIndex) const; - int getOriginalCodePointOf(const int keyIndex) const; bool hasSweetSpotData(const int keyIndex) const { // When there are no calibration data for a key, // the radius of the key is assigned to zero. @@ -77,11 +76,11 @@ class ProximityInfo { ProximityInfoUtils::initializeProximities(inputCodes, inputXCoordinates, inputYCoordinates, inputSize, mKeyXCoordinates, mKeyYCoordinates, mKeyWidths, mKeyHeights, mProximityCharsArray, CELL_HEIGHT, CELL_WIDTH, GRID_WIDTH, MOST_COMMON_KEY_WIDTH, - KEY_COUNT, mLocaleStr, &mLowerCodePointToKeyMap, allInputCodes); + KEY_COUNT, mLocaleStr, &mCodeToKeyMap, allInputCodes); } AK_FORCE_INLINE int getKeyIndexOf(const int c) const { - return ProximityInfoUtils::getKeyIndexOf(KEY_COUNT, c, &mLowerCodePointToKeyMap); + return ProximityInfoUtils::getKeyIndexOf(KEY_COUNT, c, &mCodeToKeyMap); } AK_FORCE_INLINE bool isCodePointOnKeyboard(const int codePoint) const { @@ -118,9 +117,9 @@ class ProximityInfo { // Sweet spots for geometric input. Note that we have extra sweet spots only for Y coordinates. float mSweetSpotCenterYsG[MAX_KEY_COUNT_IN_A_KEYBOARD]; float mSweetSpotRadii[MAX_KEY_COUNT_IN_A_KEYBOARD]; - hash_map_compat<int, int> mLowerCodePointToKeyMap; - int mKeyIndexToOriginalCodePoint[MAX_KEY_COUNT_IN_A_KEYBOARD]; - int mKeyIndexToLowerCodePointG[MAX_KEY_COUNT_IN_A_KEYBOARD]; + hash_map_compat<int, int> mCodeToKeyMap; + + int mKeyIndexToCodePointG[MAX_KEY_COUNT_IN_A_KEYBOARD]; int mCenterXsG[MAX_KEY_COUNT_IN_A_KEYBOARD]; int mCenterYsG[MAX_KEY_COUNT_IN_A_KEYBOARD]; int mKeyKeyDistancesG[MAX_KEY_COUNT_IN_A_KEYBOARD][MAX_KEY_COUNT_IN_A_KEYBOARD]; diff --git a/native/jni/src/suggest/core/layout/proximity_info_state.cpp b/native/jni/src/suggest/core/layout/proximity_info_state.cpp index bb4b41714..fbabd92f2 100644 --- a/native/jni/src/suggest/core/layout/proximity_info_state.cpp +++ b/native/jni/src/suggest/core/layout/proximity_info_state.cpp @@ -30,12 +30,6 @@ namespace latinime { -int ProximityInfoState::getPrimaryOriginalCodePointAt(const int index) const { - const int primaryCodePoint = getPrimaryCodePointAt(index); - const int keyIndex = mProximityInfo->getKeyIndexOf(primaryCodePoint); - return mProximityInfo->getOriginalCodePointOf(keyIndex); -} - // TODO: Remove the dependency of "isGeometric" void ProximityInfoState::initInputParams(const int pointerId, const float maxPointToKeyLength, const ProximityInfo *proximityInfo, const int *const inputCodes, const int inputSize, diff --git a/native/jni/src/suggest/core/layout/proximity_info_state.h b/native/jni/src/suggest/core/layout/proximity_info_state.h index e941e43d8..c94060fa9 100644 --- a/native/jni/src/suggest/core/layout/proximity_info_state.h +++ b/native/jni/src/suggest/core/layout/proximity_info_state.h @@ -65,8 +65,6 @@ class ProximityInfoState { return getProximityCodePointsAt(index)[0]; } - int getPrimaryOriginalCodePointAt(const int index) const; - inline bool sameAsTyped(const int *word, int length) const { if (length != mSampledInputSize) { return false; diff --git a/native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h b/native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h index b76b13971..5492c6070 100644 --- a/native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h +++ b/native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h @@ -29,10 +29,12 @@ class DictionaryHeaderStructurePolicy { public: virtual ~DictionaryHeaderStructurePolicy() {} - virtual int getFormatVersionNumber() const = 0; + virtual bool supportsDynamicUpdate() const = 0; virtual bool requiresGermanUmlautProcessing() const = 0; + virtual bool requiresFrenchLigatureProcessing() const = 0; + virtual float getMultiWordCostMultiplier() const = 0; virtual int getLastDecayedTime() const = 0; diff --git a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h index c74a4ebbe..41f82049f 100644 --- a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h +++ b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h @@ -18,8 +18,6 @@ #define LATINIME_DICTIONARY_STRUCTURE_POLICY_H #include "defines.h" -#include "suggest/core/dictionary/unigram_property.h" -#include "utils/exclusive_ownership_pointer.h" namespace latinime { @@ -30,25 +28,23 @@ class DictionaryHeaderStructurePolicy; class DictionaryShortcutsStructurePolicy; /* - * This class abstracts the structure of dictionaries. + * This class abstracts structure of dictionaries. * Implement this policy to support additional dictionaries. */ class DictionaryStructureWithBufferPolicy { public: - typedef ExclusiveOwnershipPointer<DictionaryStructureWithBufferPolicy> StructurePolicyPtr; - virtual ~DictionaryStructureWithBufferPolicy() {} virtual int getRootPosition() const = 0; - virtual void createAndGetAllChildDicNodes(const DicNode *const dicNode, + virtual void createAndGetAllChildNodes(const DicNode *const dicNode, DicNodeVector *const childDicNodes) const = 0; virtual int getCodePointsAndProbabilityAndReturnCodePointCount( const int nodePos, const int maxCodePointCount, int *const outCodePoints, int *const outUnigramProbability) const = 0; - virtual int getTerminalPtNodePositionOfWord(const int *const inWord, + virtual int getTerminalNodePositionOfWord(const int *const inWord, const int length, const bool forceLowerCaseSearch) const = 0; virtual int getProbability(const int unigramProbability, @@ -68,13 +64,11 @@ class DictionaryStructureWithBufferPolicy { // Returns whether the update was success or not. virtual bool addUnigramWord(const int *const word, const int length, - const int probability, const int *const shortcutTargetCodePoints, - const int shortcutLength, const int shortcutProbability, const bool isNotAWord, - const bool isBlacklisted,const int timestamp) = 0; + const int probability) = 0; // Returns whether the update was success or not. virtual bool addBigramWords(const int *const word0, const int length0, const int *const word1, - const int length1, const int probability, const int timestamp) = 0; + const int length1, const int probability) = 0; // Returns whether the update was success or not. virtual bool removeBigramWords(const int *const word0, const int length0, @@ -88,13 +82,9 @@ class DictionaryStructureWithBufferPolicy { // Currently, this method is used only for testing. You may want to consider creating new // dedicated method instead of this if you want to use this in the production. - virtual void getProperty(const char *const query, const int queryLength, char *const outResult, + virtual void getProperty(const char *const query, char *const outResult, const int maxResultLength) = 0; - // Used for testing. - virtual const UnigramProperty getUnigramProperty(const int *const codePonts, - const int codePointCount) const = 0; - protected: DictionaryStructureWithBufferPolicy() {} diff --git a/native/jni/src/suggest/core/policy/weighting.cpp b/native/jni/src/suggest/core/policy/weighting.cpp index c202b81fe..0c4016893 100644 --- a/native/jni/src/suggest/core/policy/weighting.cpp +++ b/native/jni/src/suggest/core/policy/weighting.cpp @@ -20,7 +20,6 @@ #include "suggest/core/dicnode/dic_node.h" #include "suggest/core/dicnode/dic_node_profiler.h" #include "suggest/core/dicnode/dic_node_utils.h" -#include "suggest/core/dictionary/error_type_utils.h" #include "suggest/core/session/dic_traverse_session.h" namespace latinime { @@ -83,8 +82,8 @@ static inline void profile(const CorrectionType correctionType, DicNode *const n traverseSession, parentDicNode, dicNode, &inputStateG); const float languageCost = Weighting::getLanguageCost(weighting, correctionType, traverseSession, parentDicNode, dicNode, multiBigramMap); - const ErrorTypeUtils::ErrorType errorType = weighting->getErrorType(correctionType, - traverseSession, parentDicNode, dicNode); + const ErrorType errorType = weighting->getErrorType(correctionType, traverseSession, + parentDicNode, dicNode); profile(correctionType, dicNode); if (inputStateG.mNeedsToUpdateInputStateG) { dicNode->updateInputIndexG(&inputStateG); diff --git a/native/jni/src/suggest/core/policy/weighting.h b/native/jni/src/suggest/core/policy/weighting.h index bd6b3cf41..2d49e98a6 100644 --- a/native/jni/src/suggest/core/policy/weighting.h +++ b/native/jni/src/suggest/core/policy/weighting.h @@ -18,7 +18,6 @@ #define LATINIME_WEIGHTING_H #include "defines.h" -#include "suggest/core/dictionary/error_type_utils.h" namespace latinime { @@ -85,7 +84,7 @@ class Weighting { virtual float getSpaceSubstitutionCost(const DicTraverseSession *const traverseSession, const DicNode *const dicNode) const = 0; - virtual ErrorTypeUtils::ErrorType getErrorType(const CorrectionType correctionType, + virtual ErrorType getErrorType(const CorrectionType correctionType, const DicTraverseSession *const traverseSession, const DicNode *const parentDicNode, const DicNode *const dicNode) const = 0; diff --git a/native/jni/src/suggest/core/session/dic_traverse_session.cpp b/native/jni/src/suggest/core/session/dic_traverse_session.cpp index 5070491f4..50f2bbd8d 100644 --- a/native/jni/src/suggest/core/session/dic_traverse_session.cpp +++ b/native/jni/src/suggest/core/session/dic_traverse_session.cpp @@ -35,16 +35,16 @@ void DicTraverseSession::init(const Dictionary *const dictionary, const int *pre ->getMultiWordCostMultiplier(); mSuggestOptions = suggestOptions; if (!prevWord) { - mPrevWordPtNodePos = NOT_A_DICT_POS; + mPrevWordPos = NOT_A_DICT_POS; return; } // TODO: merge following similar calls to getTerminalPosition into one case-insensitive call. - mPrevWordPtNodePos = getDictionaryStructurePolicy()->getTerminalPtNodePositionOfWord( + mPrevWordPos = getDictionaryStructurePolicy()->getTerminalNodePositionOfWord( prevWord, prevWordLength, false /* forceLowerCaseSearch */); - if (mPrevWordPtNodePos == NOT_A_DICT_POS) { + if (mPrevWordPos == NOT_A_DICT_POS) { // Check bigrams for lower-cased previous word if original was not found. Useful for // auto-capitalized words like "The [current_word]". - mPrevWordPtNodePos = getDictionaryStructurePolicy()->getTerminalPtNodePositionOfWord( + mPrevWordPos = getDictionaryStructurePolicy()->getTerminalNodePositionOfWord( prevWord, prevWordLength, true /* forceLowerCaseSearch */); } } diff --git a/native/jni/src/suggest/core/session/dic_traverse_session.h b/native/jni/src/suggest/core/session/dic_traverse_session.h index 6e4dda44d..e0b1c67d9 100644 --- a/native/jni/src/suggest/core/session/dic_traverse_session.h +++ b/native/jni/src/suggest/core/session/dic_traverse_session.h @@ -59,7 +59,7 @@ class DicTraverseSession { } AK_FORCE_INLINE DicTraverseSession(JNIEnv *env, jstring localeStr, bool usesLargeCache) - : mPrevWordPtNodePos(NOT_A_DICT_POS), mProximityInfo(0), + : mPrevWordPos(NOT_A_DICT_POS), mProximityInfo(0), mDictionary(0), mSuggestOptions(0), mDicNodesCache(usesLargeCache), mMultiBigramMap(), mInputSize(0), mPartiallyCommited(false), mMaxPointerCount(1), mMultiWordCostMultiplier(1.0f) { @@ -86,9 +86,11 @@ class DicTraverseSession { //-------------------- const ProximityInfo *getProximityInfo() const { return mProximityInfo; } const SuggestOptions *getSuggestOptions() const { return mSuggestOptions; } - int getPrevWordPtNodePos() const { return mPrevWordPtNodePos; } + int getPrevWordPos() const { return mPrevWordPos; } // TODO: REMOVE - void setPrevWordPtNodePos(const int ptNodePos) { mPrevWordPtNodePos = ptNodePos; } + void setPrevWordPos(int pos) { mPrevWordPos = pos; } + // TODO: Use proper parameter when changed + int getDicRootPos() const { return 0; } DicNodesCache *getDicTraverseCache() { return &mDicNodesCache; } MultiBigramMap *getMultiBigramMap() { return &mMultiBigramMap; } const ProximityInfoState *getProximityInfoState(int id) const { @@ -117,13 +119,26 @@ class DicTraverseSession { return true; } - ProximityType getProximityTypeG(const DicNode *const dicNode, const int childCodePoint) const { + void getSearchKeys(const DicNode *node, std::vector<int> *const outputSearchKeyVector) const { + for (int i = 0; i < MAX_POINTER_COUNT_G; ++i) { + if (!mProximityInfoStates[i].isUsed()) { + continue; + } + const int pointerId = node->getInputIndex(i); + const std::vector<int> *const searchKeyVector = + mProximityInfoStates[i].getSearchKeyVector(pointerId); + outputSearchKeyVector->insert(outputSearchKeyVector->end(), searchKeyVector->begin(), + searchKeyVector->end()); + } + } + + ProximityType getProximityTypeG(const DicNode *const node, const int childCodePoint) const { ProximityType proximityType = UNRELATED_CHAR; for (int i = 0; i < MAX_POINTER_COUNT_G; ++i) { if (!mProximityInfoStates[i].isUsed()) { continue; } - const int pointerId = dicNode->getInputIndex(i); + const int pointerId = node->getInputIndex(i); proximityType = mProximityInfoStates[i].getProximityTypeG(pointerId, childCodePoint); ASSERT(proximityType == UNRELATED_CHAR || proximityType == MATCH_CHAR); // TODO: Make this more generic @@ -177,7 +192,7 @@ class DicTraverseSession { const int *const inputYs, const int *const times, const int *const pointerIds, const int inputSize, const float maxSpatialDistance, const int maxPointerCount); - int mPrevWordPtNodePos; + int mPrevWordPos; const ProximityInfo *mProximityInfo; const Dictionary *mDictionary; const SuggestOptions *mSuggestOptions; diff --git a/native/jni/src/suggest/core/suggest.cpp b/native/jni/src/suggest/core/suggest.cpp index f84c84181..73ccebc88 100644 --- a/native/jni/src/suggest/core/suggest.cpp +++ b/native/jni/src/suggest/core/suggest.cpp @@ -98,7 +98,7 @@ void Suggest::initializeSearch(DicTraverseSession *traverseSession, int commitPo // Continue suggestion after partial commit. DicNode *topDicNode = traverseSession->getDicTraverseCache()->setCommitPoint(commitPoint); - traverseSession->setPrevWordPtNodePos(topDicNode->getPrevWordPtNodePos()); + traverseSession->setPrevWordPos(topDicNode->getPrevWordNodePos()); traverseSession->getDicTraverseCache()->continueSearch(); traverseSession->setPartiallyCommited(); } @@ -109,7 +109,7 @@ void Suggest::initializeSearch(DicTraverseSession *traverseSession, int commitPo // Create a new dic node here DicNode rootNode; DicNodeUtils::initAsRoot(traverseSession->getDictionaryStructurePolicy(), - traverseSession->getPrevWordPtNodePos(), &rootNode); + traverseSession->getPrevWordPos(), &rootNode); traverseSession->getDicTraverseCache()->copyPushActive(&rootNode); } } @@ -231,15 +231,12 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen BinaryDictionaryShortcutIterator shortcutIt( traverseSession->getDictionaryStructurePolicy()->getShortcutsStructurePolicy(), traverseSession->getDictionaryStructurePolicy() - ->getShortcutPositionOfPtNode(terminalDicNode->getPtNodePos())); + ->getShortcutPositionOfPtNode(terminalDicNode->getPos())); // Shortcut is not supported for multiple words suggestions. // TODO: Check shortcuts during traversal for multiple words suggestions. const bool sameAsTyped = TRAVERSAL->sameAsTyped(traverseSession, terminalDicNode); - const int shortcutBaseScore = SCORING->doesAutoCorrectValidWord() ? - SCORING->calculateFinalScore(compoundDistance, traverseSession->getInputSize(), - true /* forceCommit */) : finalScore; const int updatedOutputWordIndex = ShortcutUtils::outputShortcuts(&shortcutIt, - outputWordIndex, shortcutBaseScore, outputCodePoints, frequencies, outputTypes, + outputWordIndex, finalScore, outputCodePoints, frequencies, outputTypes, sameAsTyped); const int secondWordFirstInputIndex = terminalDicNode->getSecondWordFirstInputIndex( traverseSession->getProximityInfoState(0)); @@ -424,15 +421,15 @@ void Suggest::expandCurrentDicNodes(DicTraverseSession *traverseSession) const { } break; case UNRELATED_CHAR: - // Just drop this dicNode and do nothing. + // Just drop this node and do nothing. break; default: - // Just drop this dicNode and do nothing. + // Just drop this node and do nothing. break; } } - // Push the dicNode for look-ahead correction + // Push the node for look-ahead correction if (allowsErrorCorrections && canDoLookAheadCorrection) { traverseSession->getDicTraverseCache()->copyPushNextActive(&dicNode); } @@ -445,7 +442,7 @@ void Suggest::processTerminalDicNode( if (dicNode->getCompoundDistance() >= static_cast<float>(MAX_VALUE_FOR_WEIGHTING)) { return; } - if (!dicNode->isTerminalDicNode()) { + if (!dicNode->isTerminalWordNode()) { return; } if (dicNode->shouldBeFilteredBySafetyNetForBigram()) { @@ -466,7 +463,7 @@ void Suggest::processTerminalDicNode( /** * Adds the expanded dicNode to the next search priority queue. Also creates an additional next word - * (by the space omission error correction) search path if input dicNode is on a terminal. + * (by the space omission error correction) search path if input dicNode is on a terminal node. */ void Suggest::processExpandedDicNode( DicTraverseSession *traverseSession, DicNode *dicNode) const { @@ -508,7 +505,7 @@ void Suggest::processDicNodeAsSubstitution(DicTraverseSession *traverseSession, processExpandedDicNode(traverseSession, childDicNode); } -// Process the DicNode codepoint as a digraph. This means that composite glyphs like the German +// Process the node codepoint as a digraph. This means that composite glyphs like the German // u-umlaut is expanded to the transliteration "ue". Note that this happens in parallel with // the normal non-digraph traversal, so both "uber" and "ueber" can be corrected to "[u-umlaut]ber". void Suggest::processDicNodeAsDigraph(DicTraverseSession *traverseSession, @@ -521,7 +518,7 @@ void Suggest::processDicNodeAsDigraph(DicTraverseSession *traverseSession, /** * Handle the dicNode as an omission error (e.g., ths => this). Skip the current letter and consider * matches for all possible next letters. Note that just skipping the current letter without any - * other conditions tends to flood the search DicNodes cache with omission DicNodes. Instead, check + * other conditions tends to flood the search dic nodes cache with omission nodes. Instead, check * the possible *next* letters after the omission to better limit search to plausible omissions. * Note that apostrophes are handled as omissions. */ @@ -608,7 +605,7 @@ void Suggest::processDicNodeAsTransposition(DicTraverseSession *traverseSession, } /** - * Weight child dicNode by aligning it to the key + * Weight child node by aligning it to the key */ void Suggest::weightChildNode(DicTraverseSession *traverseSession, DicNode *dicNode) const { const int inputSize = traverseSession->getInputSize(); diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp index 7d0d09631..1926b9831 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp @@ -16,6 +16,7 @@ #include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h" +#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/utils/byte_array_utils.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" @@ -37,6 +38,7 @@ const BigramListReadWriteUtils::BigramFlags BigramListReadWriteUtils::FLAG_ATTRI // Mask for attribute probability, stored on 4 bits inside the flags byte. const BigramListReadWriteUtils::BigramFlags BigramListReadWriteUtils::MASK_ATTRIBUTE_PROBABILITY = 0x0F; +const int BigramListReadWriteUtils::ATTRIBUTE_ADDRESS_SHIFT = 4; /* static */ void BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition( const uint8_t *const bigramsBuf, BigramFlags *const outBigramFlags, @@ -77,6 +79,11 @@ const BigramListReadWriteUtils::BigramFlags offset = ByteArrayUtils::readUint24AndAdvancePosition(bigramsBuf, pos); break; } + if (offset == DynamicPatriciaTrieReadingUtils::DICT_OFFSET_INVALID) { + return NOT_A_DICT_POS; + } else if (offset == DynamicPatriciaTrieReadingUtils::DICT_OFFSET_ZERO_OFFSET) { + return origin; + } if (isOffsetNegative(flags)) { return origin - offset; } else { @@ -84,4 +91,92 @@ const BigramListReadWriteUtils::BigramFlags } } +/* static */ bool BigramListReadWriteUtils::setHasNextFlag( + BufferWithExtendableBuffer *const buffer, const bool hasNext, const int entryPos) { + const bool usesAdditionalBuffer = buffer->isInAdditionalBuffer(entryPos); + int readingPos = entryPos; + if (usesAdditionalBuffer) { + readingPos -= buffer->getOriginalBufferSize(); + } + BigramFlags bigramFlags = ByteArrayUtils::readUint8AndAdvancePosition( + buffer->getBuffer(usesAdditionalBuffer), &readingPos); + if (hasNext) { + bigramFlags = bigramFlags | FLAG_ATTRIBUTE_HAS_NEXT; + } else { + bigramFlags = bigramFlags & (~FLAG_ATTRIBUTE_HAS_NEXT); + } + int writingPos = entryPos; + return buffer->writeUintAndAdvancePosition(bigramFlags, 1 /* size */, &writingPos); +} + +/* static */ bool BigramListReadWriteUtils::createAndWriteBigramEntry( + BufferWithExtendableBuffer *const buffer, const int targetPos, const int probability, + const bool hasNext, int *const writingPos) { + BigramFlags flags; + if (!createAndGetBigramFlags(*writingPos, targetPos, probability, hasNext, &flags)) { + return false; + } + return writeBigramEntry(buffer, flags, targetPos, writingPos); +} + +/* static */ bool BigramListReadWriteUtils::writeBigramEntry( + BufferWithExtendableBuffer *const bufferToWrite, const BigramFlags flags, + const int targetPtNodePos, int *const writingPos) { + const int offset = getBigramTargetOffset(targetPtNodePos, *writingPos); + const BigramFlags flagsToWrite = (offset < 0) ? + (flags | FLAG_ATTRIBUTE_OFFSET_NEGATIVE) : (flags & ~FLAG_ATTRIBUTE_OFFSET_NEGATIVE); + if (!bufferToWrite->writeUintAndAdvancePosition(flagsToWrite, 1 /* size */, writingPos)) { + return false; + } + const uint32_t absOffest = abs(offset); + const int bigramTargetFieldSize = attributeAddressSize(flags); + return bufferToWrite->writeUintAndAdvancePosition(absOffest, bigramTargetFieldSize, + writingPos); +} + +// Returns true if the bigram entry is valid and put entry flags into out*. +/* static */ bool BigramListReadWriteUtils::createAndGetBigramFlags(const int entryPos, + const int targetPtNodePos, const int probability, const bool hasNext, + BigramFlags *const outBigramFlags) { + BigramFlags flags = probability & MASK_ATTRIBUTE_PROBABILITY; + if (hasNext) { + flags |= FLAG_ATTRIBUTE_HAS_NEXT; + } + const int offset = getBigramTargetOffset(targetPtNodePos, entryPos); + if (offset < 0) { + flags |= FLAG_ATTRIBUTE_OFFSET_NEGATIVE; + } + const uint32_t absOffest = abs(offset); + if ((absOffest >> 24) != 0) { + // Offset is too large. + return false; + } else if ((absOffest >> 16) != 0) { + flags |= FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES; + } else if ((absOffest >> 8) != 0) { + flags |= FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES; + } else { + flags |= FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE; + } + // Currently, all newly written bigram position fields are 3 bytes to simplify dictionary + // writing. + // TODO: Remove following 2 lines and optimize memory space. + flags = (flags & (~MASK_ATTRIBUTE_ADDRESS_TYPE)) | FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES; + *outBigramFlags = flags; + return true; +} + +/* static */ int BigramListReadWriteUtils::getBigramTargetOffset(const int targetPtNodePos, + const int entryPos) { + if (targetPtNodePos == NOT_A_DICT_POS) { + return DynamicPatriciaTrieReadingUtils::DICT_OFFSET_INVALID; + } else { + const int offset = targetPtNodePos - (entryPos + 1 /* bigramFlagsField */); + if (offset == 0) { + return DynamicPatriciaTrieReadingUtils::DICT_OFFSET_ZERO_OFFSET; + } else { + return offset; + } + } +} + } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h index 7e1038300..eabe4e099 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h @@ -45,6 +45,34 @@ public: // Bigrams reading methods static void skipExistingBigrams(const uint8_t *const bigramsBuf, int *const bigramListPos); + // Returns the size of the bigram position field that is stored in bigram flags. + static AK_FORCE_INLINE int attributeAddressSize(const BigramFlags flags) { + return (flags & MASK_ATTRIBUTE_ADDRESS_TYPE) >> ATTRIBUTE_ADDRESS_SHIFT; + /* Note: this is a value-dependant optimization of what may probably be + more readably written this way: + switch (flags * BinaryFormat::MASK_ATTRIBUTE_ADDRESS_TYPE) { + case FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE: return 1; + case FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES: return 2; + case FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTE: return 3; + default: return 0; + } + */ + } + + static bool setHasNextFlag(BufferWithExtendableBuffer *const buffer, + const bool hasNext, const int entryPos); + + static AK_FORCE_INLINE BigramFlags setProbabilityInFlags(const BigramFlags flags, + const int probability) { + return (flags & (~MASK_ATTRIBUTE_PROBABILITY)) | (probability & MASK_ATTRIBUTE_PROBABILITY); + } + + static bool createAndWriteBigramEntry(BufferWithExtendableBuffer *const buffer, + const int targetPos, const int probability, const bool hasNext, int *const writingPos); + + static bool writeBigramEntry(BufferWithExtendableBuffer *const buffer, const BigramFlags flags, + const int targetOffset, int *const writingPos); + private: DISALLOW_IMPLICIT_CONSTRUCTORS(BigramListReadWriteUtils); @@ -55,6 +83,11 @@ private: static const BigramFlags FLAG_ATTRIBUTE_OFFSET_NEGATIVE; static const BigramFlags FLAG_ATTRIBUTE_HAS_NEXT; static const BigramFlags MASK_ATTRIBUTE_PROBABILITY; + static const int ATTRIBUTE_ADDRESS_SHIFT; + + // Returns true if the bigram entry is valid and put entry flags into out*. + static bool createAndGetBigramFlags(const int entryPos, const int targetPos, + const int probability, const bool hasNext, BigramFlags *const outBigramFlags); static AK_FORCE_INLINE bool isOffsetNegative(const BigramFlags flags) { return (flags & FLAG_ATTRIBUTE_OFFSET_NEGATIVE) != 0; @@ -62,6 +95,8 @@ private: static int getBigramAddressAndAdvancePosition(const uint8_t *const bigramsBuf, const BigramFlags flags, int *const pos); + + static int getBigramTargetOffset(const int targetPtNodePos, const int entryPos); }; } // namespace latinime #endif // LATINIME_BIGRAM_LIST_READ_WRITE_UTILS_H diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp new file mode 100644 index 000000000..b1170e251 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp @@ -0,0 +1,391 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h" + +#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h" +#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h" +#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h" +#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" +#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h" + +namespace latinime { + +const int DynamicBigramListPolicy::CONTINUING_BIGRAM_LINK_COUNT_LIMIT = 10000; +const int DynamicBigramListPolicy::BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT = 100000; + +void DynamicBigramListPolicy::getNextBigram(int *const outBigramPos, int *const outProbability, + bool *const outHasNext, int *const bigramEntryPos) const { + const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*bigramEntryPos); + const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer); + if (usesAdditionalBuffer) { + *bigramEntryPos -= mBuffer->getOriginalBufferSize(); + } + BigramListReadWriteUtils::BigramFlags bigramFlags; + int originalBigramPos; + BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(buffer, &bigramFlags, + &originalBigramPos, bigramEntryPos); + if (usesAdditionalBuffer && originalBigramPos != NOT_A_DICT_POS) { + originalBigramPos += mBuffer->getOriginalBufferSize(); + } + *outProbability = BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags); + *outHasNext = BigramListReadWriteUtils::hasNext(bigramFlags); + if (mIsDecayingDict && !ForgettingCurveUtils::isValidEncodedProbability(*outProbability)) { + // This bigram is too weak to output. + *outBigramPos = NOT_A_DICT_POS; + } else { + *outBigramPos = followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos); + } + if (usesAdditionalBuffer) { + *bigramEntryPos += mBuffer->getOriginalBufferSize(); + } +} + +void DynamicBigramListPolicy::skipAllBigrams(int *const bigramListPos) const { + const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*bigramListPos); + const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer); + if (usesAdditionalBuffer) { + *bigramListPos -= mBuffer->getOriginalBufferSize(); + } + BigramListReadWriteUtils::skipExistingBigrams(buffer, bigramListPos); + if (usesAdditionalBuffer) { + *bigramListPos += mBuffer->getOriginalBufferSize(); + } +} + +bool DynamicBigramListPolicy::copyAllBigrams(BufferWithExtendableBuffer *const bufferToWrite, + int *const fromPos, int *const toPos, int *const outBigramsCount) const { + const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*fromPos); + if (usesAdditionalBuffer) { + *fromPos -= mBuffer->getOriginalBufferSize(); + } + *outBigramsCount = 0; + BigramListReadWriteUtils::BigramFlags bigramFlags; + int bigramEntryCount = 0; + int lastWrittenEntryPos = NOT_A_DICT_POS; + do { + if (++bigramEntryCount > BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT) { + AKLOGE("Too many bigram entries. Entry count: %d, Limit: %d", + bigramEntryCount, BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT); + ASSERT(false); + return false; + } + // The buffer address can be changed after calling buffer writing methods. + int originalBigramPos; + BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition( + mBuffer->getBuffer(usesAdditionalBuffer), &bigramFlags, &originalBigramPos, + fromPos); + if (originalBigramPos == NOT_A_DICT_POS) { + // skip invalid bigram entry. + continue; + } + if (usesAdditionalBuffer) { + originalBigramPos += mBuffer->getOriginalBufferSize(); + } + const int bigramPos = followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos); + if (bigramPos == NOT_A_DICT_POS) { + // Target PtNode has been invalidated. + continue; + } + lastWrittenEntryPos = *toPos; + if (!BigramListReadWriteUtils::createAndWriteBigramEntry(bufferToWrite, bigramPos, + BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags), + BigramListReadWriteUtils::hasNext(bigramFlags), toPos)) { + return false; + } + (*outBigramsCount)++; + } while(BigramListReadWriteUtils::hasNext(bigramFlags)); + // Makes the last entry the terminal of the list. Updates the flags. + if (lastWrittenEntryPos != NOT_A_DICT_POS) { + if (!BigramListReadWriteUtils::setHasNextFlag(bufferToWrite, false /* hasNext */, + lastWrittenEntryPos)) { + return false; + } + } + if (usesAdditionalBuffer) { + *fromPos += mBuffer->getOriginalBufferSize(); + } + return true; +} + +// Finding useless bigram entries and remove them. Bigram entry is useless when the target PtNode +// has been deleted or is not a valid terminal. +bool DynamicBigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries( + int *const bigramListPos, int *const outValidBigramEntryCount) { + const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*bigramListPos); + if (usesAdditionalBuffer) { + *bigramListPos -= mBuffer->getOriginalBufferSize(); + } + DynamicPatriciaTrieNodeReader nodeReader(mBuffer, this /* bigramsPolicy */, mShortcutPolicy); + BigramListReadWriteUtils::BigramFlags bigramFlags; + int bigramEntryCount = 0; + do { + if (++bigramEntryCount > BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT) { + AKLOGE("Too many bigram entries. Entry count: %d, Limit: %d", + bigramEntryCount, BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT); + ASSERT(false); + return false; + } + int bigramEntryPos = *bigramListPos; + int originalBigramPos; + // The buffer address can be changed after calling buffer writing methods. + BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition( + mBuffer->getBuffer(usesAdditionalBuffer), &bigramFlags, &originalBigramPos, + bigramListPos); + if (usesAdditionalBuffer) { + bigramEntryPos += mBuffer->getOriginalBufferSize(); + } + if (originalBigramPos == NOT_A_DICT_POS) { + // This entry has already been removed. + continue; + } + if (usesAdditionalBuffer) { + originalBigramPos += mBuffer->getOriginalBufferSize(); + } + const int bigramTargetNodePos = + followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos); + nodeReader.fetchNodeInfoInBufferFromPtNodePos(bigramTargetNodePos); + if (nodeReader.isDeleted() || !nodeReader.isTerminal() + || bigramTargetNodePos == NOT_A_DICT_POS) { + // The target is no longer valid terminal. Invalidate the current bigram entry. + if (!BigramListReadWriteUtils::writeBigramEntry(mBuffer, bigramFlags, + NOT_A_DICT_POS /* targetPtNodePos */, &bigramEntryPos)) { + return false; + } + continue; + } + bool isRemoved = false; + if (!updateProbabilityForDecay(bigramFlags, bigramTargetNodePos, &bigramEntryPos, + &isRemoved)) { + return false; + } + if (!isRemoved) { + (*outValidBigramEntryCount) += 1; + } + } while(BigramListReadWriteUtils::hasNext(bigramFlags)); + return true; +} + +// Updates bigram target PtNode positions in the list after the placing step in GC. +bool DynamicBigramListPolicy::updateAllBigramTargetPtNodePositions(int *const bigramListPos, + const DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap *const + ptNodePositionRelocationMap, int *const outBigramEntryCount) { + const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*bigramListPos); + if (usesAdditionalBuffer) { + *bigramListPos -= mBuffer->getOriginalBufferSize(); + } + BigramListReadWriteUtils::BigramFlags bigramFlags; + int bigramEntryCount = 0; + do { + if (++bigramEntryCount > BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT) { + AKLOGE("Too many bigram entries. Entry count: %d, Limit: %d", + bigramEntryCount, BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT); + ASSERT(false); + return false; + } + int bigramEntryPos = *bigramListPos; + if (usesAdditionalBuffer) { + bigramEntryPos += mBuffer->getOriginalBufferSize(); + } + int bigramTargetPtNodePos; + // The buffer address can be changed after calling buffer writing methods. + BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition( + mBuffer->getBuffer(usesAdditionalBuffer), &bigramFlags, &bigramTargetPtNodePos, + bigramListPos); + if (bigramTargetPtNodePos == NOT_A_DICT_POS) { + continue; + } + if (usesAdditionalBuffer) { + bigramTargetPtNodePos += mBuffer->getOriginalBufferSize(); + } + + DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap::const_iterator it = + ptNodePositionRelocationMap->find(bigramTargetPtNodePos); + if (it != ptNodePositionRelocationMap->end()) { + bigramTargetPtNodePos = it->second; + } else { + bigramTargetPtNodePos = NOT_A_DICT_POS; + } + if (!BigramListReadWriteUtils::writeBigramEntry(mBuffer, bigramFlags, + bigramTargetPtNodePos, &bigramEntryPos)) { + return false; + } + } while(BigramListReadWriteUtils::hasNext(bigramFlags)); + (*outBigramEntryCount) = bigramEntryCount; + return true; +} + +bool DynamicBigramListPolicy::addNewBigramEntryToBigramList(const int bigramTargetPos, + const int probability, int *const bigramListPos, bool *const outAddedNewBigram) { + const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*bigramListPos); + if (usesAdditionalBuffer) { + *bigramListPos -= mBuffer->getOriginalBufferSize(); + } + BigramListReadWriteUtils::BigramFlags bigramFlags; + int bigramEntryCount = 0; + do { + if (++bigramEntryCount > BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT) { + AKLOGE("Too many bigram entries. Entry count: %d, Limit: %d", + bigramEntryCount, BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT); + ASSERT(false); + return false; + } + int entryPos = *bigramListPos; + if (usesAdditionalBuffer) { + entryPos += mBuffer->getOriginalBufferSize(); + } + int originalBigramPos; + // The buffer address can be changed after calling buffer writing methods. + BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition( + mBuffer->getBuffer(usesAdditionalBuffer), &bigramFlags, &originalBigramPos, + bigramListPos); + if (usesAdditionalBuffer && originalBigramPos != NOT_A_DICT_POS) { + originalBigramPos += mBuffer->getOriginalBufferSize(); + } + if (followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos) == bigramTargetPos) { + // Update this bigram entry. + *outAddedNewBigram = false; + const int originalProbability = BigramListReadWriteUtils::getProbabilityFromFlags( + bigramFlags); + const int probabilityToWrite = mIsDecayingDict ? + ForgettingCurveUtils::getUpdatedEncodedProbability(originalProbability, + probability) : probability; + const BigramListReadWriteUtils::BigramFlags updatedFlags = + BigramListReadWriteUtils::setProbabilityInFlags(bigramFlags, + probabilityToWrite); + return BigramListReadWriteUtils::writeBigramEntry(mBuffer, updatedFlags, + originalBigramPos, &entryPos); + } + if (BigramListReadWriteUtils::hasNext(bigramFlags)) { + continue; + } + // The current last entry is found. + // First, update the flags of the last entry. + if (!BigramListReadWriteUtils::setHasNextFlag(mBuffer, true /* hasNext */, entryPos)) { + *outAddedNewBigram = false; + return false; + } + if (usesAdditionalBuffer) { + *bigramListPos += mBuffer->getOriginalBufferSize(); + } + // Then, add a new entry after the last entry. + *outAddedNewBigram = true; + return writeNewBigramEntry(bigramTargetPos, probability, bigramListPos); + } while(BigramListReadWriteUtils::hasNext(bigramFlags)); + // We return directly from the while loop. + ASSERT(false); + return false; +} + +bool DynamicBigramListPolicy::writeNewBigramEntry(const int bigramTargetPos, const int probability, + int *const writingPos) { + // hasNext is false because we are adding a new bigram entry at the end of the bigram list. + const int probabilityToWrite = mIsDecayingDict ? + ForgettingCurveUtils::getUpdatedEncodedProbability(NOT_A_PROBABILITY, probability) : + probability; + return BigramListReadWriteUtils::createAndWriteBigramEntry(mBuffer, bigramTargetPos, + probabilityToWrite, false /* hasNext */, writingPos); +} + +bool DynamicBigramListPolicy::removeBigram(const int bigramListPos, const int bigramTargetPos) { + const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(bigramListPos); + int pos = bigramListPos; + if (usesAdditionalBuffer) { + pos -= mBuffer->getOriginalBufferSize(); + } + BigramListReadWriteUtils::BigramFlags bigramFlags; + int bigramEntryCount = 0; + do { + if (++bigramEntryCount > BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT) { + AKLOGE("Too many bigram entries. Entry count: %d, Limit: %d", + bigramEntryCount, BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT); + ASSERT(false); + return false; + } + int bigramEntryPos = pos; + int originalBigramPos; + // The buffer address can be changed after calling buffer writing methods. + BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition( + mBuffer->getBuffer(usesAdditionalBuffer), &bigramFlags, &originalBigramPos, &pos); + if (usesAdditionalBuffer) { + bigramEntryPos += mBuffer->getOriginalBufferSize(); + } + if (usesAdditionalBuffer && originalBigramPos != NOT_A_DICT_POS) { + originalBigramPos += mBuffer->getOriginalBufferSize(); + } + const int bigramPos = followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos); + if (bigramPos != bigramTargetPos) { + continue; + } + // Target entry is found. Write an invalid target position to mark the bigram invalid. + return BigramListReadWriteUtils::writeBigramEntry(mBuffer, bigramFlags, + NOT_A_DICT_POS /* targetOffset */, &bigramEntryPos); + } while(BigramListReadWriteUtils::hasNext(bigramFlags)); + return false; +} + +int DynamicBigramListPolicy::followBigramLinkAndGetCurrentBigramPtNodePos( + const int originalBigramPos) const { + if (originalBigramPos == NOT_A_DICT_POS) { + return NOT_A_DICT_POS; + } + int currentPos = originalBigramPos; + DynamicPatriciaTrieNodeReader nodeReader(mBuffer, this /* bigramsPolicy */, mShortcutPolicy); + nodeReader.fetchNodeInfoInBufferFromPtNodePos(currentPos); + int bigramLinkCount = 0; + while (nodeReader.getBigramLinkedNodePos() != NOT_A_DICT_POS) { + currentPos = nodeReader.getBigramLinkedNodePos(); + nodeReader.fetchNodeInfoInBufferFromPtNodePos(currentPos); + bigramLinkCount++; + if (bigramLinkCount > CONTINUING_BIGRAM_LINK_COUNT_LIMIT) { + AKLOGE("Bigram link is invalid. start position: %d", originalBigramPos); + ASSERT(false); + return NOT_A_DICT_POS; + } + } + return currentPos; +} + +bool DynamicBigramListPolicy::updateProbabilityForDecay( + const BigramListReadWriteUtils::BigramFlags bigramFlags, const int targetPtNodePos, + int *const bigramEntryPos, bool *const outRemoved) const { + *outRemoved = false; + if (mIsDecayingDict) { + // Update bigram probability for decaying. + const int newProbability = ForgettingCurveUtils::getEncodedProbabilityToSave( + BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags), mHeaderPolicy); + if (ForgettingCurveUtils::isValidEncodedProbability(newProbability)) { + // Write new probability. + const BigramListReadWriteUtils::BigramFlags updatedBigramFlags = + BigramListReadWriteUtils::setProbabilityInFlags( + bigramFlags, newProbability); + if (!BigramListReadWriteUtils::writeBigramEntry(mBuffer, updatedBigramFlags, + targetPtNodePos, bigramEntryPos)) { + return false; + } + } else { + // Remove current bigram entry. + *outRemoved = true; + if (!BigramListReadWriteUtils::writeBigramEntry(mBuffer, bigramFlags, + NOT_A_DICT_POS /* targetPtNodePos */, bigramEntryPos)) { + return false; + } + } + } + return true; +} + +} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h new file mode 100644 index 000000000..0504b59d5 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h @@ -0,0 +1,92 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DYNAMIC_BIGRAM_LIST_POLICY_H +#define LATINIME_DYNAMIC_BIGRAM_LIST_POLICY_H + +#include <stdint.h> + +#include "defines.h" +#include "suggest/core/policy/dictionary_bigrams_structure_policy.h" +#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h" +#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h" + +namespace latinime { + +class BufferWithExtendableBuffer; +class DictionaryHeaderStructurePolicy; +class DictionaryShortcutsStructurePolicy; + +/* + * This is a dynamic version of BigramListPolicy and supports an additional buffer. + */ +class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy { + public: + DynamicBigramListPolicy(const DictionaryHeaderStructurePolicy *const headerPolicy, + BufferWithExtendableBuffer *const buffer, + const DictionaryShortcutsStructurePolicy *const shortcutPolicy, + const bool isDecayingDict) + : mHeaderPolicy(headerPolicy), mBuffer(buffer), mShortcutPolicy(shortcutPolicy), + mIsDecayingDict(isDecayingDict) {} + + ~DynamicBigramListPolicy() {} + + void getNextBigram(int *const outBigramPos, int *const outProbability, bool *const outHasNext, + int *const bigramEntryPos) const; + + void skipAllBigrams(int *const bigramListPos) const; + + // Copy bigrams from the bigram list that starts at fromPos in mBuffer to toPos in + // bufferToWrite and advance these positions after bigram lists. This method skips invalid + // bigram entries and write the valid bigram entry count to outBigramsCount. + bool copyAllBigrams(BufferWithExtendableBuffer *const bufferToWrite, int *const fromPos, + int *const toPos, int *const outBigramsCount) const; + + bool updateAllBigramEntriesAndDeleteUselessEntries(int *const bigramListPos, + int *const outBigramEntryCount); + + bool updateAllBigramTargetPtNodePositions(int *const bigramListPos, + const DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap *const + ptNodePositionRelocationMap, int *const outValidBigramEntryCount); + + bool addNewBigramEntryToBigramList(const int bigramTargetPos, const int probability, + int *const bigramListPos, bool *const outAddedNewBigram); + + bool writeNewBigramEntry(const int bigramTargetPos, const int probability, + int *const writingPos); + + // Return whether or not targetBigramPos is found. + bool removeBigram(const int bigramListPos, const int bigramTargetPos); + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicBigramListPolicy); + + static const int CONTINUING_BIGRAM_LINK_COUNT_LIMIT; + static const int BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT; + + const DictionaryHeaderStructurePolicy *const mHeaderPolicy; + BufferWithExtendableBuffer *const mBuffer; + const DictionaryShortcutsStructurePolicy *const mShortcutPolicy; + const bool mIsDecayingDict; + + // Follow bigram link and return the position of bigram target PtNode that is currently valid. + int followBigramLinkAndGetCurrentBigramPtNodePos(const int originalBigramPos) const; + + bool updateProbabilityForDecay(const BigramListReadWriteUtils::BigramFlags bigramFlags, + const int targetPtNodePos, int *const bigramEntryPos, bool *const outRemoved) const; +}; +} // namespace latinime +#endif // LATINIME_DYNAMIC_BIGRAM_LIST_POLICY_H diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp deleted file mode 100644 index cd2243025..000000000 --- a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp +++ /dev/null @@ -1,240 +0,0 @@ -/* - * Copyright (C) 2013 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h" - -#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h" -#include "suggest/policyimpl/dictionary/header/header_policy.h" -#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h" -#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h" -#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h" -#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h" - -namespace latinime { - -void Ver4BigramListPolicy::getNextBigram(int *const outBigramPos, int *const outProbability, - bool *const outHasNext, int *const bigramEntryPos) const { - const BigramEntry bigramEntry = - mBigramDictContent->getBigramEntryAndAdvancePosition(bigramEntryPos); - if (outBigramPos) { - // Lookup target PtNode position. - *outBigramPos = mTerminalPositionLookupTable->getTerminalPtNodePosition( - bigramEntry.getTargetTerminalId()); - } - if (outProbability) { - if (bigramEntry.hasHistoricalInfo()) { - *outProbability = - ForgettingCurveUtils::decodeProbability(bigramEntry.getHistoricalInfo()); - } else { - *outProbability = bigramEntry.getProbability(); - } - } - if (outHasNext) { - *outHasNext = bigramEntry.hasNext(); - } -} - -bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTargetTerminalId, - const int newProbability, const int timestamp, bool *const outAddedNewEntry) { - if (outAddedNewEntry) { - *outAddedNewEntry = false; - } - const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId); - if (bigramListPos == NOT_A_DICT_POS) { - // Updating PtNode doesn't have a bigram list. - // Create new bigram list. - if (!mBigramDictContent->createNewBigramList(terminalId)) { - return false; - } - const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY, - newTargetTerminalId); - const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(&newBigramEntry, - newProbability, timestamp); - // Write an entry. - const int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId); - if (!mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, writingPos)) { - return false; - } - if (outAddedNewEntry) { - *outAddedNewEntry = true; - } - return true; - } - - const int entryPosToUpdate = getEntryPosToUpdate(newTargetTerminalId, bigramListPos); - if (entryPosToUpdate != NOT_A_DICT_POS) { - // Overwrite existing entry. - const BigramEntry originalBigramEntry = - mBigramDictContent->getBigramEntry(entryPosToUpdate); - if (!originalBigramEntry.isValid()) { - // Reuse invalid entry. - if (outAddedNewEntry) { - *outAddedNewEntry = true; - } - } - const BigramEntry updatedBigramEntry = - originalBigramEntry.updateTargetTerminalIdAndGetEntry(newTargetTerminalId); - const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom( - &updatedBigramEntry, newProbability, timestamp); - return mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, entryPosToUpdate); - } - - // Add new entry to the bigram list. - // Create new bigram list. - if (!mBigramDictContent->createNewBigramList(terminalId)) { - return false; - } - // Write new entry at a head position of the bigram list. - int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId); - const BigramEntry newBigramEntry(true /* hasNext */, NOT_A_PROBABILITY, newTargetTerminalId); - const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom( - &newBigramEntry, newProbability, timestamp); - if (!mBigramDictContent->writeBigramEntryAndAdvancePosition(&bigramEntryToWrite, &writingPos)) { - return false; - } - if (outAddedNewEntry) { - *outAddedNewEntry = true; - } - // Append existing entries by copying. - return mBigramDictContent->copyBigramList(bigramListPos, writingPos); -} - -bool Ver4BigramListPolicy::removeEntry(const int terminalId, const int targetTerminalId) { - const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId); - if (bigramListPos == NOT_A_DICT_POS) { - // Bigram list doesn't exist. - return false; - } - const int entryPosToUpdate = getEntryPosToUpdate(targetTerminalId, bigramListPos); - if (entryPosToUpdate == NOT_A_DICT_POS) { - // Bigram entry doesn't exist. - return false; - } - const BigramEntry bigramEntry = mBigramDictContent->getBigramEntry(entryPosToUpdate); - if (targetTerminalId != bigramEntry.getTargetTerminalId()) { - // Bigram entry doesn't exist. - return false; - } - // Remove bigram entry by marking it as invalid entry and overwriting the original entry. - const BigramEntry updatedBigramEntry = bigramEntry.getInvalidatedEntry(); - return mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPosToUpdate); -} - -bool Ver4BigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries(const int terminalId, - int *const outBigramCount) { - const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId); - if (bigramListPos == NOT_A_DICT_POS) { - // Bigram list doesn't exist. - return true; - } - bool hasNext = true; - int readingPos = bigramListPos; - while (hasNext) { - const int entryPos = readingPos; - const BigramEntry bigramEntry = - mBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos); - hasNext = bigramEntry.hasNext(); - if (!bigramEntry.isValid()) { - continue; - } - const int targetPtNodePos = mTerminalPositionLookupTable->getTerminalPtNodePosition( - bigramEntry.getTargetTerminalId()); - if (targetPtNodePos == NOT_A_DICT_POS) { - // Invalidate bigram entry. - const BigramEntry updatedBigramEntry = bigramEntry.getInvalidatedEntry(); - if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) { - return false; - } - } else if (bigramEntry.hasHistoricalInfo()) { - const HistoricalInfo historicalInfo = ForgettingCurveUtils::createHistoricalInfoToSave( - bigramEntry.getHistoricalInfo()); - if (ForgettingCurveUtils::needsToKeep(&historicalInfo)) { - const BigramEntry updatedBigramEntry = - bigramEntry.updateHistoricalInfoAndGetEntry(&historicalInfo); - if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) { - return false; - } - *outBigramCount += 1; - } else { - // Remove entry. - const BigramEntry updatedBigramEntry = bigramEntry.getInvalidatedEntry(); - if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) { - return false; - } - } - } else { - *outBigramCount += 1; - } - } - return true; -} - -int Ver4BigramListPolicy::getBigramEntryConut(const int terminalId) { - const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId); - if (bigramListPos == NOT_A_DICT_POS) { - // Bigram list doesn't exist. - return 0; - } - int bigramCount = 0; - bool hasNext = true; - int readingPos = bigramListPos; - while (hasNext) { - const BigramEntry bigramEntry = - mBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos); - hasNext = bigramEntry.hasNext(); - if (bigramEntry.isValid()) { - bigramCount++; - } - } - return bigramCount; -} - -int Ver4BigramListPolicy::getEntryPosToUpdate(const int targetTerminalIdToFind, - const int bigramListPos) const { - bool hasNext = true; - int invalidEntryPos = NOT_A_DICT_POS; - int readingPos = bigramListPos; - while (hasNext) { - const int entryPos = readingPos; - const BigramEntry bigramEntry = - mBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos); - hasNext = bigramEntry.hasNext(); - if (bigramEntry.getTargetTerminalId() == targetTerminalIdToFind) { - // Entry with same target is found. - return entryPos; - } else if (!bigramEntry.isValid()) { - // Invalid entry that can be reused is found. - invalidEntryPos = entryPos; - } - } - return invalidEntryPos; -} - -const BigramEntry Ver4BigramListPolicy::createUpdatedBigramEntryFrom( - const BigramEntry *const originalBigramEntry, const int newProbability, - const int timestamp) const { - // TODO: Consolidate historical info and probability. - if (mHeaderPolicy->hasHistoricalInfoOfWords()) { - const HistoricalInfo updatedHistoricalInfo = - ForgettingCurveUtils::createUpdatedHistoricalInfo( - originalBigramEntry->getHistoricalInfo(), newProbability, timestamp); - return originalBigramEntry->updateHistoricalInfoAndGetEntry(&updatedHistoricalInfo); - } else { - return originalBigramEntry->updateProbabilityAndGetEntry(newProbability); - } -} - -} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h deleted file mode 100644 index 5b6c5a173..000000000 --- a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Copyright (C) 2013 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef LATINIME_VER4_BIGRAM_LIST_POLICY_H -#define LATINIME_VER4_BIGRAM_LIST_POLICY_H - -#include "defines.h" -#include "suggest/core/policy/dictionary_bigrams_structure_policy.h" -#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_entry.h" - -namespace latinime { - -class BigramDictContent; -class HeaderPolicy; -class TerminalPositionLookupTable; - -class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy { - public: - Ver4BigramListPolicy(BigramDictContent *const bigramDictContent, - const TerminalPositionLookupTable *const terminalPositionLookupTable, - const HeaderPolicy *const headerPolicy) - : mBigramDictContent(bigramDictContent), - mTerminalPositionLookupTable(terminalPositionLookupTable), - mHeaderPolicy(headerPolicy) {} - - void getNextBigram(int *const outBigramPos, int *const outProbability, - bool *const outHasNext, int *const bigramEntryPos) const; - - void skipAllBigrams(int *const pos) const { - // Do nothing because we don't need to skip bigram lists in ver4 dictionaries. - } - - bool addNewEntry(const int terminalId, const int newTargetTerminalId, const int newProbability, - const int timestamp, bool *const outAddedNewEntry); - - bool removeEntry(const int terminalId, const int targetTerminalId); - - bool updateAllBigramEntriesAndDeleteUselessEntries(const int terminalId, - int *const outBigramCount); - - int getBigramEntryConut(const int terminalId); - - private: - DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4BigramListPolicy); - - int getEntryPosToUpdate(const int targetTerminalIdToFind, const int bigramListPos) const; - - const BigramEntry createUpdatedBigramEntryFrom(const BigramEntry *const originalBigramEntry, - const int newProbability, const int timestamp) const; - - BigramDictContent *const mBigramDictContent; - const TerminalPositionLookupTable *const mTerminalPositionLookupTable; - const HeaderPolicy *const mHeaderPolicy; -}; -} // namespace latinime -#endif /* LATINIME_VER4_BIGRAM_LIST_POLICY_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.cpp b/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.cpp new file mode 100644 index 000000000..ff80dd2f6 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.cpp @@ -0,0 +1,53 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h" + +#include <stdint.h> + +#include "defines.h" +#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h" +#include "suggest/policyimpl/dictionary/patricia_trie_policy.h" +#include "suggest/policyimpl/dictionary/utils/format_utils.h" +#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h" + +namespace latinime { + +/* static */ DictionaryStructureWithBufferPolicy *DictionaryStructureWithBufferPolicyFactory + ::newDictionaryStructureWithBufferPolicy(const char *const path, const int bufOffset, + const int size, const bool isUpdatable) { + // Allocated buffer in MmapedBuffer::openBuffer() will be freed in the destructor of + // impl classes of DictionaryStructureWithBufferPolicy. + const MmappedBuffer *const mmapedBuffer = MmappedBuffer::openBuffer(path, bufOffset, size, + isUpdatable); + if (!mmapedBuffer) { + return 0; + } + switch (FormatUtils::detectFormatVersion(mmapedBuffer->getBuffer(), + mmapedBuffer->getBufferSize())) { + case FormatUtils::VERSION_2: + return new PatriciaTriePolicy(mmapedBuffer); + case FormatUtils::VERSION_3: + return new DynamicPatriciaTriePolicy(mmapedBuffer); + default: + AKLOGE("DICT: dictionary format is unknown, bad magic number"); + delete mmapedBuffer; + ASSERT(false); + return 0; + } +} + +} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h b/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h index 45ab52931..8cebc3b16 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h @@ -21,27 +21,16 @@ #include "defines.h" #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" -#include "utils/exclusive_ownership_pointer.h" namespace latinime { class DictionaryStructureWithBufferPolicyFactory { public: - static DictionaryStructureWithBufferPolicy::StructurePolicyPtr - newDictionaryStructureWithBufferPolicy(const char *const path, const int bufOffset, - const int size, const bool isUpdatable); + static DictionaryStructureWithBufferPolicy *newDictionaryStructureWithBufferPolicy( + const char *const path, const int bufOffset, const int size, const bool isUpdatable); private: DISALLOW_IMPLICIT_CONSTRUCTORS(DictionaryStructureWithBufferPolicyFactory); - - static DictionaryStructureWithBufferPolicy::StructurePolicyPtr - newPolicyforDirectoryDict(const char *const path, const bool isUpdatable); - - static DictionaryStructureWithBufferPolicy::StructurePolicyPtr - newPolicyforFileDict(const char *const path, const int bufOffset, const int size); - - static void getHeaderFilePathInDictDir(const char *const dirPath, - const int outHeaderFileBufSize, char *const outHeaderFilePath); }; } // namespace latinime #endif // LATINIME_DICTIONARY_STRUCTURE_WITH_BUFFER_POLICY_FACTORY_H diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp new file mode 100644 index 000000000..5724c5d88 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp @@ -0,0 +1,191 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h" + +#include "suggest/core/policy/dictionary_header_structure_policy.h" +#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h" + +namespace latinime { + +bool DynamicPatriciaTrieGcEventListeners + ::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted + ::onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node, + const int *const nodeCodePoints) { + // PtNode is useless when the PtNode is not a terminal and doesn't have any not useless + // children. + bool isUselessPtNode = !node->isTerminal(); + if (node->isTerminal() && mIsDecayingDict) { + const int newProbability = + ForgettingCurveUtils::getEncodedProbabilityToSave(node->getProbability(), + mHeaderPolicy); + int writingPos = node->getProbabilityFieldPos(); + // Update probability. + if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition( + mBuffer, newProbability, &writingPos)) { + return false; + } + if (!ForgettingCurveUtils::isValidEncodedProbability(newProbability)) { + isUselessPtNode = true; + } + } + if (mChildrenValue > 0) { + isUselessPtNode = false; + } else if (node->isTerminal()) { + // Remove children as all children are useless. + int writingPos = node->getChildrenPosFieldPos(); + if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition( + mBuffer, NOT_A_DICT_POS /* childrenPosition */, &writingPos)) { + return false; + } + } + if (isUselessPtNode) { + // Current PtNode is no longer needed. Mark it as deleted. + if (!mWritingHelper->markNodeAsDeleted(node)) { + return false; + } + } else { + mValueStack.back() += 1; + if (node->isTerminal()) { + mValidUnigramCount += 1; + } + } + return true; +} + +bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateBigramProbability + ::onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node, + const int *const nodeCodePoints) { + if (!node->isDeleted()) { + int pos = node->getBigramsPos(); + if (pos != NOT_A_DICT_POS) { + int bigramEntryCount = 0; + if (!mBigramPolicy->updateAllBigramEntriesAndDeleteUselessEntries(&pos, + &bigramEntryCount)) { + return false; + } + mValidBigramEntryCount += bigramEntryCount; + } + } + return true; +} + +// Writes dummy PtNode array size when the head of PtNode array is read. +bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer + ::onDescend(const int ptNodeArrayPos) { + mValidPtNodeCount = 0; + int writingPos = mBufferToWrite->getTailPosition(); + mDictPositionRelocationMap->mPtNodeArrayPositionRelocationMap.insert( + DynamicPatriciaTrieWritingHelper::PtNodeArrayPositionRelocationMap::value_type( + ptNodeArrayPos, writingPos)); + // Writes dummy PtNode array size because arrays can have a forward link or needles PtNodes. + // This field will be updated later in onReadingPtNodeArrayTail() with actual PtNode count. + mPtNodeArraySizeFieldPos = writingPos; + return DynamicPatriciaTrieWritingUtils::writePtNodeArraySizeAndAdvancePosition( + mBufferToWrite, 0 /* arraySize */, &writingPos); +} + +// Write PtNode array terminal and actual PtNode array size. +bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer + ::onReadingPtNodeArrayTail() { + int writingPos = mBufferToWrite->getTailPosition(); + // Write PtNode array terminal. + if (!DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition( + mBufferToWrite, NOT_A_DICT_POS /* forwardLinkPos */, &writingPos)) { + return false; + } + // Write actual PtNode array size. + if (!DynamicPatriciaTrieWritingUtils::writePtNodeArraySizeAndAdvancePosition( + mBufferToWrite, mValidPtNodeCount, &mPtNodeArraySizeFieldPos)) { + return false; + } + return true; +} + +// Write valid PtNode to buffer and memorize mapping from the old position to the new position. +bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer + ::onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node, + const int *const nodeCodePoints) { + if (node->isDeleted()) { + // Current PtNode is not written in new buffer because it has been deleted. + mDictPositionRelocationMap->mPtNodePositionRelocationMap.insert( + DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap::value_type( + node->getHeadPos(), NOT_A_DICT_POS)); + return true; + } + int writingPos = mBufferToWrite->getTailPosition(); + mDictPositionRelocationMap->mPtNodePositionRelocationMap.insert( + DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap::value_type( + node->getHeadPos(), writingPos)); + mValidPtNodeCount++; + // Writes current PtNode. + return mWritingHelper->writePtNodeToBufferByCopyingPtNodeInfo(mBufferToWrite, node, + node->getParentPos(), nodeCodePoints, node->getCodePointCount(), + node->getProbability(), &writingPos); +} + +bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateAllPositionFields + ::onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node, + const int *const nodeCodePoints) { + // Updates parent position. + int parentPos = node->getParentPos(); + if (parentPos != NOT_A_DICT_POS) { + DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap::const_iterator it = + mDictPositionRelocationMap->mPtNodePositionRelocationMap.find(parentPos); + if (it != mDictPositionRelocationMap->mPtNodePositionRelocationMap.end()) { + parentPos = it->second; + } + } + int writingPos = node->getHeadPos() + DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE; + // Write updated parent offset. + if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(mBufferToWrite, + parentPos, node->getHeadPos(), &writingPos)) { + return false; + } + + // Updates children position. + int childrenPos = node->getChildrenPos(); + if (childrenPos != NOT_A_DICT_POS) { + DynamicPatriciaTrieWritingHelper::PtNodeArrayPositionRelocationMap::const_iterator it = + mDictPositionRelocationMap->mPtNodeArrayPositionRelocationMap.find(childrenPos); + if (it != mDictPositionRelocationMap->mPtNodeArrayPositionRelocationMap.end()) { + childrenPos = it->second; + } + } + writingPos = node->getChildrenPosFieldPos(); + if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBufferToWrite, + childrenPos, &writingPos)) { + return false; + } + + // Updates bigram target PtNode positions in the bigram list. + int bigramsPos = node->getBigramsPos(); + if (bigramsPos != NOT_A_DICT_POS) { + int bigramEntryCount; + if (!mBigramPolicy->updateAllBigramTargetPtNodePositions(&bigramsPos, + &mDictPositionRelocationMap->mPtNodePositionRelocationMap, &bigramEntryCount)) { + return false; + } + mBigramCount += bigramEntryCount; + } + if (node->isTerminal()) { + mUnigramCount++; + } + + return true; +} + +} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h index d8867754d..9755120b0 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h @@ -14,32 +14,37 @@ * limitations under the License. */ -#ifndef LATINIME_DYNAMIC_PT_GC_EVENT_LISTENERS_H -#define LATINIME_DYNAMIC_PT_GC_EVENT_LISTENERS_H +#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_GC_EVENT_LISTENERS_H +#define LATINIME_DYNAMIC_PATRICIA_TRIE_GC_EVENT_LISTENERS_H #include <vector> #include "defines.h" -#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h" -#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h" +#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h" +#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h" +#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h" +#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" #include "utils/hash_map_compat.h" namespace latinime { -class PtNodeParams; +class DictionaryHeaderStructurePolicy; -class DynamicPtGcEventListeners { +class DynamicPatriciaTrieGcEventListeners { public: // Updates all PtNodes that can be reached from the root. Checks if each PtNode is useless or // not and marks useless PtNodes as deleted. Such deleted PtNodes will be discarded in the GC. // TODO: Concatenate non-terminal PtNodes. class TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted - : public DynamicPtReadingHelper::TraversingEventListener { + : public DynamicPatriciaTrieReadingHelper::TraversingEventListener { public: TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted( - PtNodeWriter *const ptNodeWriter) - : mPtNodeWriter(ptNodeWriter), mValueStack(), mChildrenValue(0), + const DictionaryHeaderStructurePolicy *const headerPolicy, + DynamicPatriciaTrieWritingHelper *const writingHelper, + BufferWithExtendableBuffer *const buffer, const bool isDecayingDict) + : mHeaderPolicy(headerPolicy), mWritingHelper(writingHelper), mBuffer(buffer), + mIsDecayingDict(isDecayingDict), mValueStack(), mChildrenValue(0), mValidUnigramCount(0) {} ~TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted() {}; @@ -61,7 +66,8 @@ class DynamicPtGcEventListeners { bool onReadingPtNodeArrayTail() { return true; } - bool onVisitingPtNode(const PtNodeParams *const ptNodeParams); + bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node, + const int *const nodeCodePoints); int getValidUnigramCount() const { return mValidUnigramCount; @@ -71,7 +77,10 @@ class DynamicPtGcEventListeners { DISALLOW_IMPLICIT_CONSTRUCTORS( TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted); - PtNodeWriter *const mPtNodeWriter; + const DictionaryHeaderStructurePolicy *const mHeaderPolicy; + DynamicPatriciaTrieWritingHelper *const mWritingHelper; + BufferWithExtendableBuffer *const mBuffer; + const bool mIsDecayingDict; std::vector<int> mValueStack; int mChildrenValue; int mValidUnigramCount; @@ -80,10 +89,11 @@ class DynamicPtGcEventListeners { // Updates all bigram entries that are held by valid PtNodes. This removes useless bigram // entries. class TraversePolicyToUpdateBigramProbability - : public DynamicPtReadingHelper::TraversingEventListener { + : public DynamicPatriciaTrieReadingHelper::TraversingEventListener { public: - TraversePolicyToUpdateBigramProbability(PtNodeWriter *const ptNodeWriter) - : mPtNodeWriter(ptNodeWriter), mValidBigramEntryCount(0) {} + TraversePolicyToUpdateBigramProbability( + DynamicBigramListPolicy *const bigramPolicy) + : mBigramPolicy(bigramPolicy), mValidBigramEntryCount(0) {} bool onAscend() { return true; } @@ -91,7 +101,8 @@ class DynamicPtGcEventListeners { bool onReadingPtNodeArrayTail() { return true; } - bool onVisitingPtNode(const PtNodeParams *const ptNodeParams); + bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node, + const int *const nodeCodePoints); int getValidBigramEntryCount() const { return mValidBigramEntryCount; @@ -100,17 +111,19 @@ class DynamicPtGcEventListeners { private: DISALLOW_IMPLICIT_CONSTRUCTORS(TraversePolicyToUpdateBigramProbability); - PtNodeWriter *const mPtNodeWriter; + DynamicBigramListPolicy *const mBigramPolicy; int mValidBigramEntryCount; }; class TraversePolicyToPlaceAndWriteValidPtNodesToBuffer - : public DynamicPtReadingHelper::TraversingEventListener { + : public DynamicPatriciaTrieReadingHelper::TraversingEventListener { public: TraversePolicyToPlaceAndWriteValidPtNodesToBuffer( - PtNodeWriter *const ptNodeWriter, BufferWithExtendableBuffer *const bufferToWrite, - PtNodeWriter::DictPositionRelocationMap *const dictPositionRelocationMap) - : mPtNodeWriter(ptNodeWriter), mBufferToWrite(bufferToWrite), + DynamicPatriciaTrieWritingHelper *const writingHelper, + BufferWithExtendableBuffer *const bufferToWrite, + DynamicPatriciaTrieWritingHelper::DictPositionRelocationMap *const + dictPositionRelocationMap) + : mWritingHelper(writingHelper), mBufferToWrite(bufferToWrite), mDictPositionRelocationMap(dictPositionRelocationMap), mValidPtNodeCount(0), mPtNodeArraySizeFieldPos(NOT_A_DICT_POS) {}; @@ -120,24 +133,31 @@ class DynamicPtGcEventListeners { bool onReadingPtNodeArrayTail(); - bool onVisitingPtNode(const PtNodeParams *const ptNodeParams); + bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node, + const int *const nodeCodePoints); private: DISALLOW_IMPLICIT_CONSTRUCTORS(TraversePolicyToPlaceAndWriteValidPtNodesToBuffer); - PtNodeWriter *const mPtNodeWriter; + DynamicPatriciaTrieWritingHelper *const mWritingHelper; BufferWithExtendableBuffer *const mBufferToWrite; - PtNodeWriter::DictPositionRelocationMap *const mDictPositionRelocationMap; + DynamicPatriciaTrieWritingHelper::DictPositionRelocationMap *const + mDictPositionRelocationMap; int mValidPtNodeCount; int mPtNodeArraySizeFieldPos; }; class TraversePolicyToUpdateAllPositionFields - : public DynamicPtReadingHelper::TraversingEventListener { + : public DynamicPatriciaTrieReadingHelper::TraversingEventListener { public: - TraversePolicyToUpdateAllPositionFields(PtNodeWriter *const ptNodeWriter, - const PtNodeWriter::DictPositionRelocationMap *const dictPositionRelocationMap) - : mPtNodeWriter(ptNodeWriter), + TraversePolicyToUpdateAllPositionFields( + DynamicPatriciaTrieWritingHelper *const writingHelper, + DynamicBigramListPolicy *const bigramPolicy, + BufferWithExtendableBuffer *const bufferToWrite, + const DynamicPatriciaTrieWritingHelper::DictPositionRelocationMap *const + dictPositionRelocationMap) + : mWritingHelper(writingHelper), mBigramPolicy(bigramPolicy), + mBufferToWrite(bufferToWrite), mDictPositionRelocationMap(dictPositionRelocationMap), mUnigramCount(0), mBigramCount(0) {}; @@ -147,7 +167,8 @@ class DynamicPtGcEventListeners { bool onReadingPtNodeArrayTail() { return true; } - bool onVisitingPtNode(const PtNodeParams *const ptNodeParams); + bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node, + const int *const nodeCodePoints); int getUnigramCount() const { return mUnigramCount; @@ -160,14 +181,17 @@ class DynamicPtGcEventListeners { private: DISALLOW_IMPLICIT_CONSTRUCTORS(TraversePolicyToUpdateAllPositionFields); - PtNodeWriter *const mPtNodeWriter; - const PtNodeWriter::DictPositionRelocationMap *const mDictPositionRelocationMap; + DynamicPatriciaTrieWritingHelper *const mWritingHelper; + DynamicBigramListPolicy *const mBigramPolicy; + BufferWithExtendableBuffer *const mBufferToWrite; + const DynamicPatriciaTrieWritingHelper::DictPositionRelocationMap *const + mDictPositionRelocationMap; int mUnigramCount; int mBigramCount; }; private: - DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPtGcEventListeners); + DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieGcEventListeners); }; } // namespace latinime -#endif /* LATINIME_DYNAMIC_PT_GC_EVENT_LISTENERS_H */ +#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_GC_EVENT_LISTENERS_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp new file mode 100644 index 000000000..2fa3111d3 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp @@ -0,0 +1,124 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h" + +#include "suggest/core/policy/dictionary_bigrams_structure_policy.h" +#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h" +#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" + +namespace latinime { + +void DynamicPatriciaTrieNodeReader::fetchPtNodeInfoFromBufferAndProcessMovedPtNode( + const int ptNodePos, const int maxCodePointCount, int *const outCodePoints) { + if (ptNodePos < 0 || ptNodePos >= mBuffer->getTailPosition()) { + // Reading invalid position because of bug or broken dictionary. + AKLOGE("Fetching PtNode info from invalid dictionary position: %d, dictionary size: %d", + ptNodePos, mBuffer->getTailPosition()); + ASSERT(false); + invalidatePtNodeInfo(); + return; + } + const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(ptNodePos); + const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer); + int pos = ptNodePos; + mHeadPos = ptNodePos; + if (usesAdditionalBuffer) { + pos -= mBuffer->getOriginalBufferSize(); + } + mFlags = PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos); + const int parentPosOffset = + DynamicPatriciaTrieReadingUtils::getParentPtNodePosOffsetAndAdvancePosition(dictBuf, + &pos); + mParentPos = DynamicPatriciaTrieReadingUtils::getParentPtNodePos(parentPosOffset, mHeadPos); + if (outCodePoints != 0) { + mCodePointCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition( + dictBuf, mFlags, maxCodePointCount, outCodePoints, &pos); + } else { + mCodePointCount = PatriciaTrieReadingUtils::skipCharacters( + dictBuf, mFlags, MAX_WORD_LENGTH, &pos); + } + if (isTerminal()) { + mProbabilityFieldPos = pos; + if (usesAdditionalBuffer) { + mProbabilityFieldPos += mBuffer->getOriginalBufferSize(); + } + mProbability = PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(dictBuf, &pos); + } else { + mProbabilityFieldPos = NOT_A_DICT_POS; + mProbability = NOT_A_PROBABILITY; + } + mChildrenPosFieldPos = pos; + if (usesAdditionalBuffer) { + mChildrenPosFieldPos += mBuffer->getOriginalBufferSize(); + } + mChildrenPos = DynamicPatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition( + dictBuf, &pos); + if (usesAdditionalBuffer && mChildrenPos != NOT_A_DICT_POS) { + mChildrenPos += mBuffer->getOriginalBufferSize(); + } + if (mSiblingPos == NOT_A_DICT_POS) { + if (DynamicPatriciaTrieReadingUtils::isMoved(mFlags)) { + mBigramLinkedNodePos = mChildrenPos; + } else { + mBigramLinkedNodePos = NOT_A_DICT_POS; + } + } + if (usesAdditionalBuffer) { + pos += mBuffer->getOriginalBufferSize(); + } + if (PatriciaTrieReadingUtils::hasShortcutTargets(mFlags)) { + mShortcutPos = pos; + mShortcutsPolicy->skipAllShortcuts(&pos); + } else { + mShortcutPos = NOT_A_DICT_POS; + } + if (PatriciaTrieReadingUtils::hasBigrams(mFlags)) { + mBigramPos = pos; + mBigramsPolicy->skipAllBigrams(&pos); + } else { + mBigramPos = NOT_A_DICT_POS; + } + // Update siblingPos if needed. + if (mSiblingPos == NOT_A_DICT_POS) { + // Sibling position is the tail position of current node. + mSiblingPos = pos; + } + // Read destination node if the read node is a moved node. + if (DynamicPatriciaTrieReadingUtils::isMoved(mFlags)) { + // The destination position is stored at the same place as the parent position. + fetchPtNodeInfoFromBufferAndProcessMovedPtNode(mParentPos, maxCodePointCount, + outCodePoints); + } +} + +void DynamicPatriciaTrieNodeReader::invalidatePtNodeInfo() { + mHeadPos = NOT_A_DICT_POS; + mFlags = 0; + mParentPos = NOT_A_DICT_POS; + mCodePointCount = 0; + mProbabilityFieldPos = NOT_A_DICT_POS; + mProbability = NOT_A_PROBABILITY; + mChildrenPosFieldPos = NOT_A_DICT_POS; + mChildrenPos = NOT_A_DICT_POS; + mBigramLinkedNodePos = NOT_A_DICT_POS; + mShortcutPos = NOT_A_DICT_POS; + mBigramPos = NOT_A_DICT_POS; + mSiblingPos = NOT_A_DICT_POS; +} + +} diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h new file mode 100644 index 000000000..3b36d425f --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h @@ -0,0 +1,163 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_NODE_READER_H +#define LATINIME_DYNAMIC_PATRICIA_TRIE_NODE_READER_H + +#include <stdint.h> + +#include "defines.h" +#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h" + +namespace latinime { + +class BufferWithExtendableBuffer; +class DictionaryBigramsStructurePolicy; +class DictionaryShortcutsStructurePolicy; + +/* + * This class is used for helping to read nodes of dynamic patricia trie. This class handles moved + * node and reads node attributes. + */ +class DynamicPatriciaTrieNodeReader { + public: + DynamicPatriciaTrieNodeReader(const BufferWithExtendableBuffer *const buffer, + const DictionaryBigramsStructurePolicy *const bigramsPolicy, + const DictionaryShortcutsStructurePolicy *const shortcutsPolicy) + : mBuffer(buffer), mBigramsPolicy(bigramsPolicy), + mShortcutsPolicy(shortcutsPolicy), mHeadPos(NOT_A_DICT_POS), mFlags(0), + mParentPos(NOT_A_DICT_POS), mCodePointCount(0), mProbabilityFieldPos(NOT_A_DICT_POS), + mProbability(NOT_A_PROBABILITY), mChildrenPosFieldPos(NOT_A_DICT_POS), + mChildrenPos(NOT_A_DICT_POS), mBigramLinkedNodePos(NOT_A_DICT_POS), + mShortcutPos(NOT_A_DICT_POS), mBigramPos(NOT_A_DICT_POS), + mSiblingPos(NOT_A_DICT_POS) {} + + ~DynamicPatriciaTrieNodeReader() {} + + // Reads PtNode information from dictionary buffer and updates members with the information. + AK_FORCE_INLINE void fetchNodeInfoInBufferFromPtNodePos(const int ptNodePos) { + fetchNodeInfoInBufferFromPtNodePosAndGetNodeCodePoints(ptNodePos , + 0 /* maxCodePointCount */, 0 /* outCodePoints */); + } + + AK_FORCE_INLINE void fetchNodeInfoInBufferFromPtNodePosAndGetNodeCodePoints( + const int ptNodePos, const int maxCodePointCount, int *const outCodePoints) { + mSiblingPos = NOT_A_DICT_POS; + mBigramLinkedNodePos = NOT_A_DICT_POS; + fetchPtNodeInfoFromBufferAndProcessMovedPtNode(ptNodePos, maxCodePointCount, outCodePoints); + } + + // HeadPos is different from NodePos when the current PtNode is a moved PtNode. + AK_FORCE_INLINE int getHeadPos() const { + return mHeadPos; + } + + // Flags + AK_FORCE_INLINE bool isDeleted() const { + return DynamicPatriciaTrieReadingUtils::isDeleted(mFlags); + } + + AK_FORCE_INLINE bool hasChildren() const { + return mChildrenPos != NOT_A_DICT_POS; + } + + AK_FORCE_INLINE bool isTerminal() const { + return PatriciaTrieReadingUtils::isTerminal(mFlags); + } + + AK_FORCE_INLINE bool isBlacklisted() const { + return PatriciaTrieReadingUtils::isBlacklisted(mFlags); + } + + AK_FORCE_INLINE bool isNotAWord() const { + return PatriciaTrieReadingUtils::isNotAWord(mFlags); + } + + // Parent node position + AK_FORCE_INLINE int getParentPos() const { + return mParentPos; + } + + // Number of code points + AK_FORCE_INLINE uint8_t getCodePointCount() const { + return mCodePointCount; + } + + // Probability + AK_FORCE_INLINE int getProbabilityFieldPos() const { + return mProbabilityFieldPos; + } + + AK_FORCE_INLINE int getProbability() const { + return mProbability; + } + + // Children PtNode array position + AK_FORCE_INLINE int getChildrenPosFieldPos() const { + return mChildrenPosFieldPos; + } + + AK_FORCE_INLINE int getChildrenPos() const { + return mChildrenPos; + } + + // Bigram linked node position. + AK_FORCE_INLINE int getBigramLinkedNodePos() const { + return mBigramLinkedNodePos; + } + + // Shortcutlist position + AK_FORCE_INLINE int getShortcutPos() const { + return mShortcutPos; + } + + // Bigrams position + AK_FORCE_INLINE int getBigramsPos() const { + return mBigramPos; + } + + // Sibling node position + AK_FORCE_INLINE int getSiblingNodePos() const { + return mSiblingPos; + } + + private: + DISALLOW_COPY_AND_ASSIGN(DynamicPatriciaTrieNodeReader); + + const BufferWithExtendableBuffer *const mBuffer; + const DictionaryBigramsStructurePolicy *const mBigramsPolicy; + const DictionaryShortcutsStructurePolicy *const mShortcutsPolicy; + int mHeadPos; + DynamicPatriciaTrieReadingUtils::NodeFlags mFlags; + int mParentPos; + uint8_t mCodePointCount; + int mProbabilityFieldPos; + int mProbability; + int mChildrenPosFieldPos; + int mChildrenPos; + int mBigramLinkedNodePos; + int mShortcutPos; + int mBigramPos; + int mSiblingPos; + + void fetchPtNodeInfoFromBufferAndProcessMovedPtNode(const int ptNodePos, + const int maxCodePointCount, int *const outCodePoints); + + void invalidatePtNodeInfo(); +}; +} // namespace latinime +#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_NODE_READER_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp new file mode 100644 index 000000000..495b146c2 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp @@ -0,0 +1,380 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h" + +#include <cstdio> +#include <cstring> +#include <ctime> + +#include "defines.h" +#include "suggest/core/dicnode/dic_node.h" +#include "suggest/core/dicnode/dic_node_vector.h" +#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h" +#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h" +#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h" +#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h" +#include "suggest/policyimpl/dictionary/utils/probability_utils.h" + +namespace latinime { + +// Note that these are corresponding definitions in Java side in BinaryDictionaryTests and +// BinaryDictionaryDecayingTests. +const char *const DynamicPatriciaTriePolicy::UNIGRAM_COUNT_QUERY = "UNIGRAM_COUNT"; +const char *const DynamicPatriciaTriePolicy::BIGRAM_COUNT_QUERY = "BIGRAM_COUNT"; +const char *const DynamicPatriciaTriePolicy::MAX_UNIGRAM_COUNT_QUERY = "MAX_UNIGRAM_COUNT"; +const char *const DynamicPatriciaTriePolicy::MAX_BIGRAM_COUNT_QUERY = "MAX_BIGRAM_COUNT"; +const char *const DynamicPatriciaTriePolicy::SET_NEEDS_TO_DECAY_FOR_TESTING_QUERY = + "SET_NEEDS_TO_DECAY_FOR_TESTING"; +const int DynamicPatriciaTriePolicy::MAX_DICT_EXTENDED_REGION_SIZE = 1024 * 1024; +const int DynamicPatriciaTriePolicy::MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS = + DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE - 1024; + +void DynamicPatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode, + DicNodeVector *const childDicNodes) const { + if (!dicNode->hasChildren()) { + return; + } + DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer, + getBigramsStructurePolicy(), getShortcutsStructurePolicy()); + readingHelper.initWithPtNodeArrayPos(dicNode->getChildrenPos()); + const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader(); + while (!readingHelper.isEnd()) { + bool isTerminal = nodeReader->isTerminal() && !nodeReader->isDeleted(); + if (isTerminal && mHeaderPolicy.isDecayingDict()) { + // A DecayingDict may have a terminal PtNode that has a terminal DicNode whose + // probability is NOT_A_PROBABILITY. In such case, we don't want to treat it as a + // valid terminal DicNode. + isTerminal = getProbability(nodeReader->getProbability(), NOT_A_PROBABILITY) + != NOT_A_PROBABILITY; + } + childDicNodes->pushLeavingChild(dicNode, nodeReader->getHeadPos(), + nodeReader->getChildrenPos(), nodeReader->getProbability(), isTerminal, + nodeReader->hasChildren(), nodeReader->isBlacklisted() || nodeReader->isNotAWord(), + nodeReader->getCodePointCount(), readingHelper.getMergedNodeCodePoints()); + readingHelper.readNextSiblingNode(); + } +} + +int DynamicPatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( + const int ptNodePos, const int maxCodePointCount, int *const outCodePoints, + int *const outUnigramProbability) const { + // This method traverses parent nodes from the terminal by following parent pointers; thus, + // node code points are stored in the buffer in the reverse order. + int reverseCodePoints[maxCodePointCount]; + DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer, + getBigramsStructurePolicy(), getShortcutsStructurePolicy()); + // First, read the terminal node and get its probability. + readingHelper.initWithPtNodePos(ptNodePos); + if (!readingHelper.isValidTerminalNode()) { + // Node at the ptNodePos is not a valid terminal node. + *outUnigramProbability = NOT_A_PROBABILITY; + return 0; + } + // Store terminal node probability. + *outUnigramProbability = readingHelper.getNodeReader()->getProbability(); + // Then, following parent node link to the dictionary root and fetch node code points. + while (!readingHelper.isEnd()) { + if (readingHelper.getTotalCodePointCount() > maxCodePointCount) { + // The ptNodePos is not a valid terminal node position in the dictionary. + *outUnigramProbability = NOT_A_PROBABILITY; + return 0; + } + // Store node code points to buffer in the reverse order. + readingHelper.fetchMergedNodeCodePointsInReverseOrder( + readingHelper.getPrevTotalCodePointCount(), reverseCodePoints); + // Follow parent node toward the root node. + readingHelper.readParentNode(); + } + if (readingHelper.isError()) { + // The node position or the dictionary is invalid. + *outUnigramProbability = NOT_A_PROBABILITY; + return 0; + } + // Reverse the stored code points to output them. + const int codePointCount = readingHelper.getTotalCodePointCount(); + for (int i = 0; i < codePointCount; ++i) { + outCodePoints[i] = reverseCodePoints[codePointCount - i - 1]; + } + return codePointCount; +} + +int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord, + const int length, const bool forceLowerCaseSearch) const { + int searchCodePoints[length]; + for (int i = 0; i < length; ++i) { + searchCodePoints[i] = forceLowerCaseSearch ? CharUtils::toLowerCase(inWord[i]) : inWord[i]; + } + DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer, + getBigramsStructurePolicy(), getShortcutsStructurePolicy()); + readingHelper.initWithPtNodeArrayPos(getRootPosition()); + const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader(); + while (!readingHelper.isEnd()) { + const int matchedCodePointCount = readingHelper.getPrevTotalCodePointCount(); + if (readingHelper.getTotalCodePointCount() > length + || !readingHelper.isMatchedCodePoint(0 /* index */, + searchCodePoints[matchedCodePointCount])) { + // Current node has too many code points or its first code point is different from + // target code point. Skip this node and read the next sibling node. + readingHelper.readNextSiblingNode(); + continue; + } + // Check following merged node code points. + const int nodeCodePointCount = nodeReader->getCodePointCount(); + for (int j = 1; j < nodeCodePointCount; ++j) { + if (!readingHelper.isMatchedCodePoint( + j, searchCodePoints[matchedCodePointCount + j])) { + // Different code point is found. The given word is not included in the dictionary. + return NOT_A_DICT_POS; + } + } + // All characters are matched. + if (length == readingHelper.getTotalCodePointCount()) { + // Terminal position is found. + return nodeReader->getHeadPos(); + } + if (!nodeReader->hasChildren()) { + return NOT_A_DICT_POS; + } + // Advance to the children nodes. + readingHelper.readChildNode(); + } + // If we already traversed the tree further than the word is long, there means + // there was no match (or we would have found it). + return NOT_A_DICT_POS; +} + +int DynamicPatriciaTriePolicy::getProbability(const int unigramProbability, + const int bigramProbability) const { + if (mHeaderPolicy.isDecayingDict()) { + return ForgettingCurveUtils::getProbability(unigramProbability, bigramProbability); + } else { + if (unigramProbability == NOT_A_PROBABILITY) { + return NOT_A_PROBABILITY; + } else if (bigramProbability == NOT_A_PROBABILITY) { + return ProbabilityUtils::backoff(unigramProbability); + } else { + return ProbabilityUtils::computeProbabilityForBigram(unigramProbability, + bigramProbability); + } + } +} + +int DynamicPatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int ptNodePos) const { + if (ptNodePos == NOT_A_DICT_POS) { + return NOT_A_PROBABILITY; + } + DynamicPatriciaTrieNodeReader nodeReader(&mBufferWithExtendableBuffer, + getBigramsStructurePolicy(), getShortcutsStructurePolicy()); + nodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos); + if (nodeReader.isDeleted() || nodeReader.isBlacklisted() || nodeReader.isNotAWord()) { + return NOT_A_PROBABILITY; + } + return getProbability(nodeReader.getProbability(), NOT_A_PROBABILITY); +} + +int DynamicPatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const { + if (ptNodePos == NOT_A_DICT_POS) { + return NOT_A_DICT_POS; + } + DynamicPatriciaTrieNodeReader nodeReader(&mBufferWithExtendableBuffer, + getBigramsStructurePolicy(), getShortcutsStructurePolicy()); + nodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos); + if (nodeReader.isDeleted()) { + return NOT_A_DICT_POS; + } + return nodeReader.getShortcutPos(); +} + +int DynamicPatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const { + if (ptNodePos == NOT_A_DICT_POS) { + return NOT_A_DICT_POS; + } + DynamicPatriciaTrieNodeReader nodeReader(&mBufferWithExtendableBuffer, + getBigramsStructurePolicy(), getShortcutsStructurePolicy()); + nodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos); + if (nodeReader.isDeleted()) { + return NOT_A_DICT_POS; + } + return nodeReader.getBigramsPos(); +} + +bool DynamicPatriciaTriePolicy::addUnigramWord(const int *const word, const int length, + const int probability) { + if (!mBuffer->isUpdatable()) { + AKLOGI("Warning: addUnigramWord() is called for non-updatable dictionary."); + return false; + } + if (mBufferWithExtendableBuffer.getTailPosition() + >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) { + AKLOGE("The dictionary is too large to dynamically update."); + return false; + } + DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer, + getBigramsStructurePolicy(), getShortcutsStructurePolicy()); + readingHelper.initWithPtNodeArrayPos(getRootPosition()); + DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer, + &mBigramListPolicy, &mShortcutListPolicy, mHeaderPolicy.isDecayingDict()); + bool addedNewUnigram = false; + if (writingHelper.addUnigramWord(&readingHelper, word, length, probability, + &addedNewUnigram)) { + if (addedNewUnigram) { + mUnigramCount++; + } + return true; + } else { + return false; + } +} + +bool DynamicPatriciaTriePolicy::addBigramWords(const int *const word0, const int length0, + const int *const word1, const int length1, const int probability) { + if (!mBuffer->isUpdatable()) { + AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary."); + return false; + } + if (mBufferWithExtendableBuffer.getTailPosition() + >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) { + AKLOGE("The dictionary is too large to dynamically update."); + return false; + } + const int word0Pos = getTerminalNodePositionOfWord(word0, length0, + false /* forceLowerCaseSearch */); + if (word0Pos == NOT_A_DICT_POS) { + return false; + } + const int word1Pos = getTerminalNodePositionOfWord(word1, length1, + false /* forceLowerCaseSearch */); + if (word1Pos == NOT_A_DICT_POS) { + return false; + } + DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer, + &mBigramListPolicy, &mShortcutListPolicy, mHeaderPolicy.isDecayingDict()); + bool addedNewBigram = false; + if (writingHelper.addBigramWords(word0Pos, word1Pos, probability, &addedNewBigram)) { + if (addedNewBigram) { + mBigramCount++; + } + return true; + } else { + return false; + } +} + +bool DynamicPatriciaTriePolicy::removeBigramWords(const int *const word0, const int length0, + const int *const word1, const int length1) { + if (!mBuffer->isUpdatable()) { + AKLOGI("Warning: removeBigramWords() is called for non-updatable dictionary."); + return false; + } + if (mBufferWithExtendableBuffer.getTailPosition() + >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) { + AKLOGE("The dictionary is too large to dynamically update."); + return false; + } + const int word0Pos = getTerminalNodePositionOfWord(word0, length0, + false /* forceLowerCaseSearch */); + if (word0Pos == NOT_A_DICT_POS) { + return false; + } + const int word1Pos = getTerminalNodePositionOfWord(word1, length1, + false /* forceLowerCaseSearch */); + if (word1Pos == NOT_A_DICT_POS) { + return false; + } + DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer, + &mBigramListPolicy, &mShortcutListPolicy, mHeaderPolicy.isDecayingDict()); + if (writingHelper.removeBigramWords(word0Pos, word1Pos)) { + mBigramCount--; + return true; + } else { + return false; + } +} + +void DynamicPatriciaTriePolicy::flush(const char *const filePath) { + if (!mBuffer->isUpdatable()) { + AKLOGI("Warning: flush() is called for non-updatable dictionary."); + return; + } + DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer, + &mBigramListPolicy, &mShortcutListPolicy, false /* needsToDecay */); + writingHelper.writeToDictFile(filePath, &mHeaderPolicy, mUnigramCount, mBigramCount); +} + +void DynamicPatriciaTriePolicy::flushWithGC(const char *const filePath) { + if (!mBuffer->isUpdatable()) { + AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary."); + return; + } + const bool needsToDecay = mHeaderPolicy.isDecayingDict() + && (mNeedsToDecayForTesting || ForgettingCurveUtils::needsToDecay( + false /* mindsBlockByDecay */, mUnigramCount, mBigramCount, &mHeaderPolicy)); + DynamicBigramListPolicy bigramListPolicyForGC(&mHeaderPolicy, &mBufferWithExtendableBuffer, + &mShortcutListPolicy, needsToDecay); + DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer, + &bigramListPolicyForGC, &mShortcutListPolicy, needsToDecay); + writingHelper.writeToDictFileWithGC(getRootPosition(), filePath, &mHeaderPolicy); + mNeedsToDecayForTesting = false; +} + +bool DynamicPatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const { + if (!mBuffer->isUpdatable()) { + AKLOGI("Warning: needsToRunGC() is called for non-updatable dictionary."); + return false; + } + if (mBufferWithExtendableBuffer.isNearSizeLimit()) { + // Additional buffer size is near the limit. + return true; + } else if (mHeaderPolicy.getExtendedRegionSize() + + mBufferWithExtendableBuffer.getUsedAdditionalBufferSize() + > MAX_DICT_EXTENDED_REGION_SIZE) { + // Total extended region size exceeds the limit. + return true; + } else if (mBufferWithExtendableBuffer.getTailPosition() + >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS + && mBufferWithExtendableBuffer.getUsedAdditionalBufferSize() > 0) { + // Needs to reduce dictionary size. + return true; + } else if (mHeaderPolicy.isDecayingDict()) { + return mNeedsToDecayForTesting || ForgettingCurveUtils::needsToDecay( + mindsBlockByGC, mUnigramCount, mBigramCount, &mHeaderPolicy); + } + return false; +} + +void DynamicPatriciaTriePolicy::getProperty(const char *const query, char *const outResult, + const int maxResultLength) { + if (strncmp(query, UNIGRAM_COUNT_QUERY, maxResultLength) == 0) { + snprintf(outResult, maxResultLength, "%d", mUnigramCount); + } else if (strncmp(query, BIGRAM_COUNT_QUERY, maxResultLength) == 0) { + snprintf(outResult, maxResultLength, "%d", mBigramCount); + } else if (strncmp(query, MAX_UNIGRAM_COUNT_QUERY, maxResultLength) == 0) { + snprintf(outResult, maxResultLength, "%d", + mHeaderPolicy.isDecayingDict() ? ForgettingCurveUtils::MAX_UNIGRAM_COUNT : + static_cast<int>(DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE)); + } else if (strncmp(query, MAX_BIGRAM_COUNT_QUERY, maxResultLength) == 0) { + snprintf(outResult, maxResultLength, "%d", + mHeaderPolicy.isDecayingDict() ? ForgettingCurveUtils::MAX_BIGRAM_COUNT : + static_cast<int>(DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE)); + } else if (strncmp(query, SET_NEEDS_TO_DECAY_FOR_TESTING_QUERY, maxResultLength) == 0) { + mNeedsToDecayForTesting = true; + } +} + +} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h new file mode 100644 index 000000000..be97ee1a5 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h @@ -0,0 +1,121 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H +#define LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H + +#include "defines.h" +#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" +#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h" +#include "suggest/policyimpl/dictionary/header/header_policy.h" +#include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h" +#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" +#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h" + +namespace latinime { + +class DicNode; +class DicNodeVector; + +class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { + public: + DynamicPatriciaTriePolicy(const MmappedBuffer *const buffer) + : mBuffer(buffer), mHeaderPolicy(mBuffer->getBuffer(), buffer->getBufferSize()), + mBufferWithExtendableBuffer(mBuffer->getBuffer() + mHeaderPolicy.getSize(), + mBuffer->getBufferSize() - mHeaderPolicy.getSize()), + mShortcutListPolicy(&mBufferWithExtendableBuffer), + mBigramListPolicy(&mHeaderPolicy, &mBufferWithExtendableBuffer, &mShortcutListPolicy, + mHeaderPolicy.isDecayingDict()), + mUnigramCount(mHeaderPolicy.getUnigramCount()), + mBigramCount(mHeaderPolicy.getBigramCount()), mNeedsToDecayForTesting(false) {} + + ~DynamicPatriciaTriePolicy() { + delete mBuffer; + } + + AK_FORCE_INLINE int getRootPosition() const { + return 0; + } + + void createAndGetAllChildNodes(const DicNode *const dicNode, + DicNodeVector *const childDicNodes) const; + + int getCodePointsAndProbabilityAndReturnCodePointCount( + const int terminalPtNodePos, const int maxCodePointCount, int *const outCodePoints, + int *const outUnigramProbability) const; + + int getTerminalNodePositionOfWord(const int *const inWord, + const int length, const bool forceLowerCaseSearch) const; + + int getProbability(const int unigramProbability, const int bigramProbability) const; + + int getUnigramProbabilityOfPtNode(const int ptNodePos) const; + + int getShortcutPositionOfPtNode(const int ptNodePos) const; + + int getBigramsPositionOfPtNode(const int ptNodePos) const; + + const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const { + return &mHeaderPolicy; + } + + const DictionaryBigramsStructurePolicy *getBigramsStructurePolicy() const { + return &mBigramListPolicy; + } + + const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const { + return &mShortcutListPolicy; + } + + bool addUnigramWord(const int *const word, const int length, const int probability); + + bool addBigramWords(const int *const word0, const int length0, const int *const word1, + const int length1, const int probability); + + bool removeBigramWords(const int *const word0, const int length0, const int *const word1, + const int length1); + + void flush(const char *const filePath); + + void flushWithGC(const char *const filePath); + + bool needsToRunGC(const bool mindsBlockByGC) const; + + void getProperty(const char *const query, char *const outResult, + const int maxResultLength); + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTriePolicy); + + static const char *const UNIGRAM_COUNT_QUERY; + static const char *const BIGRAM_COUNT_QUERY; + static const char *const MAX_UNIGRAM_COUNT_QUERY; + static const char *const MAX_BIGRAM_COUNT_QUERY; + static const char *const SET_NEEDS_TO_DECAY_FOR_TESTING_QUERY; + static const int MAX_DICT_EXTENDED_REGION_SIZE; + static const int MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS; + + const MmappedBuffer *const mBuffer; + const HeaderPolicy mHeaderPolicy; + BufferWithExtendableBuffer mBufferWithExtendableBuffer; + DynamicShortcutListPolicy mShortcutListPolicy; + DynamicBigramListPolicy mBigramListPolicy; + int mUnigramCount; + int mBigramCount; + int mNeedsToDecayForTesting; +}; +} // namespace latinime +#endif // LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp index b918e0765..f108c219f 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp @@ -14,25 +14,22 @@ * limitations under the License. */ -#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h" +#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" -#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h" -#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h" -#include "utils/char_utils.h" namespace latinime { // To avoid infinite loop caused by invalid or malicious forward links. -const int DynamicPtReadingHelper::MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP = 100000; -const int DynamicPtReadingHelper::MAX_PT_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP = 100000; -const size_t DynamicPtReadingHelper::MAX_READING_STATE_STACK_SIZE = MAX_WORD_LENGTH; +const int DynamicPatriciaTrieReadingHelper::MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP = 100000; +const int DynamicPatriciaTrieReadingHelper::MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP = 100000; +const size_t DynamicPatriciaTrieReadingHelper::MAX_READING_STATE_STACK_SIZE = MAX_WORD_LENGTH; // Visits all PtNodes in post-order depth first manner. // For example, visits c -> b -> y -> x -> a for the following dictionary: // a _ b _ c // \ x _ y -bool DynamicPtReadingHelper::traverseAllPtNodesInPostorderDepthFirstManner( +bool DynamicPatriciaTrieReadingHelper::traverseAllPtNodesInPostorderDepthFirstManner( TraversingEventListener *const listener) { bool alreadyVisitedChildren = false; // Descend from the root to the root PtNode array. @@ -40,26 +37,22 @@ bool DynamicPtReadingHelper::traverseAllPtNodesInPostorderDepthFirstManner( return false; } while (!isEnd()) { - const PtNodeParams ptNodeParams(getPtNodeParams()); - if (!ptNodeParams.isValid()) { - break; - } if (!alreadyVisitedChildren) { - if (ptNodeParams.hasChildren()) { + if (mNodeReader.hasChildren()) { // Move to the first child. - if (!listener->onDescend(ptNodeParams.getChildrenPos())) { + if (!listener->onDescend(mNodeReader.getChildrenPos())) { return false; } pushReadingStateToStack(); - readChildNode(ptNodeParams); + readChildNode(); } else { alreadyVisitedChildren = true; } } else { - if (!listener->onVisitingPtNode(&ptNodeParams)) { + if (!listener->onVisitingPtNode(&mNodeReader, mMergedNodeCodePoints)) { return false; } - readNextSiblingNode(ptNodeParams); + readNextSiblingNode(); if (isEnd()) { // All PtNodes in current linked PtNode arrays have been visited. // Return to the parent. @@ -92,7 +85,7 @@ bool DynamicPtReadingHelper::traverseAllPtNodesInPostorderDepthFirstManner( // For example, visits a -> b -> x -> c -> y for the following dictionary: // a _ b _ c // \ x _ y -bool DynamicPtReadingHelper::traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner( +bool DynamicPatriciaTrieReadingHelper::traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner( TraversingEventListener *const listener) { bool alreadyVisitedAllPtNodesInArray = false; bool alreadyVisitedChildren = false; @@ -108,14 +101,10 @@ bool DynamicPtReadingHelper::traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFi } pushReadingStateToStack(); while (!isEnd()) { - const PtNodeParams ptNodeParams(getPtNodeParams()); - if (!ptNodeParams.isValid()) { - break; - } if (alreadyVisitedAllPtNodesInArray) { if (alreadyVisitedChildren) { // Move to next sibling PtNode's children. - readNextSiblingNode(ptNodeParams); + readNextSiblingNode(); if (isEnd()) { // Return to the parent PTNode. if (!listener->onAscend()) { @@ -131,13 +120,13 @@ bool DynamicPtReadingHelper::traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFi alreadyVisitedChildren = false; } } else { - if (ptNodeParams.hasChildren()) { + if (mNodeReader.hasChildren()) { // Move to the first child. - if (!listener->onDescend(ptNodeParams.getChildrenPos())) { + if (!listener->onDescend(mNodeReader.getChildrenPos())) { return false; } pushReadingStateToStack(); - readChildNode(ptNodeParams); + readChildNode(); // Push state to return the head of PtNode array. pushReadingStateToStack(); alreadyVisitedAllPtNodesInArray = false; @@ -147,10 +136,10 @@ bool DynamicPtReadingHelper::traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFi } } } else { - if (!listener->onVisitingPtNode(&ptNodeParams)) { + if (!listener->onVisitingPtNode(&mNodeReader, mMergedNodeCodePoints)) { return false; } - readNextSiblingNode(ptNodeParams); + readNextSiblingNode(); if (isEnd()) { if (!listener->onReadingPtNodeArrayTail()) { return false; @@ -169,95 +158,9 @@ bool DynamicPtReadingHelper::traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFi return !isError(); } -int DynamicPtReadingHelper::getCodePointsAndProbabilityAndReturnCodePointCount( - const int maxCodePointCount, int *const outCodePoints, int *const outUnigramProbability) { - // This method traverses parent nodes from the terminal by following parent pointers; thus, - // node code points are stored in the buffer in the reverse order. - int reverseCodePoints[maxCodePointCount]; - const PtNodeParams terminalPtNodeParams(getPtNodeParams()); - // First, read the terminal node and get its probability. - if (!isValidTerminalNode(terminalPtNodeParams)) { - // Node at the ptNodePos is not a valid terminal node. - *outUnigramProbability = NOT_A_PROBABILITY; - return 0; - } - // Store terminal node probability. - *outUnigramProbability = terminalPtNodeParams.getProbability(); - // Then, following parent node link to the dictionary root and fetch node code points. - int totalCodePointCount = 0; - while (!isEnd()) { - const PtNodeParams ptNodeParams(getPtNodeParams()); - totalCodePointCount = getTotalCodePointCount(ptNodeParams); - if (!ptNodeParams.isValid() || totalCodePointCount > maxCodePointCount) { - // The ptNodePos is not a valid terminal node position in the dictionary. - *outUnigramProbability = NOT_A_PROBABILITY; - return 0; - } - // Store node code points to buffer in the reverse order. - fetchMergedNodeCodePointsInReverseOrder(ptNodeParams, getPrevTotalCodePointCount(), - reverseCodePoints); - // Follow parent node toward the root node. - readParentNode(ptNodeParams); - } - if (isError()) { - // The node position or the dictionary is invalid. - *outUnigramProbability = NOT_A_PROBABILITY; - return 0; - } - // Reverse the stored code points to output them. - for (int i = 0; i < totalCodePointCount; ++i) { - outCodePoints[i] = reverseCodePoints[totalCodePointCount - i - 1]; - } - return totalCodePointCount; -} - -int DynamicPtReadingHelper::getTerminalPtNodePositionOfWord(const int *const inWord, - const int length, const bool forceLowerCaseSearch) { - int searchCodePoints[length]; - for (int i = 0; i < length; ++i) { - searchCodePoints[i] = forceLowerCaseSearch ? CharUtils::toLowerCase(inWord[i]) : inWord[i]; - } - while (!isEnd()) { - const PtNodeParams ptNodeParams(getPtNodeParams()); - const int matchedCodePointCount = getPrevTotalCodePointCount(); - if (getTotalCodePointCount(ptNodeParams) > length - || !isMatchedCodePoint(ptNodeParams, 0 /* index */, - searchCodePoints[matchedCodePointCount])) { - // Current node has too many code points or its first code point is different from - // target code point. Skip this node and read the next sibling node. - readNextSiblingNode(ptNodeParams); - continue; - } - // Check following merged node code points. - const int nodeCodePointCount = ptNodeParams.getCodePointCount(); - for (int j = 1; j < nodeCodePointCount; ++j) { - if (!isMatchedCodePoint(ptNodeParams, j, searchCodePoints[matchedCodePointCount + j])) { - // Different code point is found. The given word is not included in the dictionary. - return NOT_A_DICT_POS; - } - } - // All characters are matched. - if (length == getTotalCodePointCount(ptNodeParams)) { - if (!ptNodeParams.isTerminal()) { - return NOT_A_DICT_POS; - } - // Terminal position is found. - return ptNodeParams.getHeadPos(); - } - if (!ptNodeParams.hasChildren()) { - return NOT_A_DICT_POS; - } - // Advance to the children nodes. - readChildNode(ptNodeParams); - } - // If we already traversed the tree further than the word is long, there means - // there was no match (or we would have found it). - return NOT_A_DICT_POS; -} - // Read node array size and process empty node arrays. Nodes and arrays are counted up in this // method to avoid an infinite loop. -void DynamicPtReadingHelper::nextPtNodeArray() { +void DynamicPatriciaTrieReadingHelper::nextPtNodeArray() { if (mReadingState.mPos < 0 || mReadingState.mPos >= mBuffer->getTailPosition()) { // Reading invalid position because of a bug or a broken dictionary. AKLOGE("Reading PtNode array info from invalid dictionary position: %d, dict size: %d", @@ -267,48 +170,42 @@ void DynamicPtReadingHelper::nextPtNodeArray() { mReadingState.mPos = NOT_A_DICT_POS; return; } - mReadingState.mPosOfThisPtNodeArrayHead = mReadingState.mPos; + mReadingState.mPosOfLastPtNodeArrayHead = mReadingState.mPos; const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(mReadingState.mPos); const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer); if (usesAdditionalBuffer) { mReadingState.mPos -= mBuffer->getOriginalBufferSize(); } - mReadingState.mRemainingPtNodeCountInThisArray = - PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(dictBuf, - &mReadingState.mPos); + mReadingState.mNodeCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition( + dictBuf, &mReadingState.mPos); if (usesAdditionalBuffer) { mReadingState.mPos += mBuffer->getOriginalBufferSize(); } // Count up nodes and node arrays to avoid infinite loop. - mReadingState.mTotalPtNodeIndexInThisArrayChain += - mReadingState.mRemainingPtNodeCountInThisArray; - mReadingState.mPtNodeArrayIndexInThisArrayChain++; - if (mReadingState.mRemainingPtNodeCountInThisArray < 0 - || mReadingState.mTotalPtNodeIndexInThisArrayChain - > MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP - || mReadingState.mPtNodeArrayIndexInThisArrayChain - > MAX_PT_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP) { + mReadingState.mTotalNodeCount += mReadingState.mNodeCount; + mReadingState.mNodeArrayCount++; + if (mReadingState.mNodeCount < 0 + || mReadingState.mTotalNodeCount > MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP + || mReadingState.mNodeArrayCount > MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP) { // Invalid dictionary. AKLOGI("Invalid dictionary. nodeCount: %d, totalNodeCount: %d, MAX_CHILD_COUNT: %d" "nodeArrayCount: %d, MAX_NODE_ARRAY_COUNT: %d", - mReadingState.mRemainingPtNodeCountInThisArray, - mReadingState.mTotalPtNodeIndexInThisArrayChain, - MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP, - mReadingState.mPtNodeArrayIndexInThisArrayChain, - MAX_PT_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP); + mReadingState.mNodeCount, mReadingState.mTotalNodeCount, + MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP, mReadingState.mNodeArrayCount, + MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP); ASSERT(false); mIsError = true; mReadingState.mPos = NOT_A_DICT_POS; return; } - if (mReadingState.mRemainingPtNodeCountInThisArray == 0) { + if (mReadingState.mNodeCount == 0) { // Empty node array. Try following forward link. followForwardLink(); } } // Follow the forward link and read the next node array if exists. -void DynamicPtReadingHelper::followForwardLink() { +void DynamicPatriciaTrieReadingHelper::followForwardLink() { if (mReadingState.mPos < 0 || mReadingState.mPos >= mBuffer->getTailPosition()) { // Reading invalid position because of bug or broken dictionary. AKLOGE("Reading forward link from invalid dictionary position: %d, dict size: %d", @@ -324,12 +221,12 @@ void DynamicPtReadingHelper::followForwardLink() { mReadingState.mPos -= mBuffer->getOriginalBufferSize(); } const int forwardLinkPosition = - DynamicPtReadingUtils::getForwardLinkPosition(dictBuf, mReadingState.mPos); + DynamicPatriciaTrieReadingUtils::getForwardLinkPosition(dictBuf, mReadingState.mPos); if (usesAdditionalBuffer) { mReadingState.mPos += mBuffer->getOriginalBufferSize(); } mReadingState.mPosOfLastForwardLinkField = mReadingState.mPos; - if (DynamicPtReadingUtils::isValidForwardLinkPosition(forwardLinkPosition)) { + if (DynamicPatriciaTrieReadingUtils::isValidForwardLinkPosition(forwardLinkPosition)) { // Follow the forward link. mReadingState.mPos += forwardLinkPosition; nextPtNodeArray(); diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h index a69490943..a71c06971 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h @@ -14,15 +14,16 @@ * limitations under the License. */ -#ifndef LATINIME_DYNAMIC_PT_READING_HELPER_H -#define LATINIME_DYNAMIC_PT_READING_HELPER_H +#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_READING_HELPER_H +#define LATINIME_DYNAMIC_PATRICIA_TRIE_READING_HELPER_H #include <cstddef> #include <vector> #include "defines.h" -#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h" -#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h" +#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h" +#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h" namespace latinime { @@ -34,7 +35,7 @@ class DictionaryShortcutsStructurePolicy; * This class is used for traversing dynamic patricia trie. This class supports iterating nodes and * dealing with additional buffer. This class counts nodes and node arrays to avoid infinite loop. */ -class DynamicPtReadingHelper { +class DynamicPatriciaTrieReadingHelper { public: class TraversingEventListener { public: @@ -50,7 +51,8 @@ class DynamicPtReadingHelper { virtual bool onReadingPtNodeArrayTail() = 0; // Returns whether the event handling was succeeded or not. - virtual bool onVisitingPtNode(const PtNodeParams *const node) = 0; + virtual bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node, + const int *const nodeCodePoints) = 0; protected: TraversingEventListener() {}; @@ -59,12 +61,13 @@ class DynamicPtReadingHelper { DISALLOW_COPY_AND_ASSIGN(TraversingEventListener); }; - DynamicPtReadingHelper(const BufferWithExtendableBuffer *const buffer, - const PtNodeReader *const ptNodeReader) + DynamicPatriciaTrieReadingHelper(const BufferWithExtendableBuffer *const buffer, + const DictionaryBigramsStructurePolicy *const bigramsPolicy, + const DictionaryShortcutsStructurePolicy *const shortcutsPolicy) : mIsError(false), mReadingState(), mBuffer(buffer), - mPtNodeReader(ptNodeReader), mReadingStateStack() {} + mNodeReader(mBuffer, bigramsPolicy, shortcutsPolicy), mReadingStateStack() {} - ~DynamicPtReadingHelper() {} + ~DynamicPatriciaTrieReadingHelper() {} AK_FORCE_INLINE bool isError() const { return mIsError; @@ -81,12 +84,15 @@ class DynamicPtReadingHelper { } else { mIsError = false; mReadingState.mPos = ptNodeArrayPos; - mReadingState.mTotalCodePointCountSinceInitialization = 0; - mReadingState.mTotalPtNodeIndexInThisArrayChain = 0; - mReadingState.mPtNodeArrayIndexInThisArrayChain = 0; + mReadingState.mPrevTotalCodePointCount = 0; + mReadingState.mTotalNodeCount = 0; + mReadingState.mNodeArrayCount = 0; mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS; mReadingStateStack.clear(); nextPtNodeArray(); + if (!isEnd()) { + fetchPtNodeInfo(); + } } } @@ -97,88 +103,94 @@ class DynamicPtReadingHelper { } else { mIsError = false; mReadingState.mPos = ptNodePos; - mReadingState.mRemainingPtNodeCountInThisArray = 1; - mReadingState.mTotalCodePointCountSinceInitialization = 0; - mReadingState.mTotalPtNodeIndexInThisArrayChain = 1; - mReadingState.mPtNodeArrayIndexInThisArrayChain = 1; + mReadingState.mNodeCount = 1; + mReadingState.mPrevTotalCodePointCount = 0; + mReadingState.mTotalNodeCount = 1; + mReadingState.mNodeArrayCount = 1; mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS; - mReadingState.mPosOfThisPtNodeArrayHead = NOT_A_DICT_POS; + mReadingState.mPosOfLastPtNodeArrayHead = NOT_A_DICT_POS; mReadingStateStack.clear(); + fetchPtNodeInfo(); } } - AK_FORCE_INLINE const PtNodeParams getPtNodeParams() const { - if (isEnd()) { - return PtNodeParams(); - } - return mPtNodeReader->fetchNodeInfoInBufferFromPtNodePos(mReadingState.mPos); + AK_FORCE_INLINE const DynamicPatriciaTrieNodeReader* getNodeReader() const { + return &mNodeReader; } - AK_FORCE_INLINE bool isValidTerminalNode(const PtNodeParams &ptNodeParams) const { - return !isEnd() && !ptNodeParams.isDeleted() && ptNodeParams.isTerminal(); + AK_FORCE_INLINE bool isValidTerminalNode() const { + return !isEnd() && !mNodeReader.isDeleted() && mNodeReader.isTerminal(); } - AK_FORCE_INLINE bool isMatchedCodePoint(const PtNodeParams &ptNodeParams, const int index, - const int codePoint) const { - return ptNodeParams.getCodePoints()[index] == codePoint; + AK_FORCE_INLINE bool isMatchedCodePoint(const int index, const int codePoint) const { + return mMergedNodeCodePoints[index] == codePoint; } // Return code point count exclude the last read node's code points. AK_FORCE_INLINE int getPrevTotalCodePointCount() const { - return mReadingState.mTotalCodePointCountSinceInitialization; + return mReadingState.mPrevTotalCodePointCount; } // Return code point count include the last read node's code points. - AK_FORCE_INLINE int getTotalCodePointCount(const PtNodeParams &ptNodeParams) const { - return mReadingState.mTotalCodePointCountSinceInitialization - + ptNodeParams.getCodePointCount(); + AK_FORCE_INLINE int getTotalCodePointCount() const { + return mReadingState.mPrevTotalCodePointCount + mNodeReader.getCodePointCount(); } - AK_FORCE_INLINE void fetchMergedNodeCodePointsInReverseOrder(const PtNodeParams &ptNodeParams, + AK_FORCE_INLINE void fetchMergedNodeCodePointsInReverseOrder( const int index, int *const outCodePoints) const { - const int nodeCodePointCount = ptNodeParams.getCodePointCount(); - const int *const nodeCodePoints = ptNodeParams.getCodePoints(); + const int nodeCodePointCount = mNodeReader.getCodePointCount(); for (int i = 0; i < nodeCodePointCount; ++i) { - outCodePoints[index + i] = nodeCodePoints[nodeCodePointCount - 1 - i]; + outCodePoints[index + i] = mMergedNodeCodePoints[nodeCodePointCount - 1 - i]; } } - AK_FORCE_INLINE void readNextSiblingNode(const PtNodeParams &ptNodeParams) { - mReadingState.mRemainingPtNodeCountInThisArray -= 1; - mReadingState.mPos = ptNodeParams.getSiblingNodePos(); - if (mReadingState.mRemainingPtNodeCountInThisArray <= 0) { + AK_FORCE_INLINE const int *getMergedNodeCodePoints() const { + return mMergedNodeCodePoints; + } + + AK_FORCE_INLINE void readNextSiblingNode() { + mReadingState.mNodeCount -= 1; + mReadingState.mPos = mNodeReader.getSiblingNodePos(); + if (mReadingState.mNodeCount <= 0) { // All nodes in the current node array have been read. followForwardLink(); + if (!isEnd()) { + fetchPtNodeInfo(); + } + } else { + fetchPtNodeInfo(); } } // Read the first child node of the current node. - AK_FORCE_INLINE void readChildNode(const PtNodeParams &ptNodeParams) { - if (ptNodeParams.hasChildren()) { - mReadingState.mTotalCodePointCountSinceInitialization += - ptNodeParams.getCodePointCount(); - mReadingState.mTotalPtNodeIndexInThisArrayChain = 0; - mReadingState.mPtNodeArrayIndexInThisArrayChain = 0; - mReadingState.mPos = ptNodeParams.getChildrenPos(); + AK_FORCE_INLINE void readChildNode() { + if (mNodeReader.hasChildren()) { + mReadingState.mPrevTotalCodePointCount += mNodeReader.getCodePointCount(); + mReadingState.mTotalNodeCount = 0; + mReadingState.mNodeArrayCount = 0; + mReadingState.mPos = mNodeReader.getChildrenPos(); mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS; // Read children node array. nextPtNodeArray(); + if (!isEnd()) { + fetchPtNodeInfo(); + } } else { mReadingState.mPos = NOT_A_DICT_POS; } } // Read the parent node of the current node. - AK_FORCE_INLINE void readParentNode(const PtNodeParams &ptNodeParams) { - if (ptNodeParams.getParentPos() != NOT_A_DICT_POS) { - mReadingState.mTotalCodePointCountSinceInitialization += - ptNodeParams.getCodePointCount(); - mReadingState.mTotalPtNodeIndexInThisArrayChain = 1; - mReadingState.mPtNodeArrayIndexInThisArrayChain = 1; - mReadingState.mRemainingPtNodeCountInThisArray = 1; - mReadingState.mPos = ptNodeParams.getParentPos(); + AK_FORCE_INLINE void readParentNode() { + if (mNodeReader.getParentPos() != NOT_A_DICT_POS) { + mReadingState.mPrevTotalCodePointCount += mNodeReader.getCodePointCount(); + mReadingState.mTotalNodeCount = 1; + mReadingState.mNodeArrayCount = 1; + mReadingState.mNodeCount = 1; + mReadingState.mPos = mNodeReader.getParentPos(); mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS; - mReadingState.mPosOfThisPtNodeArrayHead = NOT_A_DICT_POS; + mReadingState.mPosOfLastPtNodeArrayHead = NOT_A_DICT_POS; + fetchPtNodeInfo(); } else { mReadingState.mPos = NOT_A_DICT_POS; } @@ -189,7 +201,13 @@ class DynamicPtReadingHelper { } AK_FORCE_INLINE int getPosOfLastPtNodeArrayHead() const { - return mReadingState.mPosOfThisPtNodeArrayHead; + return mReadingState.mPosOfLastPtNodeArrayHead; + } + + AK_FORCE_INLINE void reloadCurrentPtNodeInfo() { + if (!isEnd()) { + fetchPtNodeInfo(); + } } bool traverseAllPtNodesInPostorderDepthFirstManner(TraversingEventListener *const listener); @@ -197,54 +215,53 @@ class DynamicPtReadingHelper { bool traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner( TraversingEventListener *const listener); - int getCodePointsAndProbabilityAndReturnCodePointCount(const int maxCodePointCount, - int *const outCodePoints, int *const outUnigramProbability); - - int getTerminalPtNodePositionOfWord(const int *const inWord, const int length, - const bool forceLowerCaseSearch); - private: - DISALLOW_COPY_AND_ASSIGN(DynamicPtReadingHelper); + DISALLOW_COPY_AND_ASSIGN(DynamicPatriciaTrieReadingHelper); - // This class encapsulates the reading state of a position in the dictionary. It points at a - // specific PtNode in the dictionary. - class PtNodeReadingState { + class ReadingState { public: // Note that copy constructor and assignment operator are used for this class to use // std::vector. - PtNodeReadingState() : mPos(NOT_A_DICT_POS), mRemainingPtNodeCountInThisArray(0), - mTotalCodePointCountSinceInitialization(0), mTotalPtNodeIndexInThisArrayChain(0), - mPtNodeArrayIndexInThisArrayChain(0), mPosOfLastForwardLinkField(NOT_A_DICT_POS), - mPosOfThisPtNodeArrayHead(NOT_A_DICT_POS) {} + ReadingState() : mPos(NOT_A_DICT_POS), mNodeCount(0), mPrevTotalCodePointCount(0), + mTotalNodeCount(0), mNodeArrayCount(0), mPosOfLastForwardLinkField(NOT_A_DICT_POS), + mPosOfLastPtNodeArrayHead(NOT_A_DICT_POS) {} int mPos; - // Remaining node count in the current array. - int mRemainingPtNodeCountInThisArray; - int mTotalCodePointCountSinceInitialization; - // Counter of PtNodes used to avoid infinite loops caused by broken or malicious links. - int mTotalPtNodeIndexInThisArrayChain; - // Counter of PtNode arrays used to avoid infinite loops caused by cyclic links of empty - // PtNode arrays. - int mPtNodeArrayIndexInThisArrayChain; + // Node count of a node array. + int mNodeCount; + int mPrevTotalCodePointCount; + int mTotalNodeCount; + int mNodeArrayCount; int mPosOfLastForwardLinkField; - int mPosOfThisPtNodeArrayHead; + int mPosOfLastPtNodeArrayHead; }; static const int MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP; - static const int MAX_PT_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP; + static const int MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP; static const size_t MAX_READING_STATE_STACK_SIZE; // TODO: Introduce error code to track what caused the error. bool mIsError; - PtNodeReadingState mReadingState; + ReadingState mReadingState; const BufferWithExtendableBuffer *const mBuffer; - const PtNodeReader *const mPtNodeReader; - std::vector<PtNodeReadingState> mReadingStateStack; + DynamicPatriciaTrieNodeReader mNodeReader; + int mMergedNodeCodePoints[MAX_WORD_LENGTH]; + std::vector<ReadingState> mReadingStateStack; void nextPtNodeArray(); void followForwardLink(); + AK_FORCE_INLINE void fetchPtNodeInfo() { + mNodeReader.fetchNodeInfoInBufferFromPtNodePosAndGetNodeCodePoints(mReadingState.mPos, + MAX_WORD_LENGTH, mMergedNodeCodePoints); + if (mNodeReader.getCodePointCount() <= 0) { + // Empty node is not allowed. + mIsError = true; + mReadingState.mPos = NOT_A_DICT_POS; + } + } + AK_FORCE_INLINE void pushReadingStateToStack() { if (mReadingStateStack.size() > MAX_READING_STATE_STACK_SIZE) { AKLOGI("Reading state stack overflow. Max size: %zd", MAX_READING_STATE_STACK_SIZE); @@ -262,8 +279,11 @@ class DynamicPtReadingHelper { } else { mReadingState = mReadingStateStack.back(); mReadingStateStack.pop_back(); + if (!isEnd()) { + fetchPtNodeInfo(); + } } } }; } // namespace latinime -#endif /* LATINIME_DYNAMIC_PT_READING_HELPER_H */ +#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_READING_HELPER_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp index 3586b50ab..d68446db6 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp @@ -14,38 +14,38 @@ * limitations under the License. */ -#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h" +#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h" #include "defines.h" #include "suggest/policyimpl/dictionary/utils/byte_array_utils.h" namespace latinime { -const DynamicPtReadingUtils::NodeFlags DynamicPtReadingUtils::MASK_MOVED = 0xC0; -const DynamicPtReadingUtils::NodeFlags DynamicPtReadingUtils::FLAG_IS_NOT_MOVED = 0xC0; -const DynamicPtReadingUtils::NodeFlags DynamicPtReadingUtils::FLAG_IS_MOVED = 0x40; -const DynamicPtReadingUtils::NodeFlags DynamicPtReadingUtils::FLAG_IS_DELETED = 0x80; -const DynamicPtReadingUtils::NodeFlags DynamicPtReadingUtils::FLAG_WILL_BECOME_NON_TERMINAL = 0x00; +typedef DynamicPatriciaTrieReadingUtils DptReadingUtils; + +const DptReadingUtils::NodeFlags DptReadingUtils::MASK_MOVED = 0xC0; +const DptReadingUtils::NodeFlags DptReadingUtils::FLAG_IS_NOT_MOVED = 0xC0; +const DptReadingUtils::NodeFlags DptReadingUtils::FLAG_IS_MOVED = 0x40; +const DptReadingUtils::NodeFlags DptReadingUtils::FLAG_IS_DELETED = 0x80; // TODO: Make DICT_OFFSET_ZERO_OFFSET = 0. // Currently, DICT_OFFSET_INVALID is 0 in Java side but offset can be 0 during GC. So, the maximum // value of offsets, which is 0x7FFFFF is used to represent 0 offset. -const int DynamicPtReadingUtils::DICT_OFFSET_INVALID = 0; -const int DynamicPtReadingUtils::DICT_OFFSET_ZERO_OFFSET = 0x7FFFFF; +const int DptReadingUtils::DICT_OFFSET_INVALID = 0; +const int DptReadingUtils::DICT_OFFSET_ZERO_OFFSET = 0x7FFFFF; -/* static */ int DynamicPtReadingUtils::getForwardLinkPosition(const uint8_t *const buffer, +/* static */ int DptReadingUtils::getForwardLinkPosition(const uint8_t *const buffer, const int pos) { int linkAddressPos = pos; return ByteArrayUtils::readSint24AndAdvancePosition(buffer, &linkAddressPos); } -/* static */ int DynamicPtReadingUtils::getParentPtNodePosOffsetAndAdvancePosition( +/* static */ int DptReadingUtils::getParentPtNodePosOffsetAndAdvancePosition( const uint8_t *const buffer, int *const pos) { return ByteArrayUtils::readSint24AndAdvancePosition(buffer, pos); } -/* static */ int DynamicPtReadingUtils::getParentPtNodePos(const int parentOffset, - const int ptNodePos) { +/* static */ int DptReadingUtils::getParentPtNodePos(const int parentOffset, const int ptNodePos) { if (parentOffset == DICT_OFFSET_INVALID) { return NOT_A_DICT_POS; } else if (parentOffset == DICT_OFFSET_ZERO_OFFSET) { @@ -55,7 +55,7 @@ const int DynamicPtReadingUtils::DICT_OFFSET_ZERO_OFFSET = 0x7FFFFF; } } -/* static */ int DynamicPtReadingUtils::readChildrenPositionAndAdvancePosition( +/* static */ int DptReadingUtils::readChildrenPositionAndAdvancePosition( const uint8_t *const buffer, int *const pos) { const int base = *pos; const int offset = ByteArrayUtils::readSint24AndAdvancePosition(buffer, pos); diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h index 89ae12c0b..67c3cc57e 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef LATINIME_DYNAMIC_PT_READING_UTILS_H -#define LATINIME_DYNAMIC_PT_READING_UTILS_H +#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_READING_UTILS_H +#define LATINIME_DYNAMIC_PATRICIA_TRIE_READING_UTILS_H #include <stdint.h> @@ -23,7 +23,7 @@ namespace latinime { -class DynamicPtReadingUtils { +class DynamicPatriciaTrieReadingUtils { public: typedef uint8_t NodeFlags; @@ -54,30 +54,22 @@ class DynamicPtReadingUtils { return FLAG_IS_DELETED == (MASK_MOVED & flags); } - static AK_FORCE_INLINE bool willBecomeNonTerminal(const NodeFlags flags) { - return FLAG_WILL_BECOME_NON_TERMINAL == (MASK_MOVED & flags); - } - static AK_FORCE_INLINE NodeFlags updateAndGetFlags(const NodeFlags originalFlags, - const bool isMoved, const bool isDeleted, const bool willBecomeNonTerminal) { + const bool isMoved, const bool isDeleted) { NodeFlags flags = originalFlags; - flags = willBecomeNonTerminal ? - ((flags & (~MASK_MOVED)) | FLAG_WILL_BECOME_NON_TERMINAL) : flags; flags = isMoved ? ((flags & (~MASK_MOVED)) | FLAG_IS_MOVED) : flags; flags = isDeleted ? ((flags & (~MASK_MOVED)) | FLAG_IS_DELETED) : flags; - flags = (!isMoved && !isDeleted && !willBecomeNonTerminal) ? - ((flags & (~MASK_MOVED)) | FLAG_IS_NOT_MOVED) : flags; + flags = (!isMoved && !isDeleted) ? ((flags & (~MASK_MOVED)) | FLAG_IS_NOT_MOVED) : flags; return flags; } private: - DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPtReadingUtils); + DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieReadingUtils); static const NodeFlags MASK_MOVED; static const NodeFlags FLAG_IS_NOT_MOVED; static const NodeFlags FLAG_IS_MOVED; static const NodeFlags FLAG_IS_DELETED; - static const NodeFlags FLAG_WILL_BECOME_NON_TERMINAL; }; } // namespace latinime -#endif /* LATINIME_DYNAMIC_PT_READING_UTILS_H */ +#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_READING_UTILS_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp new file mode 100644 index 000000000..052558bfc --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp @@ -0,0 +1,558 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h" + +#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h" +#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h" +#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h" +#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h" +#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h" +#include "suggest/policyimpl/dictionary/header/header_policy.h" +#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h" +#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h" +#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h" +#include "utils/hash_map_compat.h" + +namespace latinime { + +const int DynamicPatriciaTrieWritingHelper::CHILDREN_POSITION_FIELD_SIZE = 3; +// TODO: Make MAX_DICTIONARY_SIZE 8MB. +const size_t DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE = 2 * 1024 * 1024; + +bool DynamicPatriciaTrieWritingHelper::addUnigramWord( + DynamicPatriciaTrieReadingHelper *const readingHelper, + const int *const wordCodePoints, const int codePointCount, const int probability, + bool *const outAddedNewUnigram) { + int parentPos = NOT_A_DICT_POS; + while (!readingHelper->isEnd()) { + const int matchedCodePointCount = readingHelper->getPrevTotalCodePointCount(); + if (!readingHelper->isMatchedCodePoint(0 /* index */, + wordCodePoints[matchedCodePointCount])) { + // The first code point is different from target code point. Skip this node and read + // the next sibling node. + readingHelper->readNextSiblingNode(); + continue; + } + // Check following merged node code points. + const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper->getNodeReader(); + const int nodeCodePointCount = nodeReader->getCodePointCount(); + for (int j = 1; j < nodeCodePointCount; ++j) { + const int nextIndex = matchedCodePointCount + j; + if (nextIndex >= codePointCount || !readingHelper->isMatchedCodePoint(j, + wordCodePoints[matchedCodePointCount + j])) { + *outAddedNewUnigram = true; + return reallocatePtNodeAndAddNewPtNodes(nodeReader, + readingHelper->getMergedNodeCodePoints(), j, + getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, + probability), + wordCodePoints + matchedCodePointCount, + codePointCount - matchedCodePointCount); + } + } + // All characters are matched. + if (codePointCount == readingHelper->getTotalCodePointCount()) { + return setPtNodeProbability(nodeReader, probability, + readingHelper->getMergedNodeCodePoints(), outAddedNewUnigram); + } + if (!nodeReader->hasChildren()) { + *outAddedNewUnigram = true; + return createChildrenPtNodeArrayAndAChildPtNode(nodeReader, + getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, probability), + wordCodePoints + readingHelper->getTotalCodePointCount(), + codePointCount - readingHelper->getTotalCodePointCount()); + } + // Advance to the children nodes. + parentPos = nodeReader->getHeadPos(); + readingHelper->readChildNode(); + } + if (readingHelper->isError()) { + // The dictionary is invalid. + return false; + } + int pos = readingHelper->getPosOfLastForwardLinkField(); + *outAddedNewUnigram = true; + return createAndInsertNodeIntoPtNodeArray(parentPos, + wordCodePoints + readingHelper->getPrevTotalCodePointCount(), + codePointCount - readingHelper->getPrevTotalCodePointCount(), + getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, probability), &pos); +} + +bool DynamicPatriciaTrieWritingHelper::addBigramWords(const int word0Pos, const int word1Pos, + const int probability, bool *const outAddedNewBigram) { + int mMergedNodeCodePoints[MAX_WORD_LENGTH]; + DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy); + nodeReader.fetchNodeInfoInBufferFromPtNodePosAndGetNodeCodePoints(word0Pos, MAX_WORD_LENGTH, + mMergedNodeCodePoints); + // Move node to add bigram entry. + const int newNodePos = mBuffer->getTailPosition(); + if (!markNodeAsMovedAndSetPosition(&nodeReader, newNodePos, newNodePos)) { + return false; + } + int writingPos = newNodePos; + // Write a new PtNode using original PtNode's info to the tail of the dictionary in mBuffer. + if (!writePtNodeToBufferByCopyingPtNodeInfo(mBuffer, &nodeReader, nodeReader.getParentPos(), + mMergedNodeCodePoints, nodeReader.getCodePointCount(), nodeReader.getProbability(), + &writingPos)) { + return false; + } + nodeReader.fetchNodeInfoInBufferFromPtNodePos(newNodePos); + if (nodeReader.getBigramsPos() != NOT_A_DICT_POS) { + // Insert a new bigram entry into the existing bigram list. + int bigramListPos = nodeReader.getBigramsPos(); + return mBigramPolicy->addNewBigramEntryToBigramList(word1Pos, probability, &bigramListPos, + outAddedNewBigram); + } else { + // The PtNode doesn't have a bigram list. + *outAddedNewBigram = true; + // First, Write a bigram entry at the tail position of the PtNode. + if (!mBigramPolicy->writeNewBigramEntry(word1Pos, probability, &writingPos)) { + return false; + } + // Then, Mark as the PtNode having bigram list in the flags. + const PatriciaTrieReadingUtils::NodeFlags updatedFlags = + PatriciaTrieReadingUtils::createAndGetFlags(nodeReader.isBlacklisted(), + nodeReader.isNotAWord(), nodeReader.getProbability() != NOT_A_PROBABILITY, + nodeReader.getShortcutPos() != NOT_A_DICT_POS, true /* hasBigrams */, + nodeReader.getCodePointCount() > 1, CHILDREN_POSITION_FIELD_SIZE); + writingPos = newNodePos; + // Write updated flags into the moved PtNode's flags field. + return DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags, + &writingPos); + } +} + +// Remove a bigram relation from word0Pos to word1Pos. +bool DynamicPatriciaTrieWritingHelper::removeBigramWords(const int word0Pos, const int word1Pos) { + DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy); + nodeReader.fetchNodeInfoInBufferFromPtNodePos(word0Pos); + if (nodeReader.getBigramsPos() == NOT_A_DICT_POS) { + return false; + } + return mBigramPolicy->removeBigram(nodeReader.getBigramsPos(), word1Pos); +} + +void DynamicPatriciaTrieWritingHelper::writeToDictFile(const char *const fileName, + const HeaderPolicy *const headerPolicy, const int unigramCount, const int bigramCount) { + BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */); + const int extendedRegionSize = headerPolicy->getExtendedRegionSize() + + mBuffer->getUsedAdditionalBufferSize(); + if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, false /* updatesLastUpdatedTime */, + false /* updatesLastDecayedTime */, unigramCount, bigramCount, extendedRegionSize)) { + return; + } + DictFileWritingUtils::flushAllHeaderAndBodyToFile(fileName, &headerBuffer, mBuffer); +} + +void DynamicPatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNodeArrayPos, + const char *const fileName, const HeaderPolicy *const headerPolicy) { + BufferWithExtendableBuffer newDictBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */, + MAX_DICTIONARY_SIZE); + int unigramCount = 0; + int bigramCount = 0; + if (mNeedsToDecay) { + ForgettingCurveUtils::sTimeKeeper.setCurrentTime(); + } + if (!runGC(rootPtNodeArrayPos, headerPolicy, &newDictBuffer, &unigramCount, &bigramCount)) { + return; + } + BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */); + if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, true /* updatesLastUpdatedTime */, + mNeedsToDecay, unigramCount, bigramCount, 0 /* extendedRegionSize */)) { + return; + } + DictFileWritingUtils::flushAllHeaderAndBodyToFile(fileName, &headerBuffer, &newDictBuffer); +} + +bool DynamicPatriciaTrieWritingHelper::markNodeAsDeleted( + const DynamicPatriciaTrieNodeReader *const nodeToUpdate) { + int pos = nodeToUpdate->getHeadPos(); + const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(pos); + const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer); + if (usesAdditionalBuffer) { + pos -= mBuffer->getOriginalBufferSize(); + } + // Read original flags + const PatriciaTrieReadingUtils::NodeFlags originalFlags = + PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos); + const PatriciaTrieReadingUtils::NodeFlags updatedFlags = + DynamicPatriciaTrieReadingUtils::updateAndGetFlags(originalFlags, false /* isMoved */, + true /* isDeleted */); + int writingPos = nodeToUpdate->getHeadPos(); + // Update flags. + return DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags, + &writingPos); +} + +bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition( + const DynamicPatriciaTrieNodeReader *const originalNode, const int movedPos, + const int bigramLinkedNodePos) { + int pos = originalNode->getHeadPos(); + const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(pos); + const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer); + if (usesAdditionalBuffer) { + pos -= mBuffer->getOriginalBufferSize(); + } + // Read original flags + const PatriciaTrieReadingUtils::NodeFlags originalFlags = + PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos); + const PatriciaTrieReadingUtils::NodeFlags updatedFlags = + DynamicPatriciaTrieReadingUtils::updateAndGetFlags(originalFlags, true /* isMoved */, + false /* isDeleted */); + int writingPos = originalNode->getHeadPos(); + // Update flags. + if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags, + &writingPos)) { + return false; + } + // Update moved position, which is stored in the parent offset field. + if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition( + mBuffer, movedPos, originalNode->getHeadPos(), &writingPos)) { + return false; + } + // Update bigram linked node position, which is stored in the children position field. + int childrenPosFieldPos = originalNode->getChildrenPosFieldPos(); + if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition( + mBuffer, bigramLinkedNodePos, &childrenPosFieldPos)) { + return false; + } + if (originalNode->hasChildren()) { + // Update children's parent position. + DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, mBigramPolicy, mShortcutPolicy); + const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader(); + readingHelper.initWithPtNodeArrayPos(originalNode->getChildrenPos()); + while (!readingHelper.isEnd()) { + int parentOffsetFieldPos = nodeReader->getHeadPos() + + DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE; + if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition( + mBuffer, bigramLinkedNodePos, nodeReader->getHeadPos(), + &parentOffsetFieldPos)) { + // Parent offset cannot be written because of a bug or a broken dictionary; thus, + // we give up to update dictionary. + return false; + } + readingHelper.readNextSiblingNode(); + } + } + return true; +} + +// Write new PtNode at writingPos. +bool DynamicPatriciaTrieWritingHelper::writePtNodeWithFullInfoToBuffer( + BufferWithExtendableBuffer *const bufferToWrite, const bool isBlacklisted, + const bool isNotAWord, const int parentPos, const int *const codePoints, + const int codePointCount, const int probability, const int childrenPos, + const int originalBigramListPos, const int originalShortcutListPos, + int *const writingPos) { + const int nodePos = *writingPos; + // Write dummy flags. The Node flags are updated with appropriate flags at the last step of the + // PtNode writing. + if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(bufferToWrite, + 0 /* nodeFlags */, writingPos)) { + return false; + } + // Calculate a parent offset and write the offset. + if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(bufferToWrite, + parentPos, nodePos, writingPos)) { + return false; + } + // Write code points + if (!DynamicPatriciaTrieWritingUtils::writeCodePointsAndAdvancePosition(bufferToWrite, + codePoints, codePointCount, writingPos)) { + return false; + } + // Write probability when the probability is a valid probability, which means this node is + // terminal. + if (probability != NOT_A_PROBABILITY) { + if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(bufferToWrite, + probability, writingPos)) { + return false; + } + } + // Write children position + if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(bufferToWrite, + childrenPos, writingPos)) { + return false; + } + // Copy shortcut list when the originalShortcutListPos is valid dictionary position. + if (originalShortcutListPos != NOT_A_DICT_POS) { + int fromPos = originalShortcutListPos; + if (!mShortcutPolicy->copyAllShortcutsAndReturnIfSucceededOrNot(bufferToWrite, &fromPos, + writingPos)) { + return false; + } + } + // Copy bigram list when the originalBigramListPos is valid dictionary position. + int bigramCount = 0; + if (originalBigramListPos != NOT_A_DICT_POS) { + int fromPos = originalBigramListPos; + if (!mBigramPolicy->copyAllBigrams(bufferToWrite, &fromPos, writingPos, &bigramCount)) { + return false; + } + } + // Create node flags and write them. + PatriciaTrieReadingUtils::NodeFlags nodeFlags = + PatriciaTrieReadingUtils::createAndGetFlags(isBlacklisted, isNotAWord, + probability != NOT_A_PROBABILITY /* isTerminal */, + originalShortcutListPos != NOT_A_DICT_POS /* hasShortcutTargets */, + bigramCount > 0 /* hasBigrams */, codePointCount > 1 /* hasMultipleChars */, + CHILDREN_POSITION_FIELD_SIZE); + int flagsFieldPos = nodePos; + if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(bufferToWrite, nodeFlags, + &flagsFieldPos)) { + return false; + } + return true; +} + +bool DynamicPatriciaTrieWritingHelper::writePtNodeToBuffer( + BufferWithExtendableBuffer *const bufferToWrite, const int parentPos, + const int *const codePoints, const int codePointCount, const int probability, + int *const writingPos) { + return writePtNodeWithFullInfoToBuffer(bufferToWrite, false /* isBlacklisted */, + false /* isNotAWord */, parentPos, codePoints, codePointCount, probability, + NOT_A_DICT_POS /* childrenPos */, NOT_A_DICT_POS /* originalBigramsPos */, + NOT_A_DICT_POS /* originalShortcutPos */, writingPos); +} + +bool DynamicPatriciaTrieWritingHelper::writePtNodeToBufferByCopyingPtNodeInfo( + BufferWithExtendableBuffer *const bufferToWrite, + const DynamicPatriciaTrieNodeReader *const originalNode, const int parentPos, + const int *const codePoints, const int codePointCount, const int probability, + int *const writingPos) { + return writePtNodeWithFullInfoToBuffer(bufferToWrite, originalNode->isBlacklisted(), + originalNode->isNotAWord(), parentPos, codePoints, codePointCount, probability, + originalNode->getChildrenPos(), originalNode->getBigramsPos(), + originalNode->getShortcutPos(), writingPos); +} + +bool DynamicPatriciaTrieWritingHelper::createAndInsertNodeIntoPtNodeArray(const int parentPos, + const int *const nodeCodePoints, const int nodeCodePointCount, const int probability, + int *const forwardLinkFieldPos) { + const int newPtNodeArrayPos = mBuffer->getTailPosition(); + if (!DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer, + newPtNodeArrayPos, forwardLinkFieldPos)) { + return false; + } + return createNewPtNodeArrayWithAChildPtNode(parentPos, nodeCodePoints, nodeCodePointCount, + probability); +} + +bool DynamicPatriciaTrieWritingHelper::setPtNodeProbability( + const DynamicPatriciaTrieNodeReader *const originalPtNode, const int probability, + const int *const codePoints, bool *const outAddedNewUnigram) { + if (originalPtNode->isTerminal()) { + // Overwrites the probability. + *outAddedNewUnigram = false; + const int probabilityToWrite = getUpdatedProbability(originalPtNode->getProbability(), + probability); + int probabilityFieldPos = originalPtNode->getProbabilityFieldPos(); + if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(mBuffer, + probabilityToWrite, &probabilityFieldPos)) { + return false; + } + } else { + // Make the node terminal and write the probability. + *outAddedNewUnigram = true; + int movedPos = mBuffer->getTailPosition(); + if (!markNodeAsMovedAndSetPosition(originalPtNode, movedPos, movedPos)) { + return false; + } + if (!writePtNodeToBufferByCopyingPtNodeInfo(mBuffer, originalPtNode, + originalPtNode->getParentPos(), codePoints, originalPtNode->getCodePointCount(), + getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, probability), + &movedPos)) { + return false; + } + } + return true; +} + +bool DynamicPatriciaTrieWritingHelper::createChildrenPtNodeArrayAndAChildPtNode( + const DynamicPatriciaTrieNodeReader *const parentNode, const int probability, + const int *const codePoints, const int codePointCount) { + const int newPtNodeArrayPos = mBuffer->getTailPosition(); + int childrenPosFieldPos = parentNode->getChildrenPosFieldPos(); + if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBuffer, + newPtNodeArrayPos, &childrenPosFieldPos)) { + return false; + } + return createNewPtNodeArrayWithAChildPtNode(parentNode->getHeadPos(), codePoints, + codePointCount, probability); +} + +bool DynamicPatriciaTrieWritingHelper::createNewPtNodeArrayWithAChildPtNode( + const int parentPtNodePos, const int *const nodeCodePoints, const int nodeCodePointCount, + const int probability) { + int writingPos = mBuffer->getTailPosition(); + if (!DynamicPatriciaTrieWritingUtils::writePtNodeArraySizeAndAdvancePosition(mBuffer, + 1 /* arraySize */, &writingPos)) { + return false; + } + if (!writePtNodeToBuffer(mBuffer, parentPtNodePos, nodeCodePoints, nodeCodePointCount, + probability, &writingPos)) { + return false; + } + if (!DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer, + NOT_A_DICT_POS /* forwardLinkPos */, &writingPos)) { + return false; + } + return true; +} + +// Returns whether the dictionary updating was succeeded or not. +bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes( + const DynamicPatriciaTrieNodeReader *const reallocatingPtNode, + const int *const reallocatingPtNodeCodePoints, const int overlappingCodePointCount, + const int probabilityOfNewPtNode, const int *const newNodeCodePoints, + const int newNodeCodePointCount) { + // When addsExtraChild is true, split the reallocating PtNode and add new child. + // Reallocating PtNode: abcde, newNode: abcxy. + // abc (1st, not terminal) __ de (2nd) + // \_ xy (extra child, terminal) + // Otherwise, this method makes 1st part terminal and write probabilityOfNewPtNode. + // Reallocating PtNode: abcde, newNode: abc. + // abc (1st, terminal) __ de (2nd) + const bool addsExtraChild = newNodeCodePointCount > overlappingCodePointCount; + const int firstPartOfReallocatedPtNodePos = mBuffer->getTailPosition(); + int writingPos = firstPartOfReallocatedPtNodePos; + // Write the 1st part of the reallocating node. The children position will be updated later + // with actual children position. + const int newProbability = addsExtraChild ? NOT_A_PROBABILITY : probabilityOfNewPtNode; + if (!writePtNodeToBuffer(mBuffer, reallocatingPtNode->getParentPos(), + reallocatingPtNodeCodePoints, overlappingCodePointCount, newProbability, + &writingPos)) { + return false; + } + const int actualChildrenPos = writingPos; + // Create new children PtNode array. + const size_t newPtNodeCount = addsExtraChild ? 2 : 1; + if (!DynamicPatriciaTrieWritingUtils::writePtNodeArraySizeAndAdvancePosition(mBuffer, + newPtNodeCount, &writingPos)) { + return false; + } + // Write the 2nd part of the reallocating node. + const int secondPartOfReallocatedPtNodePos = writingPos; + if (!writePtNodeToBufferByCopyingPtNodeInfo(mBuffer, reallocatingPtNode, + firstPartOfReallocatedPtNodePos, + reallocatingPtNodeCodePoints + overlappingCodePointCount, + reallocatingPtNode->getCodePointCount() - overlappingCodePointCount, + reallocatingPtNode->getProbability(), &writingPos)) { + return false; + } + if (addsExtraChild) { + if (!writePtNodeToBuffer(mBuffer, firstPartOfReallocatedPtNodePos, + newNodeCodePoints + overlappingCodePointCount, + newNodeCodePointCount - overlappingCodePointCount, probabilityOfNewPtNode, + &writingPos)) { + return false; + } + } + if (!DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer, + NOT_A_DICT_POS /* forwardLinkPos */, &writingPos)) { + return false; + } + // Update original reallocatingPtNode as moved. + if (!markNodeAsMovedAndSetPosition(reallocatingPtNode, firstPartOfReallocatedPtNodePos, + secondPartOfReallocatedPtNodePos)) { + return false; + } + // Load node info. Information of the 1st part will be fetched. + DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy); + nodeReader.fetchNodeInfoInBufferFromPtNodePos(firstPartOfReallocatedPtNodePos); + // Update children position. + int childrenPosFieldPos = nodeReader.getChildrenPosFieldPos(); + if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBuffer, + actualChildrenPos, &childrenPosFieldPos)) { + return false; + } + return true; +} + +bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, + const HeaderPolicy *const headerPolicy, BufferWithExtendableBuffer *const bufferToWrite, + int *const outUnigramCount, int *const outBigramCount) { + DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, mBigramPolicy, mShortcutPolicy); + readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); + DynamicPatriciaTrieGcEventListeners + ::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted + traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted( + headerPolicy, this, mBuffer, mNeedsToDecay); + if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner( + &traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted)) { + return false; + } + if (mNeedsToDecay && traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted + .getValidUnigramCount() > ForgettingCurveUtils::MAX_UNIGRAM_COUNT_AFTER_GC) { + // TODO: Remove more unigrams. + } + + readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); + DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateBigramProbability + traversePolicyToUpdateBigramProbability(mBigramPolicy); + if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner( + &traversePolicyToUpdateBigramProbability)) { + return false; + } + if (mNeedsToDecay && traversePolicyToUpdateBigramProbability.getValidBigramEntryCount() + > ForgettingCurveUtils::MAX_BIGRAM_COUNT_AFTER_GC) { + // TODO: Remove more bigrams. + } + + // Mapping from positions in mBuffer to positions in bufferToWrite. + DictPositionRelocationMap dictPositionRelocationMap; + readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); + DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer + traversePolicyToPlaceAndWriteValidPtNodesToBuffer(this, bufferToWrite, + &dictPositionRelocationMap); + if (!readingHelper.traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner( + &traversePolicyToPlaceAndWriteValidPtNodesToBuffer)) { + return false; + } + + // Create policy instance for the GCed dictionary. + DynamicShortcutListPolicy newDictShortcutPolicy(bufferToWrite); + DynamicBigramListPolicy newDictBigramPolicy(headerPolicy, bufferToWrite, &newDictShortcutPolicy, + mNeedsToDecay); + // Create reading helper for the GCed dictionary. + DynamicPatriciaTrieReadingHelper newDictReadingHelper(bufferToWrite, &newDictBigramPolicy, + &newDictShortcutPolicy); + newDictReadingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); + DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateAllPositionFields + traversePolicyToUpdateAllPositionFields(this, &newDictBigramPolicy, bufferToWrite, + &dictPositionRelocationMap); + if (!newDictReadingHelper.traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner( + &traversePolicyToUpdateAllPositionFields)) { + return false; + } + *outUnigramCount = traversePolicyToUpdateAllPositionFields.getUnigramCount(); + *outBigramCount = traversePolicyToUpdateAllPositionFields.getBigramCount(); + return true; +} + +int DynamicPatriciaTrieWritingHelper::getUpdatedProbability(const int originalProbability, + const int newProbability) { + if (mNeedsToDecay) { + return ForgettingCurveUtils::getUpdatedEncodedProbability(originalProbability, + newProbability); + } else { + return newProbability; + } +} + +} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h new file mode 100644 index 000000000..ca8664729 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h @@ -0,0 +1,138 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H +#define LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H + +#include <stdint.h> + +#include "defines.h" +#include "utils/hash_map_compat.h" + +namespace latinime { + +class BufferWithExtendableBuffer; +class DynamicBigramListPolicy; +class DynamicPatriciaTrieNodeReader; +class DynamicPatriciaTrieReadingHelper; +class DynamicShortcutListPolicy; +class HeaderPolicy; + +class DynamicPatriciaTrieWritingHelper { + public: + typedef hash_map_compat<int, int> PtNodeArrayPositionRelocationMap; + typedef hash_map_compat<int, int> PtNodePositionRelocationMap; + struct DictPositionRelocationMap { + public: + DictPositionRelocationMap() + : mPtNodeArrayPositionRelocationMap(), mPtNodePositionRelocationMap() {} + + PtNodeArrayPositionRelocationMap mPtNodeArrayPositionRelocationMap; + PtNodePositionRelocationMap mPtNodePositionRelocationMap; + + private: + DISALLOW_COPY_AND_ASSIGN(DictPositionRelocationMap); + }; + + static const size_t MAX_DICTIONARY_SIZE; + + DynamicPatriciaTrieWritingHelper(BufferWithExtendableBuffer *const buffer, + DynamicBigramListPolicy *const bigramPolicy, + DynamicShortcutListPolicy *const shortcutPolicy, const bool needsToDecay) + : mBuffer(buffer), mBigramPolicy(bigramPolicy), mShortcutPolicy(shortcutPolicy), + mNeedsToDecay(needsToDecay) {} + + ~DynamicPatriciaTrieWritingHelper() {} + + // Add a word to the dictionary. If the word already exists, update the probability. + bool addUnigramWord(DynamicPatriciaTrieReadingHelper *const readingHelper, + const int *const wordCodePoints, const int codePointCount, const int probability, + bool *const outAddedNewUnigram); + + // Add a bigram relation from word0Pos to word1Pos. + bool addBigramWords(const int word0Pos, const int word1Pos, const int probability, + bool *const outAddedNewBigram); + + // Remove a bigram relation from word0Pos to word1Pos. + bool removeBigramWords(const int word0Pos, const int word1Pos); + + void writeToDictFile(const char *const fileName, const HeaderPolicy *const headerPolicy, + const int unigramCount, const int bigramCount); + + void writeToDictFileWithGC(const int rootPtNodeArrayPos, const char *const fileName, + const HeaderPolicy *const headerPolicy); + + // CAVEAT: This method must be called only from inner classes of + // DynamicPatriciaTrieGcEventListeners. + bool markNodeAsDeleted(const DynamicPatriciaTrieNodeReader *const nodeToUpdate); + + // CAVEAT: This method must be called only from this class or inner classes of + // DynamicPatriciaTrieGcEventListeners. + bool writePtNodeToBufferByCopyingPtNodeInfo(BufferWithExtendableBuffer *const bufferToWrite, + const DynamicPatriciaTrieNodeReader *const originalNode, const int parentPos, + const int *const codePoints, const int codePointCount, const int probability, + int *const writingPos); + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieWritingHelper); + + static const int CHILDREN_POSITION_FIELD_SIZE; + + BufferWithExtendableBuffer *const mBuffer; + DynamicBigramListPolicy *const mBigramPolicy; + DynamicShortcutListPolicy *const mShortcutPolicy; + const bool mNeedsToDecay; + + bool markNodeAsMovedAndSetPosition(const DynamicPatriciaTrieNodeReader *const nodeToUpdate, + const int movedPos, const int bigramLinkedNodePos); + + bool writePtNodeWithFullInfoToBuffer(BufferWithExtendableBuffer *const bufferToWrite, + const bool isBlacklisted, const bool isNotAWord, + const int parentPos, const int *const codePoints, const int codePointCount, + const int probability, const int childrenPos, const int originalBigramListPos, + const int originalShortcutListPos, int *const writingPos); + + bool writePtNodeToBuffer(BufferWithExtendableBuffer *const bufferToWrite, + const int parentPos, const int *const codePoints, const int codePointCount, + const int probability, int *const writingPos); + + bool createAndInsertNodeIntoPtNodeArray(const int parentPos, const int *const nodeCodePoints, + const int nodeCodePointCount, const int probability, int *const forwardLinkFieldPos); + + bool setPtNodeProbability(const DynamicPatriciaTrieNodeReader *const originalNode, + const int probability, const int *const codePoints, bool *const outAddedNewUnigram); + + bool createChildrenPtNodeArrayAndAChildPtNode( + const DynamicPatriciaTrieNodeReader *const parentNode, const int probability, + const int *const codePoints, const int codePointCount); + + bool createNewPtNodeArrayWithAChildPtNode(const int parentPos, const int *const nodeCodePoints, + const int nodeCodePointCount, const int probability); + + bool reallocatePtNodeAndAddNewPtNodes( + const DynamicPatriciaTrieNodeReader *const reallocatingPtNode, + const int *const reallocatingPtNodeCodePoints, const int overlappingCodePointCount, + const int probabilityOfNewPtNode, const int *const newNodeCodePoints, + const int newNodeCodePointCount); + + bool runGC(const int rootPtNodeArrayPos, const HeaderPolicy *const headerPolicy, + BufferWithExtendableBuffer *const bufferToWrite, int *const outUnigramCount, + int *const outBigramCount); + + int getUpdatedProbability(const int originalProbability, const int newProbability); +}; +} // namespace latinime +#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.cpp index ebbdc2ea2..30ff10cd6 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.h" +#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h" #include <cstddef> #include <cstdlib> @@ -24,18 +24,19 @@ namespace latinime { -const size_t DynamicPtWritingUtils::MAX_PTNODE_ARRAY_SIZE_TO_USE_SMALL_SIZE_FIELD = 0x7F; -const size_t DynamicPtWritingUtils::MAX_PTNODE_ARRAY_SIZE = 0x7FFF; -const int DynamicPtWritingUtils::SMALL_PTNODE_ARRAY_SIZE_FIELD_SIZE = 1; -const int DynamicPtWritingUtils::LARGE_PTNODE_ARRAY_SIZE_FIELD_SIZE = 2; -const int DynamicPtWritingUtils::LARGE_PTNODE_ARRAY_SIZE_FIELD_SIZE_FLAG = 0x8000; -const int DynamicPtWritingUtils::DICT_OFFSET_FIELD_SIZE = 3; -const int DynamicPtWritingUtils::MAX_DICT_OFFSET_VALUE = 0x7FFFFF; -const int DynamicPtWritingUtils::MIN_DICT_OFFSET_VALUE = -0x7FFFFF; -const int DynamicPtWritingUtils::DICT_OFFSET_NEGATIVE_FLAG = 0x800000; -const int DynamicPtWritingUtils::NODE_FLAG_FIELD_SIZE = 1; +const size_t DynamicPatriciaTrieWritingUtils::MAX_PTNODE_ARRAY_SIZE_TO_USE_SMALL_SIZE_FIELD = 0x7F; +const size_t DynamicPatriciaTrieWritingUtils::MAX_PTNODE_ARRAY_SIZE = 0x7FFF; +const int DynamicPatriciaTrieWritingUtils::SMALL_PTNODE_ARRAY_SIZE_FIELD_SIZE = 1; +const int DynamicPatriciaTrieWritingUtils::LARGE_PTNODE_ARRAY_SIZE_FIELD_SIZE = 2; +const int DynamicPatriciaTrieWritingUtils::LARGE_PTNODE_ARRAY_SIZE_FIELD_SIZE_FLAG = 0x8000; +const int DynamicPatriciaTrieWritingUtils::DICT_OFFSET_FIELD_SIZE = 3; +const int DynamicPatriciaTrieWritingUtils::MAX_DICT_OFFSET_VALUE = 0x7FFFFF; +const int DynamicPatriciaTrieWritingUtils::MIN_DICT_OFFSET_VALUE = -0x7FFFFF; +const int DynamicPatriciaTrieWritingUtils::DICT_OFFSET_NEGATIVE_FLAG = 0x800000; +const int DynamicPatriciaTrieWritingUtils::PROBABILITY_FIELD_SIZE = 1; +const int DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE = 1; -/* static */ bool DynamicPtWritingUtils::writeEmptyDictionary( +/* static */ bool DynamicPatriciaTrieWritingUtils::writeEmptyDictionary( BufferWithExtendableBuffer *const buffer, const int rootPos) { int writingPos = rootPos; if (!writePtNodeArraySizeAndAdvancePosition(buffer, 0 /* arraySize */, &writingPos)) { @@ -45,13 +46,13 @@ const int DynamicPtWritingUtils::NODE_FLAG_FIELD_SIZE = 1; &writingPos); } -/* static */ bool DynamicPtWritingUtils::writeForwardLinkPositionAndAdvancePosition( +/* static */ bool DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition( BufferWithExtendableBuffer *const buffer, const int forwardLinkPos, int *const forwardLinkFieldPos) { return writeDictOffset(buffer, forwardLinkPos, (*forwardLinkFieldPos), forwardLinkFieldPos); } -/* static */ bool DynamicPtWritingUtils::writePtNodeArraySizeAndAdvancePosition( +/* static */ bool DynamicPatriciaTrieWritingUtils::writePtNodeArraySizeAndAdvancePosition( BufferWithExtendableBuffer *const buffer, const size_t arraySize, int *const arraySizeFieldPos) { // Currently, all array size field to be created has LARGE_PTNODE_ARRAY_SIZE_FIELD_SIZE to @@ -73,20 +74,20 @@ const int DynamicPtWritingUtils::NODE_FLAG_FIELD_SIZE = 1; } } -/* static */ bool DynamicPtWritingUtils::writeFlagsAndAdvancePosition( +/* static */ bool DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition( BufferWithExtendableBuffer *const buffer, - const DynamicPtReadingUtils::NodeFlags nodeFlags, int *const nodeFlagsFieldPos) { + const DynamicPatriciaTrieReadingUtils::NodeFlags nodeFlags, int *const nodeFlagsFieldPos) { return buffer->writeUintAndAdvancePosition(nodeFlags, NODE_FLAG_FIELD_SIZE, nodeFlagsFieldPos); } // Note that parentOffset is offset from node's head position. -/* static */ bool DynamicPtWritingUtils::writeParentPosOffsetAndAdvancePosition( +/* static */ bool DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition( BufferWithExtendableBuffer *const buffer, const int parentPos, const int basePos, int *const parentPosFieldPos) { return writeDictOffset(buffer, parentPos, basePos, parentPosFieldPos); } -/* static */ bool DynamicPtWritingUtils::writeCodePointsAndAdvancePosition( +/* static */ bool DynamicPatriciaTrieWritingUtils::writeCodePointsAndAdvancePosition( BufferWithExtendableBuffer *const buffer, const int *const codePoints, const int codePointCount, int *const codePointFieldPos) { if (codePointCount <= 0) { @@ -100,20 +101,34 @@ const int DynamicPtWritingUtils::NODE_FLAG_FIELD_SIZE = 1; hasMultipleCodePoints, codePointFieldPos); } -/* static */ bool DynamicPtWritingUtils::writeChildrenPositionAndAdvancePosition( +/* static */ bool DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition( + BufferWithExtendableBuffer *const buffer, const int probability, + int *const probabilityFieldPos) { + if (probability < 0 || probability > MAX_PROBABILITY) { + AKLOGI("probability cannot be written because the probability is invalid: %d", + probability); + ASSERT(false); + return false; + } + return buffer->writeUintAndAdvancePosition(probability, PROBABILITY_FIELD_SIZE, + probabilityFieldPos); +} + +/* static */ bool DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition( BufferWithExtendableBuffer *const buffer, const int childrenPosition, int *const childrenPositionFieldPos) { return writeDictOffset(buffer, childrenPosition, (*childrenPositionFieldPos), childrenPositionFieldPos); } -/* static */ bool DynamicPtWritingUtils::writeDictOffset(BufferWithExtendableBuffer *const buffer, - const int targetPos, const int basePos, int *const offsetFieldPos) { +/* static */ bool DynamicPatriciaTrieWritingUtils::writeDictOffset( + BufferWithExtendableBuffer *const buffer, const int targetPos, const int basePos, + int *const offsetFieldPos) { int offset = targetPos - basePos; if (targetPos == NOT_A_DICT_POS) { - offset = DynamicPtReadingUtils::DICT_OFFSET_INVALID; + offset = DynamicPatriciaTrieReadingUtils::DICT_OFFSET_INVALID; } else if (offset == 0) { - offset = DynamicPtReadingUtils::DICT_OFFSET_ZERO_OFFSET; + offset = DynamicPatriciaTrieReadingUtils::DICT_OFFSET_ZERO_OFFSET; } if (offset > MAX_DICT_OFFSET_VALUE || offset < MIN_DICT_OFFSET_VALUE) { AKLOGI("offset cannot be written because the offset is too large or too small: %d", diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h index 362fbd1cc..af76bc6b5 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h @@ -14,19 +14,19 @@ * limitations under the License. */ -#ifndef LATINIME_DYNAMIC_PT_WRITING_UTILS_H -#define LATINIME_DYNAMIC_PT_WRITING_UTILS_H +#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_UTILS_H +#define LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_UTILS_H #include <cstddef> #include "defines.h" -#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h" +#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h" namespace latinime { class BufferWithExtendableBuffer; -class DynamicPtWritingUtils { +class DynamicPatriciaTrieWritingUtils { public: static const int NODE_FLAG_FIELD_SIZE; @@ -39,15 +39,8 @@ class DynamicPtWritingUtils { static bool writePtNodeArraySizeAndAdvancePosition(BufferWithExtendableBuffer *const buffer, const size_t arraySize, int *const arraySizeFieldPos); - static bool writeFlags(BufferWithExtendableBuffer *const buffer, - const DynamicPtReadingUtils::NodeFlags nodeFlags, - const int nodeFlagsFieldPos) { - int writingPos = nodeFlagsFieldPos; - return writeFlagsAndAdvancePosition(buffer, nodeFlags, &writingPos); - } - static bool writeFlagsAndAdvancePosition(BufferWithExtendableBuffer *const buffer, - const DynamicPtReadingUtils::NodeFlags nodeFlags, + const DynamicPatriciaTrieReadingUtils::NodeFlags nodeFlags, int *const nodeFlagsFieldPos); static bool writeParentPosOffsetAndAdvancePosition(BufferWithExtendableBuffer *const buffer, @@ -56,11 +49,14 @@ class DynamicPtWritingUtils { static bool writeCodePointsAndAdvancePosition(BufferWithExtendableBuffer *const buffer, const int *const codePoints, const int codePointCount, int *const codePointFieldPos); + static bool writeProbabilityAndAdvancePosition(BufferWithExtendableBuffer *const buffer, + const int probability, int *const probabilityFieldPos); + static bool writeChildrenPositionAndAdvancePosition(BufferWithExtendableBuffer *const buffer, const int childrenPosition, int *const childrenPositionFieldPos); private: - DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPtWritingUtils); + DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieWritingUtils); static const size_t MAX_PTNODE_ARRAY_SIZE_TO_USE_SMALL_SIZE_FIELD; static const size_t MAX_PTNODE_ARRAY_SIZE; @@ -71,9 +67,10 @@ class DynamicPtWritingUtils { static const int MAX_DICT_OFFSET_VALUE; static const int MIN_DICT_OFFSET_VALUE; static const int DICT_OFFSET_NEGATIVE_FLAG; + static const int PROBABILITY_FIELD_SIZE; static bool writeDictOffset(BufferWithExtendableBuffer *const buffer, const int targetPos, const int basePos, int *const offsetFieldPos); }; } // namespace latinime -#endif /* LATINIME_DYNAMIC_PT_WRITING_UTILS_H */ +#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_UTILS_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp index be7a3c228..eb072fbaf 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp @@ -20,8 +20,6 @@ namespace latinime { // Note that these are corresponding definitions in Java side in FormatSpec.FileHeader. const char *const HeaderPolicy::MULTIPLE_WORDS_DEMOTION_RATE_KEY = "MULTIPLE_WORDS_DEMOTION_RATE"; -const char *const HeaderPolicy::REQUIRES_GERMAN_UMLAUT_PROCESSING_KEY = - "REQUIRES_GERMAN_UMLAUT_PROCESSING"; // TODO: Change attribute string to "IS_DECAYING_DICT". const char *const HeaderPolicy::IS_DECAYING_DICT_KEY = "USES_FORGETTING_CURVE"; const char *const HeaderPolicy::LAST_UPDATED_TIME_KEY = "date"; @@ -29,9 +27,6 @@ const char *const HeaderPolicy::LAST_DECAYED_TIME_KEY = "LAST_DECAYED_TIME"; const char *const HeaderPolicy::UNIGRAM_COUNT_KEY = "UNIGRAM_COUNT"; const char *const HeaderPolicy::BIGRAM_COUNT_KEY = "BIGRAM_COUNT"; const char *const HeaderPolicy::EXTENDED_REGION_SIZE_KEY = "EXTENDED_REGION_SIZE"; -// Historical info is information that is needed to support decaying such as timestamp, level and -// count. -const char *const HeaderPolicy::HAS_HISTORICAL_INFO_KEY = "HAS_HISTORICAL_INFO"; const int HeaderPolicy::DEFAULT_MULTIPLE_WORDS_DEMOTION_RATE = 100; const float HeaderPolicy::MULTIPLE_WORD_COST_MULTIPLIER_SCALE = 100.0f; @@ -68,11 +63,6 @@ float HeaderPolicy::readMultipleWordCostMultiplier() const { return MULTIPLE_WORD_COST_MULTIPLIER_SCALE / static_cast<float>(demotionRate); } -bool HeaderPolicy::readRequiresGermanUmlautProcessing() const { - return HeaderReadWriteUtils::readBoolAttributeValue(&mAttributeMap, - REQUIRES_GERMAN_UMLAUT_PROCESSING_KEY, false); -} - bool HeaderPolicy::writeHeaderToBuffer(BufferWithExtendableBuffer *const bufferToWrite, const bool updatesLastUpdatedTime, const bool updatesLastDecayedTime, const int unigramCount, const int bigramCount, const int extendedRegionSize) const { @@ -99,12 +89,12 @@ bool HeaderPolicy::writeHeaderToBuffer(BufferWithExtendableBuffer *const bufferT if (updatesLastUpdatedTime) { // Set current time as a last updated time. HeaderReadWriteUtils::setIntAttribute(&attributeMapTowrite, LAST_UPDATED_TIME_KEY, - TimeKeeper::peekCurrentTime()); + time(0)); } if (updatesLastDecayedTime) { // Set current time as a last updated time. HeaderReadWriteUtils::setIntAttribute(&attributeMapTowrite, LAST_DECAYED_TIME_KEY, - TimeKeeper::peekCurrentTime()); + time(0)); } if (!HeaderReadWriteUtils::writeHeaderAttributes(bufferToWrite, &attributeMapTowrite, &writingPos)) { diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h index 1208d2c2a..a9c7805a8 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h @@ -17,40 +17,37 @@ #ifndef LATINIME_HEADER_POLICY_H #define LATINIME_HEADER_POLICY_H +#include <ctime> #include <stdint.h> #include "defines.h" #include "suggest/core/policy/dictionary_header_structure_policy.h" #include "suggest/policyimpl/dictionary/header/header_read_write_utils.h" #include "suggest/policyimpl/dictionary/utils/format_utils.h" -#include "utils/time_keeper.h" namespace latinime { class HeaderPolicy : public DictionaryHeaderStructurePolicy { public: // Reads information from existing dictionary buffer. - HeaderPolicy(const uint8_t *const dictBuf, const FormatUtils::FORMAT_VERSION formatVersion) - : mDictFormatVersion(formatVersion), + HeaderPolicy(const uint8_t *const dictBuf, const int dictSize) + : mDictFormatVersion(FormatUtils::detectFormatVersion(dictBuf, dictSize)), mDictionaryFlags(HeaderReadWriteUtils::getFlags(dictBuf)), mSize(HeaderReadWriteUtils::getHeaderSize(dictBuf)), mAttributeMap(createAttributeMapAndReadAllAttributes(dictBuf)), mMultiWordCostMultiplier(readMultipleWordCostMultiplier()), - mRequiresGermanUmlautProcessing(readRequiresGermanUmlautProcessing()), mIsDecayingDict(HeaderReadWriteUtils::readBoolAttributeValue(&mAttributeMap, IS_DECAYING_DICT_KEY, false /* defaultValue */)), mLastUpdatedTime(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap, - LAST_UPDATED_TIME_KEY, TimeKeeper::peekCurrentTime() /* defaultValue */)), + LAST_UPDATED_TIME_KEY, time(0) /* defaultValue */)), mLastDecayedTime(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap, - LAST_DECAYED_TIME_KEY, TimeKeeper::peekCurrentTime() /* defaultValue */)), + LAST_DECAYED_TIME_KEY, time(0) /* defaultValue */)), mUnigramCount(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap, UNIGRAM_COUNT_KEY, 0 /* defaultValue */)), mBigramCount(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap, BIGRAM_COUNT_KEY, 0 /* defaultValue */)), mExtendedRegionSize(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap, - EXTENDED_REGION_SIZE_KEY, 0 /* defaultValue */)), - mHasHistoricalInfoOfWords(HeaderReadWriteUtils::readBoolAttributeValue( - &mAttributeMap, HAS_HISTORICAL_INFO_KEY, false /* defaultValue */)) {} + EXTENDED_REGION_SIZE_KEY, 0 /* defaultValue */)) {} // Constructs header information using an attribute map. HeaderPolicy(const FormatUtils::FORMAT_VERSION dictFormatVersion, @@ -59,55 +56,30 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { mDictionaryFlags(HeaderReadWriteUtils::createAndGetDictionaryFlagsUsingAttributeMap( attributeMap)), mSize(0), mAttributeMap(*attributeMap), mMultiWordCostMultiplier(readMultipleWordCostMultiplier()), - mRequiresGermanUmlautProcessing(readRequiresGermanUmlautProcessing()), mIsDecayingDict(HeaderReadWriteUtils::readBoolAttributeValue(&mAttributeMap, IS_DECAYING_DICT_KEY, false /* defaultValue */)), mLastUpdatedTime(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap, - LAST_UPDATED_TIME_KEY, TimeKeeper::peekCurrentTime() /* defaultValue */)), + LAST_UPDATED_TIME_KEY, time(0) /* defaultValue */)), mLastDecayedTime(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap, - LAST_UPDATED_TIME_KEY, TimeKeeper::peekCurrentTime() /* defaultValue */)), - mUnigramCount(0), mBigramCount(0), mExtendedRegionSize(0), - mHasHistoricalInfoOfWords(HeaderReadWriteUtils::readBoolAttributeValue( - &mAttributeMap, HAS_HISTORICAL_INFO_KEY, false /* defaultValue */)) {} - - // Temporary dummy header. - HeaderPolicy() - : mDictFormatVersion(FormatUtils::UNKNOWN_VERSION), mDictionaryFlags(0), mSize(0), - mAttributeMap(), mMultiWordCostMultiplier(0.0f), - mRequiresGermanUmlautProcessing(false), mIsDecayingDict(false), - mLastUpdatedTime(0), mLastDecayedTime(0), mUnigramCount(0), mBigramCount(0), - mExtendedRegionSize(0), mHasHistoricalInfoOfWords(false) {} + LAST_UPDATED_TIME_KEY, time(0) /* defaultValue */)), + mUnigramCount(0), mBigramCount(0), mExtendedRegionSize(0) {} ~HeaderPolicy() {} - virtual int getFormatVersionNumber() const { - // Conceptually this converts the symbolic value we use in the code into the - // hardcoded of the bytes in the file. But we want the constants to be the - // same so we use them for both here. - switch (mDictFormatVersion) { - case FormatUtils::VERSION_2: - return FormatUtils::VERSION_2; - case FormatUtils::VERSION_4: - return FormatUtils::VERSION_4; - default: - return FormatUtils::UNKNOWN_VERSION; - } + AK_FORCE_INLINE int getSize() const { + return mSize; } - AK_FORCE_INLINE bool isValid() const { - // Decaying dictionary must have historical information. - if (!mIsDecayingDict) { - return true; - } - if (mHasHistoricalInfoOfWords) { - return true; - } else { - return false; - } + AK_FORCE_INLINE bool supportsDynamicUpdate() const { + return HeaderReadWriteUtils::supportsDynamicUpdate(mDictionaryFlags); } - AK_FORCE_INLINE int getSize() const { - return mSize; + AK_FORCE_INLINE bool requiresGermanUmlautProcessing() const { + return HeaderReadWriteUtils::requiresGermanUmlautProcessing(mDictionaryFlags); + } + + AK_FORCE_INLINE bool requiresFrenchLigatureProcessing() const { + return HeaderReadWriteUtils::requiresFrenchLigatureProcessing(mDictionaryFlags); } AK_FORCE_INLINE float getMultiWordCostMultiplier() const { @@ -118,10 +90,6 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { return mIsDecayingDict; } - AK_FORCE_INLINE bool requiresGermanUmlautProcessing() const { - return mRequiresGermanUmlautProcessing; - } - AK_FORCE_INLINE int getLastUpdatedTime() const { return mLastUpdatedTime; } @@ -142,10 +110,6 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { return mExtendedRegionSize; } - AK_FORCE_INLINE bool hasHistoricalInfoOfWords() const { - return mHasHistoricalInfoOfWords; - } - void readHeaderValueOrQuestionMark(const char *const key, int *outValue, int outValueSize) const; @@ -154,17 +118,15 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { const int unigramCount, const int bigramCount, const int extendedRegionSize) const; private: - DISALLOW_COPY_AND_ASSIGN(HeaderPolicy); + DISALLOW_IMPLICIT_CONSTRUCTORS(HeaderPolicy); static const char *const MULTIPLE_WORDS_DEMOTION_RATE_KEY; - static const char *const REQUIRES_GERMAN_UMLAUT_PROCESSING_KEY; static const char *const IS_DECAYING_DICT_KEY; static const char *const LAST_UPDATED_TIME_KEY; static const char *const LAST_DECAYED_TIME_KEY; static const char *const UNIGRAM_COUNT_KEY; static const char *const BIGRAM_COUNT_KEY; static const char *const EXTENDED_REGION_SIZE_KEY; - static const char *const HAS_HISTORICAL_INFO_KEY; static const int DEFAULT_MULTIPLE_WORDS_DEMOTION_RATE; static const float MULTIPLE_WORD_COST_MULTIPLIER_SCALE; @@ -173,17 +135,14 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { const int mSize; HeaderReadWriteUtils::AttributeMap mAttributeMap; const float mMultiWordCostMultiplier; - const bool mRequiresGermanUmlautProcessing; const bool mIsDecayingDict; const int mLastUpdatedTime; const int mLastDecayedTime; const int mUnigramCount; const int mBigramCount; const int mExtendedRegionSize; - const bool mHasHistoricalInfoOfWords; float readMultipleWordCostMultiplier() const; - bool readRequiresGermanUmlautProcessing() const; static HeaderReadWriteUtils::AttributeMap createAttributeMapAndReadAllAttributes( const uint8_t *const dictBuf); diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp index 6b4598642..5ded8f6a1 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp @@ -35,6 +35,22 @@ const int HeaderReadWriteUtils::HEADER_FLAG_SIZE = 2; const int HeaderReadWriteUtils::HEADER_SIZE_FIELD_SIZE = 4; const HeaderReadWriteUtils::DictionaryFlags HeaderReadWriteUtils::NO_FLAGS = 0; +// Flags for special processing +// Those *must* match the flags in makedict (FormatSpec#*_PROCESSING_FLAG) or +// something very bad (like, the apocalypse) will happen. Please update both at the same time. +const HeaderReadWriteUtils::DictionaryFlags + HeaderReadWriteUtils::GERMAN_UMLAUT_PROCESSING_FLAG = 0x1; +const HeaderReadWriteUtils::DictionaryFlags + HeaderReadWriteUtils::SUPPORTS_DYNAMIC_UPDATE_FLAG = 0x2; +const HeaderReadWriteUtils::DictionaryFlags + HeaderReadWriteUtils::FRENCH_LIGATURE_PROCESSING_FLAG = 0x4; + +// Note that these are corresponding definitions in Java side in FormatSpec.FileHeader. +const char *const HeaderReadWriteUtils::SUPPORTS_DYNAMIC_UPDATE_KEY = "SUPPORTS_DYNAMIC_UPDATE"; +const char *const HeaderReadWriteUtils::REQUIRES_GERMAN_UMLAUT_PROCESSING_KEY = + "REQUIRES_GERMAN_UMLAUT_PROCESSING"; +const char *const HeaderReadWriteUtils::REQUIRES_FRENCH_LIGATURE_PROCESSING_KEY = + "REQUIRES_FRENCH_LIGATURE_PROCESSING"; /* static */ int HeaderReadWriteUtils::getHeaderSize(const uint8_t *const dictBuf) { // See the format of the header in the comment in @@ -52,7 +68,17 @@ const HeaderReadWriteUtils::DictionaryFlags HeaderReadWriteUtils::NO_FLAGS = 0; /* static */ HeaderReadWriteUtils::DictionaryFlags HeaderReadWriteUtils::createAndGetDictionaryFlagsUsingAttributeMap( const HeaderReadWriteUtils::AttributeMap *const attributeMap) { - return NO_FLAGS; + const bool requiresGermanUmlautProcessing = readBoolAttributeValue(attributeMap, + REQUIRES_GERMAN_UMLAUT_PROCESSING_KEY, false /* defaultValue */); + const bool requiresFrenchLigatureProcessing = readBoolAttributeValue(attributeMap, + REQUIRES_FRENCH_LIGATURE_PROCESSING_KEY, false /* defaultValue */); + const bool supportsDynamicUpdate = readBoolAttributeValue(attributeMap, + SUPPORTS_DYNAMIC_UPDATE_KEY, false /* defaultValue */); + DictionaryFlags dictflags = NO_FLAGS; + dictflags |= requiresGermanUmlautProcessing ? GERMAN_UMLAUT_PROCESSING_FLAG : 0; + dictflags |= requiresFrenchLigatureProcessing ? FRENCH_LIGATURE_PROCESSING_FLAG : 0; + dictflags |= supportsDynamicUpdate ? SUPPORTS_DYNAMIC_UPDATE_FLAG : 0; + return dictflags; } /* static */ void HeaderReadWriteUtils::fetchAllHeaderAttributes(const uint8_t *const dictBuf, @@ -89,8 +115,8 @@ const HeaderReadWriteUtils::DictionaryFlags HeaderReadWriteUtils::NO_FLAGS = 0; case FormatUtils::VERSION_2: // Version 2 dictionary writing is not supported. return false; - case FormatUtils::VERSION_4: - return buffer->writeUintAndAdvancePosition(FormatUtils::VERSION_4 /* data */, + case FormatUtils::VERSION_3: + return buffer->writeUintAndAdvancePosition(3 /* data */, HEADER_DICTIONARY_VERSION_SIZE, writingPos); default: return false; diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.h index fc24bbdd5..225968323 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.h @@ -37,6 +37,18 @@ class HeaderReadWriteUtils { static DictionaryFlags getFlags(const uint8_t *const dictBuf); + static AK_FORCE_INLINE bool supportsDynamicUpdate(const DictionaryFlags flags) { + return (flags & SUPPORTS_DYNAMIC_UPDATE_FLAG) != 0; + } + + static AK_FORCE_INLINE bool requiresGermanUmlautProcessing(const DictionaryFlags flags) { + return (flags & GERMAN_UMLAUT_PROCESSING_FLAG) != 0; + } + + static AK_FORCE_INLINE bool requiresFrenchLigatureProcessing(const DictionaryFlags flags) { + return (flags & FRENCH_LIGATURE_PROCESSING_FLAG) != 0; + } + static AK_FORCE_INLINE int getHeaderOptionsPosition() { return HEADER_MAGIC_NUMBER_SIZE + HEADER_DICTIONARY_VERSION_SIZE + HEADER_FLAG_SIZE + HEADER_SIZE_FIELD_SIZE; @@ -89,8 +101,17 @@ class HeaderReadWriteUtils { static const int HEADER_FLAG_SIZE; static const int HEADER_SIZE_FIELD_SIZE; - // Value for the "flags" field. It's unused at the moment. static const DictionaryFlags NO_FLAGS; + // Flags for special processing + // Those *must* match the flags in makedict (FormatSpec#*_PROCESSING_FLAGS) or + // something very bad (like, the apocalypse) will happen. Please update both at the same time. + static const DictionaryFlags GERMAN_UMLAUT_PROCESSING_FLAG; + static const DictionaryFlags SUPPORTS_DYNAMIC_UPDATE_FLAG; + static const DictionaryFlags FRENCH_LIGATURE_PROCESSING_FLAG; + + static const char *const SUPPORTS_DYNAMIC_UPDATE_KEY; + static const char *const REQUIRES_GERMAN_UMLAUT_PROCESSING_KEY; + static const char *const REQUIRES_FRENCH_LIGATURE_PROCESSING_KEY; static void setIntAttributeInner(AttributeMap *const headerAttributes, const AttributeMap::key_type *const key, const int value); diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp index 960c1b936..8a84bd261 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp @@ -15,22 +15,22 @@ */ -#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h" +#include "suggest/policyimpl/dictionary/patricia_trie_policy.h" #include "defines.h" #include "suggest/core/dicnode/dic_node.h" #include "suggest/core/dicnode/dic_node_vector.h" -#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/utils/probability_utils.h" namespace latinime { -void PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode, +void PatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode, DicNodeVector *const childDicNodes) const { if (!dicNode->hasChildren()) { return; } - int nextPos = dicNode->getChildrenPtNodeArrayPos(); + int nextPos = dicNode->getChildrenPos(); if (nextPos < 0 || nextPos >= mDictBufferSize) { AKLOGE("Children PtNode array position is invalid. pos: %d, dict size: %d", nextPos, mDictBufferSize); @@ -52,14 +52,14 @@ void PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNo // This retrieves code points and the probability of the word by its terminal position. // Due to the fact that words are ordered in the dictionary in a strict breadth-first order, -// it is possible to check for this with advantageous complexity. For each PtNode array, we search +// it is possible to check for this with advantageous complexity. For each node, we search // for PtNodes with children and compare the children position with the position we look for. // When we shoot the position we look for, it means the word we look for is in the children // of the previous PtNode. The only tricky part is the fact that if we arrive at the end of a // PtNode array with the last PtNode's children position still less than what we are searching for, // we must descend the last PtNode's children (for example, if the word we are searching for starts // with a z, it's the last PtNode of the root array, so all children addresses will be smaller -// than the position we look for, and we have to descend the z PtNode). +// than the position we look for, and we have to descend the z node). /* Parameters : * ptNodePos: the byte position of the terminal PtNode of the word we are searching for (this is * what is stored as the "bigram position" in each bigram) @@ -74,9 +74,9 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( int pos = getRootPosition(); int wordPos = 0; // One iteration of the outer loop iterates through PtNode arrays. As stated above, we will - // only traverse PtNodes that are actually a part of the terminal we are searching, so each - // time we enter this loop we are one depth level further than last time. - // The only reason we count PtNodes is because we want to reduce the probability of infinite + // only traverse nodes that are actually a part of the terminal we are searching, so each time + // we enter this loop we are one depth level further than last time. + // The only reason we count nodes is because we want to reduce the probability of infinite // looping in case there is a bug. Since we know there is an upper bound to the depth we are // supposed to traverse, it does not hurt to count iterations. for (int loopCount = maxCodePointCount; loopCount > 0; --loopCount) { @@ -140,9 +140,8 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( found = true; } else if (1 >= ptNodeCount) { // However if we are on the LAST PtNode of this array, and we have NOT shot the - // position we should descend THIS PtNode. So we trick the - // lastCandidatePtNodePos so that we will descend this PtNode, not the previous - // one. + // position we should descend THIS node. So we trick the lastCandidatePtNodePos + // so that we will descend this PtNode, not the previous one. lastCandidatePtNodePos = startPos; found = true; } else { @@ -150,7 +149,7 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( found = false; } } else { - // Even if we don't have children here, we could still be on the last PtNode of + // Even if we don't have children here, we could still be on the last PtNode of / // this array. If this is the case, we should descend the last PtNode that had // children, and their position is already in lastCandidatePtNodePos. found = (1 >= ptNodeCount); @@ -231,9 +230,9 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( return 0; } -// This function gets the position of the terminal PtNode of the exact matching word in the +// This function gets the position of the terminal node of the exact matching word in the // dictionary. If no match is found, it returns NOT_A_DICT_POS. -int PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord, +int PatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord, const int length, const bool forceLowerCaseSearch) const { int pos = getRootPosition(); int wordPos = 0; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h index 2adafd22b..0f8662aea 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h @@ -24,7 +24,6 @@ #include "suggest/policyimpl/dictionary/bigram/bigram_list_policy.h" #include "suggest/policyimpl/dictionary/header/header_policy.h" #include "suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h" -#include "suggest/policyimpl/dictionary/utils/format_utils.h" #include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h" namespace latinime { @@ -34,26 +33,28 @@ class DicNodeVector; class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { public: - PatriciaTriePolicy(const MmappedBuffer::MmappedBufferPtr &mmappedBuffer) - : mMmappedBuffer(mmappedBuffer), - mHeaderPolicy(mMmappedBuffer.get()->getBuffer(), FormatUtils::VERSION_2), - mDictRoot(mMmappedBuffer.get()->getBuffer() + mHeaderPolicy.getSize()), - mDictBufferSize(mMmappedBuffer.get()->getBufferSize() - - mHeaderPolicy.getSize()), + PatriciaTriePolicy(const MmappedBuffer *const buffer) + : mBuffer(buffer), mHeaderPolicy(mBuffer->getBuffer(), buffer->getBufferSize()), + mDictRoot(mBuffer->getBuffer() + mHeaderPolicy.getSize()), + mDictBufferSize(mBuffer->getBufferSize() - mHeaderPolicy.getSize()), mBigramListPolicy(mDictRoot), mShortcutListPolicy(mDictRoot) {} + ~PatriciaTriePolicy() { + delete mBuffer; + } + AK_FORCE_INLINE int getRootPosition() const { return 0; } - void createAndGetAllChildDicNodes(const DicNode *const dicNode, + void createAndGetAllChildNodes(const DicNode *const dicNode, DicNodeVector *const childDicNodes) const; int getCodePointsAndProbabilityAndReturnCodePointCount( const int terminalNodePos, const int maxCodePointCount, int *const outCodePoints, int *const outUnigramProbability) const; - int getTerminalPtNodePositionOfWord(const int *const inWord, + int getTerminalNodePositionOfWord(const int *const inWord, const int length, const bool forceLowerCaseSearch) const; int getProbability(const int unigramProbability, const int bigramProbability) const; @@ -76,17 +77,14 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { return &mShortcutListPolicy; } - bool addUnigramWord(const int *const word, const int length, const int probability, - const int *const shortcutTargetCodePoints, const int shortcutLength, - const int shortcutProbability, const bool isNotAWord, const bool isBlacklisted, - const int timestamp) { + bool addUnigramWord(const int *const word, const int length, const int probability) { // This method should not be called for non-updatable dictionary. AKLOGI("Warning: addUnigramWord() is called for non-updatable dictionary."); return false; } bool addBigramWords(const int *const word0, const int length0, const int *const word1, - const int length1, const int probability, const int timestamp) { + const int length1, const int probability) { // This method should not be called for non-updatable dictionary. AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary."); return false; @@ -115,7 +113,7 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { return false; } - void getProperty(const char *const query, const int queryLength, char *const outResult, + void getProperty(const char *const query, char *const outResult, const int maxResultLength) { // getProperty is not supported for this class. if (maxResultLength > 0) { @@ -123,16 +121,10 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { } } - const UnigramProperty getUnigramProperty(const int *const codePoints, - const int codePointCount) const { - // getUnigramProperty is not supported. - return UnigramProperty(); - } - private: DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTriePolicy); - const MmappedBuffer::MmappedBufferPtr mMmappedBuffer; + const MmappedBuffer *const mBuffer; const HeaderPolicy mHeaderPolicy; const uint8_t *const mDictRoot; const int mDictBufferSize; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp index 82b3593c8..7df55815f 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h" #include "defines.h" #include "suggest/policyimpl/dictionary/utils/byte_array_utils.h" diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.h index b28f58336..8420ee95a 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.h @@ -23,7 +23,6 @@ namespace latinime { -// TODO: Move to pt_common class PatriciaTrieReadingUtils { public: typedef uint8_t NodeFlags; diff --git a/native/jni/src/suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h new file mode 100644 index 000000000..bd3211f6a --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h @@ -0,0 +1,123 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DYNAMIC_SHORTCUT_LIST_POLICY_H +#define LATINIME_DYNAMIC_SHORTCUT_LIST_POLICY_H + +#include <stdint.h> + +#include "defines.h" +#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h" +#include "suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h" +#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" + +namespace latinime { + +/* + * This is a dynamic version of ShortcutListPolicy and supports an additional buffer. + */ +class DynamicShortcutListPolicy : public DictionaryShortcutsStructurePolicy { + public: + explicit DynamicShortcutListPolicy(const BufferWithExtendableBuffer *const buffer) + : mBuffer(buffer) {} + + ~DynamicShortcutListPolicy() {} + + int getStartPos(const int pos) const { + if (pos == NOT_A_DICT_POS) { + return NOT_A_DICT_POS; + } + return pos + ShortcutListReadingUtils::getShortcutListSizeFieldSize(); + } + + void getNextShortcut(const int maxCodePointCount, int *const outCodePoint, + int *const outCodePointCount, bool *const outIsWhitelist, bool *const outHasNext, + int *const pos) const { + const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*pos); + const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer); + if (usesAdditionalBuffer) { + *pos -= mBuffer->getOriginalBufferSize(); + } + const ShortcutListReadingUtils::ShortcutFlags flags = + ShortcutListReadingUtils::getFlagsAndForwardPointer(buffer, pos); + if (outHasNext) { + *outHasNext = ShortcutListReadingUtils::hasNext(flags); + } + if (outIsWhitelist) { + *outIsWhitelist = ShortcutListReadingUtils::isWhitelist(flags); + } + if (outCodePoint) { + *outCodePointCount = ShortcutListReadingUtils::readShortcutTarget( + buffer, maxCodePointCount, outCodePoint, pos); + } + if (usesAdditionalBuffer) { + *pos += mBuffer->getOriginalBufferSize(); + } + } + + void skipAllShortcuts(int *const pos) const { + const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*pos); + const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer); + if (usesAdditionalBuffer) { + *pos -= mBuffer->getOriginalBufferSize(); + } + const int shortcutListSize = ShortcutListReadingUtils + ::getShortcutListSizeAndForwardPointer(buffer, pos); + *pos += shortcutListSize; + if (usesAdditionalBuffer) { + *pos += mBuffer->getOriginalBufferSize(); + } + } + + // Copy shortcuts from the shortcut list that starts at fromPos in mBuffer to toPos in + // bufferToWrite and advance these positions after the shortcut lists. This returns whether + // the copy was succeeded or not. + bool copyAllShortcutsAndReturnIfSucceededOrNot(BufferWithExtendableBuffer *const bufferToWrite, + int *const fromPos, int *const toPos) const { + const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*fromPos); + if (usesAdditionalBuffer) { + *fromPos -= mBuffer->getOriginalBufferSize(); + } + const int shortcutListSize = ShortcutListReadingUtils + ::getShortcutListSizeAndForwardPointer(mBuffer->getBuffer(usesAdditionalBuffer), + fromPos); + // Copy shortcut list size. + if (!bufferToWrite->writeUintAndAdvancePosition( + shortcutListSize + ShortcutListReadingUtils::getShortcutListSizeFieldSize(), + ShortcutListReadingUtils::getShortcutListSizeFieldSize(), toPos)) { + return false; + } + // Copy shortcut list. + for (int i = 0; i < shortcutListSize; ++i) { + const uint8_t data = ByteArrayUtils::readUint8AndAdvancePosition( + mBuffer->getBuffer(usesAdditionalBuffer), fromPos); + if (!bufferToWrite->writeUintAndAdvancePosition(data, 1 /* size */, toPos)) { + return false; + } + } + if (usesAdditionalBuffer) { + *fromPos += mBuffer->getOriginalBufferSize(); + } + return true; + } + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicShortcutListPolicy); + + const BufferWithExtendableBuffer *const mBuffer; +}; +} // namespace latinime +#endif // LATINIME_DYNAMIC_SHORTCUT_LIST_POLICY_H diff --git a/native/jni/src/suggest/policyimpl/dictionary/shortcut/ver4_shortcut_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/shortcut/ver4_shortcut_list_policy.h deleted file mode 100644 index ae863af57..000000000 --- a/native/jni/src/suggest/policyimpl/dictionary/shortcut/ver4_shortcut_list_policy.h +++ /dev/null @@ -1,110 +0,0 @@ -/* - * Copyright (C) 2013 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef LATINIME_VER4_SHORTCUT_LIST_POLICY_H -#define LATINIME_VER4_SHORTCUT_LIST_POLICY_H - -#include <stdint.h> - -#include "defines.h" -#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h" -#include "suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h" -#include "suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h" -#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h" - -namespace latinime { - -class Ver4ShortcutListPolicy : public DictionaryShortcutsStructurePolicy { - public: - Ver4ShortcutListPolicy(ShortcutDictContent *const shortcutDictContent, - const TerminalPositionLookupTable *const terminalPositionLookupTable) - : mShortcutDictContent(shortcutDictContent), - mTerminalPositionLookupTable(terminalPositionLookupTable) {} - - ~Ver4ShortcutListPolicy() {} - - int getStartPos(const int pos) const { - // The first shortcut entry is located at the head position of the shortcut list. - return pos; - } - - void getNextShortcut(const int maxCodePointCount, int *const outCodePoint, - int *const outCodePointCount, bool *const outIsWhitelist, bool *const outHasNext, - int *const pos) const { - int probability = 0; - mShortcutDictContent->getShortcutEntryAndAdvancePosition(maxCodePointCount, - outCodePoint, outCodePointCount, &probability, outHasNext, pos); - if (outIsWhitelist) { - *outIsWhitelist = ShortcutListReadingUtils::isWhitelist(probability); - } - } - - void skipAllShortcuts(int *const pos) const { - // Do nothing because we don't need to skip shortcut lists in ver4 dictionaries. - } - - bool addNewShortcut(const int terminalId, const int *const codePoints, const int codePointCount, - const int probability) { - const int shortcutListPos = mShortcutDictContent->getShortcutListHeadPos(terminalId); - if (shortcutListPos == NOT_A_DICT_POS) { - // Create shortcut list. - if (!mShortcutDictContent->createNewShortcutList(terminalId)) { - AKLOGE("Cannot create new shortcut list. terminal id: %d", terminalId); - return false; - } - const int writingPos = mShortcutDictContent->getShortcutListHeadPos(terminalId); - return mShortcutDictContent->writeShortcutEntry(codePoints, codePointCount, probability, - false /* hasNext */, writingPos); - } - const int entryPos = mShortcutDictContent->findShortcutEntryAndGetPos(shortcutListPos, - codePoints, codePointCount); - if (entryPos == NOT_A_DICT_POS) { - // Add new entry to the shortcut list. - // Create new shortcut list. - if (!mShortcutDictContent->createNewShortcutList(terminalId)) { - AKLOGE("Cannot create new shortcut list. terminal id: %d", terminalId); - return false; - } - int writingPos = mShortcutDictContent->getShortcutListHeadPos(terminalId); - if (!mShortcutDictContent->writeShortcutEntryAndAdvancePosition(codePoints, - codePointCount, probability, true /* hasNext */, &writingPos)) { - AKLOGE("Cannot write shortcut entry. terminal id: %d, pos: %d", terminalId, - writingPos); - return false; - } - return mShortcutDictContent->copyShortcutList(shortcutListPos, writingPos); - } - // Overwrite existing entry. - bool hasNext = false; - mShortcutDictContent->getShortcutEntry(MAX_WORD_LENGTH, 0 /* outCodePoint */, - 0 /* outCodePointCount */ , 0 /* probability */, &hasNext, entryPos); - if (!mShortcutDictContent->writeShortcutEntry(codePoints, - codePointCount, probability, hasNext, entryPos)) { - AKLOGE("Cannot overwrite shortcut entry. terminal id: %d, pos: %d", terminalId, - entryPos); - return false; - } - return true; - } - - private: - DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4ShortcutListPolicy); - - ShortcutDictContent *const mShortcutDictContent; - const TerminalPositionLookupTable *const mTerminalPositionLookupTable; -}; -} // namespace latinime -#endif // LATINIME_VER4_SHORTCUT_LIST_POLICY_H diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp deleted file mode 100644 index c81c61d23..000000000 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp +++ /dev/null @@ -1,132 +0,0 @@ -/* - * Copyright (C) 2013 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h" - -#include <climits> -#include <stdint.h> - -#include "defines.h" -#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h" -#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h" -#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h" -#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h" -#include "suggest/policyimpl/dictionary/utils/file_utils.h" -#include "suggest/policyimpl/dictionary/utils/format_utils.h" -#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h" - -namespace latinime { - -/* static */ DictionaryStructureWithBufferPolicy::StructurePolicyPtr - DictionaryStructureWithBufferPolicyFactory - ::newDictionaryStructureWithBufferPolicy(const char *const path, - const int bufOffset, const int size, const bool isUpdatable) { - if (FileUtils::existsDir(path)) { - // Given path represents a directory. - return newPolicyforDirectoryDict(path, isUpdatable); - } else { - if (isUpdatable) { - AKLOGE("One file dictionaries don't support updating. path: %s", path); - ASSERT(false); - return DictionaryStructureWithBufferPolicy::StructurePolicyPtr(0); - } - return newPolicyforFileDict(path, bufOffset, size); - } -} - -/* static */ DictionaryStructureWithBufferPolicy::StructurePolicyPtr - DictionaryStructureWithBufferPolicyFactory::newPolicyforDirectoryDict( - const char *const path, const bool isUpdatable) { - const int headerFilePathBufSize = PATH_MAX + 1 /* terminator */; - char headerFilePath[headerFilePathBufSize]; - getHeaderFilePathInDictDir(path, headerFilePathBufSize, headerFilePath); - // Allocated buffer in MmapedBuffer::openBuffer() will be freed in the destructor of - // MmappedBufferPtr if the instance has the responsibility. - MmappedBuffer::MmappedBufferPtr mmappedBuffer = MmappedBuffer::openBuffer(headerFilePath, - isUpdatable); - if (!mmappedBuffer.get()) { - return DictionaryStructureWithBufferPolicy::StructurePolicyPtr(0); - } - switch (FormatUtils::detectFormatVersion(mmappedBuffer.get()->getBuffer(), - mmappedBuffer.get()->getBufferSize())) { - case FormatUtils::VERSION_2: - AKLOGE("Given path is a directory but the format is version 2. path: %s", path); - break; - case FormatUtils::VERSION_4: { - const int dictDirPathBufSize = strlen(headerFilePath) + 1 /* terminator */; - char dictPath[dictDirPathBufSize]; - if (!FileUtils::getFilePathWithoutSuffix(headerFilePath, - Ver4DictConstants::HEADER_FILE_EXTENSION, dictDirPathBufSize, dictPath)) { - AKLOGE("Dictionary file name is not valid as a ver4 dictionary. path: %s", path); - ASSERT(false); - return DictionaryStructureWithBufferPolicy::StructurePolicyPtr(0); - } - const Ver4DictBuffers::Ver4DictBuffersPtr dictBuffers = - Ver4DictBuffers::openVer4DictBuffers(dictPath, mmappedBuffer); - if (!dictBuffers.get()->isValid()) { - AKLOGE("DICT: The dictionary doesn't satisfy ver4 format requirements. path: %s", - path); - ASSERT(false); - return DictionaryStructureWithBufferPolicy::StructurePolicyPtr(0); - } - return DictionaryStructureWithBufferPolicy::StructurePolicyPtr( - new Ver4PatriciaTriePolicy(dictBuffers)); - } - default: - AKLOGE("DICT: dictionary format is unknown, bad magic number. path: %s", path); - break; - } - ASSERT(false); - return DictionaryStructureWithBufferPolicy::StructurePolicyPtr(0); -} - -/* static */ DictionaryStructureWithBufferPolicy::StructurePolicyPtr - DictionaryStructureWithBufferPolicyFactory::newPolicyforFileDict( - const char *const path, const int bufOffset, const int size) { - // Allocated buffer in MmapedBuffer::openBuffer() will be freed in the destructor of - // MmappedBufferPtr if the instance has the responsibility. - MmappedBuffer::MmappedBufferPtr mmappedBuffer = MmappedBuffer::openBuffer(path, bufOffset, - size, false /* isUpdatable */); - if (!mmappedBuffer.get()) { - return DictionaryStructureWithBufferPolicy::StructurePolicyPtr(0); - } - switch (FormatUtils::detectFormatVersion(mmappedBuffer.get()->getBuffer(), - mmappedBuffer.get()->getBufferSize())) { - case FormatUtils::VERSION_2: - return DictionaryStructureWithBufferPolicy::StructurePolicyPtr( - new PatriciaTriePolicy(mmappedBuffer)); - case FormatUtils::VERSION_4: - AKLOGE("Given path is a file but the format is version 4. path: %s", path); - break; - default: - AKLOGE("DICT: dictionary format is unknown, bad magic number. path: %s", path); - break; - } - ASSERT(false); - return DictionaryStructureWithBufferPolicy::StructurePolicyPtr(0); -} - -/* static */ void DictionaryStructureWithBufferPolicyFactory::getHeaderFilePathInDictDir( - const char *const dictDirPath, const int outHeaderFileBufSize, - char *const outHeaderFilePath) { - const int dictNameBufSize = strlen(dictDirPath) + 1 /* terminator */; - char dictName[dictNameBufSize]; - FileUtils::getBasename(dictDirPath, dictNameBufSize, dictName); - snprintf(outHeaderFilePath, outHeaderFileBufSize, "%s/%s%s", dictDirPath, - dictName, Ver4DictConstants::HEADER_FILE_EXTENSION); -} - -} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.cpp deleted file mode 100644 index 8f42df6d2..000000000 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.cpp +++ /dev/null @@ -1,144 +0,0 @@ -/* - * Copyright (C) 2013 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.h" - -#include "suggest/core/policy/dictionary_header_structure_policy.h" -#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.h" -#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h" -#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h" - -namespace latinime { - -bool DynamicPtGcEventListeners - ::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted - ::onVisitingPtNode(const PtNodeParams *const ptNodeParams) { - // PtNode is useless when the PtNode is not a terminal and doesn't have any not useless - // children. - bool isUselessPtNode = !ptNodeParams->isTerminal(); - if (ptNodeParams->isTerminal()) { - bool needsToKeepPtNode = true; - if (!mPtNodeWriter->updatePtNodeProbabilityAndGetNeedsToKeepPtNodeAfterGC(ptNodeParams, - &needsToKeepPtNode)) { - AKLOGE("Cannot update PtNode probability or get needs to keep PtNode after GC."); - return false; - } - if (!needsToKeepPtNode) { - isUselessPtNode = true; - } - } - if (mChildrenValue > 0) { - isUselessPtNode = false; - } else if (ptNodeParams->isTerminal()) { - // Remove children as all children are useless. - if (!mPtNodeWriter->updateChildrenPosition(ptNodeParams, - NOT_A_DICT_POS /* newChildrenPosition */)) { - return false; - } - } - if (isUselessPtNode) { - // Current PtNode is no longer needed. Mark it as deleted. - if (!mPtNodeWriter->markPtNodeAsDeleted(ptNodeParams)) { - return false; - } - } else { - mValueStack.back() += 1; - if (ptNodeParams->isTerminal()) { - mValidUnigramCount += 1; - } - } - return true; -} - -bool DynamicPtGcEventListeners::TraversePolicyToUpdateBigramProbability - ::onVisitingPtNode(const PtNodeParams *const ptNodeParams) { - if (!ptNodeParams->isDeleted() && ptNodeParams->hasBigrams()) { - int bigramEntryCount = 0; - if (!mPtNodeWriter->updateAllBigramEntriesAndDeleteUselessEntries(ptNodeParams, - &bigramEntryCount)) { - return false; - } - mValidBigramEntryCount += bigramEntryCount; - } - return true; -} - -// Writes dummy PtNode array size when the head of PtNode array is read. -bool DynamicPtGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer - ::onDescend(const int ptNodeArrayPos) { - mValidPtNodeCount = 0; - int writingPos = mBufferToWrite->getTailPosition(); - mDictPositionRelocationMap->mPtNodeArrayPositionRelocationMap.insert( - PtNodeWriter::PtNodeArrayPositionRelocationMap::value_type(ptNodeArrayPos, writingPos)); - // Writes dummy PtNode array size because arrays can have a forward link or needles PtNodes. - // This field will be updated later in onReadingPtNodeArrayTail() with actual PtNode count. - mPtNodeArraySizeFieldPos = writingPos; - return DynamicPtWritingUtils::writePtNodeArraySizeAndAdvancePosition( - mBufferToWrite, 0 /* arraySize */, &writingPos); -} - -// Write PtNode array terminal and actual PtNode array size. -bool DynamicPtGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer - ::onReadingPtNodeArrayTail() { - int writingPos = mBufferToWrite->getTailPosition(); - // Write PtNode array terminal. - if (!DynamicPtWritingUtils::writeForwardLinkPositionAndAdvancePosition( - mBufferToWrite, NOT_A_DICT_POS /* forwardLinkPos */, &writingPos)) { - return false; - } - // Write actual PtNode array size. - if (!DynamicPtWritingUtils::writePtNodeArraySizeAndAdvancePosition( - mBufferToWrite, mValidPtNodeCount, &mPtNodeArraySizeFieldPos)) { - return false; - } - return true; -} - -// Write valid PtNode to buffer and memorize mapping from the old position to the new position. -bool DynamicPtGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer - ::onVisitingPtNode(const PtNodeParams *const ptNodeParams) { - if (ptNodeParams->isDeleted()) { - // Current PtNode is not written in new buffer because it has been deleted. - mDictPositionRelocationMap->mPtNodePositionRelocationMap.insert( - PtNodeWriter::PtNodePositionRelocationMap::value_type( - ptNodeParams->getHeadPos(), NOT_A_DICT_POS)); - return true; - } - int writingPos = mBufferToWrite->getTailPosition(); - mDictPositionRelocationMap->mPtNodePositionRelocationMap.insert( - PtNodeWriter::PtNodePositionRelocationMap::value_type( - ptNodeParams->getHeadPos(), writingPos)); - mValidPtNodeCount++; - // Writes current PtNode. - return mPtNodeWriter->writePtNodeAndAdvancePosition(ptNodeParams, &writingPos); -} - -bool DynamicPtGcEventListeners::TraversePolicyToUpdateAllPositionFields - ::onVisitingPtNode(const PtNodeParams *const ptNodeParams) { - // Updates parent position. - int bigramCount = 0; - if (!mPtNodeWriter->updateAllPositionFields(ptNodeParams, mDictPositionRelocationMap, - &bigramCount)) { - return false; - } - mBigramCount += bigramCount; - if (ptNodeParams->isTerminal()) { - mUnigramCount++; - } - return true; -} - -} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp deleted file mode 100644 index 2457b49c8..000000000 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp +++ /dev/null @@ -1,294 +0,0 @@ -/* - * Copyright (C) 2013, The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h" - -#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h" -#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.h" -#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h" -#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h" -#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h" -#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" - -namespace latinime { - -const int DynamicPtUpdatingHelper::CHILDREN_POSITION_FIELD_SIZE = 3; - -bool DynamicPtUpdatingHelper::addUnigramWord( - DynamicPtReadingHelper *const readingHelper, - const int *const wordCodePoints, const int codePointCount, const int probability, - const bool isNotAWord, const bool isBlacklisted, const int timestamp, - bool *const outAddedNewUnigram) { - int parentPos = NOT_A_DICT_POS; - while (!readingHelper->isEnd()) { - const PtNodeParams ptNodeParams(readingHelper->getPtNodeParams()); - if (!ptNodeParams.isValid()) { - break; - } - const int matchedCodePointCount = readingHelper->getPrevTotalCodePointCount(); - if (!readingHelper->isMatchedCodePoint(ptNodeParams, 0 /* index */, - wordCodePoints[matchedCodePointCount])) { - // The first code point is different from target code point. Skip this node and read - // the next sibling node. - readingHelper->readNextSiblingNode(ptNodeParams); - continue; - } - // Check following merged node code points. - const int nodeCodePointCount = ptNodeParams.getCodePointCount(); - for (int j = 1; j < nodeCodePointCount; ++j) { - const int nextIndex = matchedCodePointCount + j; - if (nextIndex >= codePointCount || !readingHelper->isMatchedCodePoint(ptNodeParams, j, - wordCodePoints[matchedCodePointCount + j])) { - *outAddedNewUnigram = true; - return reallocatePtNodeAndAddNewPtNodes(&ptNodeParams, j, isNotAWord, isBlacklisted, - probability, timestamp, wordCodePoints + matchedCodePointCount, - codePointCount - matchedCodePointCount); - } - } - // All characters are matched. - if (codePointCount == readingHelper->getTotalCodePointCount(ptNodeParams)) { - return setPtNodeProbability(&ptNodeParams, isNotAWord, isBlacklisted, probability, - timestamp, outAddedNewUnigram); - } - if (!ptNodeParams.hasChildren()) { - *outAddedNewUnigram = true; - return createChildrenPtNodeArrayAndAChildPtNode(&ptNodeParams, - isNotAWord, isBlacklisted, probability, timestamp, - wordCodePoints + readingHelper->getTotalCodePointCount(ptNodeParams), - codePointCount - readingHelper->getTotalCodePointCount(ptNodeParams)); - } - // Advance to the children nodes. - parentPos = ptNodeParams.getHeadPos(); - readingHelper->readChildNode(ptNodeParams); - } - if (readingHelper->isError()) { - // The dictionary is invalid. - return false; - } - int pos = readingHelper->getPosOfLastForwardLinkField(); - *outAddedNewUnigram = true; - return createAndInsertNodeIntoPtNodeArray(parentPos, - wordCodePoints + readingHelper->getPrevTotalCodePointCount(), - codePointCount - readingHelper->getPrevTotalCodePointCount(), - isNotAWord, isBlacklisted, probability, timestamp, &pos); -} - -bool DynamicPtUpdatingHelper::addBigramWords(const int word0Pos, const int word1Pos, - const int probability, const int timestamp, bool *const outAddedNewBigram) { - const PtNodeParams sourcePtNodeParams( - mPtNodeReader->fetchNodeInfoInBufferFromPtNodePos(word0Pos)); - const PtNodeParams targetPtNodeParams( - mPtNodeReader->fetchNodeInfoInBufferFromPtNodePos(word1Pos)); - return mPtNodeWriter->addNewBigramEntry(&sourcePtNodeParams, &targetPtNodeParams, probability, - timestamp, outAddedNewBigram); -} - -// Remove a bigram relation from word0Pos to word1Pos. -bool DynamicPtUpdatingHelper::removeBigramWords(const int word0Pos, const int word1Pos) { - const PtNodeParams sourcePtNodeParams( - mPtNodeReader->fetchNodeInfoInBufferFromPtNodePos(word0Pos)); - const PtNodeParams targetPtNodeParams( - mPtNodeReader->fetchNodeInfoInBufferFromPtNodePos(word1Pos)); - return mPtNodeWriter->removeBigramEntry(&sourcePtNodeParams, &targetPtNodeParams); -} - -bool DynamicPtUpdatingHelper::addShortcutTarget(const int wordPos, - const int *const targetCodePoints, const int targetCodePointCount, - const int shortcutProbability) { - const PtNodeParams ptNodeParams(mPtNodeReader->fetchNodeInfoInBufferFromPtNodePos(wordPos)); - return mPtNodeWriter->addShortcutTarget(&ptNodeParams, targetCodePoints, targetCodePointCount, - shortcutProbability); -} - -bool DynamicPtUpdatingHelper::createAndInsertNodeIntoPtNodeArray(const int parentPos, - const int *const nodeCodePoints, const int nodeCodePointCount, - const bool isNotAWord, const bool isBlacklisted, const int probability, - const int timestamp, int *const forwardLinkFieldPos) { - const int newPtNodeArrayPos = mBuffer->getTailPosition(); - if (!DynamicPtWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer, - newPtNodeArrayPos, forwardLinkFieldPos)) { - return false; - } - return createNewPtNodeArrayWithAChildPtNode(parentPos, nodeCodePoints, nodeCodePointCount, - isNotAWord, isBlacklisted, probability, timestamp); -} - -bool DynamicPtUpdatingHelper::setPtNodeProbability( - const PtNodeParams *const originalPtNodeParams, const bool isNotAWord, - const bool isBlacklisted, const int probability, const int timestamp, - bool *const outAddedNewUnigram) { - if (originalPtNodeParams->isTerminal()) { - // Overwrites the probability. - *outAddedNewUnigram = false; - return mPtNodeWriter->updatePtNodeProbability(originalPtNodeParams, probability, timestamp); - } else { - // Make the node terminal and write the probability. - *outAddedNewUnigram = true; - const int movedPos = mBuffer->getTailPosition(); - int writingPos = movedPos; - const PtNodeParams ptNodeParamsToWrite(getUpdatedPtNodeParams(originalPtNodeParams, - isNotAWord, isBlacklisted, true /* isTerminal */, - originalPtNodeParams->getParentPos(), originalPtNodeParams->getCodePointCount(), - originalPtNodeParams->getCodePoints(), probability)); - if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite, - timestamp, &writingPos)) { - return false; - } - if (!mPtNodeWriter->markPtNodeAsMoved(originalPtNodeParams, movedPos, movedPos)) { - return false; - } - } - return true; -} - -bool DynamicPtUpdatingHelper::createChildrenPtNodeArrayAndAChildPtNode( - const PtNodeParams *const parentPtNodeParams, const bool isNotAWord, - const bool isBlacklisted, const int probability, const int timestamp, - const int *const codePoints, const int codePointCount) { - const int newPtNodeArrayPos = mBuffer->getTailPosition(); - if (!mPtNodeWriter->updateChildrenPosition(parentPtNodeParams, newPtNodeArrayPos)) { - return false; - } - return createNewPtNodeArrayWithAChildPtNode(parentPtNodeParams->getHeadPos(), codePoints, - codePointCount, isNotAWord, isBlacklisted, probability, timestamp); -} - -bool DynamicPtUpdatingHelper::createNewPtNodeArrayWithAChildPtNode( - const int parentPtNodePos, const int *const nodeCodePoints, const int nodeCodePointCount, - const bool isNotAWord, const bool isBlacklisted, const int probability, - const int timestamp) { - int writingPos = mBuffer->getTailPosition(); - if (!DynamicPtWritingUtils::writePtNodeArraySizeAndAdvancePosition(mBuffer, - 1 /* arraySize */, &writingPos)) { - return false; - } - const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode( - isNotAWord, isBlacklisted, true /* isTerminal */, - parentPtNodePos, nodeCodePointCount, nodeCodePoints, probability)); - if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite, timestamp, - &writingPos)) { - return false; - } - if (!DynamicPtWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer, - NOT_A_DICT_POS /* forwardLinkPos */, &writingPos)) { - return false; - } - return true; -} - -// Returns whether the dictionary updating was succeeded or not. -bool DynamicPtUpdatingHelper::reallocatePtNodeAndAddNewPtNodes( - const PtNodeParams *const reallocatingPtNodeParams, const int overlappingCodePointCount, - const bool isNotAWord, const bool isBlacklisted, const int probabilityOfNewPtNode, - const int timestamp, const int *const newNodeCodePoints, const int newNodeCodePointCount) { - // When addsExtraChild is true, split the reallocating PtNode and add new child. - // Reallocating PtNode: abcde, newNode: abcxy. - // abc (1st, not terminal) __ de (2nd) - // \_ xy (extra child, terminal) - // Otherwise, this method makes 1st part terminal and write probabilityOfNewPtNode. - // Reallocating PtNode: abcde, newNode: abc. - // abc (1st, terminal) __ de (2nd) - const bool addsExtraChild = newNodeCodePointCount > overlappingCodePointCount; - const int firstPartOfReallocatedPtNodePos = mBuffer->getTailPosition(); - int writingPos = firstPartOfReallocatedPtNodePos; - // Write the 1st part of the reallocating node. The children position will be updated later - // with actual children position. - if (addsExtraChild) { - const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode( - false /* isNotAWord */, false /* isBlacklisted */, false /* isTerminal */, - reallocatingPtNodeParams->getParentPos(), overlappingCodePointCount, - reallocatingPtNodeParams->getCodePoints(), NOT_A_PROBABILITY)); - if (!mPtNodeWriter->writePtNodeAndAdvancePosition(&ptNodeParamsToWrite, &writingPos)) { - return false; - } - } else { - const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode( - isNotAWord, isBlacklisted, true /* isTerminal */, - reallocatingPtNodeParams->getParentPos(), overlappingCodePointCount, - reallocatingPtNodeParams->getCodePoints(), probabilityOfNewPtNode)); - if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite, - timestamp, &writingPos)) { - return false; - } - } - const int actualChildrenPos = writingPos; - // Create new children PtNode array. - const size_t newPtNodeCount = addsExtraChild ? 2 : 1; - if (!DynamicPtWritingUtils::writePtNodeArraySizeAndAdvancePosition(mBuffer, - newPtNodeCount, &writingPos)) { - return false; - } - // Write the 2nd part of the reallocating node. - const int secondPartOfReallocatedPtNodePos = writingPos; - const PtNodeParams childPartPtNodeParams(getUpdatedPtNodeParams(reallocatingPtNodeParams, - reallocatingPtNodeParams->isNotAWord(), reallocatingPtNodeParams->isBlacklisted(), - reallocatingPtNodeParams->isTerminal(), firstPartOfReallocatedPtNodePos, - reallocatingPtNodeParams->getCodePointCount() - overlappingCodePointCount, - reallocatingPtNodeParams->getCodePoints() + overlappingCodePointCount, - reallocatingPtNodeParams->getProbability())); - if (!mPtNodeWriter->writePtNodeAndAdvancePosition(&childPartPtNodeParams, &writingPos)) { - return false; - } - if (addsExtraChild) { - const PtNodeParams extraChildPtNodeParams(getPtNodeParamsForNewPtNode( - isNotAWord, isBlacklisted, true /* isTerminal */, - firstPartOfReallocatedPtNodePos, newNodeCodePointCount - overlappingCodePointCount, - newNodeCodePoints + overlappingCodePointCount, probabilityOfNewPtNode)); - if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&extraChildPtNodeParams, - timestamp, &writingPos)) { - return false; - } - } - if (!DynamicPtWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer, - NOT_A_DICT_POS /* forwardLinkPos */, &writingPos)) { - return false; - } - // Update original reallocating PtNode as moved. - if (!mPtNodeWriter->markPtNodeAsMoved(reallocatingPtNodeParams, firstPartOfReallocatedPtNodePos, - secondPartOfReallocatedPtNodePos)) { - return false; - } - // Load node info. Information of the 1st part will be fetched. - const PtNodeParams ptNodeParams( - mPtNodeReader->fetchNodeInfoInBufferFromPtNodePos(firstPartOfReallocatedPtNodePos)); - // Update children position. - return mPtNodeWriter->updateChildrenPosition(&ptNodeParams, actualChildrenPos); -} - -const PtNodeParams DynamicPtUpdatingHelper::getUpdatedPtNodeParams( - const PtNodeParams *const originalPtNodeParams, const bool isNotAWord, - const bool isBlacklisted, const bool isTerminal, const int parentPos, - const int codePointCount, const int *const codePoints, const int probability) const { - const PatriciaTrieReadingUtils::NodeFlags flags = PatriciaTrieReadingUtils::createAndGetFlags( - isBlacklisted, isNotAWord, isTerminal, originalPtNodeParams->hasShortcutTargets(), - originalPtNodeParams->hasBigrams(), codePointCount > 1 /* hasMultipleChars */, - CHILDREN_POSITION_FIELD_SIZE); - return PtNodeParams(originalPtNodeParams, flags, parentPos, codePointCount, codePoints, - probability); -} - -const PtNodeParams DynamicPtUpdatingHelper::getPtNodeParamsForNewPtNode( - const bool isNotAWord, const bool isBlacklisted, const bool isTerminal, - const int parentPos, const int codePointCount, const int *const codePoints, - const int probability) const { - const PatriciaTrieReadingUtils::NodeFlags flags = PatriciaTrieReadingUtils::createAndGetFlags( - isBlacklisted, isNotAWord, isTerminal, false /* hasShortcutTargets */, - false /* hasBigrams */, codePointCount > 1 /* hasMultipleChars */, - CHILDREN_POSITION_FIELD_SIZE); - return PtNodeParams(flags, parentPos, codePointCount, codePoints, probability); -} - -} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h deleted file mode 100644 index 71f473096..000000000 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h +++ /dev/null @@ -1,99 +0,0 @@ -/* - * Copyright (C) 2013, The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef LATINIME_DYNAMIC_PT_UPDATING_HELPER_H -#define LATINIME_DYNAMIC_PT_UPDATING_HELPER_H - -#include <stdint.h> - -#include "defines.h" -#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h" -#include "utils/hash_map_compat.h" - -namespace latinime { - -class BufferWithExtendableBuffer; -class DynamicPtReadingHelper; -class PtNodeReader; -class PtNodeWriter; - -class DynamicPtUpdatingHelper { - public: - DynamicPtUpdatingHelper(BufferWithExtendableBuffer *const buffer, - const PtNodeReader *const ptNodeReader, PtNodeWriter *const ptNodeWriter) - : mBuffer(buffer), mPtNodeReader(ptNodeReader), mPtNodeWriter(ptNodeWriter) {} - - ~DynamicPtUpdatingHelper() {} - - // Add a word to the dictionary. If the word already exists, update the probability. - bool addUnigramWord(DynamicPtReadingHelper *const readingHelper, - const int *const wordCodePoints, const int codePointCount, const int probability, - const bool isNotAWord, const bool isBlacklisted, const int timestamp, - bool *const outAddedNewUnigram); - - // Add a bigram relation from word0Pos to word1Pos. - bool addBigramWords(const int word0Pos, const int word1Pos, const int probability, - const int timestamp, bool *const outAddedNewBigram); - - // Remove a bigram relation from word0Pos to word1Pos. - bool removeBigramWords(const int word0Pos, const int word1Pos); - - // Add a shortcut target. - bool addShortcutTarget(const int wordPos, const int *const targetCodePoints, - const int targetCodePointCount, const int shortcutProbability); - - private: - DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPtUpdatingHelper); - - static const int CHILDREN_POSITION_FIELD_SIZE; - - BufferWithExtendableBuffer *const mBuffer; - const PtNodeReader *const mPtNodeReader; - PtNodeWriter *const mPtNodeWriter; - - bool createAndInsertNodeIntoPtNodeArray(const int parentPos, const int *const nodeCodePoints, - const int nodeCodePointCount, const bool isNotAWord, const bool isBlacklisted, - const int probability, const int timestamp, int *const forwardLinkFieldPos); - - bool setPtNodeProbability(const PtNodeParams *const originalPtNodeParams, const bool isNotAWord, - const bool isBlacklisted, const int probability, const int timestamp, - bool *const outAddedNewUnigram); - - bool createChildrenPtNodeArrayAndAChildPtNode(const PtNodeParams *const parentPtNodeParams, - const bool isNotAWord, const bool isBlacklisted, const int probability, - const int timestamp, const int *const codePoints, const int codePointCount); - - bool createNewPtNodeArrayWithAChildPtNode(const int parentPos, const int *const nodeCodePoints, - const int nodeCodePointCount, const bool isNotAWord, const bool isBlacklisted, - const int probability, const int timestamp); - - bool reallocatePtNodeAndAddNewPtNodes( - const PtNodeParams *const reallocatingPtNodeParams, const int overlappingCodePointCount, - const bool isNotAWord, const bool isBlacklisted, const int probabilityOfNewPtNode, - const int timestamp, const int *const newNodeCodePoints, - const int newNodeCodePointCount); - - const PtNodeParams getUpdatedPtNodeParams(const PtNodeParams *const originalPtNodeParams, - const bool isNotAWord, const bool isBlacklisted, const bool isTerminal, - const int parentPos, const int codePointCount, - const int *const codePoints, const int probability) const; - - const PtNodeParams getPtNodeParamsForNewPtNode(const bool isNotAWord, const bool isBlacklisted, - const bool isTerminal, const int parentPos, - const int codePointCount, const int *const codePoints, const int probability) const; -}; -} // namespace latinime -#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_UPDATING_HELPER_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h deleted file mode 100644 index 84731eb17..000000000 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h +++ /dev/null @@ -1,229 +0,0 @@ -/* - * Copyright (C) 2013, The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef LATINIME_PT_NODE_PARAMS_H -#define LATINIME_PT_NODE_PARAMS_H - -#include <cstring> - -#include "defines.h" -#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h" -#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h" -#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h" - -namespace latinime { - -// This class has information of a PtNode. This class is immutable. -class PtNodeParams { - public: - // Invalid PtNode. - PtNodeParams() : mHeadPos(NOT_A_DICT_POS), mFlags(0), mParentPos(NOT_A_DICT_POS), - mCodePointCount(0), mCodePoints(), mTerminalIdFieldPos(NOT_A_DICT_POS), - mTerminalId(Ver4DictConstants::NOT_A_TERMINAL_ID), mProbabilityFieldPos(NOT_A_DICT_POS), - mProbability(NOT_A_PROBABILITY), mChildrenPosFieldPos(NOT_A_DICT_POS), - mChildrenPos(NOT_A_DICT_POS), mBigramLinkedNodePos(NOT_A_DICT_POS), - mShortcutPos(NOT_A_DICT_POS), mBigramPos(NOT_A_DICT_POS), - mSiblingPos(NOT_A_DICT_POS) {} - - PtNodeParams(const PtNodeParams& ptNodeParams) - : mHeadPos(ptNodeParams.mHeadPos), mFlags(ptNodeParams.mFlags), - mParentPos(ptNodeParams.mParentPos), mCodePointCount(ptNodeParams.mCodePointCount), - mCodePoints(), mTerminalIdFieldPos(ptNodeParams.mTerminalIdFieldPos), - mTerminalId(ptNodeParams.mTerminalId), - mProbabilityFieldPos(ptNodeParams.mProbabilityFieldPos), - mProbability(ptNodeParams.mProbability), - mChildrenPosFieldPos(ptNodeParams.mChildrenPosFieldPos), - mChildrenPos(ptNodeParams.mChildrenPos), - mBigramLinkedNodePos(ptNodeParams.mBigramLinkedNodePos), - mShortcutPos(ptNodeParams.mShortcutPos), mBigramPos(ptNodeParams.mBigramPos), - mSiblingPos(ptNodeParams.mSiblingPos) { - memcpy(mCodePoints, ptNodeParams.getCodePoints(), sizeof(int) * mCodePointCount); - } - - // PtNode with a terminal id. - PtNodeParams(const int headPos, const PatriciaTrieReadingUtils::NodeFlags flags, - const int parentPos, const int codePointCount, const int *const codePoints, - const int terminalIdFieldPos, const int terminalId, const int probability, - const int childrenPosFieldPos, const int childrenPos, const int siblingPos) - : mHeadPos(headPos), mFlags(flags), mParentPos(parentPos), - mCodePointCount(codePointCount), mCodePoints(), - mTerminalIdFieldPos(terminalIdFieldPos), mTerminalId(terminalId), - mProbabilityFieldPos(NOT_A_DICT_POS), mProbability(probability), - mChildrenPosFieldPos(childrenPosFieldPos), mChildrenPos(childrenPos), - mBigramLinkedNodePos(NOT_A_DICT_POS), mShortcutPos(terminalId), - mBigramPos(terminalId), mSiblingPos(siblingPos) { - memcpy(mCodePoints, codePoints, sizeof(int) * mCodePointCount); - } - - // Construct new params by updating existing PtNode params. - PtNodeParams(const PtNodeParams *const ptNodeParams, - const PatriciaTrieReadingUtils::NodeFlags flags, const int parentPos, - const int codePointCount, const int *const codePoints, const int probability) - : mHeadPos(ptNodeParams->getHeadPos()), mFlags(flags), mParentPos(parentPos), - mCodePointCount(codePointCount), mCodePoints(), - mTerminalIdFieldPos(ptNodeParams->getTerminalIdFieldPos()), - mTerminalId(ptNodeParams->getTerminalId()), - mProbabilityFieldPos(ptNodeParams->getProbabilityFieldPos()), - mProbability(probability), - mChildrenPosFieldPos(ptNodeParams->getChildrenPosFieldPos()), - mChildrenPos(ptNodeParams->getChildrenPos()), - mBigramLinkedNodePos(ptNodeParams->getBigramLinkedNodePos()), - mShortcutPos(ptNodeParams->getShortcutPos()), - mBigramPos(ptNodeParams->getBigramsPos()), - mSiblingPos(ptNodeParams->getSiblingNodePos()) { - memcpy(mCodePoints, codePoints, sizeof(int) * mCodePointCount); - } - - PtNodeParams(const PatriciaTrieReadingUtils::NodeFlags flags, const int parentPos, - const int codePointCount, const int *const codePoints, const int probability) - : mHeadPos(NOT_A_DICT_POS), mFlags(flags), mParentPos(parentPos), - mCodePointCount(codePointCount), mCodePoints(), - mTerminalIdFieldPos(NOT_A_DICT_POS), - mTerminalId(Ver4DictConstants::NOT_A_TERMINAL_ID), - mProbabilityFieldPos(NOT_A_DICT_POS), mProbability(probability), - mChildrenPosFieldPos(NOT_A_DICT_POS), mChildrenPos(NOT_A_DICT_POS), - mBigramLinkedNodePos(NOT_A_DICT_POS), mShortcutPos(NOT_A_DICT_POS), - mBigramPos(NOT_A_DICT_POS), mSiblingPos(NOT_A_DICT_POS) { - memcpy(mCodePoints, codePoints, sizeof(int) * mCodePointCount); - } - - AK_FORCE_INLINE bool isValid() const { - return mCodePointCount > 0; - } - - // Head position of the PtNode - AK_FORCE_INLINE int getHeadPos() const { - return mHeadPos; - } - - // Flags - AK_FORCE_INLINE bool isDeleted() const { - return DynamicPtReadingUtils::isDeleted(mFlags); - } - - AK_FORCE_INLINE bool willBecomeNonTerminal() const { - return DynamicPtReadingUtils::willBecomeNonTerminal(mFlags); - } - - AK_FORCE_INLINE bool hasChildren() const { - return mChildrenPos != NOT_A_DICT_POS; - } - - AK_FORCE_INLINE bool isTerminal() const { - return PatriciaTrieReadingUtils::isTerminal(mFlags); - } - - AK_FORCE_INLINE bool isBlacklisted() const { - return PatriciaTrieReadingUtils::isBlacklisted(mFlags); - } - - AK_FORCE_INLINE bool isNotAWord() const { - return PatriciaTrieReadingUtils::isNotAWord(mFlags); - } - - AK_FORCE_INLINE bool hasBigrams() const { - return PatriciaTrieReadingUtils::hasBigrams(mFlags); - } - - AK_FORCE_INLINE bool hasShortcutTargets() const { - return PatriciaTrieReadingUtils::hasShortcutTargets(mFlags); - } - - // Parent node position - AK_FORCE_INLINE int getParentPos() const { - return mParentPos; - } - - // Number of code points - AK_FORCE_INLINE uint8_t getCodePointCount() const { - return mCodePointCount; - } - - AK_FORCE_INLINE const int *getCodePoints() const { - return mCodePoints; - } - - // Probability - AK_FORCE_INLINE int getTerminalIdFieldPos() const { - return mTerminalIdFieldPos; - } - - AK_FORCE_INLINE int getTerminalId() const { - return mTerminalId; - } - - // Probability - AK_FORCE_INLINE int getProbabilityFieldPos() const { - return mProbabilityFieldPos; - } - - AK_FORCE_INLINE int getProbability() const { - return mProbability; - } - - // Children PtNode array position - AK_FORCE_INLINE int getChildrenPosFieldPos() const { - return mChildrenPosFieldPos; - } - - AK_FORCE_INLINE int getChildrenPos() const { - return mChildrenPos; - } - - // Bigram linked node position. - AK_FORCE_INLINE int getBigramLinkedNodePos() const { - return mBigramLinkedNodePos; - } - - // Shortcutlist position - AK_FORCE_INLINE int getShortcutPos() const { - return mShortcutPos; - } - - // Bigrams position - AK_FORCE_INLINE int getBigramsPos() const { - return mBigramPos; - } - - // Sibling node position - AK_FORCE_INLINE int getSiblingNodePos() const { - return mSiblingPos; - } - - private: - // This class have a public copy constructor to be used as a return value. - - // Disallowing the assignment operator. - PtNodeParams &operator=(PtNodeParams &ptNodeParams); - - const int mHeadPos; - const PatriciaTrieReadingUtils::NodeFlags mFlags; - const int mParentPos; - const uint8_t mCodePointCount; - int mCodePoints[MAX_WORD_LENGTH]; - const int mTerminalIdFieldPos; - const int mTerminalId; - const int mProbabilityFieldPos; - const int mProbability; - const int mChildrenPosFieldPos; - const int mChildrenPos; - const int mBigramLinkedNodePos; - const int mShortcutPos; - const int mBigramPos; - const int mSiblingPos; -}; -} // namespace latinime -#endif /* LATINIME_PT_NODE_PARAMS_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h deleted file mode 100644 index c6b2a8bed..000000000 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright (C) 2013, The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef LATINIME_PT_NODE_READER_H -#define LATINIME_PT_NODE_READER_H - -#include "defines.h" - -#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h" - -namespace latinime { - -// Interface class used to read PtNode information. -class PtNodeReader { - public: - virtual ~PtNodeReader() {} - virtual const PtNodeParams fetchNodeInfoInBufferFromPtNodePos(const int ptNodePos) const = 0; - - protected: - PtNodeReader() {}; - - private: - DISALLOW_COPY_AND_ASSIGN(PtNodeReader); -}; -} // namespace latinime -#endif /* LATINIME_PT_NODE_READER_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h deleted file mode 100644 index 84dd6870e..000000000 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Copyright (C) 2013, The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef LATINIME_PT_NODE_WRITER_H -#define LATINIME_PT_NODE_WRITER_H - -#include "defines.h" - -#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h" -#include "utils/hash_map_compat.h" - -namespace latinime { - -// Interface class used to write PtNode information. -class PtNodeWriter { - public: - typedef hash_map_compat<int, int> PtNodeArrayPositionRelocationMap; - typedef hash_map_compat<int, int> PtNodePositionRelocationMap; - struct DictPositionRelocationMap { - public: - DictPositionRelocationMap() - : mPtNodeArrayPositionRelocationMap(), mPtNodePositionRelocationMap() {} - - PtNodeArrayPositionRelocationMap mPtNodeArrayPositionRelocationMap; - PtNodePositionRelocationMap mPtNodePositionRelocationMap; - - private: - DISALLOW_COPY_AND_ASSIGN(DictPositionRelocationMap); - }; - - virtual ~PtNodeWriter() {} - - virtual bool markPtNodeAsDeleted(const PtNodeParams *const toBeUpdatedPtNodeParams) = 0; - - virtual bool markPtNodeAsMoved(const PtNodeParams *const toBeUpdatedPtNodeParams, - const int movedPos, const int bigramLinkedNodePos) = 0; - - virtual bool markPtNodeAsWillBecomeNonTerminal( - const PtNodeParams *const toBeUpdatedPtNodeParams) = 0; - - virtual bool updatePtNodeProbability(const PtNodeParams *const toBeUpdatedPtNodeParams, - const int probability, const int timestamp) = 0; - - virtual bool updatePtNodeProbabilityAndGetNeedsToKeepPtNodeAfterGC( - const PtNodeParams *const toBeUpdatedPtNodeParams, - bool *const outNeedsToKeepPtNode) = 0; - - virtual bool updateChildrenPosition(const PtNodeParams *const toBeUpdatedPtNodeParams, - const int newChildrenPosition) = 0; - - virtual bool writePtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams, - int *const ptNodeWritingPos) = 0; - - virtual bool writeNewTerminalPtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams, - const int timestamp, int *const ptNodeWritingPos) = 0; - - virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams, - const PtNodeParams *const targetPtNodeParam, const int probability, const int timestamp, - bool *const outAddedNewBigram) = 0; - - virtual bool removeBigramEntry(const PtNodeParams *const sourcePtNodeParams, - const PtNodeParams *const targetPtNodeParam) = 0; - - virtual bool updateAllBigramEntriesAndDeleteUselessEntries( - const PtNodeParams *const sourcePtNodeParams, int *const outBigramEntryCount) = 0; - - virtual bool updateAllPositionFields(const PtNodeParams *const toBeUpdatedPtNodeParams, - const DictPositionRelocationMap *const dictPositionRelocationMap, - int *const outBigramEntryCount) = 0; - - virtual bool addShortcutTarget(const PtNodeParams *const ptNodeParams, - const int *const targetCodePoints, const int targetCodePointCount, - const int shortcutProbability) = 0; - - protected: - PtNodeWriter() {}; - - private: - DISALLOW_COPY_AND_ASSIGN(PtNodeWriter); -}; -} // namespace latinime -#endif /* LATINIME_PT_NODE_WRITER_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp deleted file mode 100644 index cb9d450ec..000000000 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp +++ /dev/null @@ -1,202 +0,0 @@ -/* - * Copyright (C) 2013 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h" - -#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" - -namespace latinime { - -const BigramEntry BigramDictContent::getBigramEntryAndAdvancePosition( - int *const bigramEntryPos) const { - const BufferWithExtendableBuffer *const bigramListBuffer = getContentBuffer(); - const int bigramFlags = bigramListBuffer->readUintAndAdvancePosition( - Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, bigramEntryPos); - const bool hasNext = (bigramFlags & Ver4DictConstants::BIGRAM_HAS_NEXT_MASK) != 0; - int probability = NOT_A_PROBABILITY; - int timestamp = NOT_A_TIMESTAMP; - int level = 0; - int count = 0; - if (mHasHistoricalInfo) { - probability = bigramListBuffer->readUintAndAdvancePosition( - Ver4DictConstants::PROBABILITY_SIZE, bigramEntryPos); - timestamp = bigramListBuffer->readUintAndAdvancePosition( - Ver4DictConstants::TIME_STAMP_FIELD_SIZE, bigramEntryPos); - level = bigramListBuffer->readUintAndAdvancePosition( - Ver4DictConstants::WORD_LEVEL_FIELD_SIZE, bigramEntryPos); - count = bigramListBuffer->readUintAndAdvancePosition( - Ver4DictConstants::WORD_COUNT_FIELD_SIZE, bigramEntryPos); - } else { - probability = bigramFlags & Ver4DictConstants::BIGRAM_PROBABILITY_MASK; - } - const int encodedTargetTerminalId = bigramListBuffer->readUintAndAdvancePosition( - Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, bigramEntryPos); - const int targetTerminalId = - (encodedTargetTerminalId == Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID) ? - Ver4DictConstants::NOT_A_TERMINAL_ID : encodedTargetTerminalId; - if (mHasHistoricalInfo) { - const HistoricalInfo historicalInfo(timestamp, level, count); - return BigramEntry(hasNext, probability, &historicalInfo, targetTerminalId); - } else { - return BigramEntry(hasNext, probability, targetTerminalId); - } -} - -bool BigramDictContent::writeBigramEntryAndAdvancePosition( - const BigramEntry *const bigramEntryToWrite, int *const entryWritingPos) { - BufferWithExtendableBuffer *const bigramListBuffer = getWritableContentBuffer(); - const int bigramFlags = createAndGetBigramFlags( - mHasHistoricalInfo ? 0 : bigramEntryToWrite->getProbability(), - bigramEntryToWrite->hasNext()); - if (!bigramListBuffer->writeUintAndAdvancePosition(bigramFlags, - Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, entryWritingPos)) { - AKLOGE("Cannot write bigram flags. pos: %d, flags: %x", *entryWritingPos, bigramFlags); - return false; - } - if (mHasHistoricalInfo) { - if (!bigramListBuffer->writeUintAndAdvancePosition(bigramEntryToWrite->getProbability(), - Ver4DictConstants::PROBABILITY_SIZE, entryWritingPos)) { - AKLOGE("Cannot write bigram probability. pos: %d, probability: %d", *entryWritingPos, - bigramEntryToWrite->getProbability()); - return false; - } - const HistoricalInfo *const historicalInfo = bigramEntryToWrite->getHistoricalInfo(); - if (!bigramListBuffer->writeUintAndAdvancePosition(historicalInfo->getTimeStamp(), - Ver4DictConstants::TIME_STAMP_FIELD_SIZE, entryWritingPos)) { - AKLOGE("Cannot write bigram timestamps. pos: %d, timestamp: %d", *entryWritingPos, - historicalInfo->getTimeStamp()); - return false; - } - if (!bigramListBuffer->writeUintAndAdvancePosition(historicalInfo->getLevel(), - Ver4DictConstants::WORD_LEVEL_FIELD_SIZE, entryWritingPos)) { - AKLOGE("Cannot write bigram level. pos: %d, level: %d", *entryWritingPos, - historicalInfo->getLevel()); - return false; - } - if (!bigramListBuffer->writeUintAndAdvancePosition(historicalInfo->getCount(), - Ver4DictConstants::WORD_COUNT_FIELD_SIZE, entryWritingPos)) { - AKLOGE("Cannot write bigram count. pos: %d, count: %d", *entryWritingPos, - historicalInfo->getCount()); - return false; - } - } - const int targetTerminalIdToWrite = - (bigramEntryToWrite->getTargetTerminalId() == Ver4DictConstants::NOT_A_TERMINAL_ID) ? - Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID : - bigramEntryToWrite->getTargetTerminalId(); - if (!bigramListBuffer->writeUintAndAdvancePosition(targetTerminalIdToWrite, - Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, entryWritingPos)) { - AKLOGE("Cannot write bigram target terminal id. pos: %d, target terminal id: %d", - *entryWritingPos, bigramEntryToWrite->getTargetTerminalId()); - return false; - } - return true; -} - -bool BigramDictContent::copyBigramList(const int bigramListPos, const int toPos) { - int readingPos = bigramListPos; - int writingPos = toPos; - bool hasNext = true; - while (hasNext) { - const BigramEntry bigramEntry = getBigramEntryAndAdvancePosition(&readingPos); - hasNext = bigramEntry.hasNext(); - if (!writeBigramEntryAndAdvancePosition(&bigramEntry, &writingPos)) { - AKLOGE("Cannot write bigram entry to copy. pos: %d", writingPos); - return false; - } - } - return true; -} - -bool BigramDictContent::runGC(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap, - const BigramDictContent *const originalBigramDictContent, - int *const outBigramEntryCount) { - for (TerminalPositionLookupTable::TerminalIdMap::const_iterator it = terminalIdMap->begin(); - it != terminalIdMap->end(); ++it) { - const int originalBigramListPos = - originalBigramDictContent->getBigramListHeadPos(it->first); - if (originalBigramListPos == NOT_A_DICT_POS) { - // This terminal does not have a bigram list. - continue; - } - const int bigramListPos = getContentBuffer()->getTailPosition(); - int bigramEntryCount = 0; - // Copy bigram list with GC from original content. - if (!runGCBigramList(originalBigramListPos, originalBigramDictContent, bigramListPos, - terminalIdMap, &bigramEntryCount)) { - AKLOGE("Cannot complete GC for the bigram list. original pos: %d, pos: %d", - originalBigramListPos, bigramListPos); - return false; - } - if (bigramEntryCount == 0) { - // All bigram entries are useless. This terminal does not have a bigram list. - continue; - } - *outBigramEntryCount += bigramEntryCount; - // Set bigram list position to the lookup table. - if (!getUpdatableAddressLookupTable()->set(it->second, bigramListPos)) { - AKLOGE("Cannot set bigram list position. terminal id: %d, pos: %d", - it->second, bigramListPos); - return false; - } - } - return true; -} - -// Returns whether GC for the bigram list was succeeded or not. -bool BigramDictContent::runGCBigramList(const int bigramListPos, - const BigramDictContent *const sourceBigramDictContent, const int toPos, - const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap, - int *const outEntrycount) { - bool hasNext = true; - int readingPos = bigramListPos; - int writingPos = toPos; - int lastEntryPos = NOT_A_DICT_POS; - while (hasNext) { - const BigramEntry originalBigramEntry = - sourceBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos); - hasNext = originalBigramEntry.hasNext(); - if (originalBigramEntry.getTargetTerminalId() == Ver4DictConstants::NOT_A_TERMINAL_ID) { - continue; - } - TerminalPositionLookupTable::TerminalIdMap::const_iterator it = - terminalIdMap->find(originalBigramEntry.getTargetTerminalId()); - if (it == terminalIdMap->end()) { - // Target word has been removed. - continue; - } - lastEntryPos = hasNext ? writingPos : NOT_A_DICT_POS; - const BigramEntry updatedBigramEntry = - originalBigramEntry.updateTargetTerminalIdAndGetEntry(it->second); - if (!writeBigramEntryAndAdvancePosition(&updatedBigramEntry, &writingPos)) { - AKLOGE("Cannot write bigram entry to run GC. pos: %d", writingPos); - return false; - } - *outEntrycount += 1; - } - if (lastEntryPos != NOT_A_DICT_POS) { - // Update has next flag in the last written entry. - const BigramEntry bigramEntry = getBigramEntry(lastEntryPos).updateHasNextAndGetEntry( - false /* hasNext */); - if (!writeBigramEntry(&bigramEntry, lastEntryPos)) { - AKLOGE("Cannot write bigram entry to set hasNext flag after GC. pos: %d", writingPos); - return false; - } - } - return true; -} - -} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h deleted file mode 100644 index ba2a05209..000000000 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h +++ /dev/null @@ -1,102 +0,0 @@ -/* - * Copyright (C) 2013, The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef LATINIME_BIGRAM_DICT_CONTENT_H -#define LATINIME_BIGRAM_DICT_CONTENT_H - -#include "defines.h" -#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_entry.h" -#include "suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h" -#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h" -#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h" - -namespace latinime { - -class BigramDictContent : public SparseTableDictContent { - public: - BigramDictContent(const char *const dictPath, const bool hasHistoricalInfo, - const bool isUpdatable) - : SparseTableDictContent(dictPath, - Ver4DictConstants::BIGRAM_LOOKUP_TABLE_FILE_EXTENSION, - Ver4DictConstants::BIGRAM_CONTENT_TABLE_FILE_EXTENSION, - Ver4DictConstants::BIGRAM_FILE_EXTENSION, isUpdatable, - Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE, - Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE), - mHasHistoricalInfo(hasHistoricalInfo) {} - - BigramDictContent(const bool hasHistoricalInfo) - : SparseTableDictContent(Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE, - Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE), - mHasHistoricalInfo(hasHistoricalInfo) {} - - const BigramEntry getBigramEntry(const int bigramEntryPos) const { - int readingPos = bigramEntryPos; - return getBigramEntryAndAdvancePosition(&readingPos); - } - - const BigramEntry getBigramEntryAndAdvancePosition(int *const bigramEntryPos) const; - - // Returns head position of bigram list for a PtNode specified by terminalId. - int getBigramListHeadPos(const int terminalId) const { - const SparseTable *const addressLookupTable = getAddressLookupTable(); - if (!addressLookupTable->contains(terminalId)) { - return NOT_A_DICT_POS; - } - return addressLookupTable->get(terminalId); - } - - bool writeBigramEntry(const BigramEntry *const bigramEntryToWrite, const int entryWritingPos) { - int writingPos = entryWritingPos; - return writeBigramEntryAndAdvancePosition(bigramEntryToWrite, &writingPos); - } - - bool writeBigramEntryAndAdvancePosition(const BigramEntry *const bigramEntryToWrite, - int *const entryWritingPos); - - bool createNewBigramList(const int terminalId) { - const int bigramListPos = getContentBuffer()->getTailPosition(); - return getUpdatableAddressLookupTable()->set(terminalId, bigramListPos); - } - - bool copyBigramList(const int bigramListPos, const int toPos); - - bool flushToFile(const char *const dictPath) const { - return flush(dictPath, Ver4DictConstants::BIGRAM_LOOKUP_TABLE_FILE_EXTENSION, - Ver4DictConstants::BIGRAM_CONTENT_TABLE_FILE_EXTENSION, - Ver4DictConstants::BIGRAM_FILE_EXTENSION); - } - - bool runGC(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap, - const BigramDictContent *const originalBigramDictContent, - int *const outBigramEntryCount); - - private: - DISALLOW_COPY_AND_ASSIGN(BigramDictContent); - - int createAndGetBigramFlags(const int probability, const bool hasNext) const { - return (probability & Ver4DictConstants::BIGRAM_PROBABILITY_MASK) - | (hasNext ? Ver4DictConstants::BIGRAM_HAS_NEXT_MASK : 0); - } - - bool runGCBigramList(const int bigramListPos, - const BigramDictContent *const sourceBigramDictContent, const int toPos, - const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap, - int *const outEntryCount); - - bool mHasHistoricalInfo; -}; -} // namespace latinime -#endif /* LATINIME_BIGRAM_DICT_CONTENT_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_entry.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_entry.h deleted file mode 100644 index 2b0cbd93b..000000000 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_entry.h +++ /dev/null @@ -1,99 +0,0 @@ -/* - * Copyright (C) 2013, The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef LATINIME_BIGRAM_ENTRY_H -#define LATINIME_BIGRAM_ENTRY_H - -#include "defines.h" -#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h" -#include "suggest/policyimpl/dictionary/utils/historical_info.h" - -namespace latinime { - -class BigramEntry { - public: - BigramEntry(const BigramEntry& bigramEntry) - : mHasNext(bigramEntry.mHasNext), mProbability(bigramEntry.mProbability), - mHistoricalInfo(), mTargetTerminalId(bigramEntry.mTargetTerminalId) {} - - // Entry with historical information. - BigramEntry(const bool hasNext, const int probability, const int targetTerminalId) - : mHasNext(hasNext), mProbability(probability), mHistoricalInfo(), - mTargetTerminalId(targetTerminalId) {} - - // Entry with historical information. - BigramEntry(const bool hasNext, const int probability, - const HistoricalInfo *const historicalInfo, const int targetTerminalId) - : mHasNext(hasNext), mProbability(probability), mHistoricalInfo(*historicalInfo), - mTargetTerminalId(targetTerminalId) {} - - const BigramEntry getInvalidatedEntry() const { - return updateTargetTerminalIdAndGetEntry(Ver4DictConstants::NOT_A_TERMINAL_ID); - } - - const BigramEntry updateHasNextAndGetEntry(const bool hasNext) const { - return BigramEntry(hasNext, mProbability, &mHistoricalInfo, mTargetTerminalId); - } - - const BigramEntry updateTargetTerminalIdAndGetEntry(const int newTargetTerminalId) const { - return BigramEntry(mHasNext, mProbability, &mHistoricalInfo, newTargetTerminalId); - } - - const BigramEntry updateProbabilityAndGetEntry(const int probability) const { - return BigramEntry(mHasNext, probability, &mHistoricalInfo, mTargetTerminalId); - } - - const BigramEntry updateHistoricalInfoAndGetEntry( - const HistoricalInfo *const historicalInfo) const { - return BigramEntry(mHasNext, mProbability, historicalInfo, mTargetTerminalId); - } - - bool isValid() const { - return mTargetTerminalId != Ver4DictConstants::NOT_A_TERMINAL_ID; - } - - bool hasNext() const { - return mHasNext; - } - - int getProbability() const { - return mProbability; - } - - bool hasHistoricalInfo() const { - return mHistoricalInfo.isValid(); - } - - const HistoricalInfo *getHistoricalInfo() const { - return &mHistoricalInfo; - } - - int getTargetTerminalId() const { - return mTargetTerminalId; - } - - private: - // Copy constructor is public to use this class as a type of return value. - DISALLOW_DEFAULT_CONSTRUCTOR(BigramEntry); - DISALLOW_ASSIGNMENT_OPERATOR(BigramEntry); - - const bool mHasNext; - const int mProbability; - const HistoricalInfo mHistoricalInfo; - const int mTargetTerminalId; -}; -} // namespace latinime -#endif /* LATINIME_BIGRAM_ENTRY_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/dict_content.h deleted file mode 100644 index 0c2f47073..000000000 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/dict_content.h +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Copyright (C) 2013, The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef LATINIME_DICT_CONTENT_H -#define LATINIME_DICT_CONTENT_H - -#include "defines.h" - -namespace latinime { - -class DictContent { - public: - virtual ~DictContent() {} - virtual bool isValid() const = 0; - - protected: - DictContent() {} - - private: - DISALLOW_COPY_AND_ASSIGN(DictContent); -}; -} // namespace latinime -#endif /* LATINIME_DICT_CONTENT_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.cpp deleted file mode 100644 index 3b7c70efd..000000000 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.cpp +++ /dev/null @@ -1,160 +0,0 @@ -/* - * Copyright (C) 2013 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h" - -#include "suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h" -#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h" -#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h" -#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" - -namespace latinime { - -const ProbabilityEntry ProbabilityDictContent::getProbabilityEntry(const int terminalId) const { - if (terminalId < 0 || terminalId >= mSize) { - // This method can be called with invalid terminal id during GC. - return ProbabilityEntry(0 /* flags */, NOT_A_PROBABILITY); - } - const BufferWithExtendableBuffer *const buffer = getBuffer(); - int entryPos = getEntryPos(terminalId); - const int flags = buffer->readUintAndAdvancePosition( - Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE, &entryPos); - const int probability = buffer->readUintAndAdvancePosition( - Ver4DictConstants::PROBABILITY_SIZE, &entryPos); - if (mHasHistoricalInfo) { - const int timestamp = buffer->readUintAndAdvancePosition( - Ver4DictConstants::TIME_STAMP_FIELD_SIZE, &entryPos); - const int level = buffer->readUintAndAdvancePosition( - Ver4DictConstants::WORD_LEVEL_FIELD_SIZE, &entryPos); - const int count = buffer->readUintAndAdvancePosition( - Ver4DictConstants::WORD_COUNT_FIELD_SIZE, &entryPos); - const HistoricalInfo historicalInfo(timestamp, level, count); - return ProbabilityEntry(flags, probability, &historicalInfo); - } else { - return ProbabilityEntry(flags, probability); - } -} - -bool ProbabilityDictContent::setProbabilityEntry(const int terminalId, - const ProbabilityEntry *const probabilityEntry) { - if (terminalId < 0) { - return false; - } - const int entryPos = getEntryPos(terminalId); - if (terminalId >= mSize) { - ProbabilityEntry dummyEntry; - // Write new entry. - int writingPos = getBuffer()->getTailPosition(); - while (writingPos <= entryPos) { - // Fulfilling with dummy entries until writingPos. - if (!writeEntry(&dummyEntry, writingPos)) { - AKLOGE("Cannot write dummy entry. pos: %d, mSize: %d", writingPos, mSize); - return false; - } - writingPos += getEntrySize(); - mSize++; - } - } - return writeEntry(probabilityEntry, entryPos); -} - -bool ProbabilityDictContent::flushToFile(const char *const dictPath) const { - if (getEntryPos(mSize) < getBuffer()->getTailPosition()) { - ProbabilityDictContent probabilityDictContentToWrite(mHasHistoricalInfo); - for (int i = 0; i < mSize; ++i) { - const ProbabilityEntry probabilityEntry = getProbabilityEntry(i); - if (!probabilityDictContentToWrite.setProbabilityEntry(i, &probabilityEntry)) { - AKLOGE("Cannot set probability entry in flushToFile. terminalId: %d", i); - return false; - } - } - return probabilityDictContentToWrite.flush(dictPath, - Ver4DictConstants::FREQ_FILE_EXTENSION); - } else { - return flush(dictPath, Ver4DictConstants::FREQ_FILE_EXTENSION); - } -} - -bool ProbabilityDictContent::runGC( - const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap, - const ProbabilityDictContent *const originalProbabilityDictContent) { - mSize = 0; - for (TerminalPositionLookupTable::TerminalIdMap::const_iterator it = terminalIdMap->begin(); - it != terminalIdMap->end(); ++it) { - const ProbabilityEntry probabilityEntry = - originalProbabilityDictContent->getProbabilityEntry(it->first); - if (!setProbabilityEntry(it->second, &probabilityEntry)) { - AKLOGE("Cannot set probability entry in runGC. terminalId: %d", it->second); - return false; - } - mSize++; - } - return true; -} - -int ProbabilityDictContent::getEntrySize() const { - if (mHasHistoricalInfo) { - return Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE - + Ver4DictConstants::PROBABILITY_SIZE - + Ver4DictConstants::TIME_STAMP_FIELD_SIZE - + Ver4DictConstants::WORD_LEVEL_FIELD_SIZE - + Ver4DictConstants::WORD_COUNT_FIELD_SIZE; - } else { - return Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE - + Ver4DictConstants::PROBABILITY_SIZE; - } -} - -int ProbabilityDictContent::getEntryPos(const int terminalId) const { - return terminalId * getEntrySize(); -} - -bool ProbabilityDictContent::writeEntry(const ProbabilityEntry *const probabilityEntry, - const int entryPos) { - BufferWithExtendableBuffer *const bufferToWrite = getWritableBuffer(); - int writingPos = entryPos; - if (!bufferToWrite->writeUintAndAdvancePosition(probabilityEntry->getFlags(), - Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE, &writingPos)) { - AKLOGE("Cannot write flags in probability dict content. pos: %d", writingPos); - return false; - } - if (!bufferToWrite->writeUintAndAdvancePosition(probabilityEntry->getProbability(), - Ver4DictConstants::PROBABILITY_SIZE, &writingPos)) { - AKLOGE("Cannot write probability in probability dict content. pos: %d", writingPos); - return false; - } - if (mHasHistoricalInfo) { - const HistoricalInfo *const historicalInfo = probabilityEntry->getHistoricalInfo(); - if (!bufferToWrite->writeUintAndAdvancePosition(historicalInfo->getTimeStamp(), - Ver4DictConstants::TIME_STAMP_FIELD_SIZE, &writingPos)) { - AKLOGE("Cannot write timestamp in probability dict content. pos: %d", writingPos); - return false; - } - if (!bufferToWrite->writeUintAndAdvancePosition(historicalInfo->getLevel(), - Ver4DictConstants::WORD_LEVEL_FIELD_SIZE, &writingPos)) { - AKLOGE("Cannot write level in probability dict content. pos: %d", writingPos); - return false; - } - if (!bufferToWrite->writeUintAndAdvancePosition(historicalInfo->getCount(), - Ver4DictConstants::WORD_COUNT_FIELD_SIZE, &writingPos)) { - AKLOGE("Cannot write count in probability dict content. pos: %d", writingPos); - return false; - } - } - return true; -} - -} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h deleted file mode 100644 index b065bc954..000000000 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright (C) 2013, The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef LATINIME_PROBABILITY_DICT_CONTENT_H -#define LATINIME_PROBABILITY_DICT_CONTENT_H - -#include "defines.h" -#include "suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h" -#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h" -#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h" -#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" - -namespace latinime { - -class ProbabilityEntry; - -class ProbabilityDictContent : public SingleDictContent { - public: - ProbabilityDictContent(const char *const dictPath, const bool hasHistoricalInfo, - const bool isUpdatable) - : SingleDictContent(dictPath, Ver4DictConstants::FREQ_FILE_EXTENSION, isUpdatable), - mHasHistoricalInfo(hasHistoricalInfo), - mSize(getBuffer()->getTailPosition() / getEntrySize()) {} - - ProbabilityDictContent(const bool hasHistoricalInfo) - : mHasHistoricalInfo(hasHistoricalInfo), mSize(0) {} - - const ProbabilityEntry getProbabilityEntry(const int terminalId) const; - - bool setProbabilityEntry(const int terminalId, const ProbabilityEntry *const probabilityEntry); - - bool flushToFile(const char *const dictPath) const; - - bool runGC(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap, - const ProbabilityDictContent *const originalProbabilityDictContent); - - private: - DISALLOW_COPY_AND_ASSIGN(ProbabilityDictContent); - - int getEntrySize() const; - - int getEntryPos(const int terminalId) const; - - bool writeEntry(const ProbabilityEntry *const probabilityEntry, const int entryPos); - - bool mHasHistoricalInfo; - int mSize; -}; -} // namespace latinime -#endif /* LATINIME_PROBABILITY_DICT_CONTENT_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h deleted file mode 100644 index 36ba82be1..000000000 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Copyright (C) 2013, The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef LATINIME_PROBABILITY_ENTRY_H -#define LATINIME_PROBABILITY_ENTRY_H - -#include "defines.h" -#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h" -#include "suggest/policyimpl/dictionary/utils/historical_info.h" - -namespace latinime { - -class ProbabilityEntry { - public: - ProbabilityEntry(const ProbabilityEntry &probabilityEntry) - : mFlags(probabilityEntry.mFlags), mProbability(probabilityEntry.mProbability), - mHistoricalInfo(probabilityEntry.mHistoricalInfo) {} - - // Dummy entry - ProbabilityEntry() - : mFlags(0), mProbability(NOT_A_PROBABILITY), mHistoricalInfo() {} - - // Entry without historical information - ProbabilityEntry(const int flags, const int probability) - : mFlags(flags), mProbability(probability), mHistoricalInfo() {} - - // Entry with historical information. - ProbabilityEntry(const int flags, const int probability, - const HistoricalInfo *const historicalInfo) - : mFlags(flags), mProbability(probability), mHistoricalInfo(*historicalInfo) {} - - const ProbabilityEntry createEntryWithUpdatedProbability(const int probability) const { - return ProbabilityEntry(mFlags, probability, &mHistoricalInfo); - } - - const ProbabilityEntry createEntryWithUpdatedHistoricalInfo( - const HistoricalInfo *const historicalInfo) const { - return ProbabilityEntry(mFlags, mProbability, historicalInfo); - } - - bool hasHistoricalInfo() const { - return mHistoricalInfo.isValid(); - } - - int getFlags() const { - return mFlags; - } - - int getProbability() const { - return mProbability; - } - - const HistoricalInfo *getHistoricalInfo() const { - return &mHistoricalInfo; - } - - private: - // Copy constructor is public to use this class as a type of return value. - DISALLOW_ASSIGNMENT_OPERATOR(ProbabilityEntry); - - const int mFlags; - const int mProbability; - const HistoricalInfo mHistoricalInfo; -}; -} // namespace latinime -#endif /* LATINIME_PROBABILITY_ENTRY_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.cpp deleted file mode 100644 index 29972a4e8..000000000 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.cpp +++ /dev/null @@ -1,175 +0,0 @@ -/* - * Copyright (C) 2013, The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h" - -#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" - -namespace latinime { - -void ShortcutDictContent::getShortcutEntryAndAdvancePosition(const int maxCodePointCount, - int *const outCodePoint, int *const outCodePointCount, int *const outProbability, - bool *const outhasNext, int *const shortcutEntryPos) const { - const BufferWithExtendableBuffer *const shortcutListBuffer = getContentBuffer(); - const int shortcutFlags = shortcutListBuffer->readUintAndAdvancePosition( - Ver4DictConstants::SHORTCUT_FLAGS_FIELD_SIZE, shortcutEntryPos); - if (outProbability) { - *outProbability = shortcutFlags & Ver4DictConstants::SHORTCUT_PROBABILITY_MASK; - } - if (outhasNext) { - *outhasNext = shortcutFlags & Ver4DictConstants::SHORTCUT_HAS_NEXT_MASK; - } - if (outCodePoint && outCodePointCount) { - shortcutListBuffer->readCodePointsAndAdvancePosition( - maxCodePointCount, outCodePoint, outCodePointCount, shortcutEntryPos); - } -} - -int ShortcutDictContent::getShortcutListHeadPos(const int terminalId) const { - const SparseTable *const addressLookupTable = getAddressLookupTable(); - if (!addressLookupTable->contains(terminalId)) { - return NOT_A_DICT_POS; - } - return addressLookupTable->get(terminalId); -} - -bool ShortcutDictContent::flushToFile(const char *const dictPath) const { - return flush(dictPath, Ver4DictConstants::SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION, - Ver4DictConstants::SHORTCUT_CONTENT_TABLE_FILE_EXTENSION, - Ver4DictConstants::SHORTCUT_FILE_EXTENSION); -} - -bool ShortcutDictContent::runGC( - const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap, - const ShortcutDictContent *const originalShortcutDictContent) { - for (TerminalPositionLookupTable::TerminalIdMap::const_iterator it = terminalIdMap->begin(); - it != terminalIdMap->end(); ++it) { - const int originalShortcutListPos = - originalShortcutDictContent->getShortcutListHeadPos(it->first); - if (originalShortcutListPos == NOT_A_DICT_POS) { - continue; - } - const int shortcutListPos = getContentBuffer()->getTailPosition(); - // Copy shortcut list from original content. - if (!copyShortcutListFromDictContent(originalShortcutListPos, originalShortcutDictContent, - shortcutListPos)) { - AKLOGE("Cannot copy shortcut list during GC. original pos: %d, pos: %d", - originalShortcutListPos, shortcutListPos); - return false; - } - // Set shortcut list position to the lookup table. - if (!getUpdatableAddressLookupTable()->set(it->second, shortcutListPos)) { - AKLOGE("Cannot set shortcut list position. terminal id: %d, pos: %d", - it->second, shortcutListPos); - return false; - } - } - return true; -} - -bool ShortcutDictContent::createNewShortcutList(const int terminalId) { - const int shortcutListListPos = getContentBuffer()->getTailPosition(); - return getUpdatableAddressLookupTable()->set(terminalId, shortcutListListPos); -} - -bool ShortcutDictContent::copyShortcutList(const int shortcutListPos, const int toPos) { - return copyShortcutListFromDictContent(shortcutListPos, this, toPos); -} - -bool ShortcutDictContent::copyShortcutListFromDictContent(const int shortcutListPos, - const ShortcutDictContent *const sourceShortcutDictContent, const int toPos) { - bool hasNext = true; - int readingPos = shortcutListPos; - int writingPos = toPos; - int codePoints[MAX_WORD_LENGTH]; - while (hasNext) { - int probability = 0; - int codePointCount = 0; - sourceShortcutDictContent->getShortcutEntryAndAdvancePosition(MAX_WORD_LENGTH, - codePoints, &codePointCount, &probability, &hasNext, &readingPos); - if (!writeShortcutEntryAndAdvancePosition(codePoints, codePointCount, probability, - hasNext, &writingPos)) { - AKLOGE("Cannot write shortcut entry to copy. pos: %d", writingPos); - return false; - } - } - return true; -} - -bool ShortcutDictContent::setProbability(const int probability, const int shortcutEntryPos) { - BufferWithExtendableBuffer *const shortcutListBuffer = getWritableContentBuffer(); - const int shortcutFlags = shortcutListBuffer->readUint( - Ver4DictConstants::SHORTCUT_FLAGS_FIELD_SIZE, shortcutEntryPos); - const bool hasNext = shortcutFlags & Ver4DictConstants::SHORTCUT_HAS_NEXT_MASK; - const int shortcutFlagsToWrite = createAndGetShortcutFlags(probability, hasNext); - return shortcutListBuffer->writeUint(shortcutFlagsToWrite, - Ver4DictConstants::SHORTCUT_FLAGS_FIELD_SIZE, shortcutEntryPos); -} - -bool ShortcutDictContent::writeShortcutEntryAndAdvancePosition(const int *const codePoint, - const int codePointCount, const int probability, const bool hasNext, - int *const shortcutEntryPos) { - BufferWithExtendableBuffer *const shortcutListBuffer = getWritableContentBuffer(); - const int shortcutFlags = createAndGetShortcutFlags(probability, hasNext); - if (!shortcutListBuffer->writeUintAndAdvancePosition(shortcutFlags, - Ver4DictConstants::SHORTCUT_FLAGS_FIELD_SIZE, shortcutEntryPos)) { - AKLOGE("Cannot write shortcut flags. flags; %x, pos: %d", shortcutFlags, *shortcutEntryPos); - return false; - } - if (!shortcutListBuffer->writeCodePointsAndAdvancePosition(codePoint, codePointCount, - true /* writesTerminator */, shortcutEntryPos)) { - AKLOGE("Cannot write shortcut target code points. pos: %d", *shortcutEntryPos); - return false; - } - return true; -} - -// Find a shortcut entry that has specified target and return its position. -int ShortcutDictContent::findShortcutEntryAndGetPos(const int shortcutListPos, - const int *const targetCodePointsToFind, const int codePointCount) const { - bool hasNext = true; - int readingPos = shortcutListPos; - int targetCodePoints[MAX_WORD_LENGTH]; - while (hasNext) { - const int entryPos = readingPos; - int probability = 0; - int targetCodePointCount = 0; - getShortcutEntryAndAdvancePosition(MAX_WORD_LENGTH, targetCodePoints, &targetCodePointCount, - &probability, &hasNext, &readingPos); - if (targetCodePointCount != codePointCount) { - continue; - } - bool matched = true; - for (int i = 0; i < codePointCount; ++i) { - if (targetCodePointsToFind[i] != targetCodePoints[i]) { - matched = false; - break; - } - } - if (matched) { - return entryPos; - } - } - return NOT_A_DICT_POS; -} - -int ShortcutDictContent::createAndGetShortcutFlags(const int probability, - const bool hasNext) const { - return (probability & Ver4DictConstants::SHORTCUT_PROBABILITY_MASK) - | (hasNext ? Ver4DictConstants::SHORTCUT_HAS_NEXT_MASK : 0); -} - -} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h deleted file mode 100644 index eaafc27bc..000000000 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Copyright (C) 2013, The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef LATINIME_SHORTCUT_DICT_CONTENT_H -#define LATINIME_SHORTCUT_DICT_CONTENT_H - -#include "defines.h" -#include "suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h" -#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h" -#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h" - -namespace latinime { - -class ShortcutDictContent : public SparseTableDictContent { - public: - ShortcutDictContent(const char *const dictPath, const bool isUpdatable) - : SparseTableDictContent(dictPath, - Ver4DictConstants::SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION, - Ver4DictConstants::SHORTCUT_CONTENT_TABLE_FILE_EXTENSION, - Ver4DictConstants::SHORTCUT_FILE_EXTENSION, isUpdatable, - Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE, - Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE) {} - - ShortcutDictContent() - : SparseTableDictContent(Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE, - Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE) {} - - void getShortcutEntry(const int maxCodePointCount, int *const outCodePoint, - int *const outCodePointCount, int *const outProbability, bool *const outhasNext, - const int shortcutEntryPos) { - int readingPos = shortcutEntryPos; - return getShortcutEntryAndAdvancePosition(maxCodePointCount, outCodePoint, - outCodePointCount, outProbability, outhasNext, &readingPos); - } - - void getShortcutEntryAndAdvancePosition(const int maxCodePointCount, - int *const outCodePoint, int *const outCodePointCount, int *const outProbability, - bool *const outhasNext, int *const shortcutEntryPos) const; - - // Returns head position of shortcut list for a PtNode specified by terminalId. - int getShortcutListHeadPos(const int terminalId) const; - - bool flushToFile(const char *const dictPath) const; - - bool runGC(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap, - const ShortcutDictContent *const originalShortcutDictContent); - - bool createNewShortcutList(const int terminalId); - - bool copyShortcutList(const int shortcutListPos, const int toPos); - - bool setProbability(const int probability, const int shortcutEntryPos); - - bool writeShortcutEntry(const int *const codePoint, const int codePointCount, - const int probability, const bool hasNext, const int shortcutEntryPos) { - int writingPos = shortcutEntryPos; - return writeShortcutEntryAndAdvancePosition(codePoint, codePointCount, probability, - hasNext, &writingPos); - } - - bool writeShortcutEntryAndAdvancePosition(const int *const codePoint, - const int codePointCount, const int probability, const bool hasNext, - int *const shortcutEntryPos); - - int findShortcutEntryAndGetPos(const int shortcutListPos, - const int *const targetCodePointsToFind, const int codePointCount) const; - - private: - DISALLOW_COPY_AND_ASSIGN(ShortcutDictContent); - - bool copyShortcutListFromDictContent(const int shortcutListPos, - const ShortcutDictContent *const sourceShortcutDictContent, const int toPos); - - int createAndGetShortcutFlags(const int probability, const bool hasNext) const; -}; -} // namespace latinime -#endif /* LATINIME_SHORTCUT_DICT_CONTENT_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h deleted file mode 100644 index 9064b7e72..000000000 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (C) 2013, The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef LATINIME_SINGLE_DICT_CONTENT_H -#define LATINIME_SINGLE_DICT_CONTENT_H - -#include "defines.h" -#include "suggest/policyimpl/dictionary/structure/v4/content/dict_content.h" -#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h" -#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" -#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h" -#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h" - -namespace latinime { - -class SingleDictContent : public DictContent { - public: - SingleDictContent(const char *const dictPath, const char *const contentFileName, - const bool isUpdatable) - : mMmappedBuffer(MmappedBuffer::openBuffer(dictPath, contentFileName, isUpdatable)), - mExpandableContentBuffer(mMmappedBuffer.get() ? mMmappedBuffer.get()->getBuffer() : 0, - mMmappedBuffer.get() ? mMmappedBuffer.get()->getBufferSize() : 0, - BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE), - mIsValid(mMmappedBuffer.get() != 0) {} - - SingleDictContent() - : mMmappedBuffer(0), mExpandableContentBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE), - mIsValid(true) {} - - virtual ~SingleDictContent() {} - - virtual bool isValid() const { - return mIsValid; - } - - bool isNearSizeLimit() const { - return mExpandableContentBuffer.isNearSizeLimit(); - } - - protected: - BufferWithExtendableBuffer *getWritableBuffer() { - return &mExpandableContentBuffer; - } - - const BufferWithExtendableBuffer *getBuffer() const { - return &mExpandableContentBuffer; - } - - bool flush(const char *const dictPath, const char *const contentFileNameSuffix) const { - return DictFileWritingUtils::flushBufferToFileWithSuffix(dictPath, - contentFileNameSuffix, &mExpandableContentBuffer); - } - - private: - DISALLOW_COPY_AND_ASSIGN(SingleDictContent); - - const MmappedBuffer::MmappedBufferPtr mMmappedBuffer; - BufferWithExtendableBuffer mExpandableContentBuffer; - const bool mIsValid; -}; -} // namespace latinime -#endif /* LATINIME_SINGLE_DICT_CONTENT_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.cpp deleted file mode 100644 index 63c6ea3a4..000000000 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.cpp +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright (C) 2013 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h" - -namespace latinime { - -bool SparseTableDictContent::flush(const char *const dictPath, - const char *const lookupTableFileNameSuffix, const char *const addressTableFileNameSuffix, - const char *const contentFileNameSuffix) const { - if (!DictFileWritingUtils::flushBufferToFileWithSuffix(dictPath, lookupTableFileNameSuffix, - &mExpandableLookupTableBuffer)){ - return false; - } - if (!DictFileWritingUtils::flushBufferToFileWithSuffix(dictPath, addressTableFileNameSuffix, - &mExpandableAddressTableBuffer)) { - return false; - } - if (!DictFileWritingUtils::flushBufferToFileWithSuffix(dictPath, contentFileNameSuffix, - &mExpandableContentBuffer)) { - return false; - } - return true; -} - -} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h deleted file mode 100644 index a82e3f50a..000000000 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Copyright (C) 2013, The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef LATINIME_SPARSE_TABLE_DICT_CONTENT_H -#define LATINIME_SPARSE_TABLE_DICT_CONTENT_H - -#include "defines.h" -#include "suggest/policyimpl/dictionary/structure/v4/content/dict_content.h" -#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h" -#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" -#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h" -#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h" -#include "suggest/policyimpl/dictionary/utils/sparse_table.h" - -namespace latinime { - -// TODO: Support multiple contents. -class SparseTableDictContent : public DictContent { - public: - AK_FORCE_INLINE SparseTableDictContent(const char *const dictPath, - const char *const lookupTableFileName, const char *const addressTableFileName, - const char *const contentFileName, const bool isUpdatable, - const int sparseTableBlockSize, const int sparseTableDataSize) - : mLookupTableBuffer( - MmappedBuffer::openBuffer(dictPath, lookupTableFileName, isUpdatable)), - mAddressTableBuffer( - MmappedBuffer::openBuffer(dictPath, addressTableFileName, isUpdatable)), - mContentBuffer(MmappedBuffer::openBuffer(dictPath, contentFileName, isUpdatable)), - mExpandableLookupTableBuffer( - mLookupTableBuffer.get() ? mLookupTableBuffer.get()->getBuffer() : 0, - mLookupTableBuffer.get() ? mLookupTableBuffer.get()->getBufferSize() : 0, - BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE), - mExpandableAddressTableBuffer( - mAddressTableBuffer.get() ? mAddressTableBuffer.get()->getBuffer() : 0, - mAddressTableBuffer.get() ? mAddressTableBuffer.get()->getBufferSize() : 0, - BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE), - mExpandableContentBuffer(mContentBuffer.get() ? mContentBuffer.get()->getBuffer() : 0, - mContentBuffer.get() ? mContentBuffer.get()->getBufferSize() : 0, - BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE), - mAddressLookupTable(&mExpandableLookupTableBuffer, &mExpandableAddressTableBuffer, - sparseTableBlockSize, sparseTableDataSize), - mIsValid(mLookupTableBuffer.get() != 0 && mAddressTableBuffer.get() != 0 - && mContentBuffer.get() != 0) {} - - SparseTableDictContent(const int sparseTableBlockSize, const int sparseTableDataSize) - : mLookupTableBuffer(0), mAddressTableBuffer(0), mContentBuffer(0), - mExpandableLookupTableBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE), - mExpandableAddressTableBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE), - mExpandableContentBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE), - mAddressLookupTable(&mExpandableLookupTableBuffer, &mExpandableAddressTableBuffer, - sparseTableBlockSize, sparseTableDataSize), mIsValid(true) {} - - virtual ~SparseTableDictContent() {} - - virtual bool isValid() const { - return mIsValid; - } - - bool isNearSizeLimit() const { - return mExpandableLookupTableBuffer.isNearSizeLimit() - || mExpandableAddressTableBuffer.isNearSizeLimit() - || mExpandableContentBuffer.isNearSizeLimit(); - } - - protected: - SparseTable *getUpdatableAddressLookupTable() { - return &mAddressLookupTable; - } - - const SparseTable *getAddressLookupTable() const { - return &mAddressLookupTable; - } - - BufferWithExtendableBuffer *getWritableContentBuffer() { - return &mExpandableContentBuffer; - } - - const BufferWithExtendableBuffer *getContentBuffer() const { - return &mExpandableContentBuffer; - } - - bool flush(const char *const dictDirPath, const char *const lookupTableFileName, - const char *const addressTableFileName, const char *const contentFileName) const; - - private: - DISALLOW_IMPLICIT_CONSTRUCTORS(SparseTableDictContent); - - const MmappedBuffer::MmappedBufferPtr mLookupTableBuffer; - const MmappedBuffer::MmappedBufferPtr mAddressTableBuffer; - const MmappedBuffer::MmappedBufferPtr mContentBuffer; - BufferWithExtendableBuffer mExpandableLookupTableBuffer; - BufferWithExtendableBuffer mExpandableAddressTableBuffer; - BufferWithExtendableBuffer mExpandableContentBuffer; - SparseTable mAddressLookupTable; - const bool mIsValid; -}; -} // namespace latinime -#endif /* LATINIME_SPARSE_TABLE_DICT_CONTENT_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.cpp deleted file mode 100644 index 0b17a009d..000000000 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.cpp +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Copyright (C) 2013 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h" - -#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.h" -#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" - -namespace latinime { - -int TerminalPositionLookupTable::getTerminalPtNodePosition(const int terminalId) const { - if (terminalId < 0 || terminalId >= mSize) { - return NOT_A_DICT_POS; - } - const int terminalPos = getBuffer()->readUint( - Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE, getEntryPos(terminalId)); - return (terminalPos == Ver4DictConstants::NOT_A_TERMINAL_ADDRESS) ? - NOT_A_DICT_POS : terminalPos; -} - -bool TerminalPositionLookupTable::setTerminalPtNodePosition( - const int terminalId, const int terminalPtNodePos) { - if (terminalId < 0) { - return NOT_A_DICT_POS; - } - while (terminalId >= mSize) { - // Write new entry. - if (!getWritableBuffer()->writeUint(Ver4DictConstants::NOT_A_TERMINAL_ADDRESS, - Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE, getEntryPos(mSize))) { - return false; - } - mSize++; - } - const int terminalPos = (terminalPtNodePos != NOT_A_DICT_POS) ? - terminalPtNodePos : Ver4DictConstants::NOT_A_TERMINAL_ADDRESS; - return getWritableBuffer()->writeUint(terminalPos, - Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE, getEntryPos(terminalId)); -} - -bool TerminalPositionLookupTable::flushToFile(const char *const dictPath) const { - // If the used buffer size is smaller than the actual buffer size, regenerate the lookup - // table and write the new table to the file. - if (getEntryPos(mSize) < getBuffer()->getTailPosition()) { - TerminalPositionLookupTable lookupTableToWrite; - for (int i = 0; i < mSize; ++i) { - const int terminalPtNodePosition = getTerminalPtNodePosition(i); - if (!lookupTableToWrite.setTerminalPtNodePosition(i, terminalPtNodePosition)) { - AKLOGE("Cannot set terminal position to lookupTableToWrite." - " terminalId: %d, position: %d", i, terminalPtNodePosition); - return false; - } - } - return lookupTableToWrite.flush(dictPath, - Ver4DictConstants::TERMINAL_ADDRESS_TABLE_FILE_EXTENSION); - } else { - // We can simply use this lookup table because the buffer size has not been - // changed. - return flush(dictPath, Ver4DictConstants::TERMINAL_ADDRESS_TABLE_FILE_EXTENSION); - } -} - -bool TerminalPositionLookupTable::runGCTerminalIds(TerminalIdMap *const terminalIdMap) { - int removedEntryCount = 0; - int nextNewTerminalId = 0; - for (int i = 0; i < mSize; ++i) { - const int terminalPos = getBuffer()->readUint( - Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE, getEntryPos(i)); - if (terminalPos == Ver4DictConstants::NOT_A_TERMINAL_ADDRESS) { - // This entry is a garbage. - removedEntryCount++; - } else { - // Give a new terminal id to the entry. - if (!getWritableBuffer()->writeUint(terminalPos, - Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE, - getEntryPos(nextNewTerminalId))) { - return false; - } - // Memorize the mapping to the old terminal id to the new terminal id. - terminalIdMap->insert(TerminalIdMap::value_type(i, nextNewTerminalId)); - nextNewTerminalId++; - } - } - mSize = nextNewTerminalId; - return true; -} - -} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h deleted file mode 100644 index f73e22754..000000000 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Copyright (C) 2013 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef LATINIME_TERMINAL_POSITION_LOOKUP_TABLE_H -#define LATINIME_TERMINAL_POSITION_LOOKUP_TABLE_H - -#include "defines.h" -#include "suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h" -#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h" -#include "utils/hash_map_compat.h" - -namespace latinime { - -class TerminalPositionLookupTable : public SingleDictContent { - public: - typedef hash_map_compat<int, int> TerminalIdMap; - - TerminalPositionLookupTable(const char *const dictPath, const bool isUpdatable) - : SingleDictContent(dictPath, - Ver4DictConstants::TERMINAL_ADDRESS_TABLE_FILE_EXTENSION, isUpdatable), - mSize(getBuffer()->getTailPosition() - / Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE) {} - - TerminalPositionLookupTable() : mSize(0) {} - - int getTerminalPtNodePosition(const int terminalId) const; - - bool setTerminalPtNodePosition(const int terminalId, const int terminalPtNodePos); - - int getNextTerminalId() const { - return mSize; - } - - bool flushToFile(const char *const dictPath) const; - - bool runGCTerminalIds(TerminalIdMap *const terminalIdMap); - - private: - DISALLOW_COPY_AND_ASSIGN(TerminalPositionLookupTable); - - int getEntryPos(const int terminalId) const { - return terminalId * Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE; - } - - int mSize; -}; -} // namespace latinime -#endif // LATINIME_TERMINAL_POSITION_LOOKUP_TABLE_H diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp deleted file mode 100644 index 918c02ba2..000000000 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp +++ /dev/null @@ -1,138 +0,0 @@ -/* - * Copyright (C) 2013 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h" - -#include <cerrno> -#include <cstring> -#include <sys/stat.h> -#include <sys/types.h> - -#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h" -#include "suggest/policyimpl/dictionary/utils/file_utils.h" - -namespace latinime { - -/* static */ Ver4DictBuffers::Ver4DictBuffersPtr Ver4DictBuffers::openVer4DictBuffers( - const char *const dictPath, const MmappedBuffer::MmappedBufferPtr &headerBuffer) { - const bool isUpdatable = headerBuffer.get() ? headerBuffer.get()->isUpdatable() : false; - // TODO: take only dictDirPath, and open both header and trie files in the constructor below - return Ver4DictBuffersPtr(new Ver4DictBuffers(dictPath, headerBuffer, isUpdatable)); -} - -bool Ver4DictBuffers::flushHeaderAndDictBuffers(const char *const dictDirPath, - const BufferWithExtendableBuffer *const headerBuffer) const { - // Create temporary directory. - const int tmpDirPathBufSize = FileUtils::getFilePathWithSuffixBufSize(dictDirPath, - DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE); - char tmpDirPath[tmpDirPathBufSize]; - FileUtils::getFilePathWithSuffix(dictDirPath, - DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE, tmpDirPathBufSize, - tmpDirPath); - if (FileUtils::existsDir(tmpDirPath)) { - if (!FileUtils::removeDirAndFiles(tmpDirPath)) { - AKLOGE("Existing directory %s cannot be removed.", tmpDirPath); - ASSERT(false); - return false; - } - } - if (mkdir(tmpDirPath, S_IRWXU) == -1) { - AKLOGE("Cannot create directory: %s. errno: %d.", tmpDirPath, errno); - return false; - } - // Get dictionary base path. - const int dictNameBufSize = strlen(dictDirPath) + 1 /* terminator */; - char dictName[dictNameBufSize]; - FileUtils::getBasename(dictDirPath, dictNameBufSize, dictName); - const int dictPathBufSize = FileUtils::getFilePathBufSize(tmpDirPath, dictName); - char dictPath[dictPathBufSize]; - FileUtils::getFilePath(tmpDirPath, dictName, dictPathBufSize, dictPath); - - // Write header file. - if (!DictFileWritingUtils::flushBufferToFileWithSuffix(dictPath, - Ver4DictConstants::HEADER_FILE_EXTENSION, headerBuffer)) { - AKLOGE("Dictionary header file %s%s cannot be written.", tmpDirPath, - Ver4DictConstants::HEADER_FILE_EXTENSION); - return false; - } - // Write trie file. - if (!DictFileWritingUtils::flushBufferToFileWithSuffix(dictPath, - Ver4DictConstants::TRIE_FILE_EXTENSION, &mExpandableTrieBuffer)) { - AKLOGE("Dictionary trie file %s%s cannot be written.", tmpDirPath, - Ver4DictConstants::TRIE_FILE_EXTENSION); - return false; - } - // Write dictionary contents. - if (!mTerminalPositionLookupTable.flushToFile(dictPath)) { - AKLOGE("Terminal position lookup table cannot be written. %s", tmpDirPath); - return false; - } - if (!mProbabilityDictContent.flushToFile(dictPath)) { - AKLOGE("Probability dict content cannot be written. %s", tmpDirPath); - return false; - } - if (!mBigramDictContent.flushToFile(dictPath)) { - AKLOGE("Bigram dict content cannot be written. %s", tmpDirPath); - return false; - } - if (!mShortcutDictContent.flushToFile(dictPath)) { - AKLOGE("Shortcut dict content cannot be written. %s", tmpDirPath); - return false; - } - // Remove existing dictionary. - if (!FileUtils::removeDirAndFiles(dictDirPath)) { - AKLOGE("Existing directory %s cannot be removed.", dictDirPath); - ASSERT(false); - return false; - } - // Rename temporary directory. - if (rename(tmpDirPath, dictDirPath) != 0) { - AKLOGE("%s cannot be renamed to %s", tmpDirPath, dictDirPath); - ASSERT(false); - return false; - } - return true; -} - -Ver4DictBuffers::Ver4DictBuffers(const char *const dictPath, - const MmappedBuffer::MmappedBufferPtr &headerBuffer, const bool isUpdatable) - : mHeaderBuffer(headerBuffer), - mDictBuffer(MmappedBuffer::openBuffer(dictPath, - Ver4DictConstants::TRIE_FILE_EXTENSION, isUpdatable)), - mHeaderPolicy(headerBuffer.get()->getBuffer(), FormatUtils::VERSION_4), - mExpandableHeaderBuffer(headerBuffer.get()->getBuffer(), mHeaderPolicy.getSize(), - BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE), - mExpandableTrieBuffer(mDictBuffer.get()->getBuffer(), - mDictBuffer.get()->getBufferSize(), - BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE), - mTerminalPositionLookupTable(dictPath, isUpdatable), - mProbabilityDictContent(dictPath, mHeaderPolicy.hasHistoricalInfoOfWords(), - isUpdatable), - mBigramDictContent(dictPath, mHeaderPolicy.hasHistoricalInfoOfWords(), - isUpdatable), - mShortcutDictContent(dictPath, isUpdatable), - mIsUpdatable(isUpdatable) {} - -Ver4DictBuffers::Ver4DictBuffers(const HeaderPolicy *const headerPolicy) - : mHeaderBuffer(0), mDictBuffer(0), mHeaderPolicy(), - mExpandableHeaderBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE), - mExpandableTrieBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE), - mTerminalPositionLookupTable(), - mProbabilityDictContent(headerPolicy->hasHistoricalInfoOfWords()), - mBigramDictContent(headerPolicy->hasHistoricalInfoOfWords()), mShortcutDictContent(), - mIsUpdatable(true) {} - -} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h deleted file mode 100644 index a0c219e4d..000000000 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h +++ /dev/null @@ -1,137 +0,0 @@ -/* - * Copyright (C) 2013, The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef LATINIME_VER4_DICT_BUFFER_H -#define LATINIME_VER4_DICT_BUFFER_H - -#include "defines.h" -#include "suggest/policyimpl/dictionary/header/header_policy.h" -#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h" -#include "suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h" -#include "suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h" -#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h" -#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h" -#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" -#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h" - -namespace latinime { - -class Ver4DictBuffers { - public: - typedef ExclusiveOwnershipPointer<Ver4DictBuffers> Ver4DictBuffersPtr; - - static Ver4DictBuffersPtr openVer4DictBuffers(const char *const dictDirPath, - const MmappedBuffer::MmappedBufferPtr &headerBuffer); - - static AK_FORCE_INLINE Ver4DictBuffersPtr createVer4DictBuffers( - const HeaderPolicy *const headerPolicy) { - return Ver4DictBuffersPtr(new Ver4DictBuffers(headerPolicy)); - } - - AK_FORCE_INLINE bool isValid() const { - return mDictBuffer.get() != 0 && mHeaderPolicy.isValid() - && mProbabilityDictContent.isValid() && mTerminalPositionLookupTable.isValid() - && mBigramDictContent.isValid() && mShortcutDictContent.isValid(); - } - - AK_FORCE_INLINE bool isNearSizeLimit() const { - return mExpandableTrieBuffer.isNearSizeLimit() - || mTerminalPositionLookupTable.isNearSizeLimit() - || mProbabilityDictContent.isNearSizeLimit() - || mBigramDictContent.isNearSizeLimit() - || mShortcutDictContent.isNearSizeLimit(); - } - - AK_FORCE_INLINE const HeaderPolicy *getHeaderPolicy() const { - return &mHeaderPolicy; - } - - AK_FORCE_INLINE BufferWithExtendableBuffer *getWritableHeaderBuffer() { - return &mExpandableHeaderBuffer; - } - - AK_FORCE_INLINE BufferWithExtendableBuffer *getWritableTrieBuffer() { - return &mExpandableTrieBuffer; - } - - AK_FORCE_INLINE const BufferWithExtendableBuffer *getTrieBuffer() const { - return &mExpandableTrieBuffer; - } - - AK_FORCE_INLINE TerminalPositionLookupTable *getMutableTerminalPositionLookupTable() { - return &mTerminalPositionLookupTable; - } - - AK_FORCE_INLINE const TerminalPositionLookupTable *getTerminalPositionLookupTable() const { - return &mTerminalPositionLookupTable; - } - - AK_FORCE_INLINE ProbabilityDictContent *getMutableProbabilityDictContent() { - return &mProbabilityDictContent; - } - - AK_FORCE_INLINE const ProbabilityDictContent *getProbabilityDictContent() const { - return &mProbabilityDictContent; - } - - AK_FORCE_INLINE BigramDictContent *getMutableBigramDictContent() { - return &mBigramDictContent; - } - - AK_FORCE_INLINE const BigramDictContent *getBigramDictContent() const { - return &mBigramDictContent; - } - - AK_FORCE_INLINE ShortcutDictContent *getMutableShortcutDictContent() { - return &mShortcutDictContent; - } - - AK_FORCE_INLINE const ShortcutDictContent *getShortcutDictContent() const { - return &mShortcutDictContent; - } - - AK_FORCE_INLINE bool isUpdatable() const { - return mIsUpdatable; - } - - bool flush(const char *const dictDirPath) const { - return flushHeaderAndDictBuffers(dictDirPath, &mExpandableHeaderBuffer); - } - - bool flushHeaderAndDictBuffers(const char *const dictDirPath, - const BufferWithExtendableBuffer *const headerBuffer) const; - - private: - DISALLOW_COPY_AND_ASSIGN(Ver4DictBuffers); - - Ver4DictBuffers(const char *const dictDirPath, - const MmappedBuffer::MmappedBufferPtr &headerBuffer, const bool isUpdatable); - - Ver4DictBuffers(const HeaderPolicy *const headerPolicy); - - const MmappedBuffer::MmappedBufferPtr mHeaderBuffer; - const MmappedBuffer::MmappedBufferPtr mDictBuffer; - const HeaderPolicy mHeaderPolicy; - BufferWithExtendableBuffer mExpandableHeaderBuffer; - BufferWithExtendableBuffer mExpandableTrieBuffer; - TerminalPositionLookupTable mTerminalPositionLookupTable; - ProbabilityDictContent mProbabilityDictContent; - BigramDictContent mBigramDictContent; - ShortcutDictContent mShortcutDictContent; - const int mIsUpdatable; -}; -} // namespace latinime -#endif /* LATINIME_VER4_DICT_BUFFER_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp deleted file mode 100644 index 34fecc25f..000000000 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (C) 2013, The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h" - -namespace latinime { - -// These values MUST match the definitions in FormatSpec.java. -const char *const Ver4DictConstants::TRIE_FILE_EXTENSION = ".trie"; -const char *const Ver4DictConstants::HEADER_FILE_EXTENSION = ".header"; -const char *const Ver4DictConstants::FREQ_FILE_EXTENSION = ".freq"; -// tat = Terminal Address Table -const char *const Ver4DictConstants::TERMINAL_ADDRESS_TABLE_FILE_EXTENSION = ".tat"; -const char *const Ver4DictConstants::BIGRAM_FILE_EXTENSION = ".bigram_freq"; -const char *const Ver4DictConstants::BIGRAM_LOOKUP_TABLE_FILE_EXTENSION = ".bigram_lookup"; -const char *const Ver4DictConstants::BIGRAM_CONTENT_TABLE_FILE_EXTENSION = ".bigram_index_freq"; -const char *const Ver4DictConstants::SHORTCUT_FILE_EXTENSION = ".shortcut_shortcut"; -const char *const Ver4DictConstants::SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION = ".shortcut_lookup"; -const char *const Ver4DictConstants::SHORTCUT_CONTENT_TABLE_FILE_EXTENSION = - ".shortcut_index_shortcut"; - -// Version 4 dictionary size is implicitly limited to 8MB due to 3-byte offsets. -// TODO: Make MAX_DICTIONARY_SIZE 8MB. -const int Ver4DictConstants::MAX_DICTIONARY_SIZE = 2 * 1024 * 1024; -// Extended region size, which is not GCed region size in dict file + additional buffer size, is -// limited to 1MB to prevent from inefficient traversing. -const int Ver4DictConstants::MAX_DICT_EXTENDED_REGION_SIZE = 1 * 1024 * 1024; - -const int Ver4DictConstants::NOT_A_TERMINAL_ID = -1; -const int Ver4DictConstants::PROBABILITY_SIZE = 1; -const int Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE = 1; -const int Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE = 3; -const int Ver4DictConstants::NOT_A_TERMINAL_ADDRESS = 0; -const int Ver4DictConstants::TERMINAL_ID_FIELD_SIZE = 4; -const int Ver4DictConstants::TIME_STAMP_FIELD_SIZE = 4; -const int Ver4DictConstants::WORD_LEVEL_FIELD_SIZE = 1; -const int Ver4DictConstants::WORD_COUNT_FIELD_SIZE = 1; - -const int Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE = 4; -const int Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE = 4; -const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE = 16; -const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE = 4; - -const int Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE = 3; -// Unsigned int max value of BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE-byte is used for representing -// invalid terminal ID in bigram lists. -const int Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID = - (1 << (BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE * 8)) - 1; -const int Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE = 1; -const int Ver4DictConstants::BIGRAM_PROBABILITY_MASK = 0x0F; -const int Ver4DictConstants::BIGRAM_HAS_NEXT_MASK = 0x80; -const int Ver4DictConstants::BIGRAM_LARGE_PROBABILITY_FIELD_SIZE = 1; - -const int Ver4DictConstants::SHORTCUT_FLAGS_FIELD_SIZE = 1; -const int Ver4DictConstants::SHORTCUT_PROBABILITY_MASK = 0x0F; -const int Ver4DictConstants::SHORTCUT_HAS_NEXT_MASK = 0x80; - -} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h deleted file mode 100644 index d6d22c5c1..000000000 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Copyright (C) 2013, The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef LATINIME_VER4_DICT_CONSTANTS_H -#define LATINIME_VER4_DICT_CONSTANTS_H - -#include "defines.h" - -namespace latinime { - -// TODO: Create PtConstants under the pt_common and move some constant values there. -// Note that there are corresponding definitions in FormatSpec.java. -class Ver4DictConstants { - public: - static const char *const TRIE_FILE_EXTENSION; - static const char *const HEADER_FILE_EXTENSION; - static const char *const FREQ_FILE_EXTENSION; - static const char *const TERMINAL_ADDRESS_TABLE_FILE_EXTENSION; - static const char *const BIGRAM_FILE_EXTENSION; - static const char *const BIGRAM_LOOKUP_TABLE_FILE_EXTENSION; - static const char *const BIGRAM_CONTENT_TABLE_FILE_EXTENSION; - static const char *const SHORTCUT_FILE_EXTENSION; - static const char *const SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION; - static const char *const SHORTCUT_CONTENT_TABLE_FILE_EXTENSION; - - static const int MAX_DICTIONARY_SIZE; - static const int MAX_DICT_EXTENDED_REGION_SIZE; - - static const int NOT_A_TERMINAL_ID; - static const int PROBABILITY_SIZE; - static const int FLAGS_IN_PROBABILITY_FILE_SIZE; - static const int TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE; - static const int NOT_A_TERMINAL_ADDRESS; - static const int TERMINAL_ID_FIELD_SIZE; - static const int TIME_STAMP_FIELD_SIZE; - static const int WORD_LEVEL_FIELD_SIZE; - static const int WORD_COUNT_FIELD_SIZE; - - static const int BIGRAM_ADDRESS_TABLE_BLOCK_SIZE; - static const int BIGRAM_ADDRESS_TABLE_DATA_SIZE; - static const int SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE; - static const int SHORTCUT_ADDRESS_TABLE_DATA_SIZE; - - static const int BIGRAM_FLAGS_FIELD_SIZE; - static const int BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE; - static const int INVALID_BIGRAM_TARGET_TERMINAL_ID; - static const int BIGRAM_PROBABILITY_MASK; - static const int BIGRAM_HAS_NEXT_MASK; - // Used when bigram list has time stamp. - static const int BIGRAM_LARGE_PROBABILITY_FIELD_SIZE; - - static const int SHORTCUT_FLAGS_FIELD_SIZE; - static const int SHORTCUT_PROBABILITY_MASK; - static const int SHORTCUT_HAS_NEXT_MASK; - - private: - DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4DictConstants); -}; -} // namespace latinime -#endif /* LATINIME_VER4_DICT_CONSTANTS_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp deleted file mode 100644 index 17fc9483b..000000000 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Copyright (C) 2013, The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h" - -#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h" -#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h" -#include "suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h" -#include "suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h" -#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.h" -#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" -#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h" - -namespace latinime { - -const PtNodeParams Ver4PatriciaTrieNodeReader::fetchPtNodeInfoFromBufferAndProcessMovedPtNode( - const int ptNodePos, const int siblingNodePos) const { - if (ptNodePos < 0 || ptNodePos >= mBuffer->getTailPosition()) { - // Reading invalid position because of bug or broken dictionary. - AKLOGE("Fetching PtNode info from invalid dictionary position: %d, dictionary size: %d", - ptNodePos, mBuffer->getTailPosition()); - ASSERT(false); - return PtNodeParams(); - } - const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(ptNodePos); - const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer); - int pos = ptNodePos; - const int headPos = ptNodePos; - if (usesAdditionalBuffer) { - pos -= mBuffer->getOriginalBufferSize(); - } - const PatriciaTrieReadingUtils::NodeFlags flags = - PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos); - const int parentPosOffset = - DynamicPtReadingUtils::getParentPtNodePosOffsetAndAdvancePosition( - dictBuf, &pos); - const int parentPos = - DynamicPtReadingUtils::getParentPtNodePos(parentPosOffset, headPos); - int codePoints[MAX_WORD_LENGTH]; - const int codePonitCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition( - dictBuf, flags, MAX_WORD_LENGTH, codePoints, &pos); - int terminalIdFieldPos = NOT_A_DICT_POS; - int terminalId = Ver4DictConstants::NOT_A_TERMINAL_ID; - int probability = NOT_A_PROBABILITY; - if (PatriciaTrieReadingUtils::isTerminal(flags)) { - terminalIdFieldPos = pos; - if (usesAdditionalBuffer) { - terminalIdFieldPos += mBuffer->getOriginalBufferSize(); - } - terminalId = Ver4PatriciaTrieReadingUtils::getTerminalIdAndAdvancePosition(dictBuf, &pos); - const ProbabilityEntry probabilityEntry = - mProbabilityDictContent->getProbabilityEntry(terminalId); - if (probabilityEntry.hasHistoricalInfo()) { - probability = ForgettingCurveUtils::decodeProbability( - probabilityEntry.getHistoricalInfo()); - } else { - probability = probabilityEntry.getProbability(); - } - } - int childrenPosFieldPos = pos; - if (usesAdditionalBuffer) { - childrenPosFieldPos += mBuffer->getOriginalBufferSize(); - } - int childrenPos = DynamicPtReadingUtils::readChildrenPositionAndAdvancePosition( - dictBuf, &pos); - if (usesAdditionalBuffer && childrenPos != NOT_A_DICT_POS) { - childrenPos += mBuffer->getOriginalBufferSize(); - } - if (usesAdditionalBuffer) { - pos += mBuffer->getOriginalBufferSize(); - } - // Sibling position is the tail position of original PtNode. - int newSiblingNodePos = (siblingNodePos == NOT_A_DICT_POS) ? pos : siblingNodePos; - // Read destination node if the read node is a moved node. - if (DynamicPtReadingUtils::isMoved(flags)) { - // The destination position is stored at the same place as the parent position. - return fetchPtNodeInfoFromBufferAndProcessMovedPtNode(parentPos, newSiblingNodePos); - } else { - return PtNodeParams(headPos, flags, parentPos, codePonitCount, codePoints, - terminalIdFieldPos, terminalId, probability, childrenPosFieldPos, childrenPos, - newSiblingNodePos); - } -} - -} diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h deleted file mode 100644 index 9d932457c..000000000 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Copyright (C) 2013, The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef LATINIME_VER4_PATRICIA_TRIE_NODE_READER_H -#define LATINIME_VER4_PATRICIA_TRIE_NODE_READER_H - -#include <stdint.h> - -#include "defines.h" -#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h" -#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h" - -namespace latinime { - -class BufferWithExtendableBuffer; -class ProbabilityDictContent; - -/* - * This class is used for helping to read nodes of ver4 patricia trie. This class handles moved - * node and reads node attributes including probability form probabilityBuffer. - */ -class Ver4PatriciaTrieNodeReader : public PtNodeReader { - public: - Ver4PatriciaTrieNodeReader(const BufferWithExtendableBuffer *const buffer, - const ProbabilityDictContent *const probabilityDictContent) - : mBuffer(buffer), mProbabilityDictContent(probabilityDictContent) {} - - ~Ver4PatriciaTrieNodeReader() {} - - virtual const PtNodeParams fetchNodeInfoInBufferFromPtNodePos(const int ptNodePos) const { - return fetchPtNodeInfoFromBufferAndProcessMovedPtNode(ptNodePos, - NOT_A_DICT_POS /* siblingNodePos */); - } - - private: - DISALLOW_COPY_AND_ASSIGN(Ver4PatriciaTrieNodeReader); - - const BufferWithExtendableBuffer *const mBuffer; - const ProbabilityDictContent *const mProbabilityDictContent; - - const PtNodeParams fetchPtNodeInfoFromBufferAndProcessMovedPtNode(const int ptNodePos, - const int siblingNodePos) const; -}; -} // namespace latinime -#endif /* LATINIME_VER4_PATRICIA_TRIE_NODE_READER_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp deleted file mode 100644 index 32576cf0a..000000000 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp +++ /dev/null @@ -1,411 +0,0 @@ -/* - * Copyright (C) 2013, The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h" - -#include "suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h" -#include "suggest/policyimpl/dictionary/header/header_policy.h" -#include "suggest/policyimpl/dictionary/shortcut/ver4_shortcut_list_policy.h" -#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h" -#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.h" -#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h" -#include "suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h" -#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h" -#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h" -#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" -#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h" - -namespace latinime { - -const int Ver4PatriciaTrieNodeWriter::CHILDREN_POSITION_FIELD_SIZE = 3; - -bool Ver4PatriciaTrieNodeWriter::markPtNodeAsDeleted( - const PtNodeParams *const toBeUpdatedPtNodeParams) { - int pos = toBeUpdatedPtNodeParams->getHeadPos(); - const bool usesAdditionalBuffer = mTrieBuffer->isInAdditionalBuffer(pos); - const uint8_t *const dictBuf = mTrieBuffer->getBuffer(usesAdditionalBuffer); - if (usesAdditionalBuffer) { - pos -= mTrieBuffer->getOriginalBufferSize(); - } - // Read original flags - const PatriciaTrieReadingUtils::NodeFlags originalFlags = - PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos); - const PatriciaTrieReadingUtils::NodeFlags updatedFlags = - DynamicPtReadingUtils::updateAndGetFlags(originalFlags, false /* isMoved */, - true /* isDeleted */, false /* willBecomeNonTerminal */); - int writingPos = toBeUpdatedPtNodeParams->getHeadPos(); - // Update flags. - if (!DynamicPtWritingUtils::writeFlagsAndAdvancePosition(mTrieBuffer, updatedFlags, - &writingPos)) { - return false; - } - if (toBeUpdatedPtNodeParams->isTerminal()) { - // The PtNode is a terminal. Delete entry from the terminal position lookup table. - return mBuffers->getMutableTerminalPositionLookupTable()->setTerminalPtNodePosition( - toBeUpdatedPtNodeParams->getTerminalId(), NOT_A_DICT_POS /* ptNodePos */); - } else { - return true; - } -} - -bool Ver4PatriciaTrieNodeWriter::markPtNodeAsMoved( - const PtNodeParams *const toBeUpdatedPtNodeParams, - const int movedPos, const int bigramLinkedNodePos) { - int pos = toBeUpdatedPtNodeParams->getHeadPos(); - const bool usesAdditionalBuffer = mTrieBuffer->isInAdditionalBuffer(pos); - const uint8_t *const dictBuf = mTrieBuffer->getBuffer(usesAdditionalBuffer); - if (usesAdditionalBuffer) { - pos -= mTrieBuffer->getOriginalBufferSize(); - } - // Read original flags - const PatriciaTrieReadingUtils::NodeFlags originalFlags = - PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos); - const PatriciaTrieReadingUtils::NodeFlags updatedFlags = - DynamicPtReadingUtils::updateAndGetFlags(originalFlags, true /* isMoved */, - false /* isDeleted */, false /* willBecomeNonTerminal */); - int writingPos = toBeUpdatedPtNodeParams->getHeadPos(); - // Update flags. - if (!DynamicPtWritingUtils::writeFlagsAndAdvancePosition(mTrieBuffer, updatedFlags, - &writingPos)) { - return false; - } - // Update moved position, which is stored in the parent offset field. - if (!DynamicPtWritingUtils::writeParentPosOffsetAndAdvancePosition( - mTrieBuffer, movedPos, toBeUpdatedPtNodeParams->getHeadPos(), &writingPos)) { - return false; - } - if (toBeUpdatedPtNodeParams->hasChildren()) { - // Update children's parent position. - mReadingHelper.initWithPtNodeArrayPos(toBeUpdatedPtNodeParams->getChildrenPos()); - while (!mReadingHelper.isEnd()) { - const PtNodeParams childPtNodeParams(mReadingHelper.getPtNodeParams()); - int parentOffsetFieldPos = childPtNodeParams.getHeadPos() - + DynamicPtWritingUtils::NODE_FLAG_FIELD_SIZE; - if (!DynamicPtWritingUtils::writeParentPosOffsetAndAdvancePosition( - mTrieBuffer, bigramLinkedNodePos, childPtNodeParams.getHeadPos(), - &parentOffsetFieldPos)) { - // Parent offset cannot be written because of a bug or a broken dictionary; thus, - // we give up to update dictionary. - return false; - } - mReadingHelper.readNextSiblingNode(childPtNodeParams); - } - } - return true; -} - -bool Ver4PatriciaTrieNodeWriter::markPtNodeAsWillBecomeNonTerminal( - const PtNodeParams *const toBeUpdatedPtNodeParams) { - int pos = toBeUpdatedPtNodeParams->getHeadPos(); - const bool usesAdditionalBuffer = mTrieBuffer->isInAdditionalBuffer(pos); - const uint8_t *const dictBuf = mTrieBuffer->getBuffer(usesAdditionalBuffer); - if (usesAdditionalBuffer) { - pos -= mTrieBuffer->getOriginalBufferSize(); - } - // Read original flags - const PatriciaTrieReadingUtils::NodeFlags originalFlags = - PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos); - const PatriciaTrieReadingUtils::NodeFlags updatedFlags = - DynamicPtReadingUtils::updateAndGetFlags(originalFlags, false /* isMoved */, - false /* isDeleted */, true /* willBecomeNonTerminal */); - if (!mBuffers->getMutableTerminalPositionLookupTable()->setTerminalPtNodePosition( - toBeUpdatedPtNodeParams->getTerminalId(), NOT_A_DICT_POS /* ptNodePos */)) { - AKLOGE("Cannot update terminal position lookup table. terminal id: %d", - toBeUpdatedPtNodeParams->getTerminalId()); - return false; - } - // Update flags. - int writingPos = toBeUpdatedPtNodeParams->getHeadPos(); - return DynamicPtWritingUtils::writeFlagsAndAdvancePosition(mTrieBuffer, updatedFlags, - &writingPos); -} - -bool Ver4PatriciaTrieNodeWriter::updatePtNodeProbability( - const PtNodeParams *const toBeUpdatedPtNodeParams, const int newProbability, - const int timestamp) { - if (!toBeUpdatedPtNodeParams->isTerminal()) { - return false; - } - const ProbabilityEntry originalProbabilityEntry = - mBuffers->getProbabilityDictContent()->getProbabilityEntry( - toBeUpdatedPtNodeParams->getTerminalId()); - const ProbabilityEntry probabilityEntry = createUpdatedEntryFrom(&originalProbabilityEntry, - newProbability, timestamp); - return mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry( - toBeUpdatedPtNodeParams->getTerminalId(), &probabilityEntry); -} - -bool Ver4PatriciaTrieNodeWriter::updatePtNodeProbabilityAndGetNeedsToKeepPtNodeAfterGC( - const PtNodeParams *const toBeUpdatedPtNodeParams, bool *const outNeedsToKeepPtNode) { - if (!toBeUpdatedPtNodeParams->isTerminal()) { - AKLOGE("updatePtNodeProbabilityAndGetNeedsToSaveForGC is called for non-terminal PtNode."); - return false; - } - const ProbabilityEntry originalProbabilityEntry = - mBuffers->getProbabilityDictContent()->getProbabilityEntry( - toBeUpdatedPtNodeParams->getTerminalId()); - if (originalProbabilityEntry.hasHistoricalInfo()) { - const HistoricalInfo historicalInfo = ForgettingCurveUtils::createHistoricalInfoToSave( - originalProbabilityEntry.getHistoricalInfo()); - const ProbabilityEntry probabilityEntry = - originalProbabilityEntry.createEntryWithUpdatedHistoricalInfo(&historicalInfo); - if (!mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry( - toBeUpdatedPtNodeParams->getTerminalId(), &probabilityEntry)) { - AKLOGE("Cannot write updated probability entry. terminalId: %d", - toBeUpdatedPtNodeParams->getTerminalId()); - return false; - } - const bool isValid = ForgettingCurveUtils::needsToKeep(&historicalInfo); - if (!isValid) { - if (!markPtNodeAsWillBecomeNonTerminal(toBeUpdatedPtNodeParams)) { - AKLOGE("Cannot mark PtNode as willBecomeNonTerminal."); - return false; - } - } - *outNeedsToKeepPtNode = isValid; - } else { - // No need to update probability. - *outNeedsToKeepPtNode = true; - } - return true; -} - -bool Ver4PatriciaTrieNodeWriter::updateChildrenPosition( - const PtNodeParams *const toBeUpdatedPtNodeParams, const int newChildrenPosition) { - int childrenPosFieldPos = toBeUpdatedPtNodeParams->getChildrenPosFieldPos(); - return DynamicPtWritingUtils::writeChildrenPositionAndAdvancePosition(mTrieBuffer, - newChildrenPosition, &childrenPosFieldPos); -} - -bool Ver4PatriciaTrieNodeWriter::updateTerminalId(const PtNodeParams *const toBeUpdatedPtNodeParams, - const int newTerminalId) { - return mTrieBuffer->writeUint(newTerminalId, Ver4DictConstants::TERMINAL_ID_FIELD_SIZE, - toBeUpdatedPtNodeParams->getTerminalIdFieldPos()); -} - -bool Ver4PatriciaTrieNodeWriter::writePtNodeAndAdvancePosition( - const PtNodeParams *const ptNodeParams, int *const ptNodeWritingPos) { - return writePtNodeAndGetTerminalIdAndAdvancePosition(ptNodeParams, 0 /* outTerminalId */, - ptNodeWritingPos); -} - - -bool Ver4PatriciaTrieNodeWriter::writeNewTerminalPtNodeAndAdvancePosition( - const PtNodeParams *const ptNodeParams, const int timestamp, int *const ptNodeWritingPos) { - int terminalId = Ver4DictConstants::NOT_A_TERMINAL_ID; - if (!writePtNodeAndGetTerminalIdAndAdvancePosition(ptNodeParams, &terminalId, - ptNodeWritingPos)) { - return false; - } - // Write probability. - ProbabilityEntry newProbabilityEntry; - const ProbabilityEntry probabilityEntryToWrite = createUpdatedEntryFrom( - &newProbabilityEntry, ptNodeParams->getProbability(), timestamp); - return mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry(terminalId, - &probabilityEntryToWrite); -} - -bool Ver4PatriciaTrieNodeWriter::addNewBigramEntry( - const PtNodeParams *const sourcePtNodeParams, - const PtNodeParams *const targetPtNodeParam, const int probability, const int timestamp, - bool *const outAddedNewBigram) { - if (!mBigramPolicy->addNewEntry(sourcePtNodeParams->getTerminalId(), - targetPtNodeParam->getTerminalId(), probability, timestamp, outAddedNewBigram)) { - AKLOGE("Cannot add new bigram entry. terminalId: %d, targetTerminalId: %d", - sourcePtNodeParams->getTerminalId(), targetPtNodeParam->getTerminalId()); - return false; - } - if (!sourcePtNodeParams->hasBigrams()) { - // Update has bigrams flag. - return updatePtNodeFlags(sourcePtNodeParams->getHeadPos(), - sourcePtNodeParams->isBlacklisted(), sourcePtNodeParams->isNotAWord(), - sourcePtNodeParams->isTerminal(), sourcePtNodeParams->hasShortcutTargets(), - true /* hasBigrams */, - sourcePtNodeParams->getCodePointCount() > 1 /* hasMultipleChars */); - } - return true; -} - -bool Ver4PatriciaTrieNodeWriter::removeBigramEntry( - const PtNodeParams *const sourcePtNodeParams, const PtNodeParams *const targetPtNodeParam) { - return mBigramPolicy->removeEntry(sourcePtNodeParams->getTerminalId(), - targetPtNodeParam->getTerminalId()); -} - -bool Ver4PatriciaTrieNodeWriter::updateAllBigramEntriesAndDeleteUselessEntries( - const PtNodeParams *const sourcePtNodeParams, int *const outBigramEntryCount) { - return mBigramPolicy->updateAllBigramEntriesAndDeleteUselessEntries( - sourcePtNodeParams->getTerminalId(), outBigramEntryCount); -} - -bool Ver4PatriciaTrieNodeWriter::updateAllPositionFields( - const PtNodeParams *const toBeUpdatedPtNodeParams, - const DictPositionRelocationMap *const dictPositionRelocationMap, - int *const outBigramEntryCount) { - int parentPos = toBeUpdatedPtNodeParams->getParentPos(); - if (parentPos != NOT_A_DICT_POS) { - PtNodeWriter::PtNodePositionRelocationMap::const_iterator it = - dictPositionRelocationMap->mPtNodePositionRelocationMap.find(parentPos); - if (it != dictPositionRelocationMap->mPtNodePositionRelocationMap.end()) { - parentPos = it->second; - } - } - int writingPos = toBeUpdatedPtNodeParams->getHeadPos() - + DynamicPtWritingUtils::NODE_FLAG_FIELD_SIZE; - // Write updated parent offset. - if (!DynamicPtWritingUtils::writeParentPosOffsetAndAdvancePosition(mTrieBuffer, - parentPos, toBeUpdatedPtNodeParams->getHeadPos(), &writingPos)) { - return false; - } - - // Updates children position. - int childrenPos = toBeUpdatedPtNodeParams->getChildrenPos(); - if (childrenPos != NOT_A_DICT_POS) { - PtNodeWriter::PtNodeArrayPositionRelocationMap::const_iterator it = - dictPositionRelocationMap->mPtNodeArrayPositionRelocationMap.find(childrenPos); - if (it != dictPositionRelocationMap->mPtNodeArrayPositionRelocationMap.end()) { - childrenPos = it->second; - } - } - if (!updateChildrenPosition(toBeUpdatedPtNodeParams, childrenPos)) { - return false; - } - - // Counts bigram entries. - if (outBigramEntryCount) { - *outBigramEntryCount = mBigramPolicy->getBigramEntryConut( - toBeUpdatedPtNodeParams->getTerminalId()); - } - return true; -} - -bool Ver4PatriciaTrieNodeWriter::addShortcutTarget(const PtNodeParams *const ptNodeParams, - const int *const targetCodePoints, const int targetCodePointCount, - const int shortcutProbability) { - if (!mShortcutPolicy->addNewShortcut(ptNodeParams->getTerminalId(), - targetCodePoints, targetCodePointCount, shortcutProbability)) { - AKLOGE("Cannot add new shortuct entry. terminalId: %d", ptNodeParams->getTerminalId()); - return false; - } - if (!ptNodeParams->hasShortcutTargets()) { - // Update has shortcut targets flag. - return updatePtNodeFlags(ptNodeParams->getHeadPos(), - ptNodeParams->isBlacklisted(), ptNodeParams->isNotAWord(), - ptNodeParams->isTerminal(), true /* hasShortcutTargets */, - ptNodeParams->hasBigrams(), - ptNodeParams->getCodePointCount() > 1 /* hasMultipleChars */); - } - return true; -} - -bool Ver4PatriciaTrieNodeWriter::updatePtNodeHasBigramsAndShortcutTargetsFlags( - const PtNodeParams *const ptNodeParams) { - const bool hasBigrams = mBuffers->getBigramDictContent()->getBigramListHeadPos( - ptNodeParams->getTerminalId()) != NOT_A_DICT_POS; - const bool hasShortcutTargets = mBuffers->getShortcutDictContent()->getShortcutListHeadPos( - ptNodeParams->getTerminalId()) != NOT_A_DICT_POS; - return updatePtNodeFlags(ptNodeParams->getHeadPos(), ptNodeParams->isBlacklisted(), - ptNodeParams->isNotAWord(), ptNodeParams->isTerminal(), hasShortcutTargets, - hasBigrams, ptNodeParams->getCodePointCount() > 1 /* hasMultipleChars */); -} - -bool Ver4PatriciaTrieNodeWriter::writePtNodeAndGetTerminalIdAndAdvancePosition( - const PtNodeParams *const ptNodeParams, int *const outTerminalId, - int *const ptNodeWritingPos) { - const int nodePos = *ptNodeWritingPos; - // Write dummy flags. The Node flags are updated with appropriate flags at the last step of the - // PtNode writing. - if (!DynamicPtWritingUtils::writeFlagsAndAdvancePosition(mTrieBuffer, - 0 /* nodeFlags */, ptNodeWritingPos)) { - return false; - } - // Calculate a parent offset and write the offset. - if (!DynamicPtWritingUtils::writeParentPosOffsetAndAdvancePosition(mTrieBuffer, - ptNodeParams->getParentPos(), nodePos, ptNodeWritingPos)) { - return false; - } - // Write code points - if (!DynamicPtWritingUtils::writeCodePointsAndAdvancePosition(mTrieBuffer, - ptNodeParams->getCodePoints(), ptNodeParams->getCodePointCount(), ptNodeWritingPos)) { - return false; - } - int terminalId = Ver4DictConstants::NOT_A_TERMINAL_ID; - if (!ptNodeParams->willBecomeNonTerminal()) { - if (ptNodeParams->getTerminalId() != Ver4DictConstants::NOT_A_TERMINAL_ID) { - terminalId = ptNodeParams->getTerminalId(); - } else if (ptNodeParams->isTerminal()) { - // Write terminal information using a new terminal id. - // Get a new unused terminal id. - terminalId = mBuffers->getTerminalPositionLookupTable()->getNextTerminalId(); - } - } - const int isTerminal = terminalId != Ver4DictConstants::NOT_A_TERMINAL_ID; - if (isTerminal) { - // Update the lookup table. - if (!mBuffers->getMutableTerminalPositionLookupTable()->setTerminalPtNodePosition( - terminalId, nodePos)) { - return false; - } - // Write terminal Id. - if (!mTrieBuffer->writeUintAndAdvancePosition(terminalId, - Ver4DictConstants::TERMINAL_ID_FIELD_SIZE, ptNodeWritingPos)) { - return false; - } - if (outTerminalId) { - *outTerminalId = terminalId; - } - } - // Write children position - if (!DynamicPtWritingUtils::writeChildrenPositionAndAdvancePosition(mTrieBuffer, - ptNodeParams->getChildrenPos(), ptNodeWritingPos)) { - return false; - } - return updatePtNodeFlags(nodePos, ptNodeParams->isBlacklisted(), ptNodeParams->isNotAWord(), - isTerminal, ptNodeParams->hasShortcutTargets(), ptNodeParams->hasBigrams(), - ptNodeParams->getCodePointCount() > 1 /* hasMultipleChars */); -} - -const ProbabilityEntry Ver4PatriciaTrieNodeWriter::createUpdatedEntryFrom( - const ProbabilityEntry *const originalProbabilityEntry, const int newProbability, - const int timestamp) const { - // TODO: Consolidate historical info and probability. - if (mBuffers->getHeaderPolicy()->hasHistoricalInfoOfWords()) { - const HistoricalInfo updatedHistoricalInfo = - ForgettingCurveUtils::createUpdatedHistoricalInfo( - originalProbabilityEntry->getHistoricalInfo(), newProbability, timestamp); - return originalProbabilityEntry->createEntryWithUpdatedHistoricalInfo( - &updatedHistoricalInfo); - } else { - return originalProbabilityEntry->createEntryWithUpdatedProbability(newProbability); - } -} - -bool Ver4PatriciaTrieNodeWriter::updatePtNodeFlags(const int ptNodePos, - const bool isBlacklisted, const bool isNotAWord, const bool isTerminal, - const bool hasShortcutTargets, const bool hasBigrams, const bool hasMultipleChars) { - // Create node flags and write them. - PatriciaTrieReadingUtils::NodeFlags nodeFlags = - PatriciaTrieReadingUtils::createAndGetFlags(isBlacklisted, isNotAWord, isTerminal, - hasShortcutTargets, hasBigrams, hasMultipleChars, - CHILDREN_POSITION_FIELD_SIZE); - if (!DynamicPtWritingUtils::writeFlags(mTrieBuffer, nodeFlags, ptNodePos)) { - AKLOGE("Cannot write PtNode flags. flags: %x, pos: %d", nodeFlags, ptNodePos); - return false; - } - return true; -} - -} diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h deleted file mode 100644 index 69576d8e5..000000000 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h +++ /dev/null @@ -1,123 +0,0 @@ -/* - * Copyright (C) 2013, The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef LATINIME_VER4_PATRICIA_TRIE_NODE_WRITER_H -#define LATINIME_VER4_PATRICIA_TRIE_NODE_WRITER_H - -#include <stdint.h> - -#include "defines.h" -#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h" -#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h" -#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h" -#include "suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h" -#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h" - -namespace latinime { - -class BufferWithExtendableBuffer; -class Ver4BigramListPolicy; -class Ver4DictBuffers; -class Ver4ShortcutListPolicy; - -/* - * This class is used for helping to writes nodes of ver4 patricia trie. - */ -class Ver4PatriciaTrieNodeWriter : public PtNodeWriter { - public: - Ver4PatriciaTrieNodeWriter(BufferWithExtendableBuffer *const trieBuffer, - Ver4DictBuffers *const buffers, const Ver4PatriciaTrieNodeReader *const ptNodeReader, - Ver4BigramListPolicy *const bigramPolicy, Ver4ShortcutListPolicy *const shortcutPolicy) - : mTrieBuffer(trieBuffer), mBuffers(buffers), mPtNodeReader(ptNodeReader), - mReadingHelper(mTrieBuffer, mPtNodeReader), - mBigramPolicy(bigramPolicy), mShortcutPolicy(shortcutPolicy) {} - - virtual ~Ver4PatriciaTrieNodeWriter() {} - - virtual bool markPtNodeAsDeleted(const PtNodeParams *const toBeUpdatedPtNodeParams); - - virtual bool markPtNodeAsMoved(const PtNodeParams *const toBeUpdatedPtNodeParams, - const int movedPos, const int bigramLinkedNodePos); - - virtual bool markPtNodeAsWillBecomeNonTerminal( - const PtNodeParams *const toBeUpdatedPtNodeParams); - - virtual bool updatePtNodeProbability(const PtNodeParams *const toBeUpdatedPtNodeParams, - const int newProbability, const int timestamp); - - virtual bool updatePtNodeProbabilityAndGetNeedsToKeepPtNodeAfterGC( - const PtNodeParams *const toBeUpdatedPtNodeParams, bool *const outNeedsToKeepPtNode); - - virtual bool updateChildrenPosition(const PtNodeParams *const toBeUpdatedPtNodeParams, - const int newChildrenPosition); - - bool updateTerminalId(const PtNodeParams *const toBeUpdatedPtNodeParams, - const int newTerminalId); - - virtual bool writePtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams, - int *const ptNodeWritingPos); - - virtual bool writeNewTerminalPtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams, - const int timestamp, int *const ptNodeWritingPos); - - virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams, - const PtNodeParams *const targetPtNodeParam, const int probability, const int timestamp, - bool *const outAddedNewBigram); - - virtual bool removeBigramEntry(const PtNodeParams *const sourcePtNodeParams, - const PtNodeParams *const targetPtNodeParam); - - virtual bool updateAllBigramEntriesAndDeleteUselessEntries( - const PtNodeParams *const sourcePtNodeParams, int *const outBigramEntryCount); - - virtual bool updateAllPositionFields(const PtNodeParams *const toBeUpdatedPtNodeParams, - const DictPositionRelocationMap *const dictPositionRelocationMap, - int *const outBigramEntryCount); - - virtual bool addShortcutTarget(const PtNodeParams *const ptNodeParams, - const int *const targetCodePoints, const int targetCodePointCount, - const int shortcutProbability); - - bool updatePtNodeHasBigramsAndShortcutTargetsFlags(const PtNodeParams *const ptNodeParams); - - private: - DISALLOW_COPY_AND_ASSIGN(Ver4PatriciaTrieNodeWriter); - - bool writePtNodeAndGetTerminalIdAndAdvancePosition( - const PtNodeParams *const ptNodeParams, int *const outTerminalId, - int *const ptNodeWritingPos); - - // Create updated probability entry using given probability and timestamp. In addition to the - // probability, this method updates historical information if needed. - const ProbabilityEntry createUpdatedEntryFrom( - const ProbabilityEntry *const originalProbabilityEntry, const int newProbability, - const int timestamp) const; - - bool updatePtNodeFlags(const int ptNodePos, const bool isBlacklisted, const bool isNotAWord, - const bool isTerminal, const bool hasShortcutTargets, const bool hasBigrams, - const bool hasMultipleChars); - - static const int CHILDREN_POSITION_FIELD_SIZE; - - BufferWithExtendableBuffer *const mTrieBuffer; - Ver4DictBuffers *const mBuffers; - const Ver4PatriciaTrieNodeReader *const mPtNodeReader; - DynamicPtReadingHelper mReadingHelper; - Ver4BigramListPolicy *const mBigramPolicy; - Ver4ShortcutListPolicy *const mShortcutPolicy; -}; -} // namespace latinime -#endif /* LATINIME_VER4_PATRICIA_TRIE_NODE_WRITER_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp deleted file mode 100644 index 96bb8128e..000000000 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp +++ /dev/null @@ -1,352 +0,0 @@ -/* - * Copyright (C) 2013, The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h" - -#include <vector> - -#include "suggest/core/dicnode/dic_node.h" -#include "suggest/core/dicnode/dic_node_vector.h" -#include "suggest/core/dictionary/unigram_property.h" -#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h" -#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h" -#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h" -#include "suggest/policyimpl/dictionary/utils/probability_utils.h" - -namespace latinime { - -// Note that there are corresponding definitions in Java side in BinaryDictionaryTests and -// BinaryDictionaryDecayingTests. -const char *const Ver4PatriciaTriePolicy::UNIGRAM_COUNT_QUERY = "UNIGRAM_COUNT"; -const char *const Ver4PatriciaTriePolicy::BIGRAM_COUNT_QUERY = "BIGRAM_COUNT"; -const char *const Ver4PatriciaTriePolicy::MAX_UNIGRAM_COUNT_QUERY = "MAX_UNIGRAM_COUNT"; -const char *const Ver4PatriciaTriePolicy::MAX_BIGRAM_COUNT_QUERY = "MAX_BIGRAM_COUNT"; -const char *const Ver4PatriciaTriePolicy::SET_CURRENT_TIME_FOR_TESTING_QUERY_FORMAT = - "SET_CURRENT_TIME_FOR_TESTING:%d"; -const char *const Ver4PatriciaTriePolicy::GET_CURRENT_TIME_QUERY = "GET_CURRENT_TIME"; -const char *const Ver4PatriciaTriePolicy::QUIT_TIMEKEEPER_TEST_MODE_QUERY = - "QUIT_TIMEKEEPER_TEST_MODE"; -const int Ver4PatriciaTriePolicy::MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS = 1024; -const int Ver4PatriciaTriePolicy::MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS = - Ver4DictConstants::MAX_DICTIONARY_SIZE - MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS; - -void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode, - DicNodeVector *const childDicNodes) const { - if (!dicNode->hasChildren()) { - return; - } - DynamicPtReadingHelper readingHelper(mDictBuffer, &mNodeReader); - readingHelper.initWithPtNodeArrayPos(dicNode->getChildrenPtNodeArrayPos()); - while (!readingHelper.isEnd()) { - const PtNodeParams ptNodeParams = readingHelper.getPtNodeParams(); - if (!ptNodeParams.isValid()) { - break; - } - bool isTerminal = ptNodeParams.isTerminal() && !ptNodeParams.isDeleted(); - if (isTerminal && mHeaderPolicy->isDecayingDict()) { - // A DecayingDict may have a terminal PtNode that has a terminal DicNode whose - // probability is NOT_A_PROBABILITY. In such case, we don't want to treat it as a - // valid terminal DicNode. - isTerminal = ptNodeParams.getProbability() != NOT_A_PROBABILITY; - } - childDicNodes->pushLeavingChild(dicNode, ptNodeParams.getHeadPos(), - ptNodeParams.getChildrenPos(), ptNodeParams.getProbability(), isTerminal, - ptNodeParams.hasChildren(), - ptNodeParams.isBlacklisted() - || ptNodeParams.isNotAWord() /* isBlacklistedOrNotAWord */, - ptNodeParams.getCodePointCount(), ptNodeParams.getCodePoints()); - readingHelper.readNextSiblingNode(ptNodeParams); - } -} - -int Ver4PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( - const int ptNodePos, const int maxCodePointCount, int *const outCodePoints, - int *const outUnigramProbability) const { - DynamicPtReadingHelper readingHelper(mDictBuffer, &mNodeReader); - readingHelper.initWithPtNodePos(ptNodePos); - return readingHelper.getCodePointsAndProbabilityAndReturnCodePointCount( - maxCodePointCount, outCodePoints, outUnigramProbability); -} - -int Ver4PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord, - const int length, const bool forceLowerCaseSearch) const { - DynamicPtReadingHelper readingHelper(mDictBuffer, &mNodeReader); - readingHelper.initWithPtNodeArrayPos(getRootPosition()); - return readingHelper.getTerminalPtNodePositionOfWord(inWord, length, forceLowerCaseSearch); -} - -int Ver4PatriciaTriePolicy::getProbability(const int unigramProbability, - const int bigramProbability) const { - if (mHeaderPolicy->isDecayingDict()) { - // Both probabilities are encoded. Decode them and get probability. - return ForgettingCurveUtils::getProbability(unigramProbability, bigramProbability); - } else { - if (unigramProbability == NOT_A_PROBABILITY) { - return NOT_A_PROBABILITY; - } else if (bigramProbability == NOT_A_PROBABILITY) { - return ProbabilityUtils::backoff(unigramProbability); - } else { - // bigramProbability is a bigram probability delta. - return ProbabilityUtils::computeProbabilityForBigram(unigramProbability, - bigramProbability); - } - } -} - -int Ver4PatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int ptNodePos) const { - if (ptNodePos == NOT_A_DICT_POS) { - return NOT_A_PROBABILITY; - } - const PtNodeParams ptNodeParams(mNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos)); - if (ptNodeParams.isDeleted() || ptNodeParams.isBlacklisted() || ptNodeParams.isNotAWord()) { - return NOT_A_PROBABILITY; - } - return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY); -} - -int Ver4PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const { - if (ptNodePos == NOT_A_DICT_POS) { - return NOT_A_DICT_POS; - } - const PtNodeParams ptNodeParams(mNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos)); - if (ptNodeParams.isDeleted()) { - return NOT_A_DICT_POS; - } - return mBuffers.get()->getShortcutDictContent()->getShortcutListHeadPos( - ptNodeParams.getTerminalId()); -} - -int Ver4PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const { - if (ptNodePos == NOT_A_DICT_POS) { - return NOT_A_DICT_POS; - } - const PtNodeParams ptNodeParams(mNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos)); - if (ptNodeParams.isDeleted()) { - return NOT_A_DICT_POS; - } - return mBuffers.get()->getBigramDictContent()->getBigramListHeadPos( - ptNodeParams.getTerminalId()); -} - -bool Ver4PatriciaTriePolicy::addUnigramWord(const int *const word, const int length, - const int probability, const int *const shortcutTargetCodePoints, const int shortcutLength, - const int shortcutProbability, const bool isNotAWord, const bool isBlacklisted, - const int timestamp) { - if (!mBuffers.get()->isUpdatable()) { - AKLOGI("Warning: addUnigramWord() is called for non-updatable dictionary."); - return false; - } - if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) { - AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d", - mDictBuffer->getTailPosition()); - return false; - } - DynamicPtReadingHelper readingHelper(mDictBuffer, &mNodeReader); - readingHelper.initWithPtNodeArrayPos(getRootPosition()); - bool addedNewUnigram = false; - if (mUpdatingHelper.addUnigramWord(&readingHelper, word, length, probability, isNotAWord, - isBlacklisted, timestamp, &addedNewUnigram)) { - if (addedNewUnigram) { - mUnigramCount++; - } - if (shortcutLength > 0) { - // Add shortcut target. - const int wordPos = getTerminalPtNodePositionOfWord(word, length, - false /* forceLowerCaseSearch */); - if (wordPos == NOT_A_DICT_POS) { - AKLOGE("Cannot find terminal PtNode position to add shortcut target."); - return false; - } - if (!mUpdatingHelper.addShortcutTarget(wordPos, shortcutTargetCodePoints, - shortcutLength, shortcutProbability)) { - AKLOGE("Cannot add new shortcut target. PtNodePos: %d, length: %d, probability: %d", - wordPos, shortcutLength, shortcutProbability); - return false; - } - } - return true; - } else { - return false; - } -} - -bool Ver4PatriciaTriePolicy::addBigramWords(const int *const word0, const int length0, - const int *const word1, const int length1, const int probability, - const int timestamp) { - if (!mBuffers.get()->isUpdatable()) { - AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary."); - return false; - } - if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) { - AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d", - mDictBuffer->getTailPosition()); - return false; - } - const int word0Pos = getTerminalPtNodePositionOfWord(word0, length0, - false /* forceLowerCaseSearch */); - if (word0Pos == NOT_A_DICT_POS) { - return false; - } - const int word1Pos = getTerminalPtNodePositionOfWord(word1, length1, - false /* forceLowerCaseSearch */); - if (word1Pos == NOT_A_DICT_POS) { - return false; - } - bool addedNewBigram = false; - if (mUpdatingHelper.addBigramWords(word0Pos, word1Pos, probability, timestamp, - &addedNewBigram)) { - if (addedNewBigram) { - mBigramCount++; - } - return true; - } else { - return false; - } -} - -bool Ver4PatriciaTriePolicy::removeBigramWords(const int *const word0, const int length0, - const int *const word1, const int length1) { - if (!mBuffers.get()->isUpdatable()) { - AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary."); - return false; - } - if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) { - AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d", - mDictBuffer->getTailPosition()); - return false; - } - const int word0Pos = getTerminalPtNodePositionOfWord(word0, length0, - false /* forceLowerCaseSearch */); - if (word0Pos == NOT_A_DICT_POS) { - return false; - } - const int word1Pos = getTerminalPtNodePositionOfWord(word1, length1, - false /* forceLowerCaseSearch */); - if (word1Pos == NOT_A_DICT_POS) { - return false; - } - if (mUpdatingHelper.removeBigramWords(word0Pos, word1Pos)) { - mBigramCount--; - return true; - } else { - return false; - } -} - -void Ver4PatriciaTriePolicy::flush(const char *const filePath) { - if (!mBuffers.get()->isUpdatable()) { - AKLOGI("Warning: flush() is called for non-updatable dictionary. filePath: %s", filePath); - return; - } - mWritingHelper.writeToDictFile(filePath, mUnigramCount, mBigramCount); -} - -void Ver4PatriciaTriePolicy::flushWithGC(const char *const filePath) { - if (!mBuffers.get()->isUpdatable()) { - AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary."); - return; - } - mWritingHelper.writeToDictFileWithGC(getRootPosition(), filePath); -} - -bool Ver4PatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const { - if (!mBuffers.get()->isUpdatable()) { - AKLOGI("Warning: needsToRunGC() is called for non-updatable dictionary."); - return false; - } - if (mBuffers.get()->isNearSizeLimit()) { - // Additional buffer size is near the limit. - return true; - } else if (mHeaderPolicy->getExtendedRegionSize() + mDictBuffer->getUsedAdditionalBufferSize() - > Ver4DictConstants::MAX_DICT_EXTENDED_REGION_SIZE) { - // Total extended region size of the trie exceeds the limit. - return true; - } else if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS - && mDictBuffer->getUsedAdditionalBufferSize() > 0) { - // Needs to reduce dictionary size. - return true; - } else if (mHeaderPolicy->isDecayingDict()) { - return ForgettingCurveUtils::needsToDecay(mindsBlockByGC, mUnigramCount, mBigramCount, - mHeaderPolicy); - } - return false; -} - -void Ver4PatriciaTriePolicy::getProperty(const char *const query, const int queryLength, - char *const outResult, const int maxResultLength) { - const int compareLength = queryLength + 1 /* terminator */; - int timestamp = NOT_A_TIMESTAMP; - if (strncmp(query, UNIGRAM_COUNT_QUERY, compareLength) == 0) { - snprintf(outResult, maxResultLength, "%d", mUnigramCount); - } else if (strncmp(query, BIGRAM_COUNT_QUERY, compareLength) == 0) { - snprintf(outResult, maxResultLength, "%d", mBigramCount); - } else if (strncmp(query, MAX_UNIGRAM_COUNT_QUERY, compareLength) == 0) { - snprintf(outResult, maxResultLength, "%d", - mHeaderPolicy->isDecayingDict() ? ForgettingCurveUtils::MAX_UNIGRAM_COUNT : - static_cast<int>(Ver4DictConstants::MAX_DICTIONARY_SIZE)); - } else if (strncmp(query, MAX_BIGRAM_COUNT_QUERY, compareLength) == 0) { - snprintf(outResult, maxResultLength, "%d", - mHeaderPolicy->isDecayingDict() ? ForgettingCurveUtils::MAX_BIGRAM_COUNT : - static_cast<int>(Ver4DictConstants::MAX_DICTIONARY_SIZE)); - } else if (sscanf(query, SET_CURRENT_TIME_FOR_TESTING_QUERY_FORMAT, ×tamp) == 1) { - TimeKeeper::startTestModeWithForceCurrentTime(timestamp); - } else if (strncmp(query, GET_CURRENT_TIME_QUERY, compareLength) == 0) { - snprintf(outResult, maxResultLength, "%d", TimeKeeper::peekCurrentTime()); - } else if (strncmp(query, QUIT_TIMEKEEPER_TEST_MODE_QUERY, compareLength) == 0) { - TimeKeeper::stopTestMode(); - } -} - -const UnigramProperty Ver4PatriciaTriePolicy::getUnigramProperty(const int *const codePoints, - const int codePointCount) const { - const int ptNodePos = getTerminalPtNodePositionOfWord(codePoints, codePointCount, - false /* forceLowerCaseSearch */); - if (ptNodePos == NOT_A_DICT_POS) { - AKLOGE("fetchUnigramProperty is called for invalid word."); - return UnigramProperty(); - } - const PtNodeParams ptNodeParams = mNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos); - const ProbabilityEntry probabilityEntry = - mBuffers.get()->getProbabilityDictContent()->getProbabilityEntry( - ptNodeParams.getTerminalId()); - const HistoricalInfo *const historicalInfo = probabilityEntry.getHistoricalInfo(); - // Fetch shortcut information. - std::vector<std::vector<int> > shortcutTargets; - std::vector<int> shortcutProbabilities; - int shortcutPos = getShortcutPositionOfPtNode(ptNodePos); - if (shortcutPos != NOT_A_DICT_POS) { - int shortcutTarget[MAX_WORD_LENGTH]; - const ShortcutDictContent *const shortcutDictContent = - mBuffers.get()->getShortcutDictContent(); - bool hasNext = true; - while (hasNext) { - int shortcutTargetLength = 0; - int shortcutProbability = NOT_A_PROBABILITY; - shortcutDictContent->getShortcutEntryAndAdvancePosition(MAX_WORD_LENGTH, shortcutTarget, - &shortcutTargetLength, &shortcutProbability, &hasNext, &shortcutPos); - std::vector<int> target(shortcutTarget, shortcutTarget + shortcutTargetLength); - shortcutTargets.push_back(target); - shortcutProbabilities.push_back(shortcutProbability); - } - } - return UnigramProperty(ptNodeParams.getCodePoints(), ptNodeParams.getCodePointCount(), - ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.hasBigrams(), - ptNodeParams.hasShortcutTargets(), ptNodeParams.getProbability(), - historicalInfo->getTimeStamp(), historicalInfo->getLevel(), - historicalInfo->getCount(), &shortcutTargets, &shortcutProbabilities); -} - -} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h deleted file mode 100644 index 8187b7a39..000000000 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h +++ /dev/null @@ -1,140 +0,0 @@ -/* - * Copyright (C) 2013, The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef LATINIME_VER4_PATRICIA_TRIE_POLICY_H -#define LATINIME_VER4_PATRICIA_TRIE_POLICY_H - -#include "defines.h" -#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" -#include "suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h" -#include "suggest/policyimpl/dictionary/header/header_policy.h" -#include "suggest/policyimpl/dictionary/shortcut/ver4_shortcut_list_policy.h" -#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h" -#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h" -#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h" -#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h" -#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.h" -#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" - -namespace latinime { - -class DicNode; -class DicNodeVector; - -// TODO: Implement. -class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { - public: - Ver4PatriciaTriePolicy(const Ver4DictBuffers::Ver4DictBuffersPtr &buffers) - : mBuffers(buffers), mHeaderPolicy(mBuffers.get()->getHeaderPolicy()), - mDictBuffer(mBuffers.get()->getWritableTrieBuffer()), - mBigramPolicy(mBuffers.get()->getMutableBigramDictContent(), - mBuffers.get()->getTerminalPositionLookupTable(), mHeaderPolicy), - mShortcutPolicy(mBuffers.get()->getMutableShortcutDictContent(), - mBuffers.get()->getTerminalPositionLookupTable()), - mNodeReader(mDictBuffer, mBuffers.get()->getProbabilityDictContent()), - mNodeWriter(mDictBuffer, mBuffers.get(), &mNodeReader, &mBigramPolicy, - &mShortcutPolicy), - mUpdatingHelper(mDictBuffer, &mNodeReader, &mNodeWriter), - mWritingHelper(mBuffers.get()), - mUnigramCount(mHeaderPolicy->getUnigramCount()), - mBigramCount(mHeaderPolicy->getBigramCount()) {}; - - AK_FORCE_INLINE int getRootPosition() const { - return 0; - } - - void createAndGetAllChildDicNodes(const DicNode *const dicNode, - DicNodeVector *const childDicNodes) const; - - int getCodePointsAndProbabilityAndReturnCodePointCount( - const int terminalPtNodePos, const int maxCodePointCount, int *const outCodePoints, - int *const outUnigramProbability) const; - - int getTerminalPtNodePositionOfWord(const int *const inWord, - const int length, const bool forceLowerCaseSearch) const; - - int getProbability(const int unigramProbability, const int bigramProbability) const; - - int getUnigramProbabilityOfPtNode(const int ptNodePos) const; - - int getShortcutPositionOfPtNode(const int ptNodePos) const; - - int getBigramsPositionOfPtNode(const int ptNodePos) const; - - const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const { - return mHeaderPolicy; - } - - const DictionaryBigramsStructurePolicy *getBigramsStructurePolicy() const { - return &mBigramPolicy; - } - - const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const { - return &mShortcutPolicy; - } - - bool addUnigramWord(const int *const word, const int length, const int probability, - const int *const shortcutTargetCodePoints, const int shortcutLength, - const int shortcutProbability, const bool isNotAWord, const bool isBlacklisted, - const int timestamp); - - bool addBigramWords(const int *const word0, const int length0, const int *const word1, - const int length1, const int probability, const int timestamp); - - bool removeBigramWords(const int *const word0, const int length0, const int *const word1, - const int length1); - - void flush(const char *const filePath); - - void flushWithGC(const char *const filePath); - - bool needsToRunGC(const bool mindsBlockByGC) const; - - void getProperty(const char *const query, const int queryLength, char *const outResult, - const int maxResultLength); - - const UnigramProperty getUnigramProperty(const int *const codePoints, - const int codePointCount) const; - - private: - DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4PatriciaTriePolicy); - - static const char *const UNIGRAM_COUNT_QUERY; - static const char *const BIGRAM_COUNT_QUERY; - static const char *const MAX_UNIGRAM_COUNT_QUERY; - static const char *const MAX_BIGRAM_COUNT_QUERY; - static const char *const SET_CURRENT_TIME_FOR_TESTING_QUERY_FORMAT; - static const char *const GET_CURRENT_TIME_QUERY; - static const char *const QUIT_TIMEKEEPER_TEST_MODE_QUERY; - // When the dictionary size is near the maximum size, we have to refuse dynamic operations to - // prevent the dictionary from overflowing. - static const int MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS; - static const int MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS; - - Ver4DictBuffers::Ver4DictBuffersPtr mBuffers; - const HeaderPolicy *const mHeaderPolicy; - BufferWithExtendableBuffer *const mDictBuffer; - Ver4BigramListPolicy mBigramPolicy; - Ver4ShortcutListPolicy mShortcutPolicy; - Ver4PatriciaTrieNodeReader mNodeReader; - Ver4PatriciaTrieNodeWriter mNodeWriter; - DynamicPtUpdatingHelper mUpdatingHelper; - Ver4PatriciaTrieWritingHelper mWritingHelper; - int mUnigramCount; - int mBigramCount; -}; -} // namespace latinime -#endif // LATINIME_VER4_PATRICIA_TRIE_POLICY_H diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.cpp deleted file mode 100644 index 254022db4..000000000 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright (C) 2013, The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.h" - -#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h" - -namespace latinime { - -/* static */ int Ver4PatriciaTrieReadingUtils::getTerminalIdAndAdvancePosition( - const uint8_t *const buffer, int *pos) { - return ByteArrayUtils::readUint32AndAdvancePosition(buffer, pos); -} - -} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.h deleted file mode 100644 index e418c4933..000000000 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.h +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright (C) 2013, The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef LATINIME_VER4_PATRICIA_TRIE_READING_UTILS_H -#define LATINIME_VER4_PATRICIA_TRIE_READING_UTILS_H - -#include <stdint.h> - -#include "defines.h" - -namespace latinime { - -class BufferWithExtendableBuffer; - -class Ver4PatriciaTrieReadingUtils { - public: - static int getTerminalIdAndAdvancePosition(const uint8_t *const buffer, - int *const pos); - - private: - DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4PatriciaTrieReadingUtils); -}; -} // namespace latinime -#endif /* LATINIME_VER4_PATRICIA_TRIE_READING_UTILS_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp deleted file mode 100644 index 43227635c..000000000 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp +++ /dev/null @@ -1,285 +0,0 @@ -/* - * Copyright (C) 2013, The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.h" - -#include <cstring> -#include <queue> - -#include "suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h" -#include "suggest/policyimpl/dictionary/header/header_policy.h" -#include "suggest/policyimpl/dictionary/shortcut/ver4_shortcut_list_policy.h" -#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h" -#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h" -#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h" -#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h" -#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" -#include "suggest/policyimpl/dictionary/utils/file_utils.h" -#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h" - -namespace latinime { - -void Ver4PatriciaTrieWritingHelper::writeToDictFile(const char *const dictDirPath, - const int unigramCount, const int bigramCount) const { - const HeaderPolicy *const headerPolicy = mBuffers->getHeaderPolicy(); - BufferWithExtendableBuffer headerBuffer( - BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE); - const int extendedRegionSize = headerPolicy->getExtendedRegionSize() - + mBuffers->getTrieBuffer()->getUsedAdditionalBufferSize(); - if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, false /* updatesLastUpdatedTime */, - false /* updatesLastDecayedTime */, unigramCount, bigramCount, extendedRegionSize)) { - AKLOGE("Cannot write header structure to buffer. updatesLastUpdatedTime: %d, " - "updatesLastDecayedTime: %d, unigramCount: %d, bigramCount: %d, " - "extendedRegionSize: %d", false, false, unigramCount, bigramCount, - extendedRegionSize); - return; - } - mBuffers->flushHeaderAndDictBuffers(dictDirPath, &headerBuffer); -} - -void Ver4PatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNodeArrayPos, - const char *const dictDirPath) { - const HeaderPolicy *const headerPolicy = mBuffers->getHeaderPolicy(); - Ver4DictBuffers::Ver4DictBuffersPtr dictBuffers( - Ver4DictBuffers::createVer4DictBuffers(headerPolicy)); - int unigramCount = 0; - int bigramCount = 0; - if (!runGC(rootPtNodeArrayPos, headerPolicy, dictBuffers.get(), &unigramCount, &bigramCount)) { - return; - } - BufferWithExtendableBuffer headerBuffer( - BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE); - if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, true /* updatesLastUpdatedTime */, - true /* updatesLastDecayedTime */, unigramCount, bigramCount, - 0 /* extendedRegionSize */)) { - return; - } - dictBuffers.get()->flushHeaderAndDictBuffers(dictDirPath, &headerBuffer); -} - -bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, - const HeaderPolicy *const headerPolicy, Ver4DictBuffers *const buffersToWrite, - int *const outUnigramCount, int *const outBigramCount) { - Ver4PatriciaTrieNodeReader ptNodeReader(mBuffers->getTrieBuffer(), - mBuffers->getProbabilityDictContent()); - Ver4BigramListPolicy bigramPolicy(mBuffers->getMutableBigramDictContent(), - mBuffers->getTerminalPositionLookupTable(), headerPolicy); - Ver4ShortcutListPolicy shortcutPolicy(mBuffers->getMutableShortcutDictContent(), - mBuffers->getTerminalPositionLookupTable()); - Ver4PatriciaTrieNodeWriter ptNodeWriter(mBuffers->getWritableTrieBuffer(), - mBuffers, &ptNodeReader, &bigramPolicy, &shortcutPolicy); - - DynamicPtReadingHelper readingHelper(mBuffers->getTrieBuffer(), &ptNodeReader); - readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); - DynamicPtGcEventListeners - ::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted - traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted( - &ptNodeWriter); - if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner( - &traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted)) { - return false; - } - const int unigramCount = traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted - .getValidUnigramCount(); - if (headerPolicy->isDecayingDict() - && unigramCount > ForgettingCurveUtils::MAX_UNIGRAM_COUNT_AFTER_GC) { - if (!truncateUnigrams(&ptNodeReader, &ptNodeWriter, - ForgettingCurveUtils::MAX_UNIGRAM_COUNT_AFTER_GC)) { - AKLOGE("Cannot remove unigrams. current: %d, max: %d", unigramCount, - ForgettingCurveUtils::MAX_UNIGRAM_COUNT_AFTER_GC); - return false; - } - } - - readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); - DynamicPtGcEventListeners::TraversePolicyToUpdateBigramProbability - traversePolicyToUpdateBigramProbability(&ptNodeWriter); - if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner( - &traversePolicyToUpdateBigramProbability)) { - return false; - } - const int bigramCount = traversePolicyToUpdateBigramProbability.getValidBigramEntryCount(); - if (headerPolicy->isDecayingDict() - && bigramCount > ForgettingCurveUtils::MAX_BIGRAM_COUNT_AFTER_GC) { - if (!truncateBigrams(ForgettingCurveUtils::MAX_BIGRAM_COUNT_AFTER_GC)) { - AKLOGE("Cannot remove bigrams. current: %d, max: %d", bigramCount, - ForgettingCurveUtils::MAX_BIGRAM_COUNT_AFTER_GC); - return false; - } - } - - // Mapping from positions in mBuffer to positions in bufferToWrite. - PtNodeWriter::DictPositionRelocationMap dictPositionRelocationMap; - readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); - Ver4PatriciaTrieNodeWriter ptNodeWriterForNewBuffers(buffersToWrite->getWritableTrieBuffer(), - buffersToWrite, &ptNodeReader, &bigramPolicy, &shortcutPolicy); - DynamicPtGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer - traversePolicyToPlaceAndWriteValidPtNodesToBuffer(&ptNodeWriterForNewBuffers, - buffersToWrite->getWritableTrieBuffer(), &dictPositionRelocationMap); - if (!readingHelper.traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner( - &traversePolicyToPlaceAndWriteValidPtNodesToBuffer)) { - return false; - } - - // Create policy instances for the GCed dictionary. - Ver4PatriciaTrieNodeReader newPtNodeReader(buffersToWrite->getTrieBuffer(), - buffersToWrite->getProbabilityDictContent()); - Ver4BigramListPolicy newBigramPolicy(buffersToWrite->getMutableBigramDictContent(), - buffersToWrite->getTerminalPositionLookupTable(), headerPolicy); - Ver4ShortcutListPolicy newShortcutPolicy(buffersToWrite->getMutableShortcutDictContent(), - buffersToWrite->getTerminalPositionLookupTable()); - Ver4PatriciaTrieNodeWriter newPtNodeWriter(buffersToWrite->getWritableTrieBuffer(), - buffersToWrite, &newPtNodeReader, &newBigramPolicy, &newShortcutPolicy); - // Re-assign terminal IDs for valid terminal PtNodes. - TerminalPositionLookupTable::TerminalIdMap terminalIdMap; - if(!buffersToWrite->getMutableTerminalPositionLookupTable()->runGCTerminalIds( - &terminalIdMap)) { - return false; - } - // Run GC for probability dict content. - if (!buffersToWrite->getMutableProbabilityDictContent()->runGC(&terminalIdMap, - mBuffers->getProbabilityDictContent())) { - return false; - } - // Run GC for bigram dict content. - if(!buffersToWrite->getMutableBigramDictContent()->runGC(&terminalIdMap, - mBuffers->getBigramDictContent(), outBigramCount)) { - return false; - } - // Run GC for shortcut dict content. - if(!buffersToWrite->getMutableShortcutDictContent()->runGC(&terminalIdMap, - mBuffers->getShortcutDictContent())) { - return false; - } - DynamicPtReadingHelper newDictReadingHelper(buffersToWrite->getTrieBuffer(), - &newPtNodeReader); - newDictReadingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); - DynamicPtGcEventListeners::TraversePolicyToUpdateAllPositionFields - traversePolicyToUpdateAllPositionFields(&newPtNodeWriter, &dictPositionRelocationMap); - if (!newDictReadingHelper.traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner( - &traversePolicyToUpdateAllPositionFields)) { - return false; - } - newDictReadingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); - TraversePolicyToUpdateAllPtNodeFlagsAndTerminalIds - traversePolicyToUpdateAllPtNodeFlagsAndTerminalIds(&newPtNodeWriter, &terminalIdMap); - if (!newDictReadingHelper.traverseAllPtNodesInPostorderDepthFirstManner( - &traversePolicyToUpdateAllPtNodeFlagsAndTerminalIds)) { - return false; - } - *outUnigramCount = traversePolicyToUpdateAllPositionFields.getUnigramCount(); - return true; -} - -bool Ver4PatriciaTrieWritingHelper::truncateUnigrams( - const Ver4PatriciaTrieNodeReader *const ptNodeReader, - Ver4PatriciaTrieNodeWriter *const ptNodeWriter, const int maxUnigramCount) { - const TerminalPositionLookupTable *const terminalPosLookupTable = - mBuffers->getTerminalPositionLookupTable(); - const int nextTerminalId = terminalPosLookupTable->getNextTerminalId(); - std::priority_queue<DictProbability, std::vector<DictProbability>, DictProbabilityComparator> - priorityQueue; - for (int i = 0; i < nextTerminalId; ++i) { - const int terminalPos = terminalPosLookupTable->getTerminalPtNodePosition(i); - if (terminalPos == NOT_A_DICT_POS) { - continue; - } - const ProbabilityEntry probabilityEntry = - mBuffers->getProbabilityDictContent()->getProbabilityEntry(i); - const int probability = probabilityEntry.hasHistoricalInfo() ? - ForgettingCurveUtils::decodeProbability(probabilityEntry.getHistoricalInfo()) : - probabilityEntry.getProbability(); - priorityQueue.push(DictProbability(terminalPos, probability, - probabilityEntry.getHistoricalInfo()->getTimeStamp())); - } - - // Delete unigrams. - while (static_cast<int>(priorityQueue.size()) > maxUnigramCount) { - const int ptNodePos = priorityQueue.top().getDictPos(); - const PtNodeParams ptNodeParams = - ptNodeReader->fetchNodeInfoInBufferFromPtNodePos(ptNodePos); - if (!ptNodeWriter->markPtNodeAsWillBecomeNonTerminal(&ptNodeParams)) { - AKLOGE("Cannot mark PtNode as willBecomeNonterminal. PtNode pos: %d", ptNodePos); - return false; - } - priorityQueue.pop(); - } - return true; -} - -bool Ver4PatriciaTrieWritingHelper::truncateBigrams(const int maxBigramCount) { - const TerminalPositionLookupTable *const terminalPosLookupTable = - mBuffers->getTerminalPositionLookupTable(); - const int nextTerminalId = terminalPosLookupTable->getNextTerminalId(); - std::priority_queue<DictProbability, std::vector<DictProbability>, DictProbabilityComparator> - priorityQueue; - BigramDictContent *const bigramDictContent = mBuffers->getMutableBigramDictContent(); - for (int i = 0; i < nextTerminalId; ++i) { - const int bigramListPos = bigramDictContent->getBigramListHeadPos(i); - if (bigramListPos == NOT_A_DICT_POS) { - continue; - } - bool hasNext = true; - int readingPos = bigramListPos; - while (hasNext) { - const int entryPos = readingPos; - const BigramEntry bigramEntry = - bigramDictContent->getBigramEntryAndAdvancePosition(&readingPos); - hasNext = bigramEntry.hasNext(); - if (!bigramEntry.isValid()) { - continue; - } - const int probability = bigramEntry.hasHistoricalInfo() ? - ForgettingCurveUtils::decodeProbability(bigramEntry.getHistoricalInfo()) : - bigramEntry.getProbability(); - priorityQueue.push(DictProbability(entryPos, probability, - bigramEntry.getHistoricalInfo()->getTimeStamp())); - } - } - - // Delete bigrams. - while (static_cast<int>(priorityQueue.size()) > maxBigramCount) { - const int entryPos = priorityQueue.top().getDictPos(); - const BigramEntry bigramEntry = bigramDictContent->getBigramEntry(entryPos); - const BigramEntry invalidatedBigramEntry = bigramEntry.getInvalidatedEntry(); - if (!bigramDictContent->writeBigramEntry(&invalidatedBigramEntry, entryPos)) { - AKLOGE("Cannot write bigram entry to remove. pos: %d", entryPos); - return false; - } - priorityQueue.pop(); - } - return true; -} - -bool Ver4PatriciaTrieWritingHelper::TraversePolicyToUpdateAllPtNodeFlagsAndTerminalIds - ::onVisitingPtNode(const PtNodeParams *const ptNodeParams) { - if (!ptNodeParams->isTerminal()) { - return true; - } - TerminalPositionLookupTable::TerminalIdMap::const_iterator it = - mTerminalIdMap->find(ptNodeParams->getTerminalId()); - if (it == mTerminalIdMap->end()) { - AKLOGE("terminal Id %d is not in the terminal position map. map size: %zd", - ptNodeParams->getTerminalId(), mTerminalIdMap->size()); - return false; - } - if (!mPtNodeWriter->updateTerminalId(ptNodeParams, it->second)) { - AKLOGE("Cannot update terminal id. %d -> %d", it->first, it->second); - } - return mPtNodeWriter->updatePtNodeHasBigramsAndShortcutTargetsFlags(ptNodeParams); -} - -} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.h deleted file mode 100644 index c3a155e0e..000000000 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.h +++ /dev/null @@ -1,123 +0,0 @@ -/* - * Copyright (C) 2013, The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef LATINIME_VER4_PATRICIA_TRIE_WRITING_HELPER_H -#define LATINIME_VER4_PATRICIA_TRIE_WRITING_HELPER_H - -#include "defines.h" -#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.h" -#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h" - -namespace latinime { - -class HeaderPolicy; -class Ver4DictBuffers; -class Ver4PatriciaTrieNodeReader; -class Ver4PatriciaTrieNodeWriter; - -class Ver4PatriciaTrieWritingHelper { - public: - Ver4PatriciaTrieWritingHelper(Ver4DictBuffers *const buffers) - : mBuffers(buffers) {} - - void writeToDictFile(const char *const dictDirPath, const int unigramCount, - const int bigramCount) const; - - void writeToDictFileWithGC(const int rootPtNodeArrayPos, const char *const dictDirPath); - - private: - DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4PatriciaTrieWritingHelper); - - class TraversePolicyToUpdateAllPtNodeFlagsAndTerminalIds - : public DynamicPtReadingHelper::TraversingEventListener { - public: - TraversePolicyToUpdateAllPtNodeFlagsAndTerminalIds( - Ver4PatriciaTrieNodeWriter *const ptNodeWriter, - const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap) - : mPtNodeWriter(ptNodeWriter), mTerminalIdMap(terminalIdMap) {} - - bool onAscend() { return true; } - - bool onDescend(const int ptNodeArrayPos) { return true; } - - bool onReadingPtNodeArrayTail() { return true; } - - bool onVisitingPtNode(const PtNodeParams *const ptNodeParams); - - private: - DISALLOW_IMPLICIT_CONSTRUCTORS(TraversePolicyToUpdateAllPtNodeFlagsAndTerminalIds); - - Ver4PatriciaTrieNodeWriter *const mPtNodeWriter; - const TerminalPositionLookupTable::TerminalIdMap *const mTerminalIdMap; - }; - - // For truncateUnigrams() and truncateBigrams(). - class DictProbability { - public: - DictProbability(const int dictPos, const int probability, const int timestamp) - : mDictPos(dictPos), mProbability(probability), mTimestamp(timestamp) {} - - int getDictPos() const { - return mDictPos; - } - - int getProbability() const { - return mProbability; - } - - int getTimestamp() const { - return mTimestamp; - } - - private: - DISALLOW_DEFAULT_CONSTRUCTOR(DictProbability); - - int mDictPos; - int mProbability; - int mTimestamp; - }; - - // For truncateUnigrams() and truncateBigrams(). - class DictProbabilityComparator { - public: - bool operator()(const DictProbability &left, const DictProbability &right) { - if (left.getProbability() != right.getProbability()) { - return left.getProbability() > right.getProbability(); - } - if (left.getTimestamp() != right.getTimestamp()) { - return left.getTimestamp() < right.getTimestamp(); - } - return left.getDictPos() > right.getDictPos(); - } - - private: - DISALLOW_ASSIGNMENT_OPERATOR(DictProbabilityComparator); - }; - - bool runGC(const int rootPtNodeArrayPos, const HeaderPolicy *const headerPolicy, - Ver4DictBuffers *const buffersToWrite, int *const outUnigramCount, - int *const outBigramCount); - - bool truncateUnigrams(const Ver4PatriciaTrieNodeReader *const ptNodeReader, - Ver4PatriciaTrieNodeWriter *const ptNodeWriter, const int maxUnigramCount); - - bool truncateBigrams(const int maxBigramCount); - - Ver4DictBuffers *const mBuffers; -}; -} // namespace latinime - -#endif /* LATINIME_VER4_PATRICIA_TRIE_WRITING_HELPER_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp index 259dae4c6..f692882f2 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp @@ -18,42 +18,11 @@ namespace latinime { -const size_t BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE = 1024 * 1024; +const size_t BufferWithExtendableBuffer::MAX_ADDITIONAL_BUFFER_SIZE = 1024 * 1024; const int BufferWithExtendableBuffer::NEAR_BUFFER_LIMIT_THRESHOLD_PERCENTILE = 90; // TODO: Needs to allocate larger memory corresponding to the current vector size. const size_t BufferWithExtendableBuffer::EXTEND_ADDITIONAL_BUFFER_SIZE_STEP = 128 * 1024; -uint32_t BufferWithExtendableBuffer::readUint(const int size, const int pos) const { - const bool readingPosIsInAdditionalBuffer = isInAdditionalBuffer(pos); - const int posInBuffer = readingPosIsInAdditionalBuffer ? pos - mOriginalBufferSize : pos; - return ByteArrayUtils::readUint(getBuffer(readingPosIsInAdditionalBuffer), size, posInBuffer); -} - -uint32_t BufferWithExtendableBuffer::readUintAndAdvancePosition(const int size, - int *const pos) const { - const int value = readUint(size, *pos); - *pos += size; - return value; -} - -void BufferWithExtendableBuffer::readCodePointsAndAdvancePosition(const int maxCodePointCount, - int *const outCodePoints, int *outCodePointCount, int *const pos) const { - const bool readingPosIsInAdditionalBuffer = isInAdditionalBuffer(*pos); - if (readingPosIsInAdditionalBuffer) { - *pos -= mOriginalBufferSize; - } - *outCodePointCount = ByteArrayUtils::readStringAndAdvancePosition( - getBuffer(readingPosIsInAdditionalBuffer), maxCodePointCount, outCodePoints, pos); - if (readingPosIsInAdditionalBuffer) { - *pos += mOriginalBufferSize; - } -} - -bool BufferWithExtendableBuffer::writeUint(const uint32_t data, const int size, const int pos) { - int writingPos = pos; - return writeUintAndAdvancePosition(data, size, &writingPos); -} - bool BufferWithExtendableBuffer::writeUintAndAdvancePosition(const uint32_t data, const int size, int *const pos) { if (!(size >= 1 && size <= 4)) { @@ -77,7 +46,7 @@ bool BufferWithExtendableBuffer::writeUintAndAdvancePosition(const uint32_t data } bool BufferWithExtendableBuffer::writeCodePointsAndAdvancePosition(const int *const codePoints, - const int codePointCount, const bool writesTerminator, int *const pos) { + const int codePointCount, const bool writesTerminator ,int *const pos) { const size_t size = ByteArrayUtils::calculateRequiredByteCountToStoreCodePoints( codePoints, codePointCount, writesTerminator); if (!checkAndPrepareWriting(*pos, size)) { @@ -131,21 +100,4 @@ bool BufferWithExtendableBuffer::checkAndPrepareWriting(const int pos, const int return true; } -bool BufferWithExtendableBuffer::copy(const BufferWithExtendableBuffer *const sourceBuffer) { - int copyingPos = 0; - const int tailPos = sourceBuffer->getTailPosition(); - const int maxDataChunkSize = sizeof(uint32_t); - while (copyingPos < tailPos) { - const int remainingSize = tailPos - copyingPos; - const int copyingSize = (remainingSize >= maxDataChunkSize) ? - maxDataChunkSize : remainingSize; - const uint32_t data = sourceBuffer->readUint(copyingSize, copyingPos); - if (!writeUint(data, copyingSize, copyingPos)) { - return false; - } - copyingPos += copyingSize; - } - return true; -} - } diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h index 76be16518..9dc34823c 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h @@ -32,20 +32,12 @@ namespace latinime { // raw pointer but provides several methods that handle boundary checking for writing data. class BufferWithExtendableBuffer { public: - static const size_t DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE; - BufferWithExtendableBuffer(uint8_t *const originalBuffer, const int originalBufferSize, - const int maxAdditionalBufferSize) + const int maxAdditionalBufferSize = MAX_ADDITIONAL_BUFFER_SIZE) : mOriginalBuffer(originalBuffer), mOriginalBufferSize(originalBufferSize), mAdditionalBuffer(EXTEND_ADDITIONAL_BUFFER_SIZE_STEP), mUsedAdditionalBufferSize(0), mMaxAdditionalBufferSize(maxAdditionalBufferSize) {} - // Without original buffer. - BufferWithExtendableBuffer(const int maxAdditionalBufferSize) - : mOriginalBuffer(0), mOriginalBufferSize(0), - mAdditionalBuffer(EXTEND_ADDITIONAL_BUFFER_SIZE_STEP), mUsedAdditionalBufferSize(0), - mMaxAdditionalBufferSize(maxAdditionalBufferSize) {} - AK_FORCE_INLINE int getTailPosition() const { return mOriginalBufferSize + mUsedAdditionalBufferSize; } @@ -71,13 +63,6 @@ class BufferWithExtendableBuffer { } } - uint32_t readUint(const int size, const int pos) const; - - uint32_t readUintAndAdvancePosition(const int size, int *const pos) const; - - void readCodePointsAndAdvancePosition(const int maxCodePointCount, - int *const outCodePoints, int *outCodePointCount, int *const pos) const; - AK_FORCE_INLINE int getOriginalBufferSize() const { return mOriginalBufferSize; } @@ -93,18 +78,15 @@ class BufferWithExtendableBuffer { * Writing is allowed for original buffer, already written region of additional buffer and the * tail of additional buffer. */ - bool writeUint(const uint32_t data, const int size, const int pos); - bool writeUintAndAdvancePosition(const uint32_t data, const int size, int *const pos); bool writeCodePointsAndAdvancePosition(const int *const codePoints, const int codePointCount, const bool writesTerminator, int *const pos); - bool copy(const BufferWithExtendableBuffer *const sourceBuffer); - private: DISALLOW_COPY_AND_ASSIGN(BufferWithExtendableBuffer); + static const size_t MAX_ADDITIONAL_BUFFER_SIZE; static const int NEAR_BUFFER_LIMIT_THRESHOLD_PERCENTILE; static const size_t EXTEND_ADDITIONAL_BUFFER_SIZE_STEP; diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h index ebdd523e1..0c1576818 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h @@ -114,24 +114,6 @@ class ByteArrayUtils { return buffer[(*pos)++]; } - static AK_FORCE_INLINE int readUint(const uint8_t *const buffer, - const int size, const int pos) { - // size must be in 1 to 4. - ASSERT(size >= 1 && size <= 4); - switch (size) { - case 1: - return ByteArrayUtils::readUint8(buffer, pos); - case 2: - return ByteArrayUtils::readUint16(buffer, pos); - case 3: - return ByteArrayUtils::readUint24(buffer, pos); - case 4: - return ByteArrayUtils::readUint32(buffer, pos); - default: - return 0; - } - } - /** * Code Point Reading * diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp index 442373b29..994826fa8 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp @@ -17,14 +17,12 @@ #include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h" #include <cstdio> +#include <cstring> #include "suggest/policyimpl/dictionary/header/header_policy.h" -#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.h" -#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h" +#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" -#include "suggest/policyimpl/dictionary/utils/file_utils.h" #include "suggest/policyimpl/dictionary/utils/format_utils.h" -#include "utils/time_keeper.h" namespace latinime { @@ -32,81 +30,60 @@ const char *const DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE = /* static */ bool DictFileWritingUtils::createEmptyDictFile(const char *const filePath, const int dictVersion, const HeaderReadWriteUtils::AttributeMap *const attributeMap) { - TimeKeeper::setCurrentTime(); switch (dictVersion) { - case FormatUtils::VERSION_4: - return createEmptyV4DictFile(filePath, attributeMap); + case 3: + return createEmptyV3DictFile(filePath, attributeMap); default: - AKLOGE("Cannot create dictionary %s because format version %d is not supported.", - filePath, dictVersion); + // Only version 3 dictionary is supported for now. return false; } } -/* static */ bool DictFileWritingUtils::createEmptyV4DictFile(const char *const dirPath, +/* static */ bool DictFileWritingUtils::createEmptyV3DictFile(const char *const filePath, const HeaderReadWriteUtils::AttributeMap *const attributeMap) { - HeaderPolicy headerPolicy(FormatUtils::VERSION_4, attributeMap); - Ver4DictBuffers::Ver4DictBuffersPtr dictBuffers = - Ver4DictBuffers::createVer4DictBuffers(&headerPolicy); - headerPolicy.writeHeaderToBuffer(dictBuffers.get()->getWritableHeaderBuffer(), - true /* updatesLastUpdatedTime */, true /* updatesLastDecayedTime */, - 0 /* unigramCount */, 0 /* bigramCount */, 0 /* extendedRegionSize */); - if (!DynamicPtWritingUtils::writeEmptyDictionary( - dictBuffers.get()->getWritableTrieBuffer(), 0 /* rootPos */)) { - AKLOGE("Empty ver4 dictionary structure cannot be created on memory."); + BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */); + HeaderPolicy headerPolicy(FormatUtils::VERSION_3, attributeMap); + headerPolicy.writeHeaderToBuffer(&headerBuffer, true /* updatesLastUpdatedTime */, + true /* updatesLastDecayedTime */, 0 /* unigramCount */, 0 /* bigramCount */, + 0 /* extendedRegionSize */); + BufferWithExtendableBuffer bodyBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */); + if (!DynamicPatriciaTrieWritingUtils::writeEmptyDictionary(&bodyBuffer, 0 /* rootPos */)) { return false; } - return dictBuffers.get()->flush(dirPath); + return flushAllHeaderAndBodyToFile(filePath, &headerBuffer, &bodyBuffer); } /* static */ bool DictFileWritingUtils::flushAllHeaderAndBodyToFile(const char *const filePath, BufferWithExtendableBuffer *const dictHeader, BufferWithExtendableBuffer *const dictBody) { - const int tmpFileNameBufSize = FileUtils::getFilePathWithSuffixBufSize(filePath, - TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE); + const int tmpFileNameBufSize = strlen(filePath) + + strlen(TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE) + 1 /* terminator */; // Name of a temporary file used for writing that is a connected string of original name and // TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE. char tmpFileName[tmpFileNameBufSize]; - FileUtils::getFilePathWithSuffix(filePath, TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE, - tmpFileNameBufSize, tmpFileName); - if (!DictFileWritingUtils::flushBufferToFile(tmpFileName, dictHeader)) { - AKLOGE("Dictionary header cannot be written to %s.", tmpFileName); - return false; - } - if (!DictFileWritingUtils::flushBufferToFile(tmpFileName, dictBody)) { - AKLOGE("Dictionary structure cannot be written to %s.", tmpFileName); - return false; - } - if (rename(tmpFileName, filePath) != 0) { - AKLOGE("Dictionary file %s cannot be renamed to %s", tmpFileName, filePath);; + snprintf(tmpFileName, tmpFileNameBufSize, "%s%s", filePath, + TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE); + FILE *const file = fopen(tmpFileName, "wb"); + if (!file) { + AKLOGE("Dictionary file %s cannnot be opened.", tmpFileName); + ASSERT(false); return false; } - return true; -} - -/* static */ bool DictFileWritingUtils::flushBufferToFileWithSuffix(const char *const basePath, - const char *const suffix, const BufferWithExtendableBuffer *const buffer) { - const int filePathBufSize = FileUtils::getFilePathWithSuffixBufSize(basePath, suffix); - char filePath[filePathBufSize]; - FileUtils::getFilePathWithSuffix(basePath, suffix, filePathBufSize, filePath); - return flushBufferToFile(filePath, buffer); -} - -/* static */ bool DictFileWritingUtils::flushBufferToFile(const char *const filePath, - const BufferWithExtendableBuffer *const buffer) { - FILE *const file = fopen(filePath, "wb"); - if (!file) { - AKLOGE("File %s cannot be opened.", filePath); + // Write the dictionary header. + if (!writeBufferToFile(file, dictHeader)) { + remove(tmpFileName); + AKLOGE("Dictionary header cannnot be written. size: %d", dictHeader->getTailPosition()); ASSERT(false); return false; } - if (!writeBufferToFile(file, buffer)) { - remove(filePath); - AKLOGE("Buffer cannot be written to the file %s. size: %d", filePath, - buffer->getTailPosition()); + // Write the dictionary body. + if (!writeBufferToFile(file, dictBody)) { + remove(tmpFileName); + AKLOGE("Dictionary body cannnot be written. size: %d", dictBody->getTailPosition()); ASSERT(false); return false; } fclose(file); + rename(tmpFileName, filePath); return true; } diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h index bdf9fd63c..bd4ac66fd 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h @@ -28,8 +28,6 @@ class BufferWithExtendableBuffer; class DictFileWritingUtils { public: - static const char *const TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE; - static bool createEmptyDictFile(const char *const filePath, const int dictVersion, const HeaderReadWriteUtils::AttributeMap *const attributeMap); @@ -37,17 +35,13 @@ class DictFileWritingUtils { BufferWithExtendableBuffer *const dictHeader, BufferWithExtendableBuffer *const dictBody); - static bool flushBufferToFileWithSuffix(const char *const basePath, const char *const suffix, - const BufferWithExtendableBuffer *const buffer); - private: DISALLOW_IMPLICIT_CONSTRUCTORS(DictFileWritingUtils); - static bool createEmptyV4DictFile(const char *const filePath, - const HeaderReadWriteUtils::AttributeMap *const attributeMap); + static const char *const TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE; - static bool flushBufferToFile(const char *const filePath, - const BufferWithExtendableBuffer *const buffer); + static bool createEmptyV3DictFile(const char *const filePath, + const HeaderReadWriteUtils::AttributeMap *const attributeMap); static bool writeBufferToFile(FILE *const file, const BufferWithExtendableBuffer *const buffer); diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/file_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/file_utils.cpp deleted file mode 100644 index 1f25cfa1e..000000000 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/file_utils.cpp +++ /dev/null @@ -1,157 +0,0 @@ -/* - * Copyright (C) 2013, The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "suggest/policyimpl/dictionary/utils/file_utils.h" - -#include <cstdio> -#include <cstring> -#include <dirent.h> -#include <fcntl.h> -#include <libgen.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <unistd.h> - -namespace latinime { - -// Returns -1 on error. -/* static */ int FileUtils::getFileSize(const char *const filePath) { - const int fd = open(filePath, O_RDONLY); - if (fd == -1) { - return -1; - } - struct stat statBuf; - if (fstat(fd, &statBuf) != 0) { - close(fd); - return -1; - } - close(fd); - return static_cast<int>(statBuf.st_size); -} - -/* static */ bool FileUtils::existsDir(const char *const dirPath) { - DIR *const dir = opendir(dirPath); - if (dir == NULL) { - return false; - } - closedir(dir); - return true; -} - -// Remove a directory and all files in the directory. -/* static */ bool FileUtils::removeDirAndFiles(const char *const dirPath) { - DIR *const dir = opendir(dirPath); - if (dir == NULL) { - AKLOGE("Cannot open dir %s.", dirPath); - return true; - } - struct dirent *dirent; - while ((dirent = readdir(dir)) != NULL) { - if (dirent->d_type != DT_REG) { - continue; - } - const int filePathBufSize = getFilePathBufSize(dirPath, dirent->d_name); - char filePath[filePathBufSize]; - getFilePath(dirPath, dirent->d_name, filePathBufSize, filePath); - if (remove(filePath) != 0) { - AKLOGE("Cannot remove file %s.", filePath); - closedir(dir); - return false; - } - } - closedir(dir); - if (remove(dirPath) != 0) { - AKLOGE("Cannot remove directory %s.", dirPath); - return false; - } - return true; -} - -/* static */ int FileUtils::getFilePathWithSuffixBufSize(const char *const filePath, - const char *const suffix) { - return strlen(filePath) + strlen(suffix) + 1 /* terminator */; -} - -/* static */ void FileUtils::getFilePathWithSuffix(const char *const filePath, - const char *const suffix, const int filePathBufSize, char *const outFilePath) { - snprintf(outFilePath, filePathBufSize, "%s%s", filePath, suffix); -} - -/* static */ int FileUtils::getFilePathBufSize(const char *const dirPath, - const char *const fileName) { - return strlen(dirPath) + 1 /* '/' */ + strlen(fileName) + 1 /* terminator */; -} - -/* static */ void FileUtils::getFilePath(const char *const dirPath, const char *const fileName, - const int filePathBufSize, char *const outFilePath) { - snprintf(outFilePath, filePathBufSize, "%s/%s", dirPath, fileName); -} - -/* static */ bool FileUtils::getFilePathWithoutSuffix(const char *const filePath, - const char *const suffix, const int outDirPathBufSize, char *const outDirPath) { - const int filePathLength = strlen(filePath); - const int suffixLength = strlen(suffix); - if (filePathLength <= suffixLength) { - AKLOGE("File path length (%s:%d) is shorter that suffix length (%s:%d).", - filePath, filePathLength, suffix, suffixLength); - return false; - } - const int resultFilePathLength = filePathLength - suffixLength; - if (outDirPathBufSize <= resultFilePathLength) { - AKLOGE("outDirPathBufSize is too small. filePath: %s, suffix: %s, outDirPathBufSize: %d", - filePath, suffix, outDirPathBufSize); - return false; - } - if (strncmp(filePath + resultFilePathLength, suffix, suffixLength) != 0) { - AKLOGE("File Path %s does not have %s as a suffix", filePath, suffix); - return false; - } - snprintf(outDirPath, resultFilePathLength + 1 /* terminator */, "%s", filePath); - return true; -} - -/* static */ void FileUtils::getDirPath(const char *const filePath, const int outDirPathBufSize, - char *const outDirPath) { - for (int i = strlen(filePath) - 1; i >= 0; --i) { - if (filePath[i] == '/') { - if (i >= outDirPathBufSize) { - AKLOGE("outDirPathBufSize is too small. filePath: %s, outDirPathBufSize: %d", - filePath, outDirPathBufSize); - ASSERT(false); - return; - } - snprintf(outDirPath, i + 1 /* terminator */, "%s", filePath); - return; - } - } -} - -/* static */ void FileUtils::getBasename(const char *const filePath, - const int outNameBufSize, char *const outName) { - const int filePathBufSize = strlen(filePath) + 1 /* terminator */; - char filePathBuf[filePathBufSize]; - snprintf(filePathBuf, filePathBufSize, "%s", filePath); - const char *const baseName = basename(filePathBuf); - const int baseNameLength = strlen(baseName); - if (baseNameLength >= outNameBufSize) { - AKLOGE("outNameBufSize is too small. filePath: %s, outNameBufSize: %d", - filePath, outNameBufSize); - return; - } - snprintf(outName, baseNameLength + 1 /* terminator */, "%s", baseName); -} - -} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/file_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/file_utils.h deleted file mode 100644 index 3e84a3038..000000000 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/file_utils.h +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Copyright (C) 2013, The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef LATINIME_FILE_UTILS_H -#define LATINIME_FILE_UTILS_H - -#include "defines.h" - -namespace latinime { - -class FileUtils { - public: - // Returns -1 on error. - static int getFileSize(const char *const filePath); - - static bool existsDir(const char *const dirPath); - - // Remove a directory and all files in the directory. - static bool removeDirAndFiles(const char *const dirPath); - - static int getFilePathWithSuffixBufSize(const char *const filePath, const char *const suffix); - - static void getFilePathWithSuffix(const char *const filePath, const char *const suffix, - const int filePathBufSize, char *const outFilePath); - - static int getFilePathBufSize(const char *const dirPath, const char *const fileName); - - static void getFilePath(const char *const dirPath, const char *const fileName, - const int filePathBufSize, char *const outFilePath); - - // Returns whether the filePath have the suffix. - static bool getFilePathWithoutSuffix(const char *const filePath, const char *const suffix, - const int dirPathBufSize, char *const outDirPath); - - static void getDirPath(const char *const filePath, const int dirPathBufSize, - char *const outDirPath); - - static void getBasename(const char *const filePath, const int outNameBufSize, - char *const outName); - - private: - DISALLOW_IMPLICIT_CONSTRUCTORS(FileUtils); -}; -} // namespace latinime -#endif /* LATINIME_FILE_UTILS_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp index 4050ad363..1632fd072 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp @@ -14,14 +14,14 @@ * limitations under the License. */ -#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h" - #include <cmath> +#include <ctime> #include <stdlib.h> +#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h" + #include "suggest/core/policy/dictionary_header_structure_policy.h" #include "suggest/policyimpl/dictionary/utils/probability_utils.h" -#include "utils/time_keeper.h" namespace latinime { @@ -31,86 +31,76 @@ const int ForgettingCurveUtils::MAX_BIGRAM_COUNT = 12000; const int ForgettingCurveUtils::MAX_BIGRAM_COUNT_AFTER_GC = 10000; const int ForgettingCurveUtils::MAX_COMPUTED_PROBABILITY = 127; +const int ForgettingCurveUtils::MAX_ENCODED_PROBABILITY = 15; +const int ForgettingCurveUtils::MIN_VALID_ENCODED_PROBABILITY = 3; +const int ForgettingCurveUtils::ENCODED_PROBABILITY_STEP = 1; +// Currently, we try to decay each uni/bigram once every 2 hours. Accordingly, the expected +// duration of the decay is approximately 66hours. +const float ForgettingCurveUtils::MIN_PROBABILITY_TO_DECAY = 0.03f; const int ForgettingCurveUtils::DECAY_INTERVAL_SECONDS = 2 * 60 * 60; -const int ForgettingCurveUtils::MAX_LEVEL = 3; -const int ForgettingCurveUtils::MAX_COUNT = 3; -const int ForgettingCurveUtils::MIN_VALID_LEVEL = 1; -const int ForgettingCurveUtils::TIME_STEP_DURATION_IN_SECONDS = 6 * 60 * 60; -const int ForgettingCurveUtils::MAX_ELAPSED_TIME_STEP_COUNT = 15; -const int ForgettingCurveUtils::DISCARD_LEVEL_ZERO_ENTRY_TIME_STEP_COUNT_THRESHOLD = 14; - const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityTable; +ForgettingCurveUtils::TimeKeeper ForgettingCurveUtils::sTimeKeeper; -/* static */ const HistoricalInfo ForgettingCurveUtils::createUpdatedHistoricalInfo( - const HistoricalInfo *const originalHistoricalInfo, - const int newProbability, const int timestamp) { - if (newProbability != NOT_A_PROBABILITY && originalHistoricalInfo->getLevel() == 0) { - return HistoricalInfo(timestamp, MIN_VALID_LEVEL /* level */, 0 /* count */); - } else if (!originalHistoricalInfo->isValid()) { - // Initial information. - return HistoricalInfo(timestamp, 0 /* level */, 1 /* count */); - } else { - const int updatedCount = originalHistoricalInfo->getCount() + 1; - if (updatedCount > MAX_COUNT) { - // The count exceeds the max value the level can be incremented. - if (originalHistoricalInfo->getLevel() >= MAX_LEVEL) { - // The level is already max. - return HistoricalInfo(timestamp, originalHistoricalInfo->getLevel(), - originalHistoricalInfo->getCount()); - } else { - // Level up. - return HistoricalInfo(timestamp, originalHistoricalInfo->getLevel() + 1, - 0 /* count */); - } - } else { - return HistoricalInfo(timestamp, originalHistoricalInfo->getLevel(), updatedCount); - } - } -} - -/* static */ int ForgettingCurveUtils::decodeProbability( - const HistoricalInfo *const historicalInfo) { - const int elapsedTimeStepCount = getElapsedTimeStepCount(historicalInfo->getTimeStamp()); - return sProbabilityTable.getProbability(historicalInfo->getLevel(), - min(max(elapsedTimeStepCount, 0), MAX_ELAPSED_TIME_STEP_COUNT)); +void ForgettingCurveUtils::TimeKeeper::setCurrentTime() { + mCurrentTime = time(0); } -/* static */ int ForgettingCurveUtils::getProbability(const int unigramProbability, - const int bigramProbability) { - if (unigramProbability == NOT_A_PROBABILITY) { +/* static */ int ForgettingCurveUtils::getProbability(const int encodedUnigramProbability, + const int encodedBigramProbability) { + if (encodedUnigramProbability == NOT_A_PROBABILITY) { return NOT_A_PROBABILITY; - } else if (bigramProbability == NOT_A_PROBABILITY) { - return min(backoff(unigramProbability), MAX_COMPUTED_PROBABILITY); + } else if (encodedBigramProbability == NOT_A_PROBABILITY) { + return backoff(decodeProbability(encodedUnigramProbability)); } else { + const int unigramProbability = decodeProbability(encodedUnigramProbability); + const int bigramProbability = decodeProbability(encodedBigramProbability); return min(max(unigramProbability, bigramProbability), MAX_COMPUTED_PROBABILITY); } } -/* static */ bool ForgettingCurveUtils::needsToKeep(const HistoricalInfo *const historicalInfo) { - return historicalInfo->getLevel() > 0 - || getElapsedTimeStepCount(historicalInfo->getTimeStamp()) - < DISCARD_LEVEL_ZERO_ENTRY_TIME_STEP_COUNT_THRESHOLD; +// Caveat: Unlike getProbability(), this method doesn't assume special bigram probability encoding +// (i.e. unigram probability + bigram probability delta). +/* static */ int ForgettingCurveUtils::getUpdatedEncodedProbability( + const int originalEncodedProbability, const int newProbability) { + if (originalEncodedProbability == NOT_A_PROBABILITY) { + // The bigram relation is not in this dictionary. + if (newProbability == NOT_A_PROBABILITY) { + // The bigram target is not in other dictionaries. + return 0; + } else { + return MIN_VALID_ENCODED_PROBABILITY; + } + } else { + if (newProbability != NOT_A_PROBABILITY + && originalEncodedProbability < MIN_VALID_ENCODED_PROBABILITY) { + return MIN_VALID_ENCODED_PROBABILITY; + } + return min(originalEncodedProbability + ENCODED_PROBABILITY_STEP, MAX_ENCODED_PROBABILITY); + } +} + +/* static */ int ForgettingCurveUtils::isValidEncodedProbability(const int encodedProbability) { + return encodedProbability >= MIN_VALID_ENCODED_PROBABILITY; } -/* static */ const HistoricalInfo ForgettingCurveUtils::createHistoricalInfoToSave( - const HistoricalInfo *const originalHistoricalInfo) { - if (originalHistoricalInfo->getTimeStamp() == NOT_A_TIMESTAMP) { - return HistoricalInfo(); - } - const int elapsedTimeStep = getElapsedTimeStepCount(originalHistoricalInfo->getTimeStamp()); - if (elapsedTimeStep <= MAX_ELAPSED_TIME_STEP_COUNT) { - // No need to update historical info. - return *originalHistoricalInfo; +/* static */ int ForgettingCurveUtils::getEncodedProbabilityToSave(const int encodedProbability, + const DictionaryHeaderStructurePolicy *const headerPolicy) { + const int elapsedTime = sTimeKeeper.peekCurrentTime() - headerPolicy->getLastDecayedTime(); + const int decayIterationCount = max(elapsedTime / DECAY_INTERVAL_SECONDS, 1); + int currentEncodedProbability = max(min(encodedProbability, MAX_ENCODED_PROBABILITY), 0); + // TODO: Implement the decay in more proper way. + for (int i = 0; i < decayIterationCount; ++i) { + const float currentRate = static_cast<float>(currentEncodedProbability) + / static_cast<float>(MAX_ENCODED_PROBABILITY); + const float thresholdToDecay = (1.0f - MIN_PROBABILITY_TO_DECAY) * currentRate; + const float randValue = static_cast<float>(rand()) / static_cast<float>(RAND_MAX); + if (thresholdToDecay < randValue) { + currentEncodedProbability = max(currentEncodedProbability - ENCODED_PROBABILITY_STEP, + 0); + } } - // Level down. - const int maxLevelDownAmonut = elapsedTimeStep / (MAX_ELAPSED_TIME_STEP_COUNT + 1); - const int levelDownAmount = (maxLevelDownAmonut >= originalHistoricalInfo->getLevel()) ? - originalHistoricalInfo->getLevel() : maxLevelDownAmonut; - const int adjustedTimestamp = originalHistoricalInfo->getTimeStamp() + - levelDownAmount * (MAX_ELAPSED_TIME_STEP_COUNT + 1) * TIME_STEP_DURATION_IN_SECONDS; - return HistoricalInfo(adjustedTimestamp, - originalHistoricalInfo->getLevel() - levelDownAmount, 0 /* count */); + return currentEncodedProbability; } /* static */ bool ForgettingCurveUtils::needsToDecay(const bool mindsBlockByDecay, @@ -126,14 +116,21 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT if (mindsBlockByDecay) { return false; } - if (headerPolicy->getLastDecayedTime() + DECAY_INTERVAL_SECONDS - < TimeKeeper::peekCurrentTime()) { + if (headerPolicy->getLastDecayedTime() + DECAY_INTERVAL_SECONDS < time(0)) { // Time to decay. return true; } return false; } +/* static */ int ForgettingCurveUtils::decodeProbability(const int encodedProbability) { + if (encodedProbability < MIN_VALID_ENCODED_PROBABILITY) { + return NOT_A_PROBABILITY; + } else { + return min(sProbabilityTable.getProbability(encodedProbability), MAX_ENCODED_PROBABILITY); + } +} + // See comments in ProbabilityUtils::backoff(). /* static */ int ForgettingCurveUtils::backoff(const int unigramProbability) { if (unigramProbability == NOT_A_PROBABILITY) { @@ -143,29 +140,15 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT } } -/* static */ int ForgettingCurveUtils::getElapsedTimeStepCount(const int timestamp) { - return (TimeKeeper::peekCurrentTime() - timestamp) / TIME_STEP_DURATION_IN_SECONDS; -} - ForgettingCurveUtils::ProbabilityTable::ProbabilityTable() : mTable() { - mTable.resize(MAX_LEVEL + 1); - for (int level = 0; level <= MAX_LEVEL; ++level) { - mTable[level].resize(MAX_ELAPSED_TIME_STEP_COUNT + 1); - const float initialProbability = - static_cast<float>(MAX_COMPUTED_PROBABILITY / (1 << (MAX_LEVEL - level))); - for (int timeStepCount = 0; timeStepCount <= MAX_ELAPSED_TIME_STEP_COUNT; ++timeStepCount) { - if (level == 0) { - mTable[level][timeStepCount] = NOT_A_PROBABILITY; - continue; - } - const int elapsedTime = timeStepCount * TIME_STEP_DURATION_IN_SECONDS; - const float probability = initialProbability - * powf(2.0f, -1.0f * static_cast<float>(elapsedTime) - / static_cast<float>(TIME_STEP_DURATION_IN_SECONDS - * (MAX_ELAPSED_TIME_STEP_COUNT + 1))); - mTable[level][timeStepCount] = - min(max(static_cast<int>(probability), 1), MAX_COMPUTED_PROBABILITY); - } + // Table entry is as follows: + // 1, 1, 1, 2, 3, 5, 6, 9, 13, 18, 25, 34, 48, 66, 91, 127. + // Note that first MIN_VALID_ENCODED_PROBABILITY values are not used. + mTable.resize(MAX_ENCODED_PROBABILITY + 1); + for (int i = 0; i <= MAX_ENCODED_PROBABILITY; ++i) { + const int probability = static_cast<int>(powf(static_cast<float>(MAX_COMPUTED_PROBABILITY), + static_cast<float>(i) / static_cast<float>(MAX_ENCODED_PROBABILITY))); + mTable[i] = min(MAX_COMPUTED_PROBABILITY, max(0, probability)); } } diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h index 6ac8dc528..2ad423874 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h @@ -20,33 +20,45 @@ #include <vector> #include "defines.h" -#include "suggest/policyimpl/dictionary/utils/historical_info.h" namespace latinime { class DictionaryHeaderStructurePolicy; +// TODO: Check the elapsed time and decrease the probability depending on the time. Time field is +// required to introduced to each terminal PtNode and bigram entry. // TODO: Quit using bigram probability to indicate the delta. class ForgettingCurveUtils { public: + class TimeKeeper { + public: + TimeKeeper() : mCurrentTime(0) {} + void setCurrentTime(); + int peekCurrentTime() const { return mCurrentTime; }; + + private: + DISALLOW_COPY_AND_ASSIGN(TimeKeeper); + + int mCurrentTime; + }; + static const int MAX_UNIGRAM_COUNT; static const int MAX_UNIGRAM_COUNT_AFTER_GC; static const int MAX_BIGRAM_COUNT; static const int MAX_BIGRAM_COUNT_AFTER_GC; - static const HistoricalInfo createUpdatedHistoricalInfo( - const HistoricalInfo *const originalHistoricalInfo, const int newProbability, - const int timestamp); - - static const HistoricalInfo createHistoricalInfoToSave( - const HistoricalInfo *const originalHistoricalInfo); - - static int decodeProbability(const HistoricalInfo *const historicalInfo); + static TimeKeeper sTimeKeeper; static int getProbability(const int encodedUnigramProbability, const int encodedBigramProbability); - static bool needsToKeep(const HistoricalInfo *const historicalInfo); + static int getUpdatedEncodedProbability(const int originalEncodedProbability, + const int newProbability); + + static int isValidEncodedProbability(const int encodedProbability); + + static int getEncodedProbabilityToSave(const int encodedProbability, + const DictionaryHeaderStructurePolicy *const headerPolicy); static bool needsToDecay(const bool mindsBlockByDecay, const int unigramCount, const int bigramCount, const DictionaryHeaderStructurePolicy *const headerPolicy); @@ -58,32 +70,31 @@ class ForgettingCurveUtils { public: ProbabilityTable(); - int getProbability(const int level, const int elapsedTimeStepCount) const { - return mTable[level][elapsedTimeStepCount]; + int getProbability(const int encodedProbability) const { + if (encodedProbability < 0 || encodedProbability > static_cast<int>(mTable.size())) { + return NOT_A_PROBABILITY; + } + return mTable[encodedProbability]; } private: DISALLOW_COPY_AND_ASSIGN(ProbabilityTable); - std::vector<std::vector<int> > mTable; + std::vector<int> mTable; }; static const int MAX_COMPUTED_PROBABILITY; + static const int MAX_ENCODED_PROBABILITY; + static const int MIN_VALID_ENCODED_PROBABILITY; + static const int ENCODED_PROBABILITY_STEP; + static const float MIN_PROBABILITY_TO_DECAY; static const int DECAY_INTERVAL_SECONDS; - static const int MAX_LEVEL; - static const int MAX_COUNT; - static const int MIN_VALID_LEVEL; - static const int TIME_STEP_DURATION_IN_SECONDS; - static const int MAX_ELAPSED_TIME_STEP_COUNT; - static const int DISCARD_LEVEL_ZERO_ENTRY_TIME_STEP_COUNT_THRESHOLD; - static const int HALF_LIFE_TIME_IN_SECONDS; - static const ProbabilityTable sProbabilityTable; - static int backoff(const int unigramProbability); + static int decodeProbability(const int encodedProbability); - static int getElapsedTimeStepCount(const int timestamp); + static int backoff(const int unigramProbability); }; } // namespace latinime #endif /* LATINIME_FORGETTING_CURVE_UTILS_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp index cd3c403fa..1d77d5c27 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp @@ -41,13 +41,10 @@ const int FormatUtils::DICTIONARY_MINIMUM_SIZE = 12; // Dictionary format version number (2 bytes) // Options (2 bytes) // Header size (4 bytes) : integer, big endian - // Conceptually this converts the hardcoded value of the bytes in the file into - // the symbolic value we use in the code. But we want the constants to be the - // same so we use them for both here. - if (ByteArrayUtils::readUint16(dict, 4) == VERSION_2) { + if (ByteArrayUtils::readUint16(dict, 4) == 2) { return VERSION_2; - } else if (ByteArrayUtils::readUint16(dict, 4) == VERSION_4) { - return VERSION_4; + } else if (ByteArrayUtils::readUint16(dict, 4) == 3) { + return VERSION_3; } else { return UNKNOWN_VERSION; } diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h index eb2227d60..79ed0de29 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h @@ -29,10 +29,9 @@ namespace latinime { class FormatUtils { public: enum FORMAT_VERSION { - // These MUST have the same values as the relevant constants in FormatSpec.java. - VERSION_2 = 2, - VERSION_4 = 400, - UNKNOWN_VERSION = -1 + VERSION_2, + VERSION_3, + UNKNOWN_VERSION }; // 32 bit magic number is stored at the beginning of the dictionary header to reject diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/historical_info.h b/native/jni/src/suggest/policyimpl/dictionary/utils/historical_info.h deleted file mode 100644 index 428ca8626..000000000 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/historical_info.h +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Copyright (C) 2013, The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef LATINIME_HISTORICAL_INFO_H -#define LATINIME_HISTORICAL_INFO_H - -#include "defines.h" - -namespace latinime { - -class HistoricalInfo { - public: - // Invalid historical info. - HistoricalInfo() - : mTimestamp(NOT_A_TIMESTAMP), mLevel(0), mCount(0) {} - - HistoricalInfo(const int timestamp, const int level, const int count) - : mTimestamp(timestamp), mLevel(level), mCount(count) {} - - bool isValid() const { - return mTimestamp != NOT_A_TIMESTAMP; - } - - int getTimeStamp() const { - return mTimestamp; - } - - int getLevel() const { - return mLevel; - } - - int getCount() const { - return mCount; - } - - private: - // Copy constructor is public to use this class as a type of return value. - DISALLOW_ASSIGNMENT_OPERATOR(HistoricalInfo); - - const int mTimestamp; - const int mLevel; - const int mCount; -}; -} // namespace latinime -#endif /* LATINIME_HISTORICAL_INFO_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.cpp deleted file mode 100644 index e88d6e0a9..000000000 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.cpp +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Copyright (C) 2013, The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h" - -#include <cerrno> -#include <climits> -#include <cstdio> -#include <fcntl.h> -#include <sys/mman.h> -#include <unistd.h> - -#include "suggest/policyimpl/dictionary/utils/file_utils.h" - -namespace latinime { - -/* static */ MmappedBuffer::MmappedBufferPtr MmappedBuffer::openBuffer( - const char *const path, const int bufferOffset, const int bufferSize, - const bool isUpdatable) { - const int mmapFd = open(path, O_RDONLY); - if (mmapFd < 0) { - AKLOGE("DICT: Can't open the source. path=%s errno=%d", path, errno); - return MmappedBufferPtr(0); - } - const int pagesize = sysconf(_SC_PAGESIZE); - const int offset = bufferOffset % pagesize; - int alignedOffset = bufferOffset - offset; - int alignedSize = bufferSize + offset; - const int protMode = isUpdatable ? PROT_READ | PROT_WRITE : PROT_READ; - void *const mmappedBuffer = mmap(0, alignedSize, protMode, MAP_PRIVATE, mmapFd, - alignedOffset); - if (mmappedBuffer == MAP_FAILED) { - AKLOGE("DICT: Can't mmap dictionary. errno=%d", errno); - close(mmapFd); - return MmappedBufferPtr(0); - } - uint8_t *const buffer = static_cast<uint8_t *>(mmappedBuffer) + offset; - if (!buffer) { - AKLOGE("DICT: buffer is null"); - close(mmapFd); - return MmappedBufferPtr(0); - } - return MmappedBufferPtr(new MmappedBuffer(buffer, bufferSize, mmappedBuffer, alignedSize, - mmapFd, isUpdatable)); -} - -/* static */ MmappedBuffer::MmappedBufferPtr MmappedBuffer::openBuffer( - const char *const path, const bool isUpdatable) { - const int fileSize = FileUtils::getFileSize(path); - if (fileSize == -1) { - return MmappedBufferPtr(0); - } else if (fileSize == 0) { - return MmappedBufferPtr(new MmappedBuffer(isUpdatable)); - } else { - return openBuffer(path, 0 /* bufferOffset */, fileSize, isUpdatable); - } -} - -/* static */ MmappedBuffer::MmappedBufferPtr MmappedBuffer::openBuffer( - const char *const dirPath, const char *const fileName, const bool isUpdatable) { - const int filePathBufferSize = PATH_MAX + 1 /* terminator */; - char filePath[filePathBufferSize]; - const int filePathLength = snprintf(filePath, filePathBufferSize, "%s%s", dirPath, - fileName); - if (filePathLength >= filePathBufferSize) { - return 0; - } - return openBuffer(filePath, isUpdatable); -} - -MmappedBuffer::~MmappedBuffer() { - if (mAlignedSize == 0) { - return; - } - int ret = munmap(mMmappedBuffer, mAlignedSize); - if (ret != 0) { - AKLOGE("DICT: Failure in munmap. ret=%d errno=%d", ret, errno); - } - ret = close(mMmapFd); - if (ret != 0) { - AKLOGE("DICT: Failure in close. ret=%d errno=%d", ret, errno); - } -} - -} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h b/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h index 73a733b0c..6b69116eb 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h @@ -17,27 +17,58 @@ #ifndef LATINIME_MMAPPED_BUFFER_H #define LATINIME_MMAPPED_BUFFER_H +#include <cerrno> +#include <fcntl.h> #include <stdint.h> +#include <sys/mman.h> +#include <unistd.h> #include "defines.h" -#include "utils/exclusive_ownership_pointer.h" namespace latinime { class MmappedBuffer { public: - typedef ExclusiveOwnershipPointer<MmappedBuffer> MmappedBufferPtr; - - static MmappedBufferPtr openBuffer(const char *const path, - const int bufferOffset, const int bufferSize, const bool isUpdatable); - - // Mmap entire file. - static MmappedBufferPtr openBuffer(const char *const path, const bool isUpdatable); - - static MmappedBufferPtr openBuffer(const char *const dirPath, const char *const fileName, - const bool isUpdatable); + static MmappedBuffer* openBuffer(const char *const path, const int bufferOffset, + const int bufferSize, const bool isUpdatable) { + const int openMode = isUpdatable ? O_RDWR : O_RDONLY; + const int mmapFd = open(path, openMode); + if (mmapFd < 0) { + AKLOGE("DICT: Can't open the source. path=%s errno=%d", path, errno); + return 0; + } + const int pagesize = getpagesize(); + const int offset = bufferOffset % pagesize; + int alignedOffset = bufferOffset - offset; + int alignedSize = bufferSize + offset; + const int protMode = isUpdatable ? PROT_READ | PROT_WRITE : PROT_READ; + void *const mmappedBuffer = mmap(0, alignedSize, protMode, MAP_PRIVATE, mmapFd, + alignedOffset); + if (mmappedBuffer == MAP_FAILED) { + AKLOGE("DICT: Can't mmap dictionary. errno=%d", errno); + close(mmapFd); + return 0; + } + uint8_t *const buffer = static_cast<uint8_t *>(mmappedBuffer) + offset; + if (!buffer) { + AKLOGE("DICT: buffer is null"); + close(mmapFd); + return 0; + } + return new MmappedBuffer(buffer, bufferSize, mmappedBuffer, alignedSize, mmapFd, + isUpdatable); + } - ~MmappedBuffer(); + ~MmappedBuffer() { + int ret = munmap(mMmappedBuffer, mAlignedSize); + if (ret != 0) { + AKLOGE("DICT: Failure in munmap. ret=%d errno=%d", ret, errno); + } + ret = close(mMmapFd); + if (ret != 0) { + AKLOGE("DICT: Failure in close. ret=%d errno=%d", ret, errno); + } + } AK_FORCE_INLINE uint8_t *getBuffer() const { return mBuffer; @@ -58,11 +89,6 @@ class MmappedBuffer { : mBuffer(buffer), mBufferSize(bufferSize), mMmappedBuffer(mmappedBuffer), mAlignedSize(alignedSize), mMmapFd(mmapFd), mIsUpdatable(isUpdatable) {} - // Empty file. We have to handle an empty file as a valid part of a dictionary. - AK_FORCE_INLINE MmappedBuffer(const bool isUpdatable) - : mBuffer(0), mBufferSize(0), mMmappedBuffer(0), mAlignedSize(0), mMmapFd(0), - mIsUpdatable(isUpdatable) {} - DISALLOW_IMPLICIT_CONSTRUCTORS(MmappedBuffer); uint8_t *const mBuffer; diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.cpp deleted file mode 100644 index 4ad82f9f7..000000000 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.cpp +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Copyright (C) 2013, The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "suggest/policyimpl/dictionary/utils/sparse_table.h" - -namespace latinime { - -const int SparseTable::NOT_EXIST = -1; -const int SparseTable::INDEX_SIZE = 4; - -bool SparseTable::contains(const int id) const { - const int readingPos = getPosInIndexTable(id); - if (id < 0 || mIndexTableBuffer->getTailPosition() <= readingPos) { - return false; - } - const int index = mIndexTableBuffer->readUint(INDEX_SIZE, readingPos); - return index != NOT_EXIST; -} - -uint32_t SparseTable::get(const int id) const { - const int indexTableReadingPos = getPosInIndexTable(id); - const int index = mIndexTableBuffer->readUint(INDEX_SIZE, indexTableReadingPos); - const int contentTableReadingPos = getPosInContentTable(id, index); - return mContentTableBuffer->readUint(mDataSize, contentTableReadingPos); -} - -bool SparseTable::set(const int id, const uint32_t value) { - const int posInIndexTable = getPosInIndexTable(id); - // Extends the index table if needed. - if (mIndexTableBuffer->getTailPosition() < posInIndexTable) { - int tailPos = mIndexTableBuffer->getTailPosition(); - while(tailPos < posInIndexTable) { - if (!mIndexTableBuffer->writeUintAndAdvancePosition(NOT_EXIST, INDEX_SIZE, &tailPos)) { - AKLOGE("cannot extend index table. tailPos: %d to: %d", tailPos, posInIndexTable); - return false; - } - } - } - if (contains(id)) { - // The entry is already in the content table. - const int index = mIndexTableBuffer->readUint(INDEX_SIZE, posInIndexTable); - if (!mContentTableBuffer->writeUint(value, mDataSize, getPosInContentTable(id, index))) { - AKLOGE("cannot update value %d. pos: %d, tailPos: %d, mDataSize: %d", value, - getPosInContentTable(id, index), mContentTableBuffer->getTailPosition(), - mDataSize); - return false; - } - return true; - } - // The entry is not in the content table. - // Create new entry in the content table. - const int index = getIndexFromContentTablePos(mContentTableBuffer->getTailPosition()); - if (!mIndexTableBuffer->writeUint(index, INDEX_SIZE, posInIndexTable)) { - AKLOGE("cannot write index %d. pos %d", index, posInIndexTable); - return false; - } - // Write a new block that containing the entry to be set. - int writingPos = getPosInContentTable(0 /* id */, index); - for (int i = 0; i < mBlockSize; ++i) { - if (!mContentTableBuffer->writeUintAndAdvancePosition(NOT_A_DICT_POS, mDataSize, - &writingPos)) { - AKLOGE("cannot write content table to extend. writingPos: %d, tailPos: %d, " - "mDataSize: %d", writingPos, mContentTableBuffer->getTailPosition(), mDataSize); - return false; - } - } - return mContentTableBuffer->writeUint(value, mDataSize, getPosInContentTable(id, index)); -} - -int SparseTable::getIndexFromContentTablePos(const int contentTablePos) const { - return contentTablePos / mDataSize / mBlockSize; -} - -int SparseTable::getPosInIndexTable(const int id) const { - return (id / mBlockSize) * INDEX_SIZE; -} - -int SparseTable::getPosInContentTable(const int id, const int index) const { - const int offset = id % mBlockSize; - return (index * mBlockSize + offset) * mDataSize; -} - -} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.h b/native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.h deleted file mode 100644 index 21c167506..000000000 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.h +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Copyright (C) 2013, The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef LATINIME_SPARSE_TABLE_H -#define LATINIME_SPARSE_TABLE_H - -#include <stdint.h> - -#include "defines.h" -#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" - -namespace latinime { - -// Note that there is a corresponding implementation in SparseTable.java. -// TODO: Support multiple content buffers. -class SparseTable { - public: - SparseTable(BufferWithExtendableBuffer *const indexTableBuffer, - BufferWithExtendableBuffer *const contentTableBuffer, const int blockSize, - const int dataSize) - : mIndexTableBuffer(indexTableBuffer), mContentTableBuffer(contentTableBuffer), - mBlockSize(blockSize), mDataSize(dataSize) {} - - bool contains(const int id) const; - - uint32_t get(const int id) const; - - bool set(const int id, const uint32_t value); - - private: - DISALLOW_IMPLICIT_CONSTRUCTORS(SparseTable); - - int getIndexFromContentTablePos(const int contentTablePos) const; - - int getPosInIndexTable(const int id) const; - - int getPosInContentTable(const int id, const int index) const; - - static const int NOT_EXIST; - static const int INDEX_SIZE; - - BufferWithExtendableBuffer *const mIndexTableBuffer; - BufferWithExtendableBuffer *const mContentTableBuffer; - const int mBlockSize; - const int mDataSize; -}; -} // namespace latinime -#endif /* LATINIME_SPARSE_TABLE_H */ diff --git a/native/jni/src/suggest/policyimpl/typing/typing_traversal.h b/native/jni/src/suggest/policyimpl/typing/typing_traversal.h index fd0ac9eb6..007c19e0a 100644 --- a/native/jni/src/suggest/policyimpl/typing/typing_traversal.h +++ b/native/jni/src/suggest/policyimpl/typing/typing_traversal.h @@ -81,7 +81,7 @@ class TypingTraversal : public Traversal { return false; } const int point0Index = dicNode->getInputIndex(0); - return dicNode->isTerminalDicNode() + return dicNode->isTerminalWordNode() && traverseSession->getProximityInfoState(0)-> hasSpaceProximity(point0Index); } @@ -96,7 +96,7 @@ class TypingTraversal : public Traversal { if (dicNode->isCompletion(inputSize)) { return false; } - if (!dicNode->isTerminalDicNode()) { + if (!dicNode->isTerminalWordNode()) { return false; } const int16_t pointIndex = dicNode->getInputIndex(0); diff --git a/native/jni/src/suggest/policyimpl/typing/typing_weighting.cpp b/native/jni/src/suggest/policyimpl/typing/typing_weighting.cpp index 54f65c786..5b6b5e874 100644 --- a/native/jni/src/suggest/policyimpl/typing/typing_weighting.cpp +++ b/native/jni/src/suggest/policyimpl/typing/typing_weighting.cpp @@ -23,64 +23,39 @@ namespace latinime { const TypingWeighting TypingWeighting::sInstance; -ErrorTypeUtils::ErrorType TypingWeighting::getErrorType(const CorrectionType correctionType, +ErrorType TypingWeighting::getErrorType(const CorrectionType correctionType, const DicTraverseSession *const traverseSession, const DicNode *const parentDicNode, const DicNode *const dicNode) const { switch (correctionType) { case CT_MATCH: if (isProximityDicNode(traverseSession, dicNode)) { - return ErrorTypeUtils::PROXIMITY_CORRECTION; - } else if (dicNode->isInDigraph()) { - return ErrorTypeUtils::MATCH_WITH_DIGRAPH; + return ET_PROXIMITY_CORRECTION; } else { - // Compare the node code point with original primary code point on the keyboard. - const ProximityInfoState *const pInfoState = - traverseSession->getProximityInfoState(0); - const int primaryOriginalCodePoint = pInfoState->getPrimaryOriginalCodePointAt( - dicNode->getInputIndex(0)); - const int nodeCodePoint = dicNode->getNodeCodePoint(); - if (primaryOriginalCodePoint == nodeCodePoint) { - // Node code point is same as original code point on the keyboard. - return ErrorTypeUtils::NOT_AN_ERROR; - } else if (CharUtils::toLowerCase(primaryOriginalCodePoint) == - CharUtils::toLowerCase(nodeCodePoint)) { - // Only cases of the code points are different. - return ErrorTypeUtils::MATCH_WITH_CASE_ERROR; - } else if (CharUtils::toBaseCodePoint(primaryOriginalCodePoint) == - CharUtils::toBaseCodePoint(nodeCodePoint)) { - // Node code point is a variant of original code point. - return ErrorTypeUtils::MATCH_WITH_ACCENT_ERROR; - } else { - // Node code point is a variant of original code point and the cases are also - // different. - return ErrorTypeUtils::MATCH_WITH_ACCENT_ERROR - | ErrorTypeUtils::MATCH_WITH_CASE_ERROR; - } + return ET_NOT_AN_ERROR; } - break; case CT_ADDITIONAL_PROXIMITY: - return ErrorTypeUtils::PROXIMITY_CORRECTION; + return ET_PROXIMITY_CORRECTION; case CT_OMISSION: if (parentDicNode->canBeIntentionalOmission()) { - return ErrorTypeUtils::INTENTIONAL_OMISSION; + return ET_INTENTIONAL_OMISSION; } else { - return ErrorTypeUtils::EDIT_CORRECTION; + return ET_EDIT_CORRECTION; } break; case CT_SUBSTITUTION: case CT_INSERTION: case CT_TERMINAL_INSERTION: case CT_TRANSPOSITION: - return ErrorTypeUtils::EDIT_CORRECTION; + return ET_EDIT_CORRECTION; case CT_NEW_WORD_SPACE_OMISSION: case CT_NEW_WORD_SPACE_SUBSTITUTION: - return ErrorTypeUtils::NEW_WORD; + return ET_NEW_WORD; case CT_TERMINAL: - return ErrorTypeUtils::NOT_AN_ERROR; + return ET_NOT_AN_ERROR; case CT_COMPLETION: - return ErrorTypeUtils::COMPLETION; + return ET_COMPLETION; default: - return ErrorTypeUtils::NOT_AN_ERROR; + return ET_NOT_AN_ERROR; } } } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/typing/typing_weighting.h b/native/jni/src/suggest/policyimpl/typing/typing_weighting.h index 41314ef52..9f0a331e3 100644 --- a/native/jni/src/suggest/policyimpl/typing/typing_weighting.h +++ b/native/jni/src/suggest/policyimpl/typing/typing_weighting.h @@ -19,7 +19,6 @@ #include "defines.h" #include "suggest/core/dicnode/dic_node_utils.h" -#include "suggest/core/dictionary/error_type_utils.h" #include "suggest/core/layout/touch_position_correction_utils.h" #include "suggest/core/policy/weighting.h" #include "suggest/core/session/dic_traverse_session.h" @@ -205,7 +204,7 @@ class TypingWeighting : public Weighting { return cost * traverseSession->getMultiWordCostMultiplier(); } - ErrorTypeUtils::ErrorType getErrorType(const CorrectionType correctionType, + ErrorType getErrorType(const CorrectionType correctionType, const DicTraverseSession *const traverseSession, const DicNode *const parentDicNode, const DicNode *const dicNode) const; diff --git a/native/jni/src/utils/exclusive_ownership_pointer.h b/native/jni/src/utils/exclusive_ownership_pointer.h deleted file mode 100644 index 081802e8b..000000000 --- a/native/jni/src/utils/exclusive_ownership_pointer.h +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Copyright (C) 2013, The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef LATINIME_EXCLUSIVE_OWNERSHIP_POINTER_H -#define LATINIME_EXCLUSIVE_OWNERSHIP_POINTER_H - -#include "defines.h" - -namespace latinime { - -template<class T> -class ExclusiveOwnershipPointer { - public: - // This instance become an owner of the raw pointer. - AK_FORCE_INLINE ExclusiveOwnershipPointer(T *const rawPointer) - : mPointer(rawPointer), - mSharedOwnerPtr(new (ExclusiveOwnershipPointer<T> *)(this)) {} - - // Move the ownership. - AK_FORCE_INLINE ExclusiveOwnershipPointer(const ExclusiveOwnershipPointer<T> &pointer) - : mPointer(pointer.mPointer), mSharedOwnerPtr(pointer.mSharedOwnerPtr) { - transferOwnership(&pointer); - } - - AK_FORCE_INLINE ~ExclusiveOwnershipPointer() { - deletePointersIfHavingOwnership(); - } - - AK_FORCE_INLINE T *get() const { - return mPointer; - } - - private: - // This class allows to copy and ensures only one instance has the ownership of the - // managed pointer. - DISALLOW_DEFAULT_CONSTRUCTOR(ExclusiveOwnershipPointer); - DISALLOW_ASSIGNMENT_OPERATOR(ExclusiveOwnershipPointer); - - void transferOwnership(const ExclusiveOwnershipPointer<T> *const src) { - if (*mSharedOwnerPtr != src) { - AKLOGE("Failed to transfer the ownership because src is not the current owner." - "src: %p, owner: %p", src, *mSharedOwnerPtr); - ASSERT(false); - return; - } - // Transfer the ownership from src to this instance. - *mSharedOwnerPtr = this; - } - - void deletePointersIfHavingOwnership() { - if (mSharedOwnerPtr && *mSharedOwnerPtr == this) { - if (mPointer) { - if (DEBUG_DICT) { - AKLOGI("Releasing pointer: %p", mPointer); - } - delete mPointer; - } - delete mSharedOwnerPtr; - } - } - - T *mPointer; - // mSharedOwnerPtr points a shared memory space where the instance which has the ownership is - // stored. - ExclusiveOwnershipPointer<T> **mSharedOwnerPtr; -}; -} // namespace latinime -#endif /* LATINIME_EXCLUSIVE_OWNERSHIP_POINTER_H */ diff --git a/native/jni/src/utils/time_keeper.cpp b/native/jni/src/utils/time_keeper.cpp deleted file mode 100644 index 026284060..000000000 --- a/native/jni/src/utils/time_keeper.cpp +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright (C) 2013, The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "utils/time_keeper.h" - -#include <ctime> - -namespace latinime { - -int TimeKeeper::sCurrentTime; -bool TimeKeeper::sSetForTesting; - -/* static */ void TimeKeeper::setCurrentTime() { - if (!sSetForTesting) { - sCurrentTime = time(0); - } -} - -/* static */ void TimeKeeper::startTestModeWithForceCurrentTime(const int currentTime) { - sCurrentTime = currentTime; - sSetForTesting = true; -} - -/* static */ void TimeKeeper::stopTestMode() { - sSetForTesting = false; -} - -} // namespace latinime diff --git a/native/jni/src/utils/time_keeper.h b/native/jni/src/utils/time_keeper.h deleted file mode 100644 index d066757e4..000000000 --- a/native/jni/src/utils/time_keeper.h +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright (C) 2013, The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef LATINIME_TIME_KEEPER_H -#define LATINIME_TIME_KEEPER_H - -#include "defines.h" - -namespace latinime { - -class TimeKeeper { - public: - static void setCurrentTime(); - - static void startTestModeWithForceCurrentTime(const int currentTime); - - static void stopTestMode(); - - static int peekCurrentTime() { return sCurrentTime; }; - - private: - DISALLOW_IMPLICIT_CONSTRUCTORS(TimeKeeper); - - static int sCurrentTime; - static bool sSetForTesting; -}; -} // namespace latinime -#endif /* LATINIME_TIME_KEEPER_H */ |