diff options
Diffstat (limited to 'native/jni/src')
20 files changed, 174 insertions, 55 deletions
diff --git a/native/jni/src/defines.h b/native/jni/src/defines.h index c920f64b4..742e388e4 100644 --- a/native/jni/src/defines.h +++ b/native/jni/src/defines.h @@ -298,9 +298,19 @@ static inline void prof_out(void) { #define NOT_AN_INDEX (-1) #define NOT_A_PROBABILITY (-1) #define NOT_A_DICT_POS (S_INT_MIN) + // A special value to mean the first word confidence makes no sense in this case, // e.g. this is not a multi-word suggestion. -#define NOT_A_FIRST_WORD_CONFIDENCE (S_INT_MIN) +#define NOT_A_FIRST_WORD_CONFIDENCE (S_INT_MAX) +// How high the confidence needs to be for us to auto-commit. Arbitrary. +// This needs to be the same as CONFIDENCE_FOR_AUTO_COMMIT in BinaryDictionary.java +#define CONFIDENCE_FOR_AUTO_COMMIT (1000000) +// 80% of the full confidence +#define DISTANCE_WEIGHT_FOR_AUTO_COMMIT (80 * CONFIDENCE_FOR_AUTO_COMMIT / 100) +// 100% of the full confidence +#define LENGTH_WEIGHT_FOR_AUTO_COMMIT (CONFIDENCE_FOR_AUTO_COMMIT) +// 80% of the full confidence +#define SPACE_COUNT_WEIGHT_FOR_AUTO_COMMIT (80 * CONFIDENCE_FOR_AUTO_COMMIT / 100) #define KEYCODE_SPACE ' ' #define KEYCODE_SINGLE_QUOTE '\'' diff --git a/native/jni/src/suggest/core/dicnode/dic_node.h b/native/jni/src/suggest/core/dicnode/dic_node.h index 9099e8285..49cfdecac 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node.h +++ b/native/jni/src/suggest/core/dicnode/dic_node.h @@ -271,7 +271,7 @@ class DicNode { return isTerminalNodes && currentNodeDepth > 0 && currentNodeDepth == terminalNodeDepth; } - bool shouldBeFilterdBySafetyNetForBigram() const { + bool shouldBeFilteredBySafetyNetForBigram() const { const uint16_t currentDepth = getNodeCodePointCount(); const int prevWordLen = mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength() - mDicNodeState.mDicNodeStatePrevWord.getPrevWordStart() - 1; @@ -321,6 +321,16 @@ class DicNode { DUMP_WORD_AND_SCORE("OUTPUT"); } + // "Total" in this context (and other methods in this class) means the whole suggestion. When + // this represents a multi-word suggestion, the referenced PtNode (in mDicNodeState) is only + // the one that corresponds to the last word of the suggestion, and all the previous words + // are concatenated together in mPrevWord - which contains a space at the end. + int getTotalNodeSpaceCount() const { + if (isFirstWord()) return 0; + return CharUtils::getSpaceCount(mDicNodeState.mDicNodeStatePrevWord.mPrevWord, + mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength()); + } + int getSecondWordFirstInputIndex(const ProximityInfoState *const pInfoState) const { const int inputIndex = mDicNodeState.mDicNodeStatePrevWord.getSecondWordFirstInputIndex(); if (inputIndex == NOT_AN_INDEX) { diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp index 5969b31cc..59ead1894 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.cpp +++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp @@ -129,7 +129,7 @@ bool Dictionary::needsToRunGC(const bool mindsBlockByGC) { } void Dictionary::getProperty(const char *const query, char *const outResult, - const int maxResultLength) const { + const int maxResultLength) { return mDictionaryStructureWithBufferPolicy->getProperty(query, outResult, maxResultLength); } diff --git a/native/jni/src/suggest/core/dictionary/dictionary.h b/native/jni/src/suggest/core/dictionary/dictionary.h index 43d3b964d..0195d5bf0 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.h +++ b/native/jni/src/suggest/core/dictionary/dictionary.h @@ -84,7 +84,7 @@ class Dictionary { bool needsToRunGC(const bool mindsBlockByGC); void getProperty(const char *const query, char *const outResult, - const int maxResultLength) const; + const int maxResultLength); const DictionaryStructureWithBufferPolicy *getDictionaryStructurePolicy() const { return mDictionaryStructureWithBufferPolicy; diff --git a/native/jni/src/suggest/core/layout/proximity_info_params.cpp b/native/jni/src/suggest/core/layout/proximity_info_params.cpp index 0e887f700..49df10301 100644 --- a/native/jni/src/suggest/core/layout/proximity_info_params.cpp +++ b/native/jni/src/suggest/core/layout/proximity_info_params.cpp @@ -69,13 +69,13 @@ const float ProximityInfoParams::STRAIGHT_ANGLE_THRESHOLD = M_PI_F * 15.0f / 180 const float ProximityInfoParams::SKIP_CORNER_PROBABILITY = 0.4f; const float ProximityInfoParams::SPEED_MARGIN = 0.1f; const float ProximityInfoParams::CENTER_VALUE_OF_NORMALIZED_DISTRIBUTION = 0.0f; -// TODO: The variance is critical for accuracy; thus, adjusting these parameter by machine +// TODO: The variance is critical for accuracy; thus, adjusting these parameters by machine // learning or something would be efficient. -const float ProximityInfoParams::SPEEDxANGLE_WEIGHT_FOR_STANDARD_DIVIATION = 0.3f; -const float ProximityInfoParams::MAX_SPEEDxANGLE_RATE_FOR_STANDERD_DIVIATION = 0.25f; -const float ProximityInfoParams::SPEEDxNEAREST_WEIGHT_FOR_STANDARD_DIVIATION = 0.5f; -const float ProximityInfoParams::MAX_SPEEDxNEAREST_RATE_FOR_STANDERD_DIVIATION = 0.15f; -const float ProximityInfoParams::MIN_STANDERD_DIVIATION = 0.37f; +const float ProximityInfoParams::SPEEDxANGLE_WEIGHT_FOR_STANDARD_DEVIATION = 0.3f; +const float ProximityInfoParams::MAX_SPEEDxANGLE_RATE_FOR_STANDARD_DEVIATION = 0.25f; +const float ProximityInfoParams::SPEEDxNEAREST_WEIGHT_FOR_STANDARD_DEVIATION = 0.5f; +const float ProximityInfoParams::MAX_SPEEDxNEAREST_RATE_FOR_STANDARD_DEVIATION = 0.15f; +const float ProximityInfoParams::MIN_STANDARD_DEVIATION = 0.37f; const float ProximityInfoParams::PREV_DISTANCE_WEIGHT = 0.5f; const float ProximityInfoParams::NEXT_DISTANCE_WEIGHT = 0.6f; diff --git a/native/jni/src/suggest/core/layout/proximity_info_params.h b/native/jni/src/suggest/core/layout/proximity_info_params.h index 4e47f7308..ae1f82c22 100644 --- a/native/jni/src/suggest/core/layout/proximity_info_params.h +++ b/native/jni/src/suggest/core/layout/proximity_info_params.h @@ -73,11 +73,11 @@ class ProximityInfoParams { static const float SKIP_CORNER_PROBABILITY; static const float SPEED_MARGIN; static const float CENTER_VALUE_OF_NORMALIZED_DISTRIBUTION; - static const float SPEEDxANGLE_WEIGHT_FOR_STANDARD_DIVIATION; - static const float MAX_SPEEDxANGLE_RATE_FOR_STANDERD_DIVIATION; - static const float SPEEDxNEAREST_WEIGHT_FOR_STANDARD_DIVIATION; - static const float MAX_SPEEDxNEAREST_RATE_FOR_STANDERD_DIVIATION; - static const float MIN_STANDERD_DIVIATION; + static const float SPEEDxANGLE_WEIGHT_FOR_STANDARD_DEVIATION; + static const float MAX_SPEEDxANGLE_RATE_FOR_STANDARD_DEVIATION; + static const float SPEEDxNEAREST_WEIGHT_FOR_STANDARD_DEVIATION; + static const float MAX_SPEEDxNEAREST_RATE_FOR_STANDARD_DEVIATION; + static const float MIN_STANDARD_DEVIATION; static const float PREV_DISTANCE_WEIGHT; static const float NEXT_DISTANCE_WEIGHT; diff --git a/native/jni/src/suggest/core/layout/proximity_info_state_utils.cpp b/native/jni/src/suggest/core/layout/proximity_info_state_utils.cpp index 904671f7f..e1b35340b 100644 --- a/native/jni/src/suggest/core/layout/proximity_info_state_utils.cpp +++ b/native/jni/src/suggest/core/layout/proximity_info_state_utils.cpp @@ -708,13 +708,13 @@ namespace latinime { const float inputCharProbability = 1.0f - skipProbability; const float speedxAngleRate = min(speedRate * currentAngle / M_PI_F - * ProximityInfoParams::SPEEDxANGLE_WEIGHT_FOR_STANDARD_DIVIATION, - ProximityInfoParams::MAX_SPEEDxANGLE_RATE_FOR_STANDERD_DIVIATION); + * ProximityInfoParams::SPEEDxANGLE_WEIGHT_FOR_STANDARD_DEVIATION, + ProximityInfoParams::MAX_SPEEDxANGLE_RATE_FOR_STANDARD_DEVIATION); const float speedxNearestKeyDistanceRate = min(speedRate * nearestKeyDistance - * ProximityInfoParams::SPEEDxNEAREST_WEIGHT_FOR_STANDARD_DIVIATION, - ProximityInfoParams::MAX_SPEEDxNEAREST_RATE_FOR_STANDERD_DIVIATION); + * ProximityInfoParams::SPEEDxNEAREST_WEIGHT_FOR_STANDARD_DEVIATION, + ProximityInfoParams::MAX_SPEEDxNEAREST_RATE_FOR_STANDARD_DEVIATION); const float sigma = speedxAngleRate + speedxNearestKeyDistanceRate - + ProximityInfoParams::MIN_STANDERD_DIVIATION; + + ProximityInfoParams::MIN_STANDARD_DEVIATION; ProximityInfoUtils::NormalDistribution distribution(ProximityInfoParams::CENTER_VALUE_OF_NORMALIZED_DISTRIBUTION, sigma); diff --git a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h index c7ffef0d5..41f82049f 100644 --- a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h +++ b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h @@ -80,8 +80,10 @@ class DictionaryStructureWithBufferPolicy { virtual bool needsToRunGC(const bool mindsBlockByGC) const = 0; + // Currently, this method is used only for testing. You may want to consider creating new + // dedicated method instead of this if you want to use this in the production. virtual void getProperty(const char *const query, char *const outResult, - const int maxResultLength) const = 0; + const int maxResultLength) = 0; protected: DictionaryStructureWithBufferPolicy() {} diff --git a/native/jni/src/suggest/core/suggest.cpp b/native/jni/src/suggest/core/suggest.cpp index 51cfba17a..73ccebc88 100644 --- a/native/jni/src/suggest/core/suggest.cpp +++ b/native/jni/src/suggest/core/suggest.cpp @@ -166,7 +166,11 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen // TODO: have partial commit work even with multiple pointers. const bool outputSecondWordFirstLetterInputIndex = traverseSession->isOnlyOnePointerUsed(0 /* pointerId */); - outputAutoCommitFirstWordConfidence[0] = computeFirstWordConfidence(); + if (terminalSize > 0) { + // If we have no suggestions, don't write this + outputAutoCommitFirstWordConfidence[0] = + computeFirstWordConfidence(&terminals[0]); + } // Output suggestion results here for (int terminalIndex = 0; terminalIndex < terminalSize && outputWordIndex < MAX_RESULTS; @@ -255,9 +259,55 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen return outputWordIndex; } -int Suggest::computeFirstWordConfidence() const { - // TODO: implement this. - return NOT_A_FIRST_WORD_CONFIDENCE; +int Suggest::computeFirstWordConfidence(const DicNode *const terminalDicNode) const { + // Get the number of spaces in the first suggestion + const int spaceCount = terminalDicNode->getTotalNodeSpaceCount(); + // Get the number of characters in the first suggestion + const int length = terminalDicNode->getTotalNodeCodePointCount(); + // Get the distance for the first word of the suggestion + const float distance = terminalDicNode->getNormalizedCompoundDistanceAfterFirstWord(); + + // Arbitrarily, we give a score whose useful values range from 0 to 1,000,000. + // 1,000,000 will be the cutoff to auto-commit. It's fine if the number is under 0 or + // above 1,000,000 : under 0 just means it's very bad to commit, and above 1,000,000 means + // we are very confident. + // Expected space count is 1 ~ 5 + static const int MIN_EXPECTED_SPACE_COUNT = 1; + static const int MAX_EXPECTED_SPACE_COUNT = 5; + // Expected length is about 4 ~ 30 + static const int MIN_EXPECTED_LENGTH = 4; + static const int MAX_EXPECTED_LENGTH = 30; + // Expected distance is about 0.2 ~ 2.0, but consider 0.0 ~ 2.0 + static const float MIN_EXPECTED_DISTANCE = 0.0; + static const float MAX_EXPECTED_DISTANCE = 2.0; + // This is not strict: it's where most stuff will be falling, but it's still fine if it's + // outside these values. We want to output a value that reflects all of these. Each factor + // contributes a bit. + + // We need at least a space. + if (spaceCount < 1) return NOT_A_FIRST_WORD_CONFIDENCE; + + // The smaller the edit distance, the higher the contribution. MIN_EXPECTED_DISTANCE means 0 + // contribution, while MAX_EXPECTED_DISTANCE means full contribution according to the + // weight of the distance. Clamp to avoid overflows. + const float clampedDistance = distance < MIN_EXPECTED_DISTANCE ? MIN_EXPECTED_DISTANCE + : distance > MAX_EXPECTED_DISTANCE ? MAX_EXPECTED_DISTANCE : distance; + const int distanceContribution = DISTANCE_WEIGHT_FOR_AUTO_COMMIT + * (MAX_EXPECTED_DISTANCE - clampedDistance) + / (MAX_EXPECTED_DISTANCE - MIN_EXPECTED_DISTANCE); + // The larger the suggestion length, the larger the contribution. MIN_EXPECTED_LENGTH is no + // contribution, MAX_EXPECTED_LENGTH is full contribution according to the weight of the + // length. Length is guaranteed to be between 1 and 48, so we don't need to clamp. + const int lengthContribution = LENGTH_WEIGHT_FOR_AUTO_COMMIT + * (length - MIN_EXPECTED_LENGTH) / (MAX_EXPECTED_LENGTH - MIN_EXPECTED_LENGTH); + // The more spaces, the larger the contribution. MIN_EXPECTED_SPACE_COUNT space is no + // contribution, MAX_EXPECTED_SPACE_COUNT spaces is full contribution according to the + // weight of the space count. + const int spaceContribution = SPACE_COUNT_WEIGHT_FOR_AUTO_COMMIT + * (spaceCount - MIN_EXPECTED_SPACE_COUNT) + / (MAX_EXPECTED_SPACE_COUNT - MIN_EXPECTED_SPACE_COUNT); + + return distanceContribution + lengthContribution + spaceContribution; } /** @@ -395,7 +445,7 @@ void Suggest::processTerminalDicNode( if (!dicNode->isTerminalWordNode()) { return; } - if (dicNode->shouldBeFilterdBySafetyNetForBigram()) { + if (dicNode->shouldBeFilteredBySafetyNetForBigram()) { return; } // Create a non-cached node here. diff --git a/native/jni/src/suggest/core/suggest.h b/native/jni/src/suggest/core/suggest.h index 0e8bd1195..b20343d29 100644 --- a/native/jni/src/suggest/core/suggest.h +++ b/native/jni/src/suggest/core/suggest.h @@ -58,7 +58,7 @@ class Suggest : public SuggestInterface { int outputSuggestions(DicTraverseSession *traverseSession, int *frequencies, int *outputCodePoints, int *outputIndicesToPartialCommit, int *outputTypes, int *outputAutoCommitFirstWordConfidence) const; - int computeFirstWordConfidence() const; + int computeFirstWordConfidence(const DicNode *const terminalDicNode) const; void initializeSearch(DicTraverseSession *traverseSession, int commitPoint) const; void expandCurrentDicNodes(DicTraverseSession *traverseSession) const; void processTerminalDicNode(DicTraverseSession *traverseSession, DicNode *dicNode) const; diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp index 0d8c92768..a5a42ead1 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp @@ -33,12 +33,16 @@ namespace latinime { +// Note that these are corresponding definitions in Java side in BinaryDictionaryTests and +// BinaryDictionaryDecayingTests. const char *const DynamicPatriciaTriePolicy::UNIGRAM_COUNT_QUERY = "UNIGRAM_COUNT"; const char *const DynamicPatriciaTriePolicy::BIGRAM_COUNT_QUERY = "BIGRAM_COUNT"; +const char *const DynamicPatriciaTriePolicy::SET_NEEDS_TO_DECAY_FOR_TESTING_QUERY = + "SET_NEEDS_TO_DECAY_FOR_TESTING"; const int DynamicPatriciaTriePolicy::MAX_DICT_EXTENDED_REGION_SIZE = 1024 * 1024; const int DynamicPatriciaTriePolicy::MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS = DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE - 1024; -const int DynamicPatriciaTriePolicy::MIN_SECONDS_TO_REQUIRE_GC_WHEN_WRITING = 2 * 60 * 60; +const int DynamicPatriciaTriePolicy::DECAY_INTERVAL_FOR_DECAYING_DICTS = 2 * 60 * 60; void DynamicPatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode, DicNodeVector *const childDicNodes) const { @@ -301,7 +305,7 @@ void DynamicPatriciaTriePolicy::flush(const char *const filePath) { return; } DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer, - &mBigramListPolicy, &mShortcutListPolicy, mHeaderPolicy.isDecayingDict()); + &mBigramListPolicy, &mShortcutListPolicy, false /* needsToDecay */); writingHelper.writeToDictFile(filePath, &mHeaderPolicy, mUnigramCount, mBigramCount); } @@ -310,9 +314,15 @@ void DynamicPatriciaTriePolicy::flushWithGC(const char *const filePath) { AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary."); return; } + const bool runGCwithDecay = needsToDecay(); + DynamicBigramListPolicy bigramListPolicyForGC(&mBufferWithExtendableBuffer, + &mShortcutListPolicy, runGCwithDecay); DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer, - &mBigramListPolicy, &mShortcutListPolicy, mHeaderPolicy.isDecayingDict()); + &bigramListPolicyForGC, &mShortcutListPolicy, runGCwithDecay); writingHelper.writeToDictFileWithGC(getRootPosition(), filePath, &mHeaderPolicy); + if (runGCwithDecay) { + mNeedsToDecayForTesting = false; + } } bool DynamicPatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const { @@ -340,8 +350,7 @@ bool DynamicPatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const { } else if (mBigramCount >= DecayingUtils::MAX_BIGRAM_COUNT) { // Bigram count exceeds the limit. return true; - } else if (mindsBlockByGC && mHeaderPolicy.getLastUpdatedTime() - + MIN_SECONDS_TO_REQUIRE_GC_WHEN_WRITING < time(0)) { + } else if (mindsBlockByGC && needsToDecay()) { // Time to update probabilities for decaying. return true; } @@ -350,12 +359,19 @@ bool DynamicPatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const { } void DynamicPatriciaTriePolicy::getProperty(const char *const query, char *const outResult, - const int maxResultLength) const { + const int maxResultLength) { if (strncmp(query, UNIGRAM_COUNT_QUERY, maxResultLength) == 0) { snprintf(outResult, maxResultLength, "%d", mUnigramCount); } else if (strncmp(query, BIGRAM_COUNT_QUERY, maxResultLength) == 0) { snprintf(outResult, maxResultLength, "%d", mBigramCount); + } else if (strncmp(query, SET_NEEDS_TO_DECAY_FOR_TESTING_QUERY, maxResultLength) == 0) { + mNeedsToDecayForTesting = true; } } +bool DynamicPatriciaTriePolicy::needsToDecay() const { + return mHeaderPolicy.isDecayingDict() && (mNeedsToDecayForTesting + || mHeaderPolicy.getLastDecayedTime() + DECAY_INTERVAL_FOR_DECAYING_DICTS < time(0)); +} + } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h index d3150c6fc..c3bbe9977 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h @@ -40,7 +40,7 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { mBigramListPolicy(&mBufferWithExtendableBuffer, &mShortcutListPolicy, mHeaderPolicy.isDecayingDict()), mUnigramCount(mHeaderPolicy.getUnigramCount()), - mBigramCount(mHeaderPolicy.getBigramCount()) {} + mBigramCount(mHeaderPolicy.getBigramCount()), mNeedsToDecayForTesting(false) {} ~DynamicPatriciaTriePolicy() { delete mBuffer; @@ -95,16 +95,17 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { bool needsToRunGC(const bool mindsBlockByGC) const; void getProperty(const char *const query, char *const outResult, - const int maxResultLength) const; + const int maxResultLength); private: DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTriePolicy); - static const char*const UNIGRAM_COUNT_QUERY; - static const char*const BIGRAM_COUNT_QUERY; + static const char *const UNIGRAM_COUNT_QUERY; + static const char *const BIGRAM_COUNT_QUERY; + static const char *const SET_NEEDS_TO_DECAY_FOR_TESTING_QUERY; static const int MAX_DICT_EXTENDED_REGION_SIZE; static const int MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS; - static const int MIN_SECONDS_TO_REQUIRE_GC_WHEN_WRITING; + static const int DECAY_INTERVAL_FOR_DECAYING_DICTS; const MmappedBuffer *const mBuffer; const HeaderPolicy mHeaderPolicy; @@ -113,6 +114,9 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { DynamicBigramListPolicy mBigramListPolicy; int mUnigramCount; int mBigramCount; + int mNeedsToDecayForTesting; + + bool needsToDecay() const; }; } // namespace latinime #endif // LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp index 28124d251..db6d48fdb 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp @@ -153,7 +153,7 @@ void DynamicPatriciaTrieWritingHelper::writeToDictFile(const char *const fileNam const int extendedRegionSize = headerPolicy->getExtendedRegionSize() + mBuffer->getUsedAdditionalBufferSize(); if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, false /* updatesLastUpdatedTime */, - unigramCount, bigramCount, extendedRegionSize)) { + false /* updatesLastDecayedTime */, unigramCount, bigramCount, extendedRegionSize)) { return; } DictFileWritingUtils::flushAllHeaderAndBodyToFile(fileName, &headerBuffer, mBuffer); @@ -170,7 +170,7 @@ void DynamicPatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNod } BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */); if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, true /* updatesLastUpdatedTime */, - unigramCount, bigramCount, 0 /* extendedRegionSize */)) { + mNeedsToDecay, unigramCount, bigramCount, 0 /* extendedRegionSize */)) { return; } DictFileWritingUtils::flushAllHeaderAndBodyToFile(fileName, &headerBuffer, &newDictBuffer); @@ -488,12 +488,12 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, DynamicPatriciaTrieGcEventListeners ::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted( - this, mBuffer, mIsDecayingDict); + this, mBuffer, mNeedsToDecay); if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner( &traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted)) { return false; } - if (mIsDecayingDict && traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted + if (mNeedsToDecay && traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted .getValidUnigramCount() > DecayingUtils::MAX_UNIGRAM_COUNT_AFTER_GC) { // TODO: Remove more unigrams. } @@ -506,7 +506,7 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, return false; } - if (mIsDecayingDict && traversePolicyToUpdateBigramProbability.getValidBigramEntryCount() + if (mNeedsToDecay && traversePolicyToUpdateBigramProbability.getValidBigramEntryCount() > DecayingUtils::MAX_BIGRAM_COUNT_AFTER_GC) { // TODO: Remove more bigrams. } @@ -525,7 +525,7 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, // Create policy instance for the GCed dictionary. DynamicShortcutListPolicy newDictShortcutPolicy(bufferToWrite); DynamicBigramListPolicy newDictBigramPolicy(bufferToWrite, &newDictShortcutPolicy, - mIsDecayingDict); + mNeedsToDecay); // Create reading helper for the GCed dictionary. DynamicPatriciaTrieReadingHelper newDictReadingHelper(bufferToWrite, &newDictBigramPolicy, &newDictShortcutPolicy); @@ -544,7 +544,7 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, int DynamicPatriciaTrieWritingHelper::getUpdatedProbability(const int originalProbability, const int newProbability) { - if (mIsDecayingDict) { + if (mNeedsToDecay) { return DecayingUtils::getUpdatedUnigramProbability(originalProbability, newProbability); } else { return newProbability; diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h index ecee2cdbf..0caf29120 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h @@ -51,9 +51,9 @@ class DynamicPatriciaTrieWritingHelper { DynamicPatriciaTrieWritingHelper(BufferWithExtendableBuffer *const buffer, DynamicBigramListPolicy *const bigramPolicy, - DynamicShortcutListPolicy *const shortcutPolicy, const bool isDecayingDict) + DynamicShortcutListPolicy *const shortcutPolicy, const bool needsToDecay) : mBuffer(buffer), mBigramPolicy(bigramPolicy), mShortcutPolicy(shortcutPolicy), - mIsDecayingDict(isDecayingDict) {} + mNeedsToDecay(needsToDecay) {} ~DynamicPatriciaTrieWritingHelper() {} @@ -94,7 +94,7 @@ class DynamicPatriciaTrieWritingHelper { BufferWithExtendableBuffer *const mBuffer; DynamicBigramListPolicy *const mBigramPolicy; DynamicShortcutListPolicy *const mShortcutPolicy; - const bool mIsDecayingDict; + const bool mNeedsToDecay; bool markNodeAsMovedAndSetPosition(const DynamicPatriciaTrieNodeReader *const nodeToUpdate, const int movedPos, const int bigramLinkedNodePos); diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp index 9ce9994dd..eb072fbaf 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp @@ -23,6 +23,7 @@ const char *const HeaderPolicy::MULTIPLE_WORDS_DEMOTION_RATE_KEY = "MULTIPLE_WOR // TODO: Change attribute string to "IS_DECAYING_DICT". const char *const HeaderPolicy::IS_DECAYING_DICT_KEY = "USES_FORGETTING_CURVE"; const char *const HeaderPolicy::LAST_UPDATED_TIME_KEY = "date"; +const char *const HeaderPolicy::LAST_DECAYED_TIME_KEY = "LAST_DECAYED_TIME"; const char *const HeaderPolicy::UNIGRAM_COUNT_KEY = "UNIGRAM_COUNT"; const char *const HeaderPolicy::BIGRAM_COUNT_KEY = "BIGRAM_COUNT"; const char *const HeaderPolicy::EXTENDED_REGION_SIZE_KEY = "EXTENDED_REGION_SIZE"; @@ -63,8 +64,8 @@ float HeaderPolicy::readMultipleWordCostMultiplier() const { } bool HeaderPolicy::writeHeaderToBuffer(BufferWithExtendableBuffer *const bufferToWrite, - const bool updatesLastUpdatedTime, const int unigramCount, const int bigramCount, - const int extendedRegionSize) const { + const bool updatesLastUpdatedTime, const bool updatesLastDecayedTime, + const int unigramCount, const int bigramCount, const int extendedRegionSize) const { int writingPos = 0; if (!HeaderReadWriteUtils::writeDictionaryVersion(bufferToWrite, mDictFormatVersion, &writingPos)) { @@ -90,6 +91,11 @@ bool HeaderPolicy::writeHeaderToBuffer(BufferWithExtendableBuffer *const bufferT HeaderReadWriteUtils::setIntAttribute(&attributeMapTowrite, LAST_UPDATED_TIME_KEY, time(0)); } + if (updatesLastDecayedTime) { + // Set current time as a last updated time. + HeaderReadWriteUtils::setIntAttribute(&attributeMapTowrite, LAST_DECAYED_TIME_KEY, + time(0)); + } if (!HeaderReadWriteUtils::writeHeaderAttributes(bufferToWrite, &attributeMapTowrite, &writingPos)) { return false; diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h index 4261667fa..a9c7805a8 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h @@ -40,6 +40,8 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { IS_DECAYING_DICT_KEY, false /* defaultValue */)), mLastUpdatedTime(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap, LAST_UPDATED_TIME_KEY, time(0) /* defaultValue */)), + mLastDecayedTime(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap, + LAST_DECAYED_TIME_KEY, time(0) /* defaultValue */)), mUnigramCount(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap, UNIGRAM_COUNT_KEY, 0 /* defaultValue */)), mBigramCount(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap, @@ -58,6 +60,8 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { IS_DECAYING_DICT_KEY, false /* defaultValue */)), mLastUpdatedTime(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap, LAST_UPDATED_TIME_KEY, time(0) /* defaultValue */)), + mLastDecayedTime(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap, + LAST_UPDATED_TIME_KEY, time(0) /* defaultValue */)), mUnigramCount(0), mBigramCount(0), mExtendedRegionSize(0) {} ~HeaderPolicy() {} @@ -90,6 +94,10 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { return mLastUpdatedTime; } + AK_FORCE_INLINE int getLastDecayedTime() const { + return mLastDecayedTime; + } + AK_FORCE_INLINE int getUnigramCount() const { return mUnigramCount; } @@ -106,8 +114,8 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { int *outValue, int outValueSize) const; bool writeHeaderToBuffer(BufferWithExtendableBuffer *const bufferToWrite, - const bool updatesLastUpdatedTime, const int unigramCount, - const int bigramCount, const int extendedRegionSize) const; + const bool updatesLastUpdatedTime, const bool updatesLastDecayedTime, + const int unigramCount, const int bigramCount, const int extendedRegionSize) const; private: DISALLOW_IMPLICIT_CONSTRUCTORS(HeaderPolicy); @@ -115,6 +123,7 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { static const char *const MULTIPLE_WORDS_DEMOTION_RATE_KEY; static const char *const IS_DECAYING_DICT_KEY; static const char *const LAST_UPDATED_TIME_KEY; + static const char *const LAST_DECAYED_TIME_KEY; static const char *const UNIGRAM_COUNT_KEY; static const char *const BIGRAM_COUNT_KEY; static const char *const EXTENDED_REGION_SIZE_KEY; @@ -128,6 +137,7 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { const float mMultiWordCostMultiplier; const bool mIsDecayingDict; const int mLastUpdatedTime; + const int mLastDecayedTime; const int mUnigramCount; const int mBigramCount; const int mExtendedRegionSize; diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h index 8d88c68e8..0f8662aea 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h @@ -114,7 +114,7 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { } void getProperty(const char *const query, char *const outResult, - const int maxResultLength) const { + const int maxResultLength) { // getProperty is not supported for this class. if (maxResultLength > 0) { outResult[0] = '\0'; diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp index f22e94c6a..994826fa8 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp @@ -44,7 +44,8 @@ const char *const DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE = BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */); HeaderPolicy headerPolicy(FormatUtils::VERSION_3, attributeMap); headerPolicy.writeHeaderToBuffer(&headerBuffer, true /* updatesLastUpdatedTime */, - 0 /* unigramCount */, 0 /* bigramCount */, 0 /* extendedRegionSize */); + true /* updatesLastDecayedTime */, 0 /* unigramCount */, 0 /* bigramCount */, + 0 /* extendedRegionSize */); BufferWithExtendableBuffer bodyBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */); if (!DynamicPatriciaTrieWritingUtils::writeEmptyDictionary(&bodyBuffer, 0 /* rootPos */)) { return false; diff --git a/native/jni/src/suggest/policyimpl/typing/typing_traversal.h b/native/jni/src/suggest/policyimpl/typing/typing_traversal.h index 89e53f441..007c19e0a 100644 --- a/native/jni/src/suggest/policyimpl/typing/typing_traversal.h +++ b/native/jni/src/suggest/policyimpl/typing/typing_traversal.h @@ -101,7 +101,7 @@ class TypingTraversal : public Traversal { } const int16_t pointIndex = dicNode->getInputIndex(0); return pointIndex <= inputSize && !dicNode->isTotalInputSizeExceedingLimit() - && !dicNode->shouldBeFilterdBySafetyNetForBigram(); + && !dicNode->shouldBeFilteredBySafetyNetForBigram(); } AK_FORCE_INLINE bool shouldDepthLevelCache( diff --git a/native/jni/src/utils/char_utils.h b/native/jni/src/utils/char_utils.h index 2e735a81c..41663c81a 100644 --- a/native/jni/src/utils/char_utils.h +++ b/native/jni/src/utils/char_utils.h @@ -75,6 +75,16 @@ class CharUtils { return c; } + static AK_FORCE_INLINE int getSpaceCount(const int *const codePointBuffer, const int length) { + int spaceCount = 0; + for (int i = 0; i < length; ++i) { + if (codePointBuffer[i] == KEYCODE_SPACE) { + ++spaceCount; + } + } + return spaceCount; + } + static unsigned short latin_tolower(const unsigned short c); private: |