diff options
Diffstat (limited to 'native/jni/src')
33 files changed, 1063 insertions, 313 deletions
diff --git a/native/jni/src/defines.h b/native/jni/src/defines.h index 89dfa39b3..c2aa8ba0e 100644 --- a/native/jni/src/defines.h +++ b/native/jni/src/defines.h @@ -375,7 +375,7 @@ typedef enum { CT_TERMINAL, CT_TERMINAL_INSERTION, // Create new word with space omission - CT_NEW_WORD_SPACE_OMITTION, + CT_NEW_WORD_SPACE_OMISSION, // Create new word with space substitution CT_NEW_WORD_SPACE_SUBSTITUTION, } CorrectionType; diff --git a/native/jni/src/suggest/core/dicnode/dic_node.h b/native/jni/src/suggest/core/dicnode/dic_node.h index 41ef9d2b2..9099e8285 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node.h +++ b/native/jni/src/suggest/core/dicnode/dic_node.h @@ -38,10 +38,10 @@ INTS_TO_CHARS(mDicNodeState.mDicNodeStatePrevWord.mPrevWord, \ mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength(), prevWordCharBuf, \ NELEMS(prevWordCharBuf)); \ - AKLOGI("#%8s, %5f, %5f, %5f, %5f, %s, %s, %d,,", header, \ + AKLOGI("#%8s, %5f, %5f, %5f, %5f, %s, %s, %d, %5f,", header, \ getSpatialDistanceForScoring(), getLanguageDistanceForScoring(), \ getNormalizedCompoundDistance(), getRawLength(), prevWordCharBuf, charBuf, \ - getInputIndex(0)); \ + getInputIndex(0), getNormalizedCompoundDistanceAfterFirstWord()); \ } while (0) #else #define LOGI_SHOW_ADD_COST_PROP @@ -434,6 +434,13 @@ class DicNode { return mDicNodeState.mDicNodeStateScoring.getLanguageDistance(); } + // For space-aware gestures, we store the normalized distance at the char index + // that ends the first word of the suggestion. We call this the distance after + // first word. + float getNormalizedCompoundDistanceAfterFirstWord() const { + return mDicNodeState.mDicNodeStateScoring.getNormalizedCompoundDistanceAfterFirstWord(); + } + float getLanguageDistanceRatePerWordForScoring() const { const float langDist = getLanguageDistanceForScoring(); const float totalWordCount = @@ -565,6 +572,12 @@ class DicNode { inputSize, getTotalInputIndex(), errorType); } + // Saves the current normalized compound distance for space-aware gestures. + // See getNormalizedCompoundDistanceAfterFirstWord for details. + AK_FORCE_INLINE void saveNormalizedCompoundDistanceAfterFirstWordIfNoneYet() { + mDicNodeState.mDicNodeStateScoring.saveNormalizedCompoundDistanceAfterFirstWordIfNoneYet(); + } + // Caveat: Must not be called outside Weighting // This restriction is guaranteed by "friend" AK_FORCE_INLINE void forwardInputIndex(const int pointerId, const int count, diff --git a/native/jni/src/suggest/core/dicnode/internal/dic_node_state_scoring.h b/native/jni/src/suggest/core/dicnode/internal/dic_node_state_scoring.h index 4c884225a..3c85d0e9d 100644 --- a/native/jni/src/suggest/core/dicnode/internal/dic_node_state_scoring.h +++ b/native/jni/src/suggest/core/dicnode/internal/dic_node_state_scoring.h @@ -31,7 +31,8 @@ class DicNodeStateScoring { mDigraphIndex(DigraphUtils::NOT_A_DIGRAPH_INDEX), mEditCorrectionCount(0), mProximityCorrectionCount(0), mNormalizedCompoundDistance(0.0f), mSpatialDistance(0.0f), mLanguageDistance(0.0f), - mRawLength(0.0f), mExactMatch(true) { + mRawLength(0.0f), mExactMatch(true), + mNormalizedCompoundDistanceAfterFirstWord(MAX_VALUE_FOR_WEIGHTING) { } virtual ~DicNodeStateScoring() {} @@ -45,6 +46,7 @@ class DicNodeStateScoring { mRawLength = 0.0f; mDoubleLetterLevel = NOT_A_DOUBLE_LETTER; mDigraphIndex = DigraphUtils::NOT_A_DIGRAPH_INDEX; + mNormalizedCompoundDistanceAfterFirstWord = MAX_VALUE_FOR_WEIGHTING; mExactMatch = true; } @@ -58,6 +60,8 @@ class DicNodeStateScoring { mDoubleLetterLevel = scoring->mDoubleLetterLevel; mDigraphIndex = scoring->mDigraphIndex; mExactMatch = scoring->mExactMatch; + mNormalizedCompoundDistanceAfterFirstWord = + scoring->mNormalizedCompoundDistanceAfterFirstWord; } void addCost(const float spatialCost, const float languageCost, const bool doNormalization, @@ -86,6 +90,17 @@ class DicNodeStateScoring { } } + // Saves the current normalized distance for space-aware gestures. + // See getNormalizedCompoundDistanceAfterFirstWord for details. + void saveNormalizedCompoundDistanceAfterFirstWordIfNoneYet() { + // We get called here after each word. We only want to store the distance after + // the first word, so if we already have a distance we skip saving -- hence "IfNoneYet" + // in the method name. + if (mNormalizedCompoundDistanceAfterFirstWord >= MAX_VALUE_FOR_WEIGHTING) { + mNormalizedCompoundDistanceAfterFirstWord = getNormalizedCompoundDistance(); + } + } + void addRawLength(const float rawLength) { mRawLength += rawLength; } @@ -102,6 +117,13 @@ class DicNodeStateScoring { return mNormalizedCompoundDistance; } + // For space-aware gestures, we store the normalized distance at the char index + // that ends the first word of the suggestion. We call this the distance after + // first word. + float getNormalizedCompoundDistanceAfterFirstWord() const { + return mNormalizedCompoundDistanceAfterFirstWord; + } + float getSpatialDistance() const { return mSpatialDistance; } @@ -178,6 +200,7 @@ class DicNodeStateScoring { float mLanguageDistance; float mRawLength; bool mExactMatch; + float mNormalizedCompoundDistanceAfterFirstWord; AK_FORCE_INLINE void addDistance(float spatialDistance, float languageDistance, bool doNormalization, int inputSize, int totalInputIndex) { diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp index ec1b63a12..b1d01ed86 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.cpp +++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp @@ -33,6 +33,8 @@ namespace latinime { +const int Dictionary::HEADER_ATTRIBUTE_BUFFER_SIZE = 32; + Dictionary::Dictionary(JNIEnv *env, DictionaryStructureWithBufferPolicy *const dictionaryStructureWithBufferPolicy) : mDictionaryStructureWithBufferPolicy(dictionaryStructureWithBufferPolicy), @@ -121,32 +123,37 @@ void Dictionary::flushWithGC(const char *const filePath) { mDictionaryStructureWithBufferPolicy->flushWithGC(filePath); } -bool Dictionary::needsToRunGC() { - return mDictionaryStructureWithBufferPolicy->needsToRunGC(); +bool Dictionary::needsToRunGC(const bool mindsBlockByGC) { + return mDictionaryStructureWithBufferPolicy->needsToRunGC(mindsBlockByGC); +} + +void Dictionary::getProperty(const char *const query, char *const outResult, + const int maxResultLength) const { + return mDictionaryStructureWithBufferPolicy->getProperty(query, outResult, maxResultLength); } void Dictionary::logDictionaryInfo(JNIEnv *const env) const { - const int BUFFER_SIZE = 16; - int dictionaryIdCodePointBuffer[BUFFER_SIZE]; - int versionStringCodePointBuffer[BUFFER_SIZE]; - int dateStringCodePointBuffer[BUFFER_SIZE]; + int dictionaryIdCodePointBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE]; + int versionStringCodePointBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE]; + int dateStringCodePointBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE]; const DictionaryHeaderStructurePolicy *const headerPolicy = getDictionaryStructurePolicy()->getHeaderStructurePolicy(); headerPolicy->readHeaderValueOrQuestionMark("dictionary", dictionaryIdCodePointBuffer, - BUFFER_SIZE); + HEADER_ATTRIBUTE_BUFFER_SIZE); headerPolicy->readHeaderValueOrQuestionMark("version", versionStringCodePointBuffer, - BUFFER_SIZE); - headerPolicy->readHeaderValueOrQuestionMark("date", dateStringCodePointBuffer, BUFFER_SIZE); - - char dictionaryIdCharBuffer[BUFFER_SIZE]; - char versionStringCharBuffer[BUFFER_SIZE]; - char dateStringCharBuffer[BUFFER_SIZE]; - intArrayToCharArray(dictionaryIdCodePointBuffer, BUFFER_SIZE, - dictionaryIdCharBuffer, BUFFER_SIZE); - intArrayToCharArray(versionStringCodePointBuffer, BUFFER_SIZE, - versionStringCharBuffer, BUFFER_SIZE); - intArrayToCharArray(dateStringCodePointBuffer, BUFFER_SIZE, - dateStringCharBuffer, BUFFER_SIZE); + HEADER_ATTRIBUTE_BUFFER_SIZE); + headerPolicy->readHeaderValueOrQuestionMark("date", dateStringCodePointBuffer, + HEADER_ATTRIBUTE_BUFFER_SIZE); + + char dictionaryIdCharBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE]; + char versionStringCharBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE]; + char dateStringCharBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE]; + intArrayToCharArray(dictionaryIdCodePointBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE, + dictionaryIdCharBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE); + intArrayToCharArray(versionStringCodePointBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE, + versionStringCharBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE); + intArrayToCharArray(dateStringCodePointBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE, + dateStringCharBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE); LogUtils::logToJava(env, "Dictionary info: dictionary = %s ; version = %s ; date = %s", diff --git a/native/jni/src/suggest/core/dictionary/dictionary.h b/native/jni/src/suggest/core/dictionary/dictionary.h index 974447468..d8a0f3e58 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.h +++ b/native/jni/src/suggest/core/dictionary/dictionary.h @@ -81,7 +81,10 @@ class Dictionary { void flushWithGC(const char *const filePath); - bool needsToRunGC(); + bool needsToRunGC(const bool mindsBlockByGC); + + void getProperty(const char *const query, char *const outResult, + const int maxResultLength) const; const DictionaryStructureWithBufferPolicy *getDictionaryStructurePolicy() const { return mDictionaryStructureWithBufferPolicy; @@ -92,6 +95,8 @@ class Dictionary { private: DISALLOW_IMPLICIT_CONSTRUCTORS(Dictionary); + static const int HEADER_ATTRIBUTE_BUFFER_SIZE; + DictionaryStructureWithBufferPolicy *const mDictionaryStructureWithBufferPolicy; const BigramDictionary *const mBigramDictionary; const SuggestInterface *const mGestureSuggest; diff --git a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h index b95488ebd..c7ffef0d5 100644 --- a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h +++ b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h @@ -78,7 +78,10 @@ class DictionaryStructureWithBufferPolicy { virtual void flushWithGC(const char *const filePath) = 0; - virtual bool needsToRunGC() const = 0; + virtual bool needsToRunGC(const bool mindsBlockByGC) const = 0; + + virtual void getProperty(const char *const query, char *const outResult, + const int maxResultLength) const = 0; protected: DictionaryStructureWithBufferPolicy() {} diff --git a/native/jni/src/suggest/core/policy/weighting.cpp b/native/jni/src/suggest/core/policy/weighting.cpp index f9b777df2..0c4016893 100644 --- a/native/jni/src/suggest/core/policy/weighting.cpp +++ b/native/jni/src/suggest/core/policy/weighting.cpp @@ -38,7 +38,7 @@ static inline void profile(const CorrectionType correctionType, DicNode *const n case CT_SUBSTITUTION: PROF_SUBSTITUTION(node->mProfiler); return; - case CT_NEW_WORD_SPACE_OMITTION: + case CT_NEW_WORD_SPACE_OMISSION: PROF_NEW_WORD(node->mProfiler); return; case CT_MATCH: @@ -93,6 +93,11 @@ static inline void profile(const CorrectionType correctionType, DicNode *const n } dicNode->addCost(spatialCost, languageCost, weighting->needsToNormalizeCompoundDistance(), inputSize, errorType); + if (CT_NEW_WORD_SPACE_OMISSION == correctionType) { + // When we are on a terminal, we save the current distance for evaluating + // when to auto-commit partial suggestions. + dicNode->saveNormalizedCompoundDistanceAfterFirstWordIfNoneYet(); + } } /* static */ float Weighting::getSpatialCost(const Weighting *const weighting, @@ -108,7 +113,7 @@ static inline void profile(const CorrectionType correctionType, DicNode *const n case CT_SUBSTITUTION: // only used for typing return weighting->getSubstitutionCost(); - case CT_NEW_WORD_SPACE_OMITTION: + case CT_NEW_WORD_SPACE_OMISSION: return weighting->getNewWordSpatialCost(traverseSession, dicNode, inputStateG); case CT_MATCH: return weighting->getMatchedCost(traverseSession, dicNode, inputStateG); @@ -138,7 +143,7 @@ static inline void profile(const CorrectionType correctionType, DicNode *const n return 0.0f; case CT_SUBSTITUTION: return 0.0f; - case CT_NEW_WORD_SPACE_OMITTION: + case CT_NEW_WORD_SPACE_OMISSION: return weighting->getNewWordBigramLanguageCost( traverseSession, parentDicNode, multiBigramMap); case CT_MATCH: @@ -173,7 +178,7 @@ static inline void profile(const CorrectionType correctionType, DicNode *const n return 0; /* 0 because CT_MATCH will be called */ case CT_SUBSTITUTION: return 0; /* 0 because CT_MATCH will be called */ - case CT_NEW_WORD_SPACE_OMITTION: + case CT_NEW_WORD_SPACE_OMISSION: return 0; case CT_MATCH: return 1; diff --git a/native/jni/src/suggest/core/suggest.cpp b/native/jni/src/suggest/core/suggest.cpp index b1340e12f..e20bc497a 100644 --- a/native/jni/src/suggest/core/suggest.cpp +++ b/native/jni/src/suggest/core/suggest.cpp @@ -574,7 +574,7 @@ void Suggest::createNextWordDicNode(DicTraverseSession *traverseSession, DicNode DicNodeUtils::initAsRootWithPreviousWord( traverseSession->getDictionaryStructurePolicy(), dicNode, &newDicNode); const CorrectionType correctionType = spaceSubstitution ? - CT_NEW_WORD_SPACE_SUBSTITUTION : CT_NEW_WORD_SPACE_OMITTION; + CT_NEW_WORD_SPACE_SUBSTITUTION : CT_NEW_WORD_SPACE_OMISSION; Weighting::addCostAndForwardInputIndex(WEIGHTING, correctionType, traverseSession, dicNode, &newDicNode, traverseSession->getMultiBigramMap()); if (newDicNode.getCompoundDistance() < static_cast<float>(MAX_VALUE_FOR_WEIGHTING)) { diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp index bc2f5ee58..67a085de3 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp @@ -17,10 +17,10 @@ #include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h" #include "suggest/core/policy/dictionary_shortcuts_structure_policy.h" -#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h" #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h" #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" +#include "suggest/policyimpl/dictionary/utils/decaying_utils.h" namespace latinime { @@ -41,9 +41,14 @@ void DynamicBigramListPolicy::getNextBigram(int *const outBigramPos, int *const if (usesAdditionalBuffer && originalBigramPos != NOT_A_DICT_POS) { originalBigramPos += mBuffer->getOriginalBufferSize(); } - *outBigramPos = followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos); *outProbability = BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags); *outHasNext = BigramListReadWriteUtils::hasNext(bigramFlags); + if (mIsDecayingDict && !DecayingUtils::isValidBigram(*outProbability)) { + // This bigram is too weak to output. + *outBigramPos = NOT_A_DICT_POS; + } else { + *outBigramPos = followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos); + } if (usesAdditionalBuffer) { *bigramEntryPos += mBuffer->getOriginalBufferSize(); } @@ -119,7 +124,7 @@ bool DynamicBigramListPolicy::copyAllBigrams(BufferWithExtendableBuffer *const b // Finding useless bigram entries and remove them. Bigram entry is useless when the target PtNode // has been deleted or is not a valid terminal. bool DynamicBigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries( - int *const bigramListPos) { + int *const bigramListPos, int *const outValidBigramEntryCount) { const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*bigramListPos); if (usesAdditionalBuffer) { *bigramListPos -= mBuffer->getOriginalBufferSize(); @@ -153,14 +158,22 @@ bool DynamicBigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries( const int bigramTargetNodePos = followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos); nodeReader.fetchNodeInfoInBufferFromPtNodePos(bigramTargetNodePos); - // TODO: Update probability for supporting probability decaying. if (nodeReader.isDeleted() || !nodeReader.isTerminal() || bigramTargetNodePos == NOT_A_DICT_POS) { // The target is no longer valid terminal. Invalidate the current bigram entry. if (!BigramListReadWriteUtils::writeBigramEntry(mBuffer, bigramFlags, - NOT_A_DICT_POS /* targetOffset */, &bigramEntryPos)) { + NOT_A_DICT_POS /* targetPtNodePos */, &bigramEntryPos)) { return false; } + continue; + } + bool isRemoved = false; + if (!updateProbabilityForDecay(bigramFlags, bigramTargetNodePos, &bigramEntryPos, + &isRemoved)) { + return false; + } + if (!isRemoved) { + (*outValidBigramEntryCount) += 1; } } while(BigramListReadWriteUtils::hasNext(bigramFlags)); return true; @@ -169,7 +182,7 @@ bool DynamicBigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries( // Updates bigram target PtNode positions in the list after the placing step in GC. bool DynamicBigramListPolicy::updateAllBigramTargetPtNodePositions(int *const bigramListPos, const DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap *const - ptNodePositionRelocationMap) { + ptNodePositionRelocationMap, int *const outBigramEntryCount) { const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*bigramListPos); if (usesAdditionalBuffer) { *bigramListPos -= mBuffer->getOriginalBufferSize(); @@ -211,11 +224,12 @@ bool DynamicBigramListPolicy::updateAllBigramTargetPtNodePositions(int *const bi return false; } } while(BigramListReadWriteUtils::hasNext(bigramFlags)); + (*outBigramEntryCount) = bigramEntryCount; return true; } bool DynamicBigramListPolicy::addNewBigramEntryToBigramList(const int bigramTargetPos, - const int probability, int *const bigramListPos) { + const int probability, int *const bigramListPos, bool *const outAddedNewBigram) { const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*bigramListPos); if (usesAdditionalBuffer) { *bigramListPos -= mBuffer->getOriginalBufferSize(); @@ -243,8 +257,15 @@ bool DynamicBigramListPolicy::addNewBigramEntryToBigramList(const int bigramTarg } if (followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos) == bigramTargetPos) { // Update this bigram entry. + *outAddedNewBigram = false; + const int originalProbability = BigramListReadWriteUtils::getProbabilityFromFlags( + bigramFlags); + const int probabilityToWrite = mIsDecayingDict ? + DecayingUtils::getUpdatedBigramProbabilityDelta( + originalProbability, probability) : probability; const BigramListReadWriteUtils::BigramFlags updatedFlags = - BigramListReadWriteUtils::setProbabilityInFlags(bigramFlags, probability); + BigramListReadWriteUtils::setProbabilityInFlags(bigramFlags, + probabilityToWrite); return BigramListReadWriteUtils::writeBigramEntry(mBuffer, updatedFlags, originalBigramPos, &entryPos); } @@ -254,12 +275,14 @@ bool DynamicBigramListPolicy::addNewBigramEntryToBigramList(const int bigramTarg // The current last entry is found. // First, update the flags of the last entry. if (!BigramListReadWriteUtils::setHasNextFlag(mBuffer, true /* hasNext */, entryPos)) { + *outAddedNewBigram = false; return false; } if (usesAdditionalBuffer) { *bigramListPos += mBuffer->getOriginalBufferSize(); } // Then, add a new entry after the last entry. + *outAddedNewBigram = true; return writeNewBigramEntry(bigramTargetPos, probability, bigramListPos); } while(BigramListReadWriteUtils::hasNext(bigramFlags)); // We return directly from the while loop. @@ -270,8 +293,11 @@ bool DynamicBigramListPolicy::addNewBigramEntryToBigramList(const int bigramTarg bool DynamicBigramListPolicy::writeNewBigramEntry(const int bigramTargetPos, const int probability, int *const writingPos) { // hasNext is false because we are adding a new bigram entry at the end of the bigram list. + const int probabilityToWrite = mIsDecayingDict ? + DecayingUtils::getUpdatedBigramProbabilityDelta(NOT_A_PROBABILITY, probability) : + probability; return BigramListReadWriteUtils::createAndWriteBigramEntry(mBuffer, bigramTargetPos, - probability, false /* hasNext */, writingPos); + probabilityToWrite, false /* hasNext */, writingPos); } bool DynamicBigramListPolicy::removeBigram(const int bigramListPos, const int bigramTargetPos) { @@ -325,7 +351,7 @@ int DynamicBigramListPolicy::followBigramLinkAndGetCurrentBigramPtNodePos( nodeReader.fetchNodeInfoInBufferFromPtNodePos(currentPos); bigramLinkCount++; if (bigramLinkCount > CONTINUING_BIGRAM_LINK_COUNT_LIMIT) { - AKLOGE("Bigram link is invalid. start position: %d", bigramPos); + AKLOGE("Bigram link is invalid. start position: %d", originalBigramPos); ASSERT(false); return NOT_A_DICT_POS; } @@ -333,4 +359,33 @@ int DynamicBigramListPolicy::followBigramLinkAndGetCurrentBigramPtNodePos( return currentPos; } +bool DynamicBigramListPolicy::updateProbabilityForDecay( + BigramListReadWriteUtils::BigramFlags bigramFlags, const int targetPtNodePos, + int *const bigramEntryPos, bool *const outRemoved) const { + *outRemoved = false; + if (mIsDecayingDict) { + // Update bigram probability for decaying. + const int newProbability = DecayingUtils::getBigramProbabilityDeltaToSave( + BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags)); + if (DecayingUtils::isValidBigram(newProbability)) { + // Write new probability. + const BigramListReadWriteUtils::BigramFlags updatedBigramFlags = + BigramListReadWriteUtils::setProbabilityInFlags( + bigramFlags, newProbability); + if (!BigramListReadWriteUtils::writeBigramEntry(mBuffer, updatedBigramFlags, + targetPtNodePos, bigramEntryPos)) { + return false; + } + } else { + // Remove current bigram entry. + *outRemoved = true; + if (!BigramListReadWriteUtils::writeBigramEntry(mBuffer, bigramFlags, + NOT_A_DICT_POS /* targetPtNodePos */, bigramEntryPos)) { + return false; + } + } + } + return true; +} + } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h index 8ea318a41..b358b4ed5 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h @@ -21,6 +21,7 @@ #include "defines.h" #include "suggest/core/policy/dictionary_bigrams_structure_policy.h" +#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h" #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h" namespace latinime { @@ -34,8 +35,9 @@ class DictionaryShortcutsStructurePolicy; class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy { public: DynamicBigramListPolicy(BufferWithExtendableBuffer *const buffer, - const DictionaryShortcutsStructurePolicy *const shortcutPolicy) - : mBuffer(buffer), mShortcutPolicy(shortcutPolicy) {} + const DictionaryShortcutsStructurePolicy *const shortcutPolicy, + const bool isDecayingDict) + : mBuffer(buffer), mShortcutPolicy(shortcutPolicy), mIsDecayingDict(isDecayingDict) {} ~DynamicBigramListPolicy() {} @@ -50,19 +52,20 @@ class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy { bool copyAllBigrams(BufferWithExtendableBuffer *const bufferToWrite, int *const fromPos, int *const toPos, int *const outBigramsCount) const; - bool updateAllBigramEntriesAndDeleteUselessEntries(int *const bigramListPos); + bool updateAllBigramEntriesAndDeleteUselessEntries(int *const bigramListPos, + int *const outBigramEntryCount); bool updateAllBigramTargetPtNodePositions(int *const bigramListPos, const DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap *const - ptNodePositionRelocationMap); + ptNodePositionRelocationMap, int *const outValidBigramEntryCount); bool addNewBigramEntryToBigramList(const int bigramTargetPos, const int probability, - int *const bigramListPos); + int *const bigramListPos, bool *const outAddedNewBigram); bool writeNewBigramEntry(const int bigramTargetPos, const int probability, int *const writingPos); - // Return if targetBigramPos is found or not. + // Return whether or not targetBigramPos is found. bool removeBigram(const int bigramListPos, const int bigramTargetPos); private: @@ -73,9 +76,13 @@ class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy { BufferWithExtendableBuffer *const mBuffer; const DictionaryShortcutsStructurePolicy *const mShortcutPolicy; + const bool mIsDecayingDict; // Follow bigram link and return the position of bigram target PtNode that is currently valid. int followBigramLinkAndGetCurrentBigramPtNodePos(const int originalBigramPos) const; + + bool updateProbabilityForDecay(BigramListReadWriteUtils::BigramFlags bigramFlags, + const int targetPtNodePos, int *const bigramEntryPos, bool *const outRemoved) const; }; } // namespace latinime #endif // LATINIME_DYNAMIC_BIGRAM_LIST_POLICY_H diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp index c60e45819..081163a4d 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp @@ -16,6 +16,8 @@ #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h" +#include "suggest/policyimpl/dictionary/utils/decaying_utils.h" + namespace latinime { bool DynamicPatriciaTrieGcEventListeners @@ -25,6 +27,19 @@ bool DynamicPatriciaTrieGcEventListeners // PtNode is useless when the PtNode is not a terminal and doesn't have any not useless // children. bool isUselessPtNode = !node->isTerminal(); + if (node->isTerminal() && mIsDecayingDict) { + const int newProbability = + DecayingUtils::getUnigramProbabilityToSave(node->getProbability()); + int writingPos = node->getProbabilityFieldPos(); + // Update probability. + if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition( + mBuffer, newProbability, &writingPos)) { + return false; + } + if (!DecayingUtils::isValidUnigram(newProbability)) { + isUselessPtNode = false; + } + } if (mChildrenValue > 0) { isUselessPtNode = false; } else if (node->isTerminal()) { @@ -41,7 +56,27 @@ bool DynamicPatriciaTrieGcEventListeners return false; } } else { - valueStack.back() += 1; + mValueStack.back() += 1; + if (node->isTerminal()) { + mValidUnigramCount += 1; + } + } + return true; +} + +bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateBigramProbability + ::onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node, + const int *const nodeCodePoints) { + if (!node->isDeleted()) { + int pos = node->getBigramsPos(); + if (pos != NOT_A_DICT_POS) { + int bigramEntryCount = 0; + if (!mBigramPolicy->updateAllBigramEntriesAndDeleteUselessEntries(&pos, + &bigramEntryCount)) { + return false; + } + mValidBigramEntryCount += bigramEntryCount; + } } return true; } @@ -137,10 +172,15 @@ bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateAllPositionField // Updates bigram target PtNode positions in the bigram list. int bigramsPos = node->getBigramsPos(); if (bigramsPos != NOT_A_DICT_POS) { + int bigramEntryCount; if (!mBigramPolicy->updateAllBigramTargetPtNodePositions(&bigramsPos, - &mDictPositionRelocationMap->mPtNodePositionRelocationMap)) { + &mDictPositionRelocationMap->mPtNodePositionRelocationMap, &bigramEntryCount)) { return false; } + mBigramCount += bigramEntryCount; + } + if (node->isTerminal()) { + mUnigramCount++; } return true; diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h index 4256f22fb..463715af5 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h @@ -39,23 +39,23 @@ class DynamicPatriciaTrieGcEventListeners { public: TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted( DynamicPatriciaTrieWritingHelper *const writingHelper, - BufferWithExtendableBuffer *const buffer) - : mWritingHelper(writingHelper), mBuffer(buffer), valueStack(), - mChildrenValue(0) {} + BufferWithExtendableBuffer *const buffer, const bool isDecayingDict) + : mWritingHelper(writingHelper), mBuffer(buffer), mIsDecayingDict(isDecayingDict), + mValueStack(), mChildrenValue(0), mValidUnigramCount(0) {} ~TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted() {}; bool onAscend() { - if (valueStack.empty()) { + if (mValueStack.empty()) { return false; } - mChildrenValue = valueStack.back(); - valueStack.pop_back(); + mChildrenValue = mValueStack.back(); + mValueStack.pop_back(); return true; } bool onDescend(const int ptNodeArrayPos) { - valueStack.push_back(0); + mValueStack.push_back(0); return true; } @@ -64,14 +64,20 @@ class DynamicPatriciaTrieGcEventListeners { bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node, const int *const nodeCodePoints); + int getValidUnigramCount() const { + return mValidUnigramCount; + } + private: DISALLOW_IMPLICIT_CONSTRUCTORS( TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted); DynamicPatriciaTrieWritingHelper *const mWritingHelper; BufferWithExtendableBuffer *const mBuffer; - std::vector<int> valueStack; + const int mIsDecayingDict; + std::vector<int> mValueStack; int mChildrenValue; + int mValidUnigramCount; }; // Updates all bigram entries that are held by valid PtNodes. This removes useless bigram @@ -80,7 +86,7 @@ class DynamicPatriciaTrieGcEventListeners { : public DynamicPatriciaTrieReadingHelper::TraversingEventListener { public: TraversePolicyToUpdateBigramProbability(DynamicBigramListPolicy *const bigramPolicy) - : mBigramPolicy(bigramPolicy) {} + : mBigramPolicy(bigramPolicy), mValidBigramEntryCount(0) {} bool onAscend() { return true; } @@ -89,22 +95,17 @@ class DynamicPatriciaTrieGcEventListeners { bool onReadingPtNodeArrayTail() { return true; } bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node, - const int *const nodeCodePoints) { - if (!node->isDeleted()) { - int pos = node->getBigramsPos(); - if (pos != NOT_A_DICT_POS) { - if (!mBigramPolicy->updateAllBigramEntriesAndDeleteUselessEntries(&pos)) { - return false; - } - } - } - return true; + const int *const nodeCodePoints); + + int getValidBigramEntryCount() const { + return mValidBigramEntryCount; } private: DISALLOW_IMPLICIT_CONSTRUCTORS(TraversePolicyToUpdateBigramProbability); DynamicBigramListPolicy *const mBigramPolicy; + int mValidBigramEntryCount; }; class TraversePolicyToPlaceAndWriteValidPtNodesToBuffer @@ -150,7 +151,8 @@ class DynamicPatriciaTrieGcEventListeners { dictPositionRelocationMap) : mWritingHelper(writingHelper), mBigramPolicy(bigramPolicy), mBufferToWrite(bufferToWrite), - mDictPositionRelocationMap(dictPositionRelocationMap) {}; + mDictPositionRelocationMap(dictPositionRelocationMap), mUnigramCount(0), + mBigramCount(0) {}; bool onAscend() { return true; } @@ -161,6 +163,14 @@ class DynamicPatriciaTrieGcEventListeners { bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node, const int *const nodeCodePoints); + int getUnigramCount() const { + return mUnigramCount; + } + + int getBigramCount() const { + return mBigramCount; + } + private: DISALLOW_IMPLICIT_CONSTRUCTORS(TraversePolicyToUpdateAllPositionFields); @@ -169,6 +179,8 @@ class DynamicPatriciaTrieGcEventListeners { BufferWithExtendableBuffer *const mBufferToWrite; const DynamicPatriciaTrieWritingHelper::DictPositionRelocationMap *const mDictPositionRelocationMap; + int mUnigramCount; + int mBigramCount; }; private: diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp index 42397c19e..0d8c92768 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp @@ -16,6 +16,10 @@ #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h" +#include <cstdio> +#include <cstring> +#include <ctime> + #include "defines.h" #include "suggest/core/dicnode/dic_node.h" #include "suggest/core/dicnode/dic_node_vector.h" @@ -24,10 +28,18 @@ #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h" #include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/utils/decaying_utils.h" #include "suggest/policyimpl/dictionary/utils/probability_utils.h" namespace latinime { +const char *const DynamicPatriciaTriePolicy::UNIGRAM_COUNT_QUERY = "UNIGRAM_COUNT"; +const char *const DynamicPatriciaTriePolicy::BIGRAM_COUNT_QUERY = "BIGRAM_COUNT"; +const int DynamicPatriciaTriePolicy::MAX_DICT_EXTENDED_REGION_SIZE = 1024 * 1024; +const int DynamicPatriciaTriePolicy::MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS = + DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE - 1024; +const int DynamicPatriciaTriePolicy::MIN_SECONDS_TO_REQUIRE_GC_WHEN_WRITING = 2 * 60 * 60; + void DynamicPatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode, DicNodeVector *const childDicNodes) const { if (!dicNode->hasChildren()) { @@ -137,14 +149,17 @@ int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const in int DynamicPatriciaTriePolicy::getProbability(const int unigramProbability, const int bigramProbability) const { - // TODO: check mHeaderPolicy.usesForgettingCurve(); - if (unigramProbability == NOT_A_PROBABILITY) { - return NOT_A_PROBABILITY; - } else if (bigramProbability == NOT_A_PROBABILITY) { - return ProbabilityUtils::backoff(unigramProbability); + if (mHeaderPolicy.isDecayingDict()) { + return DecayingUtils::getProbability(unigramProbability, bigramProbability); } else { - return ProbabilityUtils::computeProbabilityForBigram(unigramProbability, - bigramProbability); + if (unigramProbability == NOT_A_PROBABILITY) { + return NOT_A_PROBABILITY; + } else if (bigramProbability == NOT_A_PROBABILITY) { + return ProbabilityUtils::backoff(unigramProbability); + } else { + return ProbabilityUtils::computeProbabilityForBigram(unigramProbability, + bigramProbability); + } } } @@ -193,12 +208,26 @@ bool DynamicPatriciaTriePolicy::addUnigramWord(const int *const word, const int AKLOGI("Warning: addUnigramWord() is called for non-updatable dictionary."); return false; } + if (mBufferWithExtendableBuffer.getTailPosition() + >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) { + AKLOGE("The dictionary is too large to dynamically update."); + return false; + } DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer, getBigramsStructurePolicy(), getShortcutsStructurePolicy()); readingHelper.initWithPtNodeArrayPos(getRootPosition()); DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer, - &mBigramListPolicy, &mShortcutListPolicy); - return writingHelper.addUnigramWord(&readingHelper, word, length, probability); + &mBigramListPolicy, &mShortcutListPolicy, mHeaderPolicy.isDecayingDict()); + bool addedNewUnigram = false; + if (writingHelper.addUnigramWord(&readingHelper, word, length, probability, + &addedNewUnigram)) { + if (addedNewUnigram) { + mUnigramCount++; + } + return true; + } else { + return false; + } } bool DynamicPatriciaTriePolicy::addBigramWords(const int *const word0, const int length0, @@ -207,6 +236,11 @@ bool DynamicPatriciaTriePolicy::addBigramWords(const int *const word0, const int AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary."); return false; } + if (mBufferWithExtendableBuffer.getTailPosition() + >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) { + AKLOGE("The dictionary is too large to dynamically update."); + return false; + } const int word0Pos = getTerminalNodePositionOfWord(word0, length0, false /* forceLowerCaseSearch */); if (word0Pos == NOT_A_DICT_POS) { @@ -218,8 +252,16 @@ bool DynamicPatriciaTriePolicy::addBigramWords(const int *const word0, const int return false; } DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer, - &mBigramListPolicy, &mShortcutListPolicy); - return writingHelper.addBigramWords(word0Pos, word1Pos, probability); + &mBigramListPolicy, &mShortcutListPolicy, mHeaderPolicy.isDecayingDict()); + bool addedNewBigram = false; + if (writingHelper.addBigramWords(word0Pos, word1Pos, probability, &addedNewBigram)) { + if (addedNewBigram) { + mBigramCount++; + } + return true; + } else { + return false; + } } bool DynamicPatriciaTriePolicy::removeBigramWords(const int *const word0, const int length0, @@ -228,6 +270,11 @@ bool DynamicPatriciaTriePolicy::removeBigramWords(const int *const word0, const AKLOGI("Warning: removeBigramWords() is called for non-updatable dictionary."); return false; } + if (mBufferWithExtendableBuffer.getTailPosition() + >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) { + AKLOGE("The dictionary is too large to dynamically update."); + return false; + } const int word0Pos = getTerminalNodePositionOfWord(word0, length0, false /* forceLowerCaseSearch */); if (word0Pos == NOT_A_DICT_POS) { @@ -239,8 +286,13 @@ bool DynamicPatriciaTriePolicy::removeBigramWords(const int *const word0, const return false; } DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer, - &mBigramListPolicy, &mShortcutListPolicy); - return writingHelper.removeBigramWords(word0Pos, word1Pos); + &mBigramListPolicy, &mShortcutListPolicy, mHeaderPolicy.isDecayingDict()); + if (writingHelper.removeBigramWords(word0Pos, word1Pos)) { + mBigramCount--; + return true; + } else { + return false; + } } void DynamicPatriciaTriePolicy::flush(const char *const filePath) { @@ -249,8 +301,8 @@ void DynamicPatriciaTriePolicy::flush(const char *const filePath) { return; } DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer, - &mBigramListPolicy, &mShortcutListPolicy); - writingHelper.writeToDictFile(filePath, &mHeaderPolicy); + &mBigramListPolicy, &mShortcutListPolicy, mHeaderPolicy.isDecayingDict()); + writingHelper.writeToDictFile(filePath, &mHeaderPolicy, mUnigramCount, mBigramCount); } void DynamicPatriciaTriePolicy::flushWithGC(const char *const filePath) { @@ -259,17 +311,51 @@ void DynamicPatriciaTriePolicy::flushWithGC(const char *const filePath) { return; } DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer, - &mBigramListPolicy, &mShortcutListPolicy); + &mBigramListPolicy, &mShortcutListPolicy, mHeaderPolicy.isDecayingDict()); writingHelper.writeToDictFileWithGC(getRootPosition(), filePath, &mHeaderPolicy); } -bool DynamicPatriciaTriePolicy::needsToRunGC() const { +bool DynamicPatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const { if (!mBuffer->isUpdatable()) { AKLOGI("Warning: needsToRunGC() is called for non-updatable dictionary."); return false; } - // TODO: Implement more properly. - return mBufferWithExtendableBuffer.isNearSizeLimit(); + if (mBufferWithExtendableBuffer.isNearSizeLimit()) { + // Additional buffer size is near the limit. + return true; + } else if (mHeaderPolicy.getExtendedRegionSize() + + mBufferWithExtendableBuffer.getUsedAdditionalBufferSize() + > MAX_DICT_EXTENDED_REGION_SIZE) { + // Total extended region size exceeds the limit. + return true; + } else if (mBufferWithExtendableBuffer.getTailPosition() + >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS + && mBufferWithExtendableBuffer.getUsedAdditionalBufferSize() > 0) { + // Needs to reduce dictionary size. + return true; + } else if (mHeaderPolicy.isDecayingDict()) { + if (mUnigramCount >= DecayingUtils::MAX_UNIGRAM_COUNT) { + // Unigram count exceeds the limit. + return true; + } else if (mBigramCount >= DecayingUtils::MAX_BIGRAM_COUNT) { + // Bigram count exceeds the limit. + return true; + } else if (mindsBlockByGC && mHeaderPolicy.getLastUpdatedTime() + + MIN_SECONDS_TO_REQUIRE_GC_WHEN_WRITING < time(0)) { + // Time to update probabilities for decaying. + return true; + } + } + return false; +} + +void DynamicPatriciaTriePolicy::getProperty(const char *const query, char *const outResult, + const int maxResultLength) const { + if (strncmp(query, UNIGRAM_COUNT_QUERY, maxResultLength) == 0) { + snprintf(outResult, maxResultLength, "%d", mUnigramCount); + } else if (strncmp(query, BIGRAM_COUNT_QUERY, maxResultLength) == 0) { + snprintf(outResult, maxResultLength, "%d", mBigramCount); + } } } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h index 06d8095d8..d3150c6fc 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h @@ -37,7 +37,10 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { mBufferWithExtendableBuffer(mBuffer->getBuffer() + mHeaderPolicy.getSize(), mBuffer->getBufferSize() - mHeaderPolicy.getSize()), mShortcutListPolicy(&mBufferWithExtendableBuffer), - mBigramListPolicy(&mBufferWithExtendableBuffer, &mShortcutListPolicy) {} + mBigramListPolicy(&mBufferWithExtendableBuffer, &mShortcutListPolicy, + mHeaderPolicy.isDecayingDict()), + mUnigramCount(mHeaderPolicy.getUnigramCount()), + mBigramCount(mHeaderPolicy.getBigramCount()) {} ~DynamicPatriciaTriePolicy() { delete mBuffer; @@ -89,16 +92,27 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { void flushWithGC(const char *const filePath); - bool needsToRunGC() const; + bool needsToRunGC(const bool mindsBlockByGC) const; + + void getProperty(const char *const query, char *const outResult, + const int maxResultLength) const; private: DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTriePolicy); + static const char*const UNIGRAM_COUNT_QUERY; + static const char*const BIGRAM_COUNT_QUERY; + static const int MAX_DICT_EXTENDED_REGION_SIZE; + static const int MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS; + static const int MIN_SECONDS_TO_REQUIRE_GC_WHEN_WRITING; + const MmappedBuffer *const mBuffer; const HeaderPolicy mHeaderPolicy; BufferWithExtendableBuffer mBufferWithExtendableBuffer; DynamicShortcutListPolicy mShortcutListPolicy; DynamicBigramListPolicy mBigramListPolicy; + int mUnigramCount; + int mBigramCount; }; } // namespace latinime #endif // LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h index 154590fbd..512a4d818 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h @@ -264,7 +264,7 @@ class DynamicPatriciaTrieReadingHelper { AK_FORCE_INLINE void pushReadingStateToStack() { if (mReadingStateStack.size() > MAX_READING_STATE_STACK_SIZE) { - AKLOGI("Reading state stack overflow. Max size: %d", MAX_READING_STATE_STACK_SIZE); + AKLOGI("Reading state stack overflow. Max size: %zd", MAX_READING_STATE_STACK_SIZE); ASSERT(false); mIsError = true; mReadingState.mPos = NOT_A_DICT_POS; diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp index a51ae5e1d..28124d251 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp @@ -16,9 +16,6 @@ #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h" -#include <cstdio> -#include <cstring> - #include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h" #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h" #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h" @@ -28,19 +25,20 @@ #include "suggest/policyimpl/dictionary/header/header_policy.h" #include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h" +#include "suggest/policyimpl/dictionary/utils/decaying_utils.h" +#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h" #include "utils/hash_map_compat.h" namespace latinime { const int DynamicPatriciaTrieWritingHelper::CHILDREN_POSITION_FIELD_SIZE = 3; -const char *const DynamicPatriciaTrieWritingHelper::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE = - ".tmp"; // TODO: Make MAX_DICTIONARY_SIZE 8MB. const size_t DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE = 2 * 1024 * 1024; bool DynamicPatriciaTrieWritingHelper::addUnigramWord( DynamicPatriciaTrieReadingHelper *const readingHelper, - const int *const wordCodePoints, const int codePointCount, const int probability) { + const int *const wordCodePoints, const int codePointCount, const int probability, + bool *const outAddedNewUnigram) { int parentPos = NOT_A_DICT_POS; while (!readingHelper->isEnd()) { const int matchedCodePointCount = readingHelper->getPrevTotalCodePointCount(); @@ -58,8 +56,11 @@ bool DynamicPatriciaTrieWritingHelper::addUnigramWord( const int nextIndex = matchedCodePointCount + j; if (nextIndex >= codePointCount || !readingHelper->isMatchedCodePoint(j, wordCodePoints[matchedCodePointCount + j])) { + *outAddedNewUnigram = true; return reallocatePtNodeAndAddNewPtNodes(nodeReader, - readingHelper->getMergedNodeCodePoints(), j, probability, + readingHelper->getMergedNodeCodePoints(), j, + getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, + probability), wordCodePoints + matchedCodePointCount, codePointCount - matchedCodePointCount); } @@ -67,10 +68,12 @@ bool DynamicPatriciaTrieWritingHelper::addUnigramWord( // All characters are matched. if (codePointCount == readingHelper->getTotalCodePointCount()) { return setPtNodeProbability(nodeReader, probability, - readingHelper->getMergedNodeCodePoints()); + readingHelper->getMergedNodeCodePoints(), outAddedNewUnigram); } if (!nodeReader->hasChildren()) { - return createChildrenPtNodeArrayAndAChildPtNode(nodeReader, probability, + *outAddedNewUnigram = true; + return createChildrenPtNodeArrayAndAChildPtNode(nodeReader, + getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, probability), wordCodePoints + readingHelper->getTotalCodePointCount(), codePointCount - readingHelper->getTotalCodePointCount()); } @@ -83,14 +86,15 @@ bool DynamicPatriciaTrieWritingHelper::addUnigramWord( return false; } int pos = readingHelper->getPosOfLastForwardLinkField(); + *outAddedNewUnigram = true; return createAndInsertNodeIntoPtNodeArray(parentPos, wordCodePoints + readingHelper->getPrevTotalCodePointCount(), codePointCount - readingHelper->getPrevTotalCodePointCount(), - probability, &pos); + getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, probability), &pos); } bool DynamicPatriciaTrieWritingHelper::addBigramWords(const int word0Pos, const int word1Pos, - const int probability) { + const int probability, bool *const outAddedNewBigram) { int mMergedNodeCodePoints[MAX_WORD_LENGTH]; DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy); nodeReader.fetchNodeInfoInBufferFromPtNodePosAndGetNodeCodePoints(word0Pos, MAX_WORD_LENGTH, @@ -111,9 +115,11 @@ bool DynamicPatriciaTrieWritingHelper::addBigramWords(const int word0Pos, const if (nodeReader.getBigramsPos() != NOT_A_DICT_POS) { // Insert a new bigram entry into the existing bigram list. int bigramListPos = nodeReader.getBigramsPos(); - return mBigramPolicy->addNewBigramEntryToBigramList(word1Pos, probability, &bigramListPos); + return mBigramPolicy->addNewBigramEntryToBigramList(word1Pos, probability, &bigramListPos, + outAddedNewBigram); } else { // The PtNode doesn't have a bigram list. + *outAddedNewBigram = true; // First, Write a bigram entry at the tail position of the PtNode. if (!mBigramPolicy->writeNewBigramEntry(word1Pos, probability, &writingPos)) { return false; @@ -142,26 +148,32 @@ bool DynamicPatriciaTrieWritingHelper::removeBigramWords(const int word0Pos, con } void DynamicPatriciaTrieWritingHelper::writeToDictFile(const char *const fileName, - const HeaderPolicy *const headerPolicy) { + const HeaderPolicy *const headerPolicy, const int unigramCount, const int bigramCount) { BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */); - if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, false /* updatesLastUpdatedTime */)) { + const int extendedRegionSize = headerPolicy->getExtendedRegionSize() + + mBuffer->getUsedAdditionalBufferSize(); + if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, false /* updatesLastUpdatedTime */, + unigramCount, bigramCount, extendedRegionSize)) { return; } - flushAllToFile(fileName, &headerBuffer, mBuffer); + DictFileWritingUtils::flushAllHeaderAndBodyToFile(fileName, &headerBuffer, mBuffer); } void DynamicPatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNodeArrayPos, const char *const fileName, const HeaderPolicy *const headerPolicy) { - BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */); - if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, true /* updatesLastUpdatedTime */)) { - return; - } BufferWithExtendableBuffer newDictBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */, MAX_DICTIONARY_SIZE); - if (!runGC(rootPtNodeArrayPos, &newDictBuffer)) { + int unigramCount = 0; + int bigramCount = 0; + if (!runGC(rootPtNodeArrayPos, &newDictBuffer, &unigramCount, &bigramCount)) { + return; + } + BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */); + if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, true /* updatesLastUpdatedTime */, + unigramCount, bigramCount, 0 /* extendedRegionSize */)) { return; } - flushAllToFile(fileName, &headerBuffer, &newDictBuffer); + DictFileWritingUtils::flushAllHeaderAndBodyToFile(fileName, &headerBuffer, &newDictBuffer); } bool DynamicPatriciaTrieWritingHelper::markNodeAsDeleted( @@ -339,23 +351,28 @@ bool DynamicPatriciaTrieWritingHelper::createAndInsertNodeIntoPtNodeArray(const bool DynamicPatriciaTrieWritingHelper::setPtNodeProbability( const DynamicPatriciaTrieNodeReader *const originalPtNode, const int probability, - const int *const codePoints) { + const int *const codePoints, bool *const outAddedNewUnigram) { if (originalPtNode->isTerminal()) { // Overwrites the probability. + *outAddedNewUnigram = false; + const int probabilityToWrite = getUpdatedProbability(originalPtNode->getProbability(), + probability); int probabilityFieldPos = originalPtNode->getProbabilityFieldPos(); if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(mBuffer, - probability, &probabilityFieldPos)) { + probabilityToWrite, &probabilityFieldPos)) { return false; } } else { // Make the node terminal and write the probability. + *outAddedNewUnigram = true; int movedPos = mBuffer->getTailPosition(); if (!markNodeAsMovedAndSetPosition(originalPtNode, movedPos, movedPos)) { return false; } if (!writePtNodeToBufferByCopyingPtNodeInfo(mBuffer, originalPtNode, originalPtNode->getParentPos(), codePoints, originalPtNode->getCodePointCount(), - probability, &movedPos)) { + getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, probability), + &movedPos)) { return false; } } @@ -463,72 +480,23 @@ bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes( return true; } -// TODO: Create a struct which contains header, body and etc... and use here as an argument. -void DynamicPatriciaTrieWritingHelper::flushAllToFile(const char *const fileName, - BufferWithExtendableBuffer *const dictHeader, - BufferWithExtendableBuffer *const dictBody) const { - const int tmpFileNameBufSize = strlen(fileName) - + strlen(TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE) + 1 /* terminator */; - // Name of a temporary file used for writing that is a connected string of original name and - // TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE. - char tmpFileName[tmpFileNameBufSize]; - snprintf(tmpFileName, tmpFileNameBufSize, "%s%s", fileName, - TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE); - FILE *const file = fopen(tmpFileName, "wb"); - if (!file) { - AKLOGI("Dictionary file %s cannnot be opened.", tmpFileName); - ASSERT(false); - return; - } - // Write the dictionary header. - if (!writeBufferToFilePointer(file, dictHeader)) { - remove(tmpFileName); - AKLOGI("Dictionary header cannnot be written. size: %d", dictHeader->getTailPosition()); - ASSERT(false); - return; - } - // Write the dictionary body. - if (!writeBufferToFilePointer(file, dictBody)) { - remove(tmpFileName); - AKLOGI("Dictionary body cannnot be written. size: %d", dictBody->getTailPosition()); - ASSERT(false); - return; - } - fclose(file); - rename(tmpFileName, fileName); -} - -// This closes file pointer when an error is caused and returns whether the writing was succeeded -// or not. -bool DynamicPatriciaTrieWritingHelper::writeBufferToFilePointer(FILE *const file, - const BufferWithExtendableBuffer *const buffer) const { - const int originalBufSize = buffer->getOriginalBufferSize(); - if (originalBufSize > 0 && fwrite(buffer->getBuffer(false /* usesAdditionalBuffer */), - originalBufSize, 1, file) < 1) { - fclose(file); - return false; - } - const int additionalBufSize = buffer->getTailPosition() - buffer->getOriginalBufferSize(); - if (additionalBufSize > 0 && fwrite(buffer->getBuffer(true /* usesAdditionalBuffer */), - additionalBufSize, 1, file) < 1) { - fclose(file); - return false; - } - return true; -} - bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, - BufferWithExtendableBuffer *const bufferToWrite) { + BufferWithExtendableBuffer *const bufferToWrite, int *const outUnigramCount, + int *const outBigramCount) { DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, mBigramPolicy, mShortcutPolicy); readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); DynamicPatriciaTrieGcEventListeners ::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted( - this, mBuffer); + this, mBuffer, mIsDecayingDict); if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner( &traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted)) { return false; } + if (mIsDecayingDict && traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted + .getValidUnigramCount() > DecayingUtils::MAX_UNIGRAM_COUNT_AFTER_GC) { + // TODO: Remove more unigrams. + } readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateBigramProbability @@ -538,6 +506,11 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, return false; } + if (mIsDecayingDict && traversePolicyToUpdateBigramProbability.getValidBigramEntryCount() + > DecayingUtils::MAX_BIGRAM_COUNT_AFTER_GC) { + // TODO: Remove more bigrams. + } + // Mapping from positions in mBuffer to positions in bufferToWrite. DictPositionRelocationMap dictPositionRelocationMap; readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); @@ -551,7 +524,8 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, // Create policy instance for the GCed dictionary. DynamicShortcutListPolicy newDictShortcutPolicy(bufferToWrite); - DynamicBigramListPolicy newDictBigramPolicy(bufferToWrite, &newDictShortcutPolicy); + DynamicBigramListPolicy newDictBigramPolicy(bufferToWrite, &newDictShortcutPolicy, + mIsDecayingDict); // Create reading helper for the GCed dictionary. DynamicPatriciaTrieReadingHelper newDictReadingHelper(bufferToWrite, &newDictBigramPolicy, &newDictShortcutPolicy); @@ -563,7 +537,18 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, &traversePolicyToUpdateAllPositionFields)) { return false; } + *outUnigramCount = traversePolicyToUpdateAllPositionFields.getUnigramCount(); + *outBigramCount = traversePolicyToUpdateAllPositionFields.getBigramCount(); return true; } +int DynamicPatriciaTrieWritingHelper::getUpdatedProbability(const int originalProbability, + const int newProbability) { + if (mIsDecayingDict) { + return DecayingUtils::getUpdatedUnigramProbability(originalProbability, newProbability); + } else { + return newProbability; + } +} + } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h index 028fa6075..ecee2cdbf 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h @@ -17,7 +17,6 @@ #ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H #define LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H -#include <cstdio> #include <stdint.h> #include "defines.h" @@ -48,24 +47,30 @@ class DynamicPatriciaTrieWritingHelper { DISALLOW_COPY_AND_ASSIGN(DictPositionRelocationMap); }; + static const size_t MAX_DICTIONARY_SIZE; + DynamicPatriciaTrieWritingHelper(BufferWithExtendableBuffer *const buffer, DynamicBigramListPolicy *const bigramPolicy, - DynamicShortcutListPolicy *const shortcutPolicy) - : mBuffer(buffer), mBigramPolicy(bigramPolicy), mShortcutPolicy(shortcutPolicy) {} + DynamicShortcutListPolicy *const shortcutPolicy, const bool isDecayingDict) + : mBuffer(buffer), mBigramPolicy(bigramPolicy), mShortcutPolicy(shortcutPolicy), + mIsDecayingDict(isDecayingDict) {} ~DynamicPatriciaTrieWritingHelper() {} // Add a word to the dictionary. If the word already exists, update the probability. bool addUnigramWord(DynamicPatriciaTrieReadingHelper *const readingHelper, - const int *const wordCodePoints, const int codePointCount, const int probability); + const int *const wordCodePoints, const int codePointCount, const int probability, + bool *const outAddedNewUnigram); // Add a bigram relation from word0Pos to word1Pos. - bool addBigramWords(const int word0Pos, const int word1Pos, const int probability); + bool addBigramWords(const int word0Pos, const int word1Pos, const int probability, + bool *const outAddedNewBigram); // Remove a bigram relation from word0Pos to word1Pos. bool removeBigramWords(const int word0Pos, const int word1Pos); - void writeToDictFile(const char *const fileName, const HeaderPolicy *const headerPolicy); + void writeToDictFile(const char *const fileName, const HeaderPolicy *const headerPolicy, + const int unigramCount, const int bigramCount); void writeToDictFileWithGC(const int rootPtNodeArrayPos, const char *const fileName, const HeaderPolicy *const headerPolicy); @@ -85,12 +90,11 @@ class DynamicPatriciaTrieWritingHelper { DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieWritingHelper); static const int CHILDREN_POSITION_FIELD_SIZE; - static const char *const TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE; - static const size_t MAX_DICTIONARY_SIZE; BufferWithExtendableBuffer *const mBuffer; DynamicBigramListPolicy *const mBigramPolicy; DynamicShortcutListPolicy *const mShortcutPolicy; + const bool mIsDecayingDict; bool markNodeAsMovedAndSetPosition(const DynamicPatriciaTrieNodeReader *const nodeToUpdate, const int movedPos, const int bigramLinkedNodePos); @@ -109,7 +113,7 @@ class DynamicPatriciaTrieWritingHelper { const int nodeCodePointCount, const int probability, int *const forwardLinkFieldPos); bool setPtNodeProbability(const DynamicPatriciaTrieNodeReader *const originalNode, - const int probability, const int *const codePoints); + const int probability, const int *const codePoints, bool *const outAddedNewUnigram); bool createChildrenPtNodeArrayAndAChildPtNode( const DynamicPatriciaTrieNodeReader *const parentNode, const int probability, @@ -124,14 +128,10 @@ class DynamicPatriciaTrieWritingHelper { const int probabilityOfNewPtNode, const int *const newNodeCodePoints, const int newNodeCodePointCount); - void flushAllToFile(const char *const fileName, - BufferWithExtendableBuffer *const dictHeader, - BufferWithExtendableBuffer *const dictBody) const; - - bool writeBufferToFilePointer(FILE *const file, - const BufferWithExtendableBuffer *const buffer) const; + bool runGC(const int rootPtNodeArrayPos, BufferWithExtendableBuffer *const bufferToWrite, + int *const outUnigramCount, int *const outBigramCount); - bool runGC(const int rootPtNodeArrayPos, BufferWithExtendableBuffer *const bufferToWrite); + int getUpdatedProbability(const int originalProbability, const int newProbability); }; } // namespace latinime #endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.cpp index 5a3983776..30ff10cd6 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.cpp @@ -36,6 +36,16 @@ const int DynamicPatriciaTrieWritingUtils::DICT_OFFSET_NEGATIVE_FLAG = 0x800000; const int DynamicPatriciaTrieWritingUtils::PROBABILITY_FIELD_SIZE = 1; const int DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE = 1; +/* static */ bool DynamicPatriciaTrieWritingUtils::writeEmptyDictionary( + BufferWithExtendableBuffer *const buffer, const int rootPos) { + int writingPos = rootPos; + if (!writePtNodeArraySizeAndAdvancePosition(buffer, 0 /* arraySize */, &writingPos)) { + return false; + } + return writeForwardLinkPositionAndAdvancePosition(buffer, NOT_A_DICT_POS /* forwardLinkPos */, + &writingPos); +} + /* static */ bool DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition( BufferWithExtendableBuffer *const buffer, const int forwardLinkPos, int *const forwardLinkFieldPos) { diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h index a37e9fb3d..af76bc6b5 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h @@ -30,6 +30,8 @@ class DynamicPatriciaTrieWritingUtils { public: static const int NODE_FLAG_FIELD_SIZE; + static bool writeEmptyDictionary(BufferWithExtendableBuffer *const buffer, const int rootPos); + static bool writeForwardLinkPositionAndAdvancePosition( BufferWithExtendableBuffer *const buffer, const int forwardLinkPos, int *const forwardLinkFieldPos); diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp index 47ace23a1..9ce9994dd 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp @@ -16,16 +16,17 @@ #include "suggest/policyimpl/dictionary/header/header_policy.h" -#include <cstddef> -#include <cstdio> -#include <ctime> - namespace latinime { +// Note that these are corresponding definitions in Java side in FormatSpec.FileHeader. const char *const HeaderPolicy::MULTIPLE_WORDS_DEMOTION_RATE_KEY = "MULTIPLE_WORDS_DEMOTION_RATE"; -const char *const HeaderPolicy::USES_FORGETTING_CURVE_KEY = "USES_FORGETTING_CURVE"; +// TODO: Change attribute string to "IS_DECAYING_DICT". +const char *const HeaderPolicy::IS_DECAYING_DICT_KEY = "USES_FORGETTING_CURVE"; const char *const HeaderPolicy::LAST_UPDATED_TIME_KEY = "date"; -const float HeaderPolicy::DEFAULT_MULTIPLE_WORD_COST_MULTIPLIER = 1.0f; +const char *const HeaderPolicy::UNIGRAM_COUNT_KEY = "UNIGRAM_COUNT"; +const char *const HeaderPolicy::BIGRAM_COUNT_KEY = "BIGRAM_COUNT"; +const char *const HeaderPolicy::EXTENDED_REGION_SIZE_KEY = "EXTENDED_REGION_SIZE"; +const int HeaderPolicy::DEFAULT_MULTIPLE_WORDS_DEMOTION_RATE = 100; const float HeaderPolicy::MULTIPLE_WORD_COST_MULTIPLIER_SCALE = 100.0f; // Used for logging. Question mark is used to indicate that the key is not found. @@ -37,7 +38,7 @@ void HeaderPolicy::readHeaderValueOrQuestionMark(const char *const key, int *out return; } std::vector<int> keyCodePointVector; - insertCharactersIntoVector(key, &keyCodePointVector); + HeaderReadWriteUtils::insertCharactersIntoVector(key, &keyCodePointVector); HeaderReadWriteUtils::AttributeMap::const_iterator it = mAttributeMap.find(keyCodePointVector); if (it == mAttributeMap.end()) { // The key was not found. @@ -53,51 +54,17 @@ void HeaderPolicy::readHeaderValueOrQuestionMark(const char *const key, int *out } float HeaderPolicy::readMultipleWordCostMultiplier() const { - int attributeValue = 0; - if (getAttributeValueAsInt(MULTIPLE_WORDS_DEMOTION_RATE_KEY, &attributeValue)) { - if (attributeValue <= 0) { - return static_cast<float>(MAX_VALUE_FOR_WEIGHTING); - } - return MULTIPLE_WORD_COST_MULTIPLIER_SCALE / static_cast<float>(attributeValue); - } else { - return DEFAULT_MULTIPLE_WORD_COST_MULTIPLIER; - } -} - -bool HeaderPolicy::readUsesForgettingCurveFlag() const { - int attributeValue = 0; - if (getAttributeValueAsInt(USES_FORGETTING_CURVE_KEY, &attributeValue)) { - return attributeValue != 0; - } else { - return false; - } -} - -// Returns S_INT_MIN when the key is not found or the value is invalid. -int HeaderPolicy::readLastUpdatedTime() const { - int attributeValue = 0; - if (getAttributeValueAsInt(LAST_UPDATED_TIME_KEY, &attributeValue)) { - return attributeValue; - } else { - return S_INT_MIN; - } -} - -// Returns whether the key is found or not and stores the found value into outValue. -bool HeaderPolicy::getAttributeValueAsInt(const char *const key, int *const outValue) const { - std::vector<int> keyVector; - insertCharactersIntoVector(key, &keyVector); - HeaderReadWriteUtils::AttributeMap::const_iterator it = mAttributeMap.find(keyVector); - if (it == mAttributeMap.end()) { - // The key was not found. - return false; + const int demotionRate = HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap, + MULTIPLE_WORDS_DEMOTION_RATE_KEY, DEFAULT_MULTIPLE_WORDS_DEMOTION_RATE); + if (demotionRate <= 0) { + return static_cast<float>(MAX_VALUE_FOR_WEIGHTING); } - *outValue = parseIntAttributeValue(&(it->second)); - return true; + return MULTIPLE_WORD_COST_MULTIPLIER_SCALE / static_cast<float>(demotionRate); } bool HeaderPolicy::writeHeaderToBuffer(BufferWithExtendableBuffer *const bufferToWrite, - const bool updatesLastUpdatedTime) const { + const bool updatesLastUpdatedTime, const int unigramCount, const int bigramCount, + const int extendedRegionSize) const { int writingPos = 0; if (!HeaderReadWriteUtils::writeDictionaryVersion(bufferToWrite, mDictFormatVersion, &writingPos)) { @@ -113,26 +80,19 @@ bool HeaderPolicy::writeHeaderToBuffer(BufferWithExtendableBuffer *const bufferT &writingPos)) { return false; } + HeaderReadWriteUtils::AttributeMap attributeMapTowrite(mAttributeMap); + HeaderReadWriteUtils::setIntAttribute(&attributeMapTowrite, UNIGRAM_COUNT_KEY, unigramCount); + HeaderReadWriteUtils::setIntAttribute(&attributeMapTowrite, BIGRAM_COUNT_KEY, bigramCount); + HeaderReadWriteUtils::setIntAttribute(&attributeMapTowrite, EXTENDED_REGION_SIZE_KEY, + extendedRegionSize); if (updatesLastUpdatedTime) { // Set current time as a last updated time. - HeaderReadWriteUtils::AttributeMap attributeMapTowrite(mAttributeMap); - std::vector<int> updatedTimekey; - insertCharactersIntoVector(LAST_UPDATED_TIME_KEY, &updatedTimekey); - const time_t currentTime = time(NULL); - std::vector<int> updatedTimeValue; - char charBuf[LARGEST_INT_DIGIT_COUNT + 1]; - snprintf(charBuf, LARGEST_INT_DIGIT_COUNT + 1, "%ld", currentTime); - insertCharactersIntoVector(charBuf, &updatedTimeValue); - attributeMapTowrite[updatedTimekey] = updatedTimeValue; - if (!HeaderReadWriteUtils::writeHeaderAttributes(bufferToWrite, &attributeMapTowrite, - &writingPos)) { - return false; - } - } else { - if (!HeaderReadWriteUtils::writeHeaderAttributes(bufferToWrite, &mAttributeMap, - &writingPos)) { - return false; - } + HeaderReadWriteUtils::setIntAttribute(&attributeMapTowrite, LAST_UPDATED_TIME_KEY, + time(0)); + } + if (!HeaderReadWriteUtils::writeHeaderAttributes(bufferToWrite, &attributeMapTowrite, + &writingPos)) { + return false; } // Writes an actual header size. if (!HeaderReadWriteUtils::writeDictionaryHeaderSize(bufferToWrite, writingPos, @@ -149,30 +109,4 @@ bool HeaderPolicy::writeHeaderToBuffer(BufferWithExtendableBuffer *const bufferT return attributeMap; } -/* static */ int HeaderPolicy::parseIntAttributeValue( - const std::vector<int> *const attributeValue) { - int value = 0; - bool isNegative = false; - for (size_t i = 0; i < attributeValue->size(); ++i) { - if (i == 0 && attributeValue->at(i) == '-') { - isNegative = true; - } else { - if (!isdigit(attributeValue->at(i))) { - // If not a number, return S_INT_MIN - return S_INT_MIN; - } - value *= 10; - value += attributeValue->at(i) - '0'; - } - } - return isNegative ? -value : value; -} - -/* static */ void HeaderPolicy::insertCharactersIntoVector(const char *const characters, - std::vector<int> *const vector) { - for (int i = 0; characters[i]; ++i) { - vector->push_back(characters[i]); - } -} - } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h index 6b396f3f2..4261667fa 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h @@ -17,7 +17,7 @@ #ifndef LATINIME_HEADER_POLICY_H #define LATINIME_HEADER_POLICY_H -#include <cctype> +#include <ctime> #include <stdint.h> #include "defines.h" @@ -29,15 +29,36 @@ namespace latinime { class HeaderPolicy : public DictionaryHeaderStructurePolicy { public: - explicit HeaderPolicy(const uint8_t *const dictBuf, const int dictSize) - : mDictBuf(dictBuf), - mDictFormatVersion(FormatUtils::detectFormatVersion(dictBuf, dictSize)), + // Reads information from existing dictionary buffer. + HeaderPolicy(const uint8_t *const dictBuf, const int dictSize) + : mDictFormatVersion(FormatUtils::detectFormatVersion(dictBuf, dictSize)), mDictionaryFlags(HeaderReadWriteUtils::getFlags(dictBuf)), mSize(HeaderReadWriteUtils::getHeaderSize(dictBuf)), - mAttributeMap(createAttributeMapAndReadAllAttributes(mDictBuf)), + mAttributeMap(createAttributeMapAndReadAllAttributes(dictBuf)), mMultiWordCostMultiplier(readMultipleWordCostMultiplier()), - mUsesForgettingCurve(readUsesForgettingCurveFlag()), - mLastUpdatedTime(readLastUpdatedTime()) {} + mIsDecayingDict(HeaderReadWriteUtils::readBoolAttributeValue(&mAttributeMap, + IS_DECAYING_DICT_KEY, false /* defaultValue */)), + mLastUpdatedTime(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap, + LAST_UPDATED_TIME_KEY, time(0) /* defaultValue */)), + mUnigramCount(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap, + UNIGRAM_COUNT_KEY, 0 /* defaultValue */)), + mBigramCount(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap, + BIGRAM_COUNT_KEY, 0 /* defaultValue */)), + mExtendedRegionSize(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap, + EXTENDED_REGION_SIZE_KEY, 0 /* defaultValue */)) {} + + // Constructs header information using an attribute map. + HeaderPolicy(const FormatUtils::FORMAT_VERSION dictFormatVersion, + const HeaderReadWriteUtils::AttributeMap *const attributeMap) + : mDictFormatVersion(dictFormatVersion), + mDictionaryFlags(HeaderReadWriteUtils::createAndGetDictionaryFlagsUsingAttributeMap( + attributeMap)), mSize(0), mAttributeMap(*attributeMap), + mMultiWordCostMultiplier(readMultipleWordCostMultiplier()), + mIsDecayingDict(HeaderReadWriteUtils::readBoolAttributeValue(&mAttributeMap, + IS_DECAYING_DICT_KEY, false /* defaultValue */)), + mLastUpdatedTime(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap, + LAST_UPDATED_TIME_KEY, time(0) /* defaultValue */)), + mUnigramCount(0), mBigramCount(0), mExtendedRegionSize(0) {} ~HeaderPolicy() {} @@ -61,53 +82,60 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { return mMultiWordCostMultiplier; } - AK_FORCE_INLINE bool usesForgettingCurve() const { - return mUsesForgettingCurve; + AK_FORCE_INLINE bool isDecayingDict() const { + return mIsDecayingDict; } AK_FORCE_INLINE int getLastUpdatedTime() const { return mLastUpdatedTime; } + AK_FORCE_INLINE int getUnigramCount() const { + return mUnigramCount; + } + + AK_FORCE_INLINE int getBigramCount() const { + return mBigramCount; + } + + AK_FORCE_INLINE int getExtendedRegionSize() const { + return mExtendedRegionSize; + } + void readHeaderValueOrQuestionMark(const char *const key, int *outValue, int outValueSize) const; bool writeHeaderToBuffer(BufferWithExtendableBuffer *const bufferToWrite, - const bool updatesLastUpdatedTime) const; + const bool updatesLastUpdatedTime, const int unigramCount, + const int bigramCount, const int extendedRegionSize) const; private: DISALLOW_IMPLICIT_CONSTRUCTORS(HeaderPolicy); static const char *const MULTIPLE_WORDS_DEMOTION_RATE_KEY; - static const char *const USES_FORGETTING_CURVE_KEY; + static const char *const IS_DECAYING_DICT_KEY; static const char *const LAST_UPDATED_TIME_KEY; - static const float DEFAULT_MULTIPLE_WORD_COST_MULTIPLIER; + static const char *const UNIGRAM_COUNT_KEY; + static const char *const BIGRAM_COUNT_KEY; + static const char *const EXTENDED_REGION_SIZE_KEY; + static const int DEFAULT_MULTIPLE_WORDS_DEMOTION_RATE; static const float MULTIPLE_WORD_COST_MULTIPLIER_SCALE; - const uint8_t *const mDictBuf; const FormatUtils::FORMAT_VERSION mDictFormatVersion; const HeaderReadWriteUtils::DictionaryFlags mDictionaryFlags; const int mSize; HeaderReadWriteUtils::AttributeMap mAttributeMap; const float mMultiWordCostMultiplier; - const bool mUsesForgettingCurve; + const bool mIsDecayingDict; const int mLastUpdatedTime; + const int mUnigramCount; + const int mBigramCount; + const int mExtendedRegionSize; float readMultipleWordCostMultiplier() const; - bool readUsesForgettingCurveFlag() const; - - int readLastUpdatedTime() const; - - bool getAttributeValueAsInt(const char *const key, int *const outValue) const; - static HeaderReadWriteUtils::AttributeMap createAttributeMapAndReadAllAttributes( const uint8_t *const dictBuf); - - static int parseIntAttributeValue(const std::vector<int> *const attributeValue); - - static void insertCharactersIntoVector( - const char *const characters, std::vector<int> *const vector); }; } // namespace latinime #endif /* LATINIME_HEADER_POLICY_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp index 80fe88671..2694ce8d5 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp @@ -16,6 +16,8 @@ #include "suggest/policyimpl/dictionary/header/header_read_write_utils.h" +#include <cctype> +#include <cstdio> #include <vector> #include "defines.h" @@ -43,6 +45,13 @@ const HeaderReadWriteUtils::DictionaryFlags const HeaderReadWriteUtils::DictionaryFlags HeaderReadWriteUtils::FRENCH_LIGATURE_PROCESSING_FLAG = 0x4; +// Note that these are corresponding definitions in Java side in FormatSpec.FileHeader. +const char *const HeaderReadWriteUtils::SUPPORTS_DYNAMIC_UPDATE_KEY = "SUPPORTS_DYNAMIC_UPDATE"; +const char *const HeaderReadWriteUtils::REQUIRES_GERMAN_UMLAUT_PROCESSING_KEY = + "REQUIRES_GERMAN_UMLAUT_PROCESSING"; +const char *const HeaderReadWriteUtils::REQUIRES_FRENCH_LIGATURE_PROCESSING_KEY = + "REQUIRES_FRENCH_LIGATURE_PROCESSING"; + /* static */ int HeaderReadWriteUtils::getHeaderSize(const uint8_t *const dictBuf) { // See the format of the header in the comment in // BinaryDictionaryFormatUtils::detectFormatVersion() @@ -56,6 +65,22 @@ const HeaderReadWriteUtils::DictionaryFlags HEADER_MAGIC_NUMBER_SIZE + HEADER_DICTIONARY_VERSION_SIZE); } +/* static */ HeaderReadWriteUtils::DictionaryFlags + HeaderReadWriteUtils::createAndGetDictionaryFlagsUsingAttributeMap( + const HeaderReadWriteUtils::AttributeMap *const attributeMap) { + const bool requiresGermanUmlautProcessing = readBoolAttributeValue(attributeMap, + REQUIRES_GERMAN_UMLAUT_PROCESSING_KEY, false /* defaultValue */); + const bool requiresFrenchLigatureProcessing = readBoolAttributeValue(attributeMap, + REQUIRES_FRENCH_LIGATURE_PROCESSING_KEY, false /* defaultValue */); + const bool supportsDynamicUpdate = readBoolAttributeValue(attributeMap, + SUPPORTS_DYNAMIC_UPDATE_KEY, false /* defaultValue */); + DictionaryFlags dictflags = NO_FLAGS; + dictflags |= requiresGermanUmlautProcessing ? GERMAN_UMLAUT_PROCESSING_FLAG : 0; + dictflags |= requiresFrenchLigatureProcessing ? FRENCH_LIGATURE_PROCESSING_FLAG : 0; + dictflags |= supportsDynamicUpdate ? SUPPORTS_DYNAMIC_UPDATE_FLAG : 0; + return dictflags; +} + /* static */ void HeaderReadWriteUtils::fetchAllHeaderAttributes(const uint8_t *const dictBuf, AttributeMap *const headerAttributes) { const int headerSize = getHeaderSize(dictBuf); @@ -128,4 +153,72 @@ const HeaderReadWriteUtils::DictionaryFlags return true; } +/* static */ void HeaderReadWriteUtils::setBoolAttribute(AttributeMap *const headerAttributes, + const char *const key, const bool value) { + setIntAttribute(headerAttributes, key, value ? 1 : 0); +} + +/* static */ void HeaderReadWriteUtils::setIntAttribute(AttributeMap *const headerAttributes, + const char *const key, const int value) { + AttributeMap::key_type keyVector; + insertCharactersIntoVector(key, &keyVector); + setIntAttributeInner(headerAttributes, &keyVector, value); +} + +/* static */ void HeaderReadWriteUtils::setIntAttributeInner(AttributeMap *const headerAttributes, + const AttributeMap::key_type *const key, const int value) { + AttributeMap::mapped_type valueVector; + char charBuf[LARGEST_INT_DIGIT_COUNT + 1]; + snprintf(charBuf, LARGEST_INT_DIGIT_COUNT + 1, "%d", value); + insertCharactersIntoVector(charBuf, &valueVector); + (*headerAttributes)[*key] = valueVector; +} + +/* static */ bool HeaderReadWriteUtils::readBoolAttributeValue( + const AttributeMap *const headerAttributes, const char *const key, + const bool defaultValue) { + const int intDefaultValue = defaultValue ? 1 : 0; + const int intValue = readIntAttributeValue(headerAttributes, key, intDefaultValue); + return intValue != 0; +} + +/* static */ int HeaderReadWriteUtils::readIntAttributeValue( + const AttributeMap *const headerAttributes, const char *const key, + const int defaultValue) { + AttributeMap::key_type keyVector; + insertCharactersIntoVector(key, &keyVector); + return readIntAttributeValueInner(headerAttributes, &keyVector, defaultValue); +} + +/* static */ int HeaderReadWriteUtils::readIntAttributeValueInner( + const AttributeMap *const headerAttributes, const AttributeMap::key_type *const key, + const int defaultValue) { + AttributeMap::const_iterator it = headerAttributes->find(*key); + if (it != headerAttributes->end()) { + int value = 0; + bool isNegative = false; + for (size_t i = 0; i < it->second.size(); ++i) { + if (i == 0 && it->second.at(i) == '-') { + isNegative = true; + } else { + if (!isdigit(it->second.at(i))) { + // If not a number. + return defaultValue; + } + value *= 10; + value += it->second.at(i) - '0'; + } + } + return isNegative ? -value : value; + } + return defaultValue; +} + +/* static */ void HeaderReadWriteUtils::insertCharactersIntoVector(const char *const characters, + std::vector<int> *const vector) { + for (int i = 0; characters[i]; ++i) { + vector->push_back(characters[i]); + } +} + } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.h index 6cce73375..225968323 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.h @@ -54,6 +54,9 @@ class HeaderReadWriteUtils { + HEADER_SIZE_FIELD_SIZE; } + static DictionaryFlags createAndGetDictionaryFlagsUsingAttributeMap( + const HeaderReadWriteUtils::AttributeMap *const attributeMap); + static void fetchAllHeaderAttributes(const uint8_t *const dictBuf, AttributeMap *const headerAttributes); @@ -69,6 +72,24 @@ class HeaderReadWriteUtils { static bool writeHeaderAttributes(BufferWithExtendableBuffer *const buffer, const AttributeMap *const headerAttributes, int *const writingPos); + /** + * Methods for header attributes. + */ + static void setBoolAttribute(AttributeMap *const headerAttributes, + const char *const key, const bool value); + + static void setIntAttribute(AttributeMap *const headerAttributes, + const char *const key, const int value); + + static bool readBoolAttributeValue(const AttributeMap *const headerAttributes, + const char *const key, const bool defaultValue); + + static int readIntAttributeValue(const AttributeMap *const headerAttributes, + const char *const key, const int defaultValue); + + static void insertCharactersIntoVector(const char *const characters, + AttributeMap::key_type *const key); + private: DISALLOW_IMPLICIT_CONSTRUCTORS(HeaderReadWriteUtils); @@ -87,7 +108,16 @@ class HeaderReadWriteUtils { static const DictionaryFlags GERMAN_UMLAUT_PROCESSING_FLAG; static const DictionaryFlags SUPPORTS_DYNAMIC_UPDATE_FLAG; static const DictionaryFlags FRENCH_LIGATURE_PROCESSING_FLAG; - static const DictionaryFlags CONTAINS_BIGRAMS_FLAG; + + static const char *const SUPPORTS_DYNAMIC_UPDATE_KEY; + static const char *const REQUIRES_GERMAN_UMLAUT_PROCESSING_KEY; + static const char *const REQUIRES_FRENCH_LIGATURE_PROCESSING_KEY; + + static void setIntAttributeInner(AttributeMap *const headerAttributes, + const AttributeMap::key_type *const key, const int value); + + static int readIntAttributeValueInner(const AttributeMap *const headerAttributes, + const AttributeMap::key_type *const key, const int defaultValue); }; } #endif /* LATINIME_HEADER_READ_WRITE_UTILS_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp index 5269795a4..8a84bd261 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp @@ -31,9 +31,21 @@ void PatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode, return; } int nextPos = dicNode->getChildrenPos(); + if (nextPos < 0 || nextPos >= mDictBufferSize) { + AKLOGE("Children PtNode array position is invalid. pos: %d, dict size: %d", + nextPos, mDictBufferSize); + ASSERT(false); + return; + } const int childCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition( mDictRoot, &nextPos); for (int i = 0; i < childCount; i++) { + if (nextPos < 0 || nextPos >= mDictBufferSize) { + AKLOGE("Child PtNode position is invalid. pos: %d, dict size: %d, childCount: %d / %d", + nextPos, mDictBufferSize, i, childCount); + ASSERT(false); + return; + } nextPos = createAndGetLeavingChildNode(dicNode, nextPos, childDicNodes); } } @@ -404,6 +416,11 @@ int PatriciaTriePolicy::createAndGetLeavingChildNode(const DicNode *const dicNod if (PatriciaTrieReadingUtils::hasBigrams(flags)) { getBigramsStructurePolicy()->skipAllBigrams(&pos); } + if (mergedNodeCodePointCount <= 0) { + AKLOGE("Empty PtNode is not allowed. Code point count: %d", mergedNodeCodePointCount); + ASSERT(false); + return pos; + } childDicNodes->pushLeavingChild(dicNode, ptNodePos, childrenPos, probability, PatriciaTrieReadingUtils::isTerminal(flags), PatriciaTrieReadingUtils::hasChildrenInFlags(flags), diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h index 19155f938..8d88c68e8 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h @@ -36,6 +36,7 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { PatriciaTriePolicy(const MmappedBuffer *const buffer) : mBuffer(buffer), mHeaderPolicy(mBuffer->getBuffer(), buffer->getBufferSize()), mDictRoot(mBuffer->getBuffer() + mHeaderPolicy.getSize()), + mDictBufferSize(mBuffer->getBufferSize() - mHeaderPolicy.getSize()), mBigramListPolicy(mDictRoot), mShortcutListPolicy(mDictRoot) {} ~PatriciaTriePolicy() { @@ -106,18 +107,27 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary."); } - bool needsToRunGC() const { + bool needsToRunGC(const bool mindsBlockByGC) const { // This method should not be called for non-updatable dictionary. AKLOGI("Warning: needsToRunGC() is called for non-updatable dictionary."); return false; } + void getProperty(const char *const query, char *const outResult, + const int maxResultLength) const { + // getProperty is not supported for this class. + if (maxResultLength > 0) { + outResult[0] = '\0'; + } + } + private: DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTriePolicy); const MmappedBuffer *const mBuffer; const HeaderPolicy mHeaderPolicy; const uint8_t *const mDictRoot; + const int mDictBufferSize; const BigramListPolicy mBigramListPolicy; const ShortcutListPolicy mShortcutListPolicy; diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp index 1316b425f..7df55815f 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp @@ -71,8 +71,17 @@ const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_IS_BLACKLISTED = 0x01; length = ByteArrayUtils::readStringAndAdvancePosition(buffer, maxLength, outBuffer, pos); } else { - if (maxLength > 0) { - outBuffer[0] = getCodePointAndAdvancePosition(buffer, pos); + const int codePoint = getCodePointAndAdvancePosition(buffer, pos); + if (codePoint == NOT_A_CODE_POINT) { + // CAVEAT: codePoint == NOT_A_CODE_POINT means the code point is + // CHARACTER_ARRAY_TERMINATOR. The code point must not be CHARACTER_ARRAY_TERMINATOR + // when the PtNode has a single code point. + length = 0; + AKLOGE("codePoint is NOT_A_CODE_POINT. pos: %d, codePoint: 0x%x, buffer[pos - 1]: 0x%x", + *pos - 1, codePoint, buffer[*pos - 1]); + ASSERT(false); + } else if (maxLength > 0) { + outBuffer[0] = codePoint; length = 1; } } diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h index 17d2e39c2..9dc34823c 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h @@ -42,6 +42,10 @@ class BufferWithExtendableBuffer { return mOriginalBufferSize + mUsedAdditionalBufferSize; } + AK_FORCE_INLINE int getUsedAdditionalBufferSize() const { + return mUsedAdditionalBufferSize; + } + /** * For reading. */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/decaying_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/decaying_utils.cpp new file mode 100644 index 000000000..942a74238 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/decaying_utils.cpp @@ -0,0 +1,129 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "suggest/policyimpl/dictionary/utils/decaying_utils.h" + +#include "suggest/policyimpl/dictionary/utils/probability_utils.h" + +namespace latinime { + +const int DecayingUtils::MAX_UNIGRAM_COUNT = 12000; +const int DecayingUtils::MAX_UNIGRAM_COUNT_AFTER_GC = 10000; +const int DecayingUtils::MAX_BIGRAM_COUNT = 12000; +const int DecayingUtils::MAX_BIGRAM_COUNT_AFTER_GC = 10000; + +const int DecayingUtils::MAX_COMPUTED_PROBABILITY = 127; +const int DecayingUtils::MAX_UNIGRAM_PROBABILITY = 120; +const int DecayingUtils::MIN_VALID_UNIGRAM_PROBABILITY = 24; +const int DecayingUtils::UNIGRAM_PROBABILITY_STEP = 8; +const int DecayingUtils::MAX_BIGRAM_PROBABILITY_DELTA = 15; +const int DecayingUtils::MIN_VALID_BIGRAM_PROBABILITY_DELTA = 3; +const int DecayingUtils::BIGRAM_PROBABILITY_DELTA_STEP = 1; + +/* static */ int DecayingUtils::getProbability(const int encodedUnigramProbability, + const int encodedBigramProbabilityDelta) { + if (encodedUnigramProbability == NOT_A_PROBABILITY) { + return NOT_A_PROBABILITY; + } else if (encodedBigramProbabilityDelta == NOT_A_PROBABILITY) { + const int rawProbability = ProbabilityUtils::backoff(decodeUnigramProbability( + encodedUnigramProbability)); + return min(getDecayedProbability(rawProbability), MAX_COMPUTED_PROBABILITY); + } else { + const int rawProbability = ProbabilityUtils::computeProbabilityForBigram( + decodeUnigramProbability(encodedUnigramProbability), + decodeBigramProbabilityDelta(encodedBigramProbabilityDelta)); + return min(getDecayedProbability(rawProbability), MAX_COMPUTED_PROBABILITY); + } +} + +/* static */ int DecayingUtils::getUpdatedUnigramProbability(const int originalEncodedProbability, + const int newProbability) { + if (originalEncodedProbability == NOT_A_PROBABILITY) { + // The unigram is not in this dictionary. + if (newProbability == NOT_A_PROBABILITY) { + // The unigram is not in other dictionaries. + return 0; + } else { + return MIN_VALID_UNIGRAM_PROBABILITY; + } + } else { + if (newProbability != NOT_A_PROBABILITY + && originalEncodedProbability < MIN_VALID_UNIGRAM_PROBABILITY) { + return MIN_VALID_UNIGRAM_PROBABILITY; + } + return min(originalEncodedProbability + UNIGRAM_PROBABILITY_STEP, MAX_UNIGRAM_PROBABILITY); + } +} + +/* static */ int DecayingUtils::getUnigramProbabilityToSave(const int encodedProbability) { + return max(encodedProbability - UNIGRAM_PROBABILITY_STEP, 0); +} + +/* static */ int DecayingUtils::getBigramProbabilityDeltaToSave(const int encodedProbabilityDelta) { + return max(encodedProbabilityDelta - BIGRAM_PROBABILITY_DELTA_STEP, 0); +} + +/* static */ int DecayingUtils::getUpdatedBigramProbabilityDelta( + const int originalEncodedProbabilityDelta, const int newProbability) { + if (originalEncodedProbabilityDelta == NOT_A_PROBABILITY) { + // The bigram relation is not in this dictionary. + if (newProbability == NOT_A_PROBABILITY) { + // The bigram target is not in other dictionaries. + return 0; + } else { + return MIN_VALID_BIGRAM_PROBABILITY_DELTA; + } + } else { + if (newProbability != NOT_A_PROBABILITY + && originalEncodedProbabilityDelta < MIN_VALID_BIGRAM_PROBABILITY_DELTA) { + return MIN_VALID_BIGRAM_PROBABILITY_DELTA; + } + return min(originalEncodedProbabilityDelta + BIGRAM_PROBABILITY_DELTA_STEP, + MAX_BIGRAM_PROBABILITY_DELTA); + } +} + +/* static */ int DecayingUtils::isValidUnigram(const int encodedUnigramProbability) { + return encodedUnigramProbability >= MIN_VALID_UNIGRAM_PROBABILITY; +} + +/* static */ int DecayingUtils::isValidBigram(const int encodedBigramProbabilityDelta) { + return encodedBigramProbabilityDelta >= MIN_VALID_BIGRAM_PROBABILITY_DELTA; +} + +/* static */ int DecayingUtils::decodeUnigramProbability(const int encodedProbability) { + const int probability = encodedProbability - MIN_VALID_UNIGRAM_PROBABILITY; + if (probability < 0) { + return NOT_A_PROBABILITY; + } else { + return min(probability, MAX_UNIGRAM_PROBABILITY); + } +} + +/* static */ int DecayingUtils::decodeBigramProbabilityDelta(const int encodedProbabilityDelta) { + const int probabilityDelta = encodedProbabilityDelta - MIN_VALID_BIGRAM_PROBABILITY_DELTA; + if (probabilityDelta < 0) { + return NOT_A_PROBABILITY; + } else { + return min(probabilityDelta, MAX_BIGRAM_PROBABILITY_DELTA); + } +} + +/* static */ int DecayingUtils::getDecayedProbability(const int rawProbability) { + return rawProbability; +} + +} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/decaying_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/decaying_utils.h new file mode 100644 index 000000000..1ca03918f --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/decaying_utils.h @@ -0,0 +1,70 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DECAYING_UTILS_H +#define LATINIME_DECAYING_UTILS_H + +#include "defines.h" + +namespace latinime { + +// TODO: Check the elapsed time and decrease the probability depending on the time. Time field is +// required to introduced to each terminal PtNode and bigram entry. +// TODO: Quit using bigram probability to indicate the delta. +// TODO: Quit using bigram probability delta. +class DecayingUtils { + public: + static const int MAX_UNIGRAM_COUNT; + static const int MAX_UNIGRAM_COUNT_AFTER_GC; + static const int MAX_BIGRAM_COUNT; + static const int MAX_BIGRAM_COUNT_AFTER_GC; + + static int getProbability(const int encodedUnigramProbability, + const int encodedBigramProbabilityDelta); + + static int getUpdatedUnigramProbability(const int originalEncodedProbability, + const int newProbability); + + static int getUpdatedBigramProbabilityDelta(const int originalEncodedProbabilityDelta, + const int newProbability); + + static int isValidUnigram(const int encodedUnigramProbability); + + static int isValidBigram(const int encodedProbabilityDelta); + + static int getUnigramProbabilityToSave(const int encodedProbability); + + static int getBigramProbabilityDeltaToSave(const int encodedProbabilityDelta); + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(DecayingUtils); + + static const int MAX_COMPUTED_PROBABILITY; + static const int MAX_UNIGRAM_PROBABILITY; + static const int MIN_VALID_UNIGRAM_PROBABILITY; + static const int UNIGRAM_PROBABILITY_STEP; + static const int MAX_BIGRAM_PROBABILITY_DELTA; + static const int MIN_VALID_BIGRAM_PROBABILITY_DELTA; + static const int BIGRAM_PROBABILITY_DELTA_STEP; + + static int decodeUnigramProbability(const int encodedProbability); + + static int decodeBigramProbabilityDelta(const int encodedProbability); + + static int getDecayedProbability(const int rawProbability); +}; +} // namespace latinime +#endif /* LATINIME_DECAYING_UTILS_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp new file mode 100644 index 000000000..f22e94c6a --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp @@ -0,0 +1,108 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h" + +#include <cstdio> +#include <cstring> + +#include "suggest/policyimpl/dictionary/header/header_policy.h" +#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h" +#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" +#include "suggest/policyimpl/dictionary/utils/format_utils.h" + +namespace latinime { + +const char *const DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE = ".tmp"; + +/* static */ bool DictFileWritingUtils::createEmptyDictFile(const char *const filePath, + const int dictVersion, const HeaderReadWriteUtils::AttributeMap *const attributeMap) { + switch (dictVersion) { + case 3: + return createEmptyV3DictFile(filePath, attributeMap); + default: + // Only version 3 dictionary is supported for now. + return false; + } +} + +/* static */ bool DictFileWritingUtils::createEmptyV3DictFile(const char *const filePath, + const HeaderReadWriteUtils::AttributeMap *const attributeMap) { + BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */); + HeaderPolicy headerPolicy(FormatUtils::VERSION_3, attributeMap); + headerPolicy.writeHeaderToBuffer(&headerBuffer, true /* updatesLastUpdatedTime */, + 0 /* unigramCount */, 0 /* bigramCount */, 0 /* extendedRegionSize */); + BufferWithExtendableBuffer bodyBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */); + if (!DynamicPatriciaTrieWritingUtils::writeEmptyDictionary(&bodyBuffer, 0 /* rootPos */)) { + return false; + } + return flushAllHeaderAndBodyToFile(filePath, &headerBuffer, &bodyBuffer); +} + +/* static */ bool DictFileWritingUtils::flushAllHeaderAndBodyToFile(const char *const filePath, + BufferWithExtendableBuffer *const dictHeader, BufferWithExtendableBuffer *const dictBody) { + const int tmpFileNameBufSize = strlen(filePath) + + strlen(TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE) + 1 /* terminator */; + // Name of a temporary file used for writing that is a connected string of original name and + // TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE. + char tmpFileName[tmpFileNameBufSize]; + snprintf(tmpFileName, tmpFileNameBufSize, "%s%s", filePath, + TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE); + FILE *const file = fopen(tmpFileName, "wb"); + if (!file) { + AKLOGE("Dictionary file %s cannnot be opened.", tmpFileName); + ASSERT(false); + return false; + } + // Write the dictionary header. + if (!writeBufferToFile(file, dictHeader)) { + remove(tmpFileName); + AKLOGE("Dictionary header cannnot be written. size: %d", dictHeader->getTailPosition()); + ASSERT(false); + return false; + } + // Write the dictionary body. + if (!writeBufferToFile(file, dictBody)) { + remove(tmpFileName); + AKLOGE("Dictionary body cannnot be written. size: %d", dictBody->getTailPosition()); + ASSERT(false); + return false; + } + fclose(file); + rename(tmpFileName, filePath); + return true; +} + +// This closes file pointer when an error is caused and returns whether the writing was succeeded +// or not. +/* static */ bool DictFileWritingUtils::writeBufferToFile(FILE *const file, + const BufferWithExtendableBuffer *const buffer) { + const int originalBufSize = buffer->getOriginalBufferSize(); + if (originalBufSize > 0 && fwrite(buffer->getBuffer(false /* usesAdditionalBuffer */), + originalBufSize, 1, file) < 1) { + fclose(file); + return false; + } + const int additionalBufSize = buffer->getUsedAdditionalBufferSize(); + if (additionalBufSize > 0 && fwrite(buffer->getBuffer(true /* usesAdditionalBuffer */), + additionalBufSize, 1, file) < 1) { + fclose(file); + return false; + } + return true; +} + +} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h new file mode 100644 index 000000000..bd4ac66fd --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h @@ -0,0 +1,50 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DICT_FILE_WRITING_UTILS_H +#define LATINIME_DICT_FILE_WRITING_UTILS_H + +#include <cstdio> + +#include "defines.h" +#include "suggest/policyimpl/dictionary/header/header_read_write_utils.h" + +namespace latinime { + +class BufferWithExtendableBuffer; + +class DictFileWritingUtils { + public: + static bool createEmptyDictFile(const char *const filePath, const int dictVersion, + const HeaderReadWriteUtils::AttributeMap *const attributeMap); + + static bool flushAllHeaderAndBodyToFile(const char *const filePath, + BufferWithExtendableBuffer *const dictHeader, + BufferWithExtendableBuffer *const dictBody); + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(DictFileWritingUtils); + + static const char *const TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE; + + static bool createEmptyV3DictFile(const char *const filePath, + const HeaderReadWriteUtils::AttributeMap *const attributeMap); + + static bool writeBufferToFile(FILE *const file, + const BufferWithExtendableBuffer *const buffer); +}; +} // namespace latinime +#endif /* LATINIME_DICT_FILE_WRITING_UTILS_H */ diff --git a/native/jni/src/suggest/policyimpl/typing/typing_weighting.cpp b/native/jni/src/suggest/policyimpl/typing/typing_weighting.cpp index 408b12ae9..5b6b5e874 100644 --- a/native/jni/src/suggest/policyimpl/typing/typing_weighting.cpp +++ b/native/jni/src/suggest/policyimpl/typing/typing_weighting.cpp @@ -47,7 +47,7 @@ ErrorType TypingWeighting::getErrorType(const CorrectionType correctionType, case CT_TERMINAL_INSERTION: case CT_TRANSPOSITION: return ET_EDIT_CORRECTION; - case CT_NEW_WORD_SPACE_OMITTION: + case CT_NEW_WORD_SPACE_OMISSION: case CT_NEW_WORD_SPACE_SUBSTITUTION: return ET_NEW_WORD; case CT_TERMINAL: diff --git a/native/jni/src/suggest/policyimpl/typing/typing_weighting.h b/native/jni/src/suggest/policyimpl/typing/typing_weighting.h index b6aa85896..9f0a331e3 100644 --- a/native/jni/src/suggest/policyimpl/typing/typing_weighting.h +++ b/native/jni/src/suggest/policyimpl/typing/typing_weighting.h @@ -74,7 +74,8 @@ class TypingWeighting : public Weighting { // Note: min() required since length can be MAX_POINT_TO_KEY_LENGTH for characters not on // the keyboard (like accented letters) const float normalizedSquaredLength = traverseSession->getProximityInfoState(0) - ->getPointToKeyLength(pointIndex, dicNode->getNodeCodePoint()); + ->getPointToKeyLength(pointIndex, + CharUtils::toBaseLowerCase(dicNode->getNodeCodePoint())); const float normalizedDistance = TouchPositionCorrectionUtils::getSweetSpotFactor( traverseSession->isTouchPositionCorrectionEnabled(), normalizedSquaredLength); const float weightedDistance = ScoringParams::DISTANCE_WEIGHT_LENGTH * normalizedDistance; @@ -113,10 +114,10 @@ class TypingWeighting : public Weighting { const int16_t parentPointIndex = parentDicNode->getInputIndex(0); const int prevCodePoint = parentDicNode->getNodeCodePoint(); const float distance1 = traverseSession->getProximityInfoState(0)->getPointToKeyLength( - parentPointIndex + 1, prevCodePoint); + parentPointIndex + 1, CharUtils::toBaseLowerCase(prevCodePoint)); const int codePoint = dicNode->getNodeCodePoint(); const float distance2 = traverseSession->getProximityInfoState(0)->getPointToKeyLength( - parentPointIndex, codePoint); + parentPointIndex, CharUtils::toBaseLowerCase(codePoint)); const float distance = distance1 + distance2; const float weightedLengthDistance = distance * ScoringParams::DISTANCE_WEIGHT_LENGTH; @@ -133,7 +134,7 @@ class TypingWeighting : public Weighting { const bool existsAdjacentProximityChars = traverseSession->getProximityInfoState(0) ->existsAdjacentProximityChars(insertedPointIndex); const float dist = traverseSession->getProximityInfoState(0)->getPointToKeyLength( - insertedPointIndex + 1, dicNode->getNodeCodePoint()); + insertedPointIndex + 1, CharUtils::toBaseLowerCase(dicNode->getNodeCodePoint())); const float weightedDistance = dist * ScoringParams::DISTANCE_WEIGHT_LENGTH; const bool singleChar = dicNode->getNodeCodePointCount() == 1; float cost = (singleChar ? ScoringParams::INSERTION_COST_FIRST_CHAR : 0.0f); |