diff options
Diffstat (limited to 'native/jni')
6 files changed, 82 insertions, 27 deletions
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp index 628d3ab38..dc2adb44e 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp @@ -20,6 +20,7 @@ #include "suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h" #include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h" +#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h" namespace latinime { @@ -46,10 +47,12 @@ bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTarget if (!mBigramDictContent->createNewBigramList(terminalId)) { return false; } + const int probabilityToWrite = getUpdatedProbability( + NOT_A_PROBABILITY /* originalProbability */, newProbability); // Write an entry. - int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId); - if (!mBigramDictContent->writeBigramEntryAndAdvancePosition(newProbability, - false /* hasNext */, newTargetTerminalId, &writingPos)) { + const int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId); + if (!mBigramDictContent->writeBigramEntry(probabilityToWrite, false /* hasNext */, + newTargetTerminalId, writingPos)) { return false; } if (outAddedNewEntry) { @@ -61,19 +64,18 @@ bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTarget const int entryPosToUpdate = getEntryPosToUpdate(newTargetTerminalId, bigramListPos); if (entryPosToUpdate != NOT_A_DICT_POS) { // Overwrite existing entry. - int readingPos = entryPosToUpdate; bool hasNext = false; int probability = NOT_A_PROBABILITY; int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID; - mBigramDictContent->getBigramEntryAndAdvancePosition(&probability, &hasNext, - &targetTerminalId, &readingPos); + mBigramDictContent->getBigramEntry(&probability, &hasNext, &targetTerminalId, + entryPosToUpdate); + const int probabilityToWrite = getUpdatedProbability(probability, newProbability); if (targetTerminalId == Ver4DictConstants::NOT_A_TERMINAL_ID && outAddedNewEntry) { // Reuse invalid entry. *outAddedNewEntry = true; } - int writingPos = entryPosToUpdate; - return mBigramDictContent->writeBigramEntryAndAdvancePosition(newProbability, hasNext, - newTargetTerminalId, &writingPos); + return mBigramDictContent->writeBigramEntry(probabilityToWrite, hasNext, + newTargetTerminalId, entryPosToUpdate); } // Add new entry to the bigram list. @@ -83,7 +85,9 @@ bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTarget } // Write new entry at a head position of the bigram list. int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId); - if (!mBigramDictContent->writeBigramEntryAndAdvancePosition(newProbability, + const int probabilityToWrite = getUpdatedProbability( + NOT_A_PROBABILITY /* originalProbability */, newProbability); + if (!mBigramDictContent->writeBigramEntryAndAdvancePosition(probabilityToWrite, true /* hasNext */, newTargetTerminalId, &writingPos)) { return false; } @@ -105,20 +109,18 @@ bool Ver4BigramListPolicy::removeEntry(const int terminalId, const int targetTer // Bigram entry doesn't exist. return false; } - int readingPos = entryPosToUpdate; bool hasNext = false; int probability = NOT_A_PROBABILITY; int originalTargetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID; - mBigramDictContent->getBigramEntryAndAdvancePosition(&probability, &hasNext, - &originalTargetTerminalId, &readingPos); + mBigramDictContent->getBigramEntry(&probability, &hasNext, &originalTargetTerminalId, + entryPosToUpdate); if (targetTerminalId != originalTargetTerminalId) { // Bigram entry doesn't exist. return false; } - int writingPos = entryPosToUpdate; // Remove bigram entry by overwriting target terminal Id. - return mBigramDictContent->writeBigramEntryAndAdvancePosition(probability, hasNext, - Ver4DictConstants::NOT_A_TERMINAL_ID /* targetTerminalId */, &writingPos); + return mBigramDictContent->writeBigramEntry(probability, hasNext, + Ver4DictConstants::NOT_A_TERMINAL_ID /* targetTerminalId */, entryPosToUpdate); } bool Ver4BigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries(const int terminalId, @@ -143,9 +145,28 @@ bool Ver4BigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries(const i targetTerminalId); if (targetPtNodePos == NOT_A_DICT_POS) { // Invalidate bigram entry. - int writingPos = entryPos; - return mBigramDictContent->writeBigramEntryAndAdvancePosition(probability, hasNext, - Ver4DictConstants::NOT_A_TERMINAL_ID /* targetTerminalId */, &writingPos); + if (!mBigramDictContent->writeBigramEntry(probability, hasNext, + Ver4DictConstants::NOT_A_TERMINAL_ID /* targetTerminalId */, entryPos)) { + return false; + } + } else if (mNeedsToDecayWhenUpdating) { + probability = ForgettingCurveUtils::getEncodedProbabilityToSave( + probability, mHeaderPolicy); + if (ForgettingCurveUtils::isValidEncodedProbability(probability)) { + if (!mBigramDictContent->writeBigramEntry(probability, hasNext, targetTerminalId, + entryPos)) { + return false; + } + *outBigramCount += 1; + } else { + // Remove entry. + if (!mBigramDictContent->writeBigramEntry(probability, hasNext, + Ver4DictConstants::NOT_A_TERMINAL_ID /* targetTerminalId */, entryPos)) { + return false; + } + } + } else { + *outBigramCount += 1; } } return true; @@ -192,4 +213,14 @@ int Ver4BigramListPolicy::getEntryPosToUpdate(const int targetTerminalIdToFind, return invalidEntryPos; } +int Ver4BigramListPolicy::getUpdatedProbability(const int originalProbability, + const int newProbability) const { + if (mNeedsToDecayWhenUpdating) { + return ForgettingCurveUtils::getUpdatedEncodedProbability(originalProbability, + newProbability); + } else { + return newProbability; + } +} + } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h index 5b7d5b527..ed8bdb84d 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h @@ -23,14 +23,18 @@ namespace latinime { class BigramDictContent; +class DictionaryHeaderStructurePolicy; class TerminalPositionLookupTable; class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy { public: Ver4BigramListPolicy(BigramDictContent *const bigramDictContent, - const TerminalPositionLookupTable *const terminalPositionLookupTable) + const TerminalPositionLookupTable *const terminalPositionLookupTable, + const DictionaryHeaderStructurePolicy *const headerPolicy, + const bool needsToDecayWhenUpdating) : mBigramDictContent(bigramDictContent), - mTerminalPositionLookupTable(terminalPositionLookupTable) {} + mTerminalPositionLookupTable(terminalPositionLookupTable), + mHeaderPolicy(headerPolicy), mNeedsToDecayWhenUpdating(needsToDecayWhenUpdating) {} void getNextBigram(int *const outBigramPos, int *const outProbability, bool *const outHasNext, int *const bigramEntryPos) const; @@ -54,8 +58,12 @@ class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy { int getEntryPosToUpdate(const int targetTerminalIdToFind, const int bigramListPos) const; + int getUpdatedProbability(const int originalProbability, const int newProbability) const; + BigramDictContent *const mBigramDictContent; const TerminalPositionLookupTable *const mTerminalPositionLookupTable; + const DictionaryHeaderStructurePolicy *const mHeaderPolicy; + const bool mNeedsToDecayWhenUpdating; }; } // namespace latinime #endif /* LATINIME_VER4_BIGRAM_LIST_POLICY_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp index 906687647..4cd96722e 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp @@ -103,6 +103,7 @@ bool BigramDictContent::runGC(const TerminalPositionLookupTable::TerminalIdMap * return true; } +// Returns whether GC for the bigram list was succeeded or not. bool BigramDictContent::runGCBigramList(const int bigramListPos, const BigramDictContent *const sourceBigramDictContent, const int toPos, const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap, @@ -121,9 +122,8 @@ bool BigramDictContent::runGCBigramList(const int bigramListPos, TerminalPositionLookupTable::TerminalIdMap::const_iterator it = terminalIdMap->find(targetTerminalId); if (it == terminalIdMap->end()) { - AKLOGE("terminal Id %d is not in the terminal position map. map size: %zd", - targetTerminalId, terminalIdMap->size()); - return false; + // Target word has been removed. + continue; } if (!writeBigramEntryAndAdvancePosition(probability, hasNext, it->second, &writingPos)) { diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h index ec0d756d8..cf380f403 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h @@ -38,6 +38,13 @@ class BigramDictContent : public SparseTableDictContent { : SparseTableDictContent(Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE, Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE) {} + void getBigramEntry(int *const outProbability, bool *const outHasNext, + int *const outTargetTerminalId, const int bigramEntryPos) const { + int readingPos = bigramEntryPos; + getBigramEntryAndAdvancePosition(outProbability, outHasNext, outTargetTerminalId, + &readingPos); + } + void getBigramEntryAndAdvancePosition(int *const outProbability, bool *const outHasNext, int *const outTargetTerminalId, int *const bigramEntryPos) const; @@ -50,6 +57,13 @@ class BigramDictContent : public SparseTableDictContent { return addressLookupTable->get(terminalId); } + bool writeBigramEntry(const int probability, const int hasNext, const int targetTerminalId, + const int entryWritingPos) { + int writingPos = entryWritingPos; + return writeBigramEntryAndAdvancePosition(probability, hasNext, targetTerminalId, + &writingPos); + } + bool writeBigramEntryAndAdvancePosition(const int probability, const int hasNext, const int targetTerminalId, int *const entryWritingPos); diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h index 6fe978d0f..3606a2ae9 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h @@ -43,7 +43,8 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { false /* usesAdditionalBuffer*/), FormatUtils::VERSION_4), mDictBuffer(mBuffers.get()->getWritableTrieBuffer()), mBigramPolicy(mBuffers.get()->getUpdatableBigramDictContent(), - mBuffers.get()->getTerminalPositionLookupTable()), + mBuffers.get()->getTerminalPositionLookupTable(), &mHeaderPolicy, + mHeaderPolicy.isDecayingDict()), mShortcutPolicy(mBuffers.get()->getShortcutDictContent(), mBuffers.get()->getTerminalPositionLookupTable()), mNodeReader(mDictBuffer, mBuffers.get()->getProbabilityDictContent()), diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp index f141d52f5..ea03c72fa 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp @@ -83,7 +83,7 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, Ver4PatriciaTrieNodeReader ptNodeReader(mBuffers->getTrieBuffer(), mBuffers->getProbabilityDictContent()); Ver4BigramListPolicy bigramPolicy(mBuffers->getUpdatableBigramDictContent(), - mBuffers->getTerminalPositionLookupTable()); + mBuffers->getTerminalPositionLookupTable(), headerPolicy, needsToDecay); Ver4ShortcutListPolicy shortcutPolicy(mBuffers->getShortcutDictContent(), mBuffers->getTerminalPositionLookupTable()); Ver4PatriciaTrieNodeWriter ptNodeWriter(mBuffers->getWritableTrieBuffer(), @@ -134,7 +134,8 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, Ver4PatriciaTrieNodeReader newPtNodeReader(buffersToWrite->getTrieBuffer(), buffersToWrite->getProbabilityDictContent()); Ver4BigramListPolicy newBigramPolicy(buffersToWrite->getUpdatableBigramDictContent(), - buffersToWrite->getTerminalPositionLookupTable()); + buffersToWrite->getTerminalPositionLookupTable(), headerPolicy, + false /* needsToDecay */); Ver4ShortcutListPolicy newShortcutPolicy(buffersToWrite->getShortcutDictContent(), buffersToWrite->getTerminalPositionLookupTable()); Ver4PatriciaTrieNodeWriter newPtNodeWriter(buffersToWrite->getWritableTrieBuffer(), |