aboutsummaryrefslogtreecommitdiffstats
path: root/native/jni
diff options
context:
space:
mode:
Diffstat (limited to 'native/jni')
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp69
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h12
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp6
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h14
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h3
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp5
6 files changed, 82 insertions, 27 deletions
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp
index 628d3ab38..dc2adb44e 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp
@@ -20,6 +20,7 @@
#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
+#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
namespace latinime {
@@ -46,10 +47,12 @@ bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTarget
if (!mBigramDictContent->createNewBigramList(terminalId)) {
return false;
}
+ const int probabilityToWrite = getUpdatedProbability(
+ NOT_A_PROBABILITY /* originalProbability */, newProbability);
// Write an entry.
- int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId);
- if (!mBigramDictContent->writeBigramEntryAndAdvancePosition(newProbability,
- false /* hasNext */, newTargetTerminalId, &writingPos)) {
+ const int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId);
+ if (!mBigramDictContent->writeBigramEntry(probabilityToWrite, false /* hasNext */,
+ newTargetTerminalId, writingPos)) {
return false;
}
if (outAddedNewEntry) {
@@ -61,19 +64,18 @@ bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTarget
const int entryPosToUpdate = getEntryPosToUpdate(newTargetTerminalId, bigramListPos);
if (entryPosToUpdate != NOT_A_DICT_POS) {
// Overwrite existing entry.
- int readingPos = entryPosToUpdate;
bool hasNext = false;
int probability = NOT_A_PROBABILITY;
int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
- mBigramDictContent->getBigramEntryAndAdvancePosition(&probability, &hasNext,
- &targetTerminalId, &readingPos);
+ mBigramDictContent->getBigramEntry(&probability, &hasNext, &targetTerminalId,
+ entryPosToUpdate);
+ const int probabilityToWrite = getUpdatedProbability(probability, newProbability);
if (targetTerminalId == Ver4DictConstants::NOT_A_TERMINAL_ID && outAddedNewEntry) {
// Reuse invalid entry.
*outAddedNewEntry = true;
}
- int writingPos = entryPosToUpdate;
- return mBigramDictContent->writeBigramEntryAndAdvancePosition(newProbability, hasNext,
- newTargetTerminalId, &writingPos);
+ return mBigramDictContent->writeBigramEntry(probabilityToWrite, hasNext,
+ newTargetTerminalId, entryPosToUpdate);
}
// Add new entry to the bigram list.
@@ -83,7 +85,9 @@ bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTarget
}
// Write new entry at a head position of the bigram list.
int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId);
- if (!mBigramDictContent->writeBigramEntryAndAdvancePosition(newProbability,
+ const int probabilityToWrite = getUpdatedProbability(
+ NOT_A_PROBABILITY /* originalProbability */, newProbability);
+ if (!mBigramDictContent->writeBigramEntryAndAdvancePosition(probabilityToWrite,
true /* hasNext */, newTargetTerminalId, &writingPos)) {
return false;
}
@@ -105,20 +109,18 @@ bool Ver4BigramListPolicy::removeEntry(const int terminalId, const int targetTer
// Bigram entry doesn't exist.
return false;
}
- int readingPos = entryPosToUpdate;
bool hasNext = false;
int probability = NOT_A_PROBABILITY;
int originalTargetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
- mBigramDictContent->getBigramEntryAndAdvancePosition(&probability, &hasNext,
- &originalTargetTerminalId, &readingPos);
+ mBigramDictContent->getBigramEntry(&probability, &hasNext, &originalTargetTerminalId,
+ entryPosToUpdate);
if (targetTerminalId != originalTargetTerminalId) {
// Bigram entry doesn't exist.
return false;
}
- int writingPos = entryPosToUpdate;
// Remove bigram entry by overwriting target terminal Id.
- return mBigramDictContent->writeBigramEntryAndAdvancePosition(probability, hasNext,
- Ver4DictConstants::NOT_A_TERMINAL_ID /* targetTerminalId */, &writingPos);
+ return mBigramDictContent->writeBigramEntry(probability, hasNext,
+ Ver4DictConstants::NOT_A_TERMINAL_ID /* targetTerminalId */, entryPosToUpdate);
}
bool Ver4BigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries(const int terminalId,
@@ -143,9 +145,28 @@ bool Ver4BigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries(const i
targetTerminalId);
if (targetPtNodePos == NOT_A_DICT_POS) {
// Invalidate bigram entry.
- int writingPos = entryPos;
- return mBigramDictContent->writeBigramEntryAndAdvancePosition(probability, hasNext,
- Ver4DictConstants::NOT_A_TERMINAL_ID /* targetTerminalId */, &writingPos);
+ if (!mBigramDictContent->writeBigramEntry(probability, hasNext,
+ Ver4DictConstants::NOT_A_TERMINAL_ID /* targetTerminalId */, entryPos)) {
+ return false;
+ }
+ } else if (mNeedsToDecayWhenUpdating) {
+ probability = ForgettingCurveUtils::getEncodedProbabilityToSave(
+ probability, mHeaderPolicy);
+ if (ForgettingCurveUtils::isValidEncodedProbability(probability)) {
+ if (!mBigramDictContent->writeBigramEntry(probability, hasNext, targetTerminalId,
+ entryPos)) {
+ return false;
+ }
+ *outBigramCount += 1;
+ } else {
+ // Remove entry.
+ if (!mBigramDictContent->writeBigramEntry(probability, hasNext,
+ Ver4DictConstants::NOT_A_TERMINAL_ID /* targetTerminalId */, entryPos)) {
+ return false;
+ }
+ }
+ } else {
+ *outBigramCount += 1;
}
}
return true;
@@ -192,4 +213,14 @@ int Ver4BigramListPolicy::getEntryPosToUpdate(const int targetTerminalIdToFind,
return invalidEntryPos;
}
+int Ver4BigramListPolicy::getUpdatedProbability(const int originalProbability,
+ const int newProbability) const {
+ if (mNeedsToDecayWhenUpdating) {
+ return ForgettingCurveUtils::getUpdatedEncodedProbability(originalProbability,
+ newProbability);
+ } else {
+ return newProbability;
+ }
+}
+
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h
index 5b7d5b527..ed8bdb84d 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h
@@ -23,14 +23,18 @@
namespace latinime {
class BigramDictContent;
+class DictionaryHeaderStructurePolicy;
class TerminalPositionLookupTable;
class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy {
public:
Ver4BigramListPolicy(BigramDictContent *const bigramDictContent,
- const TerminalPositionLookupTable *const terminalPositionLookupTable)
+ const TerminalPositionLookupTable *const terminalPositionLookupTable,
+ const DictionaryHeaderStructurePolicy *const headerPolicy,
+ const bool needsToDecayWhenUpdating)
: mBigramDictContent(bigramDictContent),
- mTerminalPositionLookupTable(terminalPositionLookupTable) {}
+ mTerminalPositionLookupTable(terminalPositionLookupTable),
+ mHeaderPolicy(headerPolicy), mNeedsToDecayWhenUpdating(needsToDecayWhenUpdating) {}
void getNextBigram(int *const outBigramPos, int *const outProbability,
bool *const outHasNext, int *const bigramEntryPos) const;
@@ -54,8 +58,12 @@ class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy {
int getEntryPosToUpdate(const int targetTerminalIdToFind, const int bigramListPos) const;
+ int getUpdatedProbability(const int originalProbability, const int newProbability) const;
+
BigramDictContent *const mBigramDictContent;
const TerminalPositionLookupTable *const mTerminalPositionLookupTable;
+ const DictionaryHeaderStructurePolicy *const mHeaderPolicy;
+ const bool mNeedsToDecayWhenUpdating;
};
} // namespace latinime
#endif /* LATINIME_VER4_BIGRAM_LIST_POLICY_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp
index 906687647..4cd96722e 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp
@@ -103,6 +103,7 @@ bool BigramDictContent::runGC(const TerminalPositionLookupTable::TerminalIdMap *
return true;
}
+// Returns whether GC for the bigram list was succeeded or not.
bool BigramDictContent::runGCBigramList(const int bigramListPos,
const BigramDictContent *const sourceBigramDictContent, const int toPos,
const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
@@ -121,9 +122,8 @@ bool BigramDictContent::runGCBigramList(const int bigramListPos,
TerminalPositionLookupTable::TerminalIdMap::const_iterator it =
terminalIdMap->find(targetTerminalId);
if (it == terminalIdMap->end()) {
- AKLOGE("terminal Id %d is not in the terminal position map. map size: %zd",
- targetTerminalId, terminalIdMap->size());
- return false;
+ // Target word has been removed.
+ continue;
}
if (!writeBigramEntryAndAdvancePosition(probability, hasNext, it->second,
&writingPos)) {
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h
index ec0d756d8..cf380f403 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h
@@ -38,6 +38,13 @@ class BigramDictContent : public SparseTableDictContent {
: SparseTableDictContent(Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE,
Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE) {}
+ void getBigramEntry(int *const outProbability, bool *const outHasNext,
+ int *const outTargetTerminalId, const int bigramEntryPos) const {
+ int readingPos = bigramEntryPos;
+ getBigramEntryAndAdvancePosition(outProbability, outHasNext, outTargetTerminalId,
+ &readingPos);
+ }
+
void getBigramEntryAndAdvancePosition(int *const outProbability, bool *const outHasNext,
int *const outTargetTerminalId, int *const bigramEntryPos) const;
@@ -50,6 +57,13 @@ class BigramDictContent : public SparseTableDictContent {
return addressLookupTable->get(terminalId);
}
+ bool writeBigramEntry(const int probability, const int hasNext, const int targetTerminalId,
+ const int entryWritingPos) {
+ int writingPos = entryWritingPos;
+ return writeBigramEntryAndAdvancePosition(probability, hasNext, targetTerminalId,
+ &writingPos);
+ }
+
bool writeBigramEntryAndAdvancePosition(const int probability, const int hasNext,
const int targetTerminalId, int *const entryWritingPos);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
index 6fe978d0f..3606a2ae9 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
@@ -43,7 +43,8 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
false /* usesAdditionalBuffer*/), FormatUtils::VERSION_4),
mDictBuffer(mBuffers.get()->getWritableTrieBuffer()),
mBigramPolicy(mBuffers.get()->getUpdatableBigramDictContent(),
- mBuffers.get()->getTerminalPositionLookupTable()),
+ mBuffers.get()->getTerminalPositionLookupTable(), &mHeaderPolicy,
+ mHeaderPolicy.isDecayingDict()),
mShortcutPolicy(mBuffers.get()->getShortcutDictContent(),
mBuffers.get()->getTerminalPositionLookupTable()),
mNodeReader(mDictBuffer, mBuffers.get()->getProbabilityDictContent()),
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp
index f141d52f5..ea03c72fa 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp
@@ -83,7 +83,7 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
Ver4PatriciaTrieNodeReader ptNodeReader(mBuffers->getTrieBuffer(),
mBuffers->getProbabilityDictContent());
Ver4BigramListPolicy bigramPolicy(mBuffers->getUpdatableBigramDictContent(),
- mBuffers->getTerminalPositionLookupTable());
+ mBuffers->getTerminalPositionLookupTable(), headerPolicy, needsToDecay);
Ver4ShortcutListPolicy shortcutPolicy(mBuffers->getShortcutDictContent(),
mBuffers->getTerminalPositionLookupTable());
Ver4PatriciaTrieNodeWriter ptNodeWriter(mBuffers->getWritableTrieBuffer(),
@@ -134,7 +134,8 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
Ver4PatriciaTrieNodeReader newPtNodeReader(buffersToWrite->getTrieBuffer(),
buffersToWrite->getProbabilityDictContent());
Ver4BigramListPolicy newBigramPolicy(buffersToWrite->getUpdatableBigramDictContent(),
- buffersToWrite->getTerminalPositionLookupTable());
+ buffersToWrite->getTerminalPositionLookupTable(), headerPolicy,
+ false /* needsToDecay */);
Ver4ShortcutListPolicy newShortcutPolicy(buffersToWrite->getShortcutDictContent(),
buffersToWrite->getTerminalPositionLookupTable());
Ver4PatriciaTrieNodeWriter newPtNodeWriter(buffersToWrite->getWritableTrieBuffer(),