aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp3
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp31
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h10
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h14
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.cpp4
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.h14
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_entry.h4
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h4
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h15
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp8
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp32
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h7
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp17
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h9
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp37
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.h4
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp113
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h31
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/historical_info.h4
-rw-r--r--tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java63
20 files changed, 189 insertions, 235 deletions
diff --git a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp
index c2a15a312..2a62b555b 100644
--- a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp
+++ b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp
@@ -163,7 +163,8 @@ int BigramDictionary::getBigramProbability(const int *word0, int length0, const
mDictionaryStructurePolicy->getBigramsStructurePolicy(), pos);
while (bigramsIt.hasNext()) {
bigramsIt.next();
- if (bigramsIt.getBigramPos() == nextWordPos) {
+ if (bigramsIt.getBigramPos() == nextWordPos
+ && bigramsIt.getProbability() != NOT_A_PROBABILITY) {
return mDictionaryStructurePolicy->getProbability(
mDictionaryStructurePolicy->getUnigramProbabilityOfPtNode(nextWordPos),
bigramsIt.getProbability());
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp
index 968bacee6..cd2243025 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp
@@ -17,6 +17,7 @@
#include "suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h"
#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h"
+#include "suggest/policyimpl/dictionary/header/header_policy.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
@@ -34,7 +35,12 @@ void Ver4BigramListPolicy::getNextBigram(int *const outBigramPos, int *const out
bigramEntry.getTargetTerminalId());
}
if (outProbability) {
- *outProbability = bigramEntry.getProbability();
+ if (bigramEntry.hasHistoricalInfo()) {
+ *outProbability =
+ ForgettingCurveUtils::decodeProbability(bigramEntry.getHistoricalInfo());
+ } else {
+ *outProbability = bigramEntry.getProbability();
+ }
}
if (outHasNext) {
*outHasNext = bigramEntry.hasNext();
@@ -152,17 +158,12 @@ bool Ver4BigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries(const i
if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) {
return false;
}
- } else if (mNeedsToDecayWhenUpdating) {
- const int probability = ForgettingCurveUtils::getEncodedProbabilityToSave(
- bigramEntry.getProbability(), mHeaderPolicy);
- const HistoricalInfo historicalInfo =
- ForgettingCurveUtils::createHistoricalInfoToSave(
- bigramEntry.getHistoricalInfo());
- // TODO: Use ForgettingCurveUtils::needsToKeep(&historicalInfo).
- if (ForgettingCurveUtils::isValidEncodedProbability(probability)) {
+ } else if (bigramEntry.hasHistoricalInfo()) {
+ const HistoricalInfo historicalInfo = ForgettingCurveUtils::createHistoricalInfoToSave(
+ bigramEntry.getHistoricalInfo());
+ if (ForgettingCurveUtils::needsToKeep(&historicalInfo)) {
const BigramEntry updatedBigramEntry =
- bigramEntry.updateProbabilityAndGetEntry(probability)
- .updateHistoricalInfoAndGetEntry(&historicalInfo);
+ bigramEntry.updateHistoricalInfoAndGetEntry(&historicalInfo);
if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) {
return false;
}
@@ -225,14 +226,12 @@ int Ver4BigramListPolicy::getEntryPosToUpdate(const int targetTerminalIdToFind,
const BigramEntry Ver4BigramListPolicy::createUpdatedBigramEntryFrom(
const BigramEntry *const originalBigramEntry, const int newProbability,
const int timestamp) const {
- if (mNeedsToDecayWhenUpdating) {
- const int probability = ForgettingCurveUtils::getUpdatedEncodedProbability(
- originalBigramEntry->getProbability(), newProbability);
+ // TODO: Consolidate historical info and probability.
+ if (mHeaderPolicy->hasHistoricalInfoOfWords()) {
const HistoricalInfo updatedHistoricalInfo =
ForgettingCurveUtils::createUpdatedHistoricalInfo(
originalBigramEntry->getHistoricalInfo(), newProbability, timestamp);
- return originalBigramEntry->updateProbabilityAndGetEntry(probability)
- .updateHistoricalInfoAndGetEntry(&updatedHistoricalInfo);
+ return originalBigramEntry->updateHistoricalInfoAndGetEntry(&updatedHistoricalInfo);
} else {
return originalBigramEntry->updateProbabilityAndGetEntry(newProbability);
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h
index 972144100..5b6c5a173 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h
@@ -24,18 +24,17 @@
namespace latinime {
class BigramDictContent;
-class DictionaryHeaderStructurePolicy;
+class HeaderPolicy;
class TerminalPositionLookupTable;
class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy {
public:
Ver4BigramListPolicy(BigramDictContent *const bigramDictContent,
const TerminalPositionLookupTable *const terminalPositionLookupTable,
- const DictionaryHeaderStructurePolicy *const headerPolicy,
- const bool needsToDecayWhenUpdating)
+ const HeaderPolicy *const headerPolicy)
: mBigramDictContent(bigramDictContent),
mTerminalPositionLookupTable(terminalPositionLookupTable),
- mHeaderPolicy(headerPolicy), mNeedsToDecayWhenUpdating(needsToDecayWhenUpdating) {}
+ mHeaderPolicy(headerPolicy) {}
void getNextBigram(int *const outBigramPos, int *const outProbability,
bool *const outHasNext, int *const bigramEntryPos) const;
@@ -64,8 +63,7 @@ class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy {
BigramDictContent *const mBigramDictContent;
const TerminalPositionLookupTable *const mTerminalPositionLookupTable;
- const DictionaryHeaderStructurePolicy *const mHeaderPolicy;
- const bool mNeedsToDecayWhenUpdating;
+ const HeaderPolicy *const mHeaderPolicy;
};
} // namespace latinime
#endif /* LATINIME_VER4_BIGRAM_LIST_POLICY_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
index 29e937b3a..d71e0a068 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
@@ -93,6 +93,18 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
}
}
+ AK_FORCE_INLINE bool isValid() const {
+ // Decaying dictionary must have historical information.
+ if (!mIsDecayingDict) {
+ return true;
+ }
+ if (mHasHistoricalInfoOfWords) {
+ return true;
+ } else {
+ return false;
+ }
+ }
+
AK_FORCE_INLINE int getSize() const {
return mSize;
}
@@ -137,7 +149,7 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
return mExtendedRegionSize;
}
- AK_FORCE_INLINE bool hasHistricalInfoOfWords() const {
+ AK_FORCE_INLINE bool hasHistoricalInfoOfWords() const {
return mHasHistoricalInfoOfWords;
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.cpp
index d4175355e..bcba67035 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.cpp
@@ -20,7 +20,6 @@
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h"
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.h"
-#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
namespace latinime {
@@ -30,8 +29,7 @@ bool DynamicPatriciaTrieGcEventListeners
// PtNode is useless when the PtNode is not a terminal and doesn't have any not useless
// children.
bool isUselessPtNode = !ptNodeParams->isTerminal();
- // TODO: Quit checking mNeedsToDecayWhenUpdating.
- if (ptNodeParams->isTerminal() && mNeedsToDecayWhenUpdating) {
+ if (ptNodeParams->isTerminal()) {
bool needsToKeepPtNode = true;
if (!mPtNodeWriter->updatePtNodeProbabilityAndGetNeedsToKeepPtNodeAfterGC(ptNodeParams,
&needsToKeepPtNode)) {
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.h
index edd692898..562eb69bd 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.h
@@ -27,8 +27,6 @@
namespace latinime {
-class DictionaryHeaderStructurePolicy;
-class PtNodeWriter;
class PtNodeParams;
// TODO: Move to pt_common.
@@ -41,12 +39,9 @@ class DynamicPatriciaTrieGcEventListeners {
: public DynamicPatriciaTrieReadingHelper::TraversingEventListener {
public:
TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted(
- const DictionaryHeaderStructurePolicy *const headerPolicy,
- PtNodeWriter *const ptNodeWriter, BufferWithExtendableBuffer *const buffer,
- const bool needsToDecayWhenUpdating)
- : mHeaderPolicy(headerPolicy), mPtNodeWriter(ptNodeWriter), mBuffer(buffer),
- mNeedsToDecayWhenUpdating(needsToDecayWhenUpdating), mValueStack(),
- mChildrenValue(0), mValidUnigramCount(0) {}
+ PtNodeWriter *const ptNodeWriter)
+ : mPtNodeWriter(ptNodeWriter), mValueStack(), mChildrenValue(0),
+ mValidUnigramCount(0) {}
~TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted() {};
@@ -77,10 +72,7 @@ class DynamicPatriciaTrieGcEventListeners {
DISALLOW_IMPLICIT_CONSTRUCTORS(
TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted);
- const DictionaryHeaderStructurePolicy *const mHeaderPolicy;
PtNodeWriter *const mPtNodeWriter;
- BufferWithExtendableBuffer *const mBuffer;
- const bool mNeedsToDecayWhenUpdating;
std::vector<int> mValueStack;
int mChildrenValue;
int mValidUnigramCount;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_entry.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_entry.h
index 805014506..2b0cbd93b 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_entry.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_entry.h
@@ -73,6 +73,10 @@ class BigramEntry {
return mProbability;
}
+ bool hasHistoricalInfo() const {
+ return mHistoricalInfo.isValid();
+ }
+
const HistoricalInfo *getHistoricalInfo() const {
return &mHistoricalInfo;
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h
index d1b28cc0a..36ba82be1 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h
@@ -51,6 +51,10 @@ class ProbabilityEntry {
return ProbabilityEntry(mFlags, mProbability, historicalInfo);
}
+ bool hasHistoricalInfo() const {
+ return mHistoricalInfo.isValid();
+ }
+
int getFlags() const {
return mFlags;
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h
index 7e283f437..153d8990a 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h
@@ -45,9 +45,9 @@ class Ver4DictBuffers {
}
AK_FORCE_INLINE bool isValid() const {
- return mDictBuffer.get() != 0 && mProbabilityDictContent.isValid()
- && mTerminalPositionLookupTable.isValid() && mBigramDictContent.isValid()
- && mShortcutDictContent.isValid();
+ return mDictBuffer.get() != 0 && mHeaderPolicy.isValid()
+ && mProbabilityDictContent.isValid() && mTerminalPositionLookupTable.isValid()
+ && mBigramDictContent.isValid() && mShortcutDictContent.isValid();
}
AK_FORCE_INLINE bool isNearSizeLimit() const {
@@ -131,9 +131,10 @@ class Ver4DictBuffers {
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
// TODO: Quit using header size.
mTerminalPositionLookupTable(dictDirPath, isUpdatable, mHeaderPolicy.getSize()),
- mProbabilityDictContent(dictDirPath, mHeaderPolicy.hasHistricalInfoOfWords(),
+ mProbabilityDictContent(dictDirPath, mHeaderPolicy.hasHistoricalInfoOfWords(),
+ isUpdatable),
+ mBigramDictContent(dictDirPath, mHeaderPolicy.hasHistoricalInfoOfWords(),
isUpdatable),
- mBigramDictContent(dictDirPath, mHeaderPolicy.hasHistricalInfoOfWords(), isUpdatable),
mShortcutDictContent(dictDirPath, isUpdatable),
mIsUpdatable(isUpdatable) {}
@@ -142,8 +143,8 @@ class Ver4DictBuffers {
mExpandableHeaderBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
mExpandableTrieBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
mTerminalPositionLookupTable(),
- mProbabilityDictContent(headerPolicy->hasHistricalInfoOfWords()),
- mBigramDictContent(headerPolicy->hasHistricalInfoOfWords()), mShortcutDictContent(),
+ mProbabilityDictContent(headerPolicy->hasHistoricalInfoOfWords()),
+ mBigramDictContent(headerPolicy->hasHistoricalInfoOfWords()), mShortcutDictContent(),
mIsUpdatable(true) {}
const MmappedBuffer::MmappedBufferPtr mDictBuffer;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp
index 5fa657d01..8ea029076 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp
@@ -22,6 +22,7 @@
#include "suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
namespace latinime {
@@ -62,7 +63,12 @@ const PtNodeParams Ver4PatriciaTrieNodeReader::fetchPtNodeInfoFromBufferAndProce
terminalId = Ver4PatriciaTrieReadingUtils::getTerminalIdAndAdvancePosition(dictBuf, &pos);
const ProbabilityEntry probabilityEntry =
mProbabilityDictContent->getProbabilityEntry(terminalId);
- probability = probabilityEntry.getProbability();
+ if (probabilityEntry.hasHistoricalInfo()) {
+ probability = ForgettingCurveUtils::decodeProbability(
+ probabilityEntry.getHistoricalInfo());
+ } else {
+ probability = probabilityEntry.getProbability();
+ }
}
int childrenPosFieldPos = pos;
if (usesAdditionalBuffer) {
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp
index 07554342f..b60499e9f 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp
@@ -17,6 +17,7 @@
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h"
#include "suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h"
+#include "suggest/policyimpl/dictionary/header/header_policy.h"
#include "suggest/policyimpl/dictionary/shortcut/ver4_shortcut_list_policy.h"
#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h"
@@ -147,27 +148,21 @@ bool Ver4PatriciaTrieNodeWriter::updatePtNodeProbabilityAndGetNeedsToKeepPtNodeA
AKLOGE("updatePtNodeProbabilityAndGetNeedsToSaveForGC is called for non-terminal PtNode.");
return false;
}
- if (mBuffers->getHeaderPolicy()->isDecayingDict()) {
- const ProbabilityEntry originalProbabilityEntry =
- mBuffers->getProbabilityDictContent()->getProbabilityEntry(
- toBeUpdatedPtNodeParams->getTerminalId());
- // TODO: Remove.
- const int newProbability = ForgettingCurveUtils::getEncodedProbabilityToSave(
- originalProbabilityEntry.getProbability(), mBuffers->getHeaderPolicy());
- const HistoricalInfo historicalInfo =
- ForgettingCurveUtils::createHistoricalInfoToSave(
- originalProbabilityEntry.getHistoricalInfo());
+ const ProbabilityEntry originalProbabilityEntry =
+ mBuffers->getProbabilityDictContent()->getProbabilityEntry(
+ toBeUpdatedPtNodeParams->getTerminalId());
+ if (originalProbabilityEntry.hasHistoricalInfo()) {
+ const HistoricalInfo historicalInfo = ForgettingCurveUtils::createHistoricalInfoToSave(
+ originalProbabilityEntry.getHistoricalInfo());
const ProbabilityEntry probabilityEntry =
- originalProbabilityEntry.createEntryWithUpdatedProbability(newProbability)
- .createEntryWithUpdatedHistoricalInfo(&historicalInfo);
+ originalProbabilityEntry.createEntryWithUpdatedHistoricalInfo(&historicalInfo);
if (!mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry(
toBeUpdatedPtNodeParams->getTerminalId(), &probabilityEntry)) {
AKLOGE("Cannot write updated probability entry. terminalId: %d",
toBeUpdatedPtNodeParams->getTerminalId());
return false;
}
- // TODO: Use ForgettingCurveUtils::needsToKeep(&historicalInfo).
- const bool isValid = ForgettingCurveUtils::isValidEncodedProbability(newProbability);
+ const bool isValid = ForgettingCurveUtils::needsToKeep(&historicalInfo);
if (!isValid) {
if (!markPtNodeAsWillBecomeNonTerminal(toBeUpdatedPtNodeParams)) {
AKLOGE("Cannot mark PtNode as willBecomeNonTerminal.");
@@ -380,14 +375,13 @@ bool Ver4PatriciaTrieNodeWriter::writePtNodeAndGetTerminalIdAndAdvancePosition(
const ProbabilityEntry Ver4PatriciaTrieNodeWriter::createUpdatedEntryFrom(
const ProbabilityEntry *const originalProbabilityEntry, const int newProbability,
const int timestamp) const {
- if (mNeedsToDecayWhenUpdating) {
- const int updatedProbability = ForgettingCurveUtils::getUpdatedEncodedProbability(
- originalProbabilityEntry->getProbability(), newProbability);
+ // TODO: Consolidate historical info and probability.
+ if (mBuffers->getHeaderPolicy()->hasHistoricalInfoOfWords()) {
const HistoricalInfo updatedHistoricalInfo =
ForgettingCurveUtils::createUpdatedHistoricalInfo(
originalProbabilityEntry->getHistoricalInfo(), newProbability, timestamp);
- return originalProbabilityEntry->createEntryWithUpdatedProbability(updatedProbability)
- .createEntryWithUpdatedHistoricalInfo(&updatedHistoricalInfo);
+ return originalProbabilityEntry->createEntryWithUpdatedHistoricalInfo(
+ &updatedHistoricalInfo);
} else {
return originalProbabilityEntry->createEntryWithUpdatedProbability(newProbability);
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h
index b0b5b70f2..1df853a30 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h
@@ -40,12 +40,10 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
public:
Ver4PatriciaTrieNodeWriter(BufferWithExtendableBuffer *const trieBuffer,
Ver4DictBuffers *const buffers, const Ver4PatriciaTrieNodeReader *const ptNodeReader,
- Ver4BigramListPolicy *const bigramPolicy, Ver4ShortcutListPolicy *const shortcutPolicy,
- const bool needsToDecayWhenUpdating)
+ Ver4BigramListPolicy *const bigramPolicy, Ver4ShortcutListPolicy *const shortcutPolicy)
: mTrieBuffer(trieBuffer), mBuffers(buffers), mPtNodeReader(ptNodeReader),
mReadingHelper(mTrieBuffer, mPtNodeReader),
- mBigramPolicy(bigramPolicy), mShortcutPolicy(shortcutPolicy),
- mNeedsToDecayWhenUpdating(needsToDecayWhenUpdating) {}
+ mBigramPolicy(bigramPolicy), mShortcutPolicy(shortcutPolicy) {}
virtual ~Ver4PatriciaTrieNodeWriter() {}
@@ -120,7 +118,6 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
DynamicPatriciaTrieReadingHelper mReadingHelper;
Ver4BigramListPolicy *const mBigramPolicy;
Ver4ShortcutListPolicy *const mShortcutPolicy;
- const bool mNeedsToDecayWhenUpdating;
};
} // namespace latinime
#endif /* LATINIME_VER4_PATRICIA_TRIE_NODE_WRITER_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
index 3447dce93..7b5461d74 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
@@ -34,8 +34,6 @@ const char *const Ver4PatriciaTriePolicy::UNIGRAM_COUNT_QUERY = "UNIGRAM_COUNT";
const char *const Ver4PatriciaTriePolicy::BIGRAM_COUNT_QUERY = "BIGRAM_COUNT";
const char *const Ver4PatriciaTriePolicy::MAX_UNIGRAM_COUNT_QUERY = "MAX_UNIGRAM_COUNT";
const char *const Ver4PatriciaTriePolicy::MAX_BIGRAM_COUNT_QUERY = "MAX_BIGRAM_COUNT";
-const char *const Ver4PatriciaTriePolicy::SET_NEEDS_TO_DECAY_FOR_TESTING_QUERY =
- "SET_NEEDS_TO_DECAY_FOR_TESTING";
const char *const Ver4PatriciaTriePolicy::SET_CURRENT_TIME_FOR_TESTING_QUERY_FORMAT =
"SET_CURRENT_TIME_FOR_TESTING:%d";
const char *const Ver4PatriciaTriePolicy::GET_CURRENT_TIME_QUERY = "GET_CURRENT_TIME";
@@ -62,8 +60,7 @@ void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const d
// A DecayingDict may have a terminal PtNode that has a terminal DicNode whose
// probability is NOT_A_PROBABILITY. In such case, we don't want to treat it as a
// valid terminal DicNode.
- isTerminal = getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY)
- != NOT_A_PROBABILITY;
+ isTerminal = ptNodeParams.getProbability() != NOT_A_PROBABILITY;
}
childDicNodes->pushLeavingChild(dicNode, ptNodeParams.getHeadPos(),
ptNodeParams.getChildrenPos(), ptNodeParams.getProbability(), isTerminal,
@@ -262,11 +259,7 @@ void Ver4PatriciaTriePolicy::flushWithGC(const char *const filePath) {
AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary.");
return;
}
- const bool needsToDecay = mHeaderPolicy->isDecayingDict()
- && (mNeedsToDecayForTesting || ForgettingCurveUtils::needsToDecay(
- false /* mindsBlockByDecay */, mUnigramCount, mBigramCount, mHeaderPolicy));
- mWritingHelper.writeToDictFileWithGC(getRootPosition(), filePath, needsToDecay);
- mNeedsToDecayForTesting = false;
+ mWritingHelper.writeToDictFileWithGC(getRootPosition(), filePath);
}
bool Ver4PatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const {
@@ -286,8 +279,8 @@ bool Ver4PatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const {
// Needs to reduce dictionary size.
return true;
} else if (mHeaderPolicy->isDecayingDict()) {
- return mNeedsToDecayForTesting || ForgettingCurveUtils::needsToDecay(
- mindsBlockByGC, mUnigramCount, mBigramCount, mHeaderPolicy);
+ return ForgettingCurveUtils::needsToDecay(mindsBlockByGC, mUnigramCount, mBigramCount,
+ mHeaderPolicy);
}
return false;
}
@@ -308,8 +301,6 @@ void Ver4PatriciaTriePolicy::getProperty(const char *const query, const int quer
snprintf(outResult, maxResultLength, "%d",
mHeaderPolicy->isDecayingDict() ? ForgettingCurveUtils::MAX_BIGRAM_COUNT :
static_cast<int>(Ver4DictConstants::MAX_DICTIONARY_SIZE));
- } else if (strncmp(query, SET_NEEDS_TO_DECAY_FOR_TESTING_QUERY, compareLength) == 0) {
- mNeedsToDecayForTesting = true;
} else if (sscanf(query, SET_CURRENT_TIME_FOR_TESTING_QUERY_FORMAT, &timestamp) == 1) {
TimeKeeper::startTestModeWithForceCurrentTime(timestamp);
} else if (strncmp(query, GET_CURRENT_TIME_QUERY, compareLength) == 0) {
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
index d2d2ead50..2d46c0ae2 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
@@ -41,17 +41,16 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
: mBuffers(buffers), mHeaderPolicy(mBuffers.get()->getHeaderPolicy()),
mDictBuffer(mBuffers.get()->getWritableTrieBuffer()),
mBigramPolicy(mBuffers.get()->getMutableBigramDictContent(),
- mBuffers.get()->getTerminalPositionLookupTable(), mHeaderPolicy,
- mHeaderPolicy->isDecayingDict()),
+ mBuffers.get()->getTerminalPositionLookupTable(), mHeaderPolicy),
mShortcutPolicy(mBuffers.get()->getMutableShortcutDictContent(),
mBuffers.get()->getTerminalPositionLookupTable()),
mNodeReader(mDictBuffer, mBuffers.get()->getProbabilityDictContent()),
mNodeWriter(mDictBuffer, mBuffers.get(), &mNodeReader, &mBigramPolicy,
- &mShortcutPolicy, mHeaderPolicy->isDecayingDict()),
+ &mShortcutPolicy),
mUpdatingHelper(mDictBuffer, &mNodeReader, &mNodeWriter),
mWritingHelper(mBuffers.get()),
mUnigramCount(mHeaderPolicy->getUnigramCount()),
- mBigramCount(mHeaderPolicy->getBigramCount()), mNeedsToDecayForTesting(false) {};
+ mBigramCount(mHeaderPolicy->getBigramCount()) {};
AK_FORCE_INLINE int getRootPosition() const {
return 0;
@@ -117,7 +116,6 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
static const char *const BIGRAM_COUNT_QUERY;
static const char *const MAX_UNIGRAM_COUNT_QUERY;
static const char *const MAX_BIGRAM_COUNT_QUERY;
- static const char *const SET_NEEDS_TO_DECAY_FOR_TESTING_QUERY;
static const char *const SET_CURRENT_TIME_FOR_TESTING_QUERY_FORMAT;
static const char *const GET_CURRENT_TIME_QUERY;
static const char *const QUIT_TIMEKEEPER_TEST_MODE_QUERY;
@@ -137,7 +135,6 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
Ver4PatriciaTrieWritingHelper mWritingHelper;
int mUnigramCount;
int mBigramCount;
- bool mNeedsToDecayForTesting;
};
} // namespace latinime
#endif // LATINIME_VER4_PATRICIA_TRIE_POLICY_H
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp
index 6ab460ff6..21d009ecb 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp
@@ -53,20 +53,20 @@ void Ver4PatriciaTrieWritingHelper::writeToDictFile(const char *const trieFilePa
}
void Ver4PatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNodeArrayPos,
- const char *const trieFilePath, const bool needsToDecay) {
+ const char *const trieFilePath) {
const HeaderPolicy *const headerPolicy = mBuffers->getHeaderPolicy();
Ver4DictBuffers::Ver4DictBuffersPtr dictBuffers(
Ver4DictBuffers::createVer4DictBuffers(headerPolicy));
int unigramCount = 0;
int bigramCount = 0;
- if (!runGC(rootPtNodeArrayPos, headerPolicy, dictBuffers.get(), &unigramCount, &bigramCount,
- needsToDecay)) {
+ if (!runGC(rootPtNodeArrayPos, headerPolicy, dictBuffers.get(), &unigramCount, &bigramCount)) {
return;
}
BufferWithExtendableBuffer headerBuffer(
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE);
if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, true /* updatesLastUpdatedTime */,
- needsToDecay, unigramCount, bigramCount, 0 /* extendedRegionSize */)) {
+ true /* updatesLastDecayedTime */, unigramCount, bigramCount,
+ 0 /* extendedRegionSize */)) {
return;
}
const int dirPathBufSize = strlen(trieFilePath) + 1 /* terminator */;
@@ -77,30 +77,29 @@ void Ver4PatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNodeAr
bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
const HeaderPolicy *const headerPolicy, Ver4DictBuffers *const buffersToWrite,
- int *const outUnigramCount, int *const outBigramCount, const bool needsToDecay) {
+ int *const outUnigramCount, int *const outBigramCount) {
Ver4PatriciaTrieNodeReader ptNodeReader(mBuffers->getTrieBuffer(),
mBuffers->getProbabilityDictContent());
Ver4BigramListPolicy bigramPolicy(mBuffers->getMutableBigramDictContent(),
- mBuffers->getTerminalPositionLookupTable(), headerPolicy, needsToDecay);
+ mBuffers->getTerminalPositionLookupTable(), headerPolicy);
Ver4ShortcutListPolicy shortcutPolicy(mBuffers->getMutableShortcutDictContent(),
mBuffers->getTerminalPositionLookupTable());
Ver4PatriciaTrieNodeWriter ptNodeWriter(mBuffers->getWritableTrieBuffer(),
- mBuffers, &ptNodeReader, &bigramPolicy, &shortcutPolicy,
- false /* needsToDecayWhenUpdating */);
+ mBuffers, &ptNodeReader, &bigramPolicy, &shortcutPolicy);
DynamicPatriciaTrieReadingHelper readingHelper(mBuffers->getTrieBuffer(), &ptNodeReader);
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
DynamicPatriciaTrieGcEventListeners
::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted(
- headerPolicy, &ptNodeWriter, mBuffers->getWritableTrieBuffer(),
- needsToDecay);
+ &ptNodeWriter);
if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
&traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted)) {
return false;
}
- if (needsToDecay && traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
- .getValidUnigramCount() > ForgettingCurveUtils::MAX_UNIGRAM_COUNT_AFTER_GC) {
+ if (headerPolicy->isDecayingDict()
+ && traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
+ .getValidUnigramCount() > ForgettingCurveUtils::MAX_UNIGRAM_COUNT_AFTER_GC) {
// TODO: Remove more unigrams.
}
@@ -111,8 +110,9 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
&traversePolicyToUpdateBigramProbability)) {
return false;
}
- if (needsToDecay && traversePolicyToUpdateBigramProbability.getValidBigramEntryCount()
- > ForgettingCurveUtils::MAX_BIGRAM_COUNT_AFTER_GC) {
+ if (headerPolicy->isDecayingDict()
+ && traversePolicyToUpdateBigramProbability.getValidBigramEntryCount()
+ > ForgettingCurveUtils::MAX_BIGRAM_COUNT_AFTER_GC) {
// TODO: Remove more bigrams.
}
@@ -120,8 +120,7 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
PtNodeWriter::DictPositionRelocationMap dictPositionRelocationMap;
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
Ver4PatriciaTrieNodeWriter ptNodeWriterForNewBuffers(buffersToWrite->getWritableTrieBuffer(),
- buffersToWrite, &ptNodeReader, &bigramPolicy, &shortcutPolicy,
- false /* needsToDecayWhenUpdating */);
+ buffersToWrite, &ptNodeReader, &bigramPolicy, &shortcutPolicy);
DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
traversePolicyToPlaceAndWriteValidPtNodesToBuffer(&ptNodeWriterForNewBuffers,
buffersToWrite->getWritableTrieBuffer(), &dictPositionRelocationMap);
@@ -134,13 +133,11 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
Ver4PatriciaTrieNodeReader newPtNodeReader(buffersToWrite->getTrieBuffer(),
buffersToWrite->getProbabilityDictContent());
Ver4BigramListPolicy newBigramPolicy(buffersToWrite->getMutableBigramDictContent(),
- buffersToWrite->getTerminalPositionLookupTable(), headerPolicy,
- false /* needsToDecay */);
+ buffersToWrite->getTerminalPositionLookupTable(), headerPolicy);
Ver4ShortcutListPolicy newShortcutPolicy(buffersToWrite->getMutableShortcutDictContent(),
buffersToWrite->getTerminalPositionLookupTable());
Ver4PatriciaTrieNodeWriter newPtNodeWriter(buffersToWrite->getWritableTrieBuffer(),
- buffersToWrite, &newPtNodeReader, &newBigramPolicy, &newShortcutPolicy,
- false /* needsToDecayWhenUpdating */);
+ buffersToWrite, &newPtNodeReader, &newBigramPolicy, &newShortcutPolicy);
// Re-assign terminal IDs for valid terminal PtNodes.
TerminalPositionLookupTable::TerminalIdMap terminalIdMap;
if(!buffersToWrite->getMutableTerminalPositionLookupTable()->runGCTerminalIds(
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.h
index 505012b4f..82877fdcc 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.h
@@ -36,7 +36,7 @@ class Ver4PatriciaTrieWritingHelper {
const int bigramCount) const;
void writeToDictFileWithGC(const int rootPtNodeArrayPos,
- const char *const trieFilePath, const bool needsToDecay);
+ const char *const trieFilePath);
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4PatriciaTrieWritingHelper);
@@ -66,7 +66,7 @@ class Ver4PatriciaTrieWritingHelper {
bool runGC(const int rootPtNodeArrayPos, const HeaderPolicy *const headerPolicy,
Ver4DictBuffers *const buffersToWrite, int *const outUnigramCount,
- int *const outBigramCount, const bool needsToDecay);
+ int *const outBigramCount);
Ver4DictBuffers *const mBuffers;
};
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp
index 466af7256..4050ad363 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp
@@ -31,12 +31,6 @@ const int ForgettingCurveUtils::MAX_BIGRAM_COUNT = 12000;
const int ForgettingCurveUtils::MAX_BIGRAM_COUNT_AFTER_GC = 10000;
const int ForgettingCurveUtils::MAX_COMPUTED_PROBABILITY = 127;
-const int ForgettingCurveUtils::MAX_ENCODED_PROBABILITY = 15;
-const int ForgettingCurveUtils::MIN_VALID_ENCODED_PROBABILITY = 3;
-const int ForgettingCurveUtils::ENCODED_PROBABILITY_STEP = 1;
-// Currently, we try to decay each uni/bigram once every 2 hours. Accordingly, the expected
-// duration of the decay is approximately 66hours.
-const float ForgettingCurveUtils::MIN_PROBABILITY_TO_DECAY = 0.03f;
const int ForgettingCurveUtils::DECAY_INTERVAL_SECONDS = 2 * 60 * 60;
const int ForgettingCurveUtils::MAX_LEVEL = 3;
@@ -53,9 +47,9 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT
const int newProbability, const int timestamp) {
if (newProbability != NOT_A_PROBABILITY && originalHistoricalInfo->getLevel() == 0) {
return HistoricalInfo(timestamp, MIN_VALID_LEVEL /* level */, 0 /* count */);
- } else if (originalHistoricalInfo->getTimeStamp() == NOT_A_TIMESTAMP) {
+ } else if (!originalHistoricalInfo->isValid()) {
// Initial information.
- return HistoricalInfo(timestamp, 0 /* level */, 0 /* count */);
+ return HistoricalInfo(timestamp, 0 /* level */, 1 /* count */);
} else {
const int updatedCount = originalHistoricalInfo->getCount() + 1;
if (updatedCount > MAX_COUNT) {
@@ -75,85 +69,46 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT
}
}
-/* static */ int ForgettingCurveUtils::getProbability(const int encodedUnigramProbability,
- const int encodedBigramProbability) {
- if (encodedUnigramProbability == NOT_A_PROBABILITY) {
- return NOT_A_PROBABILITY;
- } else if (encodedBigramProbability == NOT_A_PROBABILITY) {
- return backoff(decodeProbability(encodedUnigramProbability));
- } else {
- const int unigramProbability = decodeProbability(encodedUnigramProbability);
- const int bigramProbability = decodeProbability(encodedBigramProbability);
- return min(max(unigramProbability, bigramProbability), MAX_COMPUTED_PROBABILITY);
- }
+/* static */ int ForgettingCurveUtils::decodeProbability(
+ const HistoricalInfo *const historicalInfo) {
+ const int elapsedTimeStepCount = getElapsedTimeStepCount(historicalInfo->getTimeStamp());
+ return sProbabilityTable.getProbability(historicalInfo->getLevel(),
+ min(max(elapsedTimeStepCount, 0), MAX_ELAPSED_TIME_STEP_COUNT));
}
-// Caveat: Unlike getProbability(), this method doesn't assume special bigram probability encoding
-// (i.e. unigram probability + bigram probability delta).
-/* static */ int ForgettingCurveUtils::getUpdatedEncodedProbability(
- const int originalEncodedProbability, const int newProbability) {
- if (originalEncodedProbability == NOT_A_PROBABILITY) {
- // The bigram relation is not in this dictionary.
- if (newProbability == NOT_A_PROBABILITY) {
- // The bigram target is not in other dictionaries.
- return 0;
- } else {
- return MIN_VALID_ENCODED_PROBABILITY;
- }
+/* static */ int ForgettingCurveUtils::getProbability(const int unigramProbability,
+ const int bigramProbability) {
+ if (unigramProbability == NOT_A_PROBABILITY) {
+ return NOT_A_PROBABILITY;
+ } else if (bigramProbability == NOT_A_PROBABILITY) {
+ return min(backoff(unigramProbability), MAX_COMPUTED_PROBABILITY);
} else {
- if (newProbability != NOT_A_PROBABILITY
- && originalEncodedProbability < MIN_VALID_ENCODED_PROBABILITY) {
- return MIN_VALID_ENCODED_PROBABILITY;
- }
- return min(originalEncodedProbability + ENCODED_PROBABILITY_STEP, MAX_ENCODED_PROBABILITY);
+ return min(max(unigramProbability, bigramProbability), MAX_COMPUTED_PROBABILITY);
}
}
-/* static */ int ForgettingCurveUtils::isValidEncodedProbability(const int encodedProbability) {
- return encodedProbability >= MIN_VALID_ENCODED_PROBABILITY;
-}
-
/* static */ bool ForgettingCurveUtils::needsToKeep(const HistoricalInfo *const historicalInfo) {
return historicalInfo->getLevel() > 0
|| getElapsedTimeStepCount(historicalInfo->getTimeStamp())
< DISCARD_LEVEL_ZERO_ENTRY_TIME_STEP_COUNT_THRESHOLD;
}
-/* static */ int ForgettingCurveUtils::getEncodedProbabilityToSave(const int encodedProbability,
- const DictionaryHeaderStructurePolicy *const headerPolicy) {
- const int elapsedTime = TimeKeeper::peekCurrentTime() - headerPolicy->getLastDecayedTime();
- const int decayIterationCount = max(elapsedTime / DECAY_INTERVAL_SECONDS, 1);
- int currentEncodedProbability = max(min(encodedProbability, MAX_ENCODED_PROBABILITY), 0);
- // TODO: Implement the decay in more proper way.
- for (int i = 0; i < decayIterationCount; ++i) {
- const float currentRate = static_cast<float>(currentEncodedProbability)
- / static_cast<float>(MAX_ENCODED_PROBABILITY);
- const float thresholdToDecay = (1.0f - MIN_PROBABILITY_TO_DECAY) * currentRate;
- const float randValue = static_cast<float>(rand()) / static_cast<float>(RAND_MAX);
- if (thresholdToDecay < randValue) {
- currentEncodedProbability = max(currentEncodedProbability - ENCODED_PROBABILITY_STEP,
- 0);
- }
- }
- return currentEncodedProbability;
-}
-
/* static */ const HistoricalInfo ForgettingCurveUtils::createHistoricalInfoToSave(
const HistoricalInfo *const originalHistoricalInfo) {
if (originalHistoricalInfo->getTimeStamp() == NOT_A_TIMESTAMP) {
return HistoricalInfo();
}
const int elapsedTimeStep = getElapsedTimeStepCount(originalHistoricalInfo->getTimeStamp());
- if (elapsedTimeStep < MAX_ELAPSED_TIME_STEP_COUNT) {
+ if (elapsedTimeStep <= MAX_ELAPSED_TIME_STEP_COUNT) {
// No need to update historical info.
return *originalHistoricalInfo;
}
// Level down.
- const int maxLevelDownAmonut = elapsedTimeStep / MAX_ELAPSED_TIME_STEP_COUNT;
+ const int maxLevelDownAmonut = elapsedTimeStep / (MAX_ELAPSED_TIME_STEP_COUNT + 1);
const int levelDownAmount = (maxLevelDownAmonut >= originalHistoricalInfo->getLevel()) ?
originalHistoricalInfo->getLevel() : maxLevelDownAmonut;
const int adjustedTimestamp = originalHistoricalInfo->getTimeStamp() +
- levelDownAmount * MAX_ELAPSED_TIME_STEP_COUNT * TIME_STEP_DURATION_IN_SECONDS;
+ levelDownAmount * (MAX_ELAPSED_TIME_STEP_COUNT + 1) * TIME_STEP_DURATION_IN_SECONDS;
return HistoricalInfo(adjustedTimestamp,
originalHistoricalInfo->getLevel() - levelDownAmount, 0 /* count */);
}
@@ -179,14 +134,6 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT
return false;
}
-/* static */ int ForgettingCurveUtils::decodeProbability(const int encodedProbability) {
- if (encodedProbability < MIN_VALID_ENCODED_PROBABILITY) {
- return NOT_A_PROBABILITY;
- } else {
- return min(sProbabilityTable.getProbability(encodedProbability), MAX_ENCODED_PROBABILITY);
- }
-}
-
// See comments in ProbabilityUtils::backoff().
/* static */ int ForgettingCurveUtils::backoff(const int unigramProbability) {
if (unigramProbability == NOT_A_PROBABILITY) {
@@ -201,14 +148,24 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT
}
ForgettingCurveUtils::ProbabilityTable::ProbabilityTable() : mTable() {
- // Table entry is as follows:
- // 1, 1, 1, 2, 3, 5, 6, 9, 13, 18, 25, 34, 48, 66, 91, 127.
- // Note that first MIN_VALID_ENCODED_PROBABILITY values are not used.
- mTable.resize(MAX_ENCODED_PROBABILITY + 1);
- for (int i = 0; i <= MAX_ENCODED_PROBABILITY; ++i) {
- const int probability = static_cast<int>(powf(static_cast<float>(MAX_COMPUTED_PROBABILITY),
- static_cast<float>(i) / static_cast<float>(MAX_ENCODED_PROBABILITY)));
- mTable[i] = min(MAX_COMPUTED_PROBABILITY, max(0, probability));
+ mTable.resize(MAX_LEVEL + 1);
+ for (int level = 0; level <= MAX_LEVEL; ++level) {
+ mTable[level].resize(MAX_ELAPSED_TIME_STEP_COUNT + 1);
+ const float initialProbability =
+ static_cast<float>(MAX_COMPUTED_PROBABILITY / (1 << (MAX_LEVEL - level)));
+ for (int timeStepCount = 0; timeStepCount <= MAX_ELAPSED_TIME_STEP_COUNT; ++timeStepCount) {
+ if (level == 0) {
+ mTable[level][timeStepCount] = NOT_A_PROBABILITY;
+ continue;
+ }
+ const int elapsedTime = timeStepCount * TIME_STEP_DURATION_IN_SECONDS;
+ const float probability = initialProbability
+ * powf(2.0f, -1.0f * static_cast<float>(elapsedTime)
+ / static_cast<float>(TIME_STEP_DURATION_IN_SECONDS
+ * (MAX_ELAPSED_TIME_STEP_COUNT + 1)));
+ mTable[level][timeStepCount] =
+ min(max(static_cast<int>(probability), 1), MAX_COMPUTED_PROBABILITY);
+ }
}
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h
index 76d172e0b..6ac8dc528 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h
@@ -26,8 +26,6 @@ namespace latinime {
class DictionaryHeaderStructurePolicy;
-// TODO: Check the elapsed time and decrease the probability depending on the time. Time field is
-// required to introduced to each terminal PtNode and bigram entry.
// TODO: Quit using bigram probability to indicate the delta.
class ForgettingCurveUtils {
public:
@@ -43,22 +41,13 @@ class ForgettingCurveUtils {
static const HistoricalInfo createHistoricalInfoToSave(
const HistoricalInfo *const originalHistoricalInfo);
+ static int decodeProbability(const HistoricalInfo *const historicalInfo);
+
static int getProbability(const int encodedUnigramProbability,
const int encodedBigramProbability);
- // TODO: Remove.
- static int getUpdatedEncodedProbability(const int originalEncodedProbability,
- const int newProbability);
-
- // TODO: Remove.
- static int isValidEncodedProbability(const int encodedProbability);
-
static bool needsToKeep(const HistoricalInfo *const historicalInfo);
- // TODO: Remove.
- static int getEncodedProbabilityToSave(const int encodedProbability,
- const DictionaryHeaderStructurePolicy *const headerPolicy);
-
static bool needsToDecay(const bool mindsBlockByDecay, const int unigramCount,
const int bigramCount, const DictionaryHeaderStructurePolicy *const headerPolicy);
@@ -69,24 +58,17 @@ class ForgettingCurveUtils {
public:
ProbabilityTable();
- int getProbability(const int encodedProbability) const {
- if (encodedProbability < 0 || encodedProbability > static_cast<int>(mTable.size())) {
- return NOT_A_PROBABILITY;
- }
- return mTable[encodedProbability];
+ int getProbability(const int level, const int elapsedTimeStepCount) const {
+ return mTable[level][elapsedTimeStepCount];
}
private:
DISALLOW_COPY_AND_ASSIGN(ProbabilityTable);
- std::vector<int> mTable;
+ std::vector<std::vector<int> > mTable;
};
static const int MAX_COMPUTED_PROBABILITY;
- static const int MAX_ENCODED_PROBABILITY;
- static const int MIN_VALID_ENCODED_PROBABILITY;
- static const int ENCODED_PROBABILITY_STEP;
- static const float MIN_PROBABILITY_TO_DECAY;
static const int DECAY_INTERVAL_SECONDS;
static const int MAX_LEVEL;
@@ -95,11 +77,10 @@ class ForgettingCurveUtils {
static const int TIME_STEP_DURATION_IN_SECONDS;
static const int MAX_ELAPSED_TIME_STEP_COUNT;
static const int DISCARD_LEVEL_ZERO_ENTRY_TIME_STEP_COUNT_THRESHOLD;
+ static const int HALF_LIFE_TIME_IN_SECONDS;
static const ProbabilityTable sProbabilityTable;
- static int decodeProbability(const int encodedProbability);
-
static int backoff(const int unigramProbability);
static int getElapsedTimeStepCount(const int timestamp);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/historical_info.h b/native/jni/src/suggest/policyimpl/dictionary/utils/historical_info.h
index 64c0136a6..428ca8626 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/historical_info.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/historical_info.h
@@ -30,6 +30,10 @@ class HistoricalInfo {
HistoricalInfo(const int timestamp, const int level, const int count)
: mTimestamp(timestamp), mLevel(level), mCount(count) {}
+ bool isValid() const {
+ return mTimestamp != NOT_A_TIMESTAMP;
+ }
+
int getTimeStamp() const {
return mTimestamp;
}
diff --git a/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java b/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java
index 1ea54eae5..d77a11f01 100644
--- a/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java
+++ b/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java
@@ -30,6 +30,7 @@ import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
import java.util.Random;
+import java.util.concurrent.TimeUnit;
@LargeTest
public class BinaryDictionaryDecayingTests extends AndroidTestCase {
@@ -38,8 +39,6 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
// Note that these are corresponding definitions in native code in
// latinime::Ver4PatriciaTriePolicy.
- private static final String SET_NEEDS_TO_DECAY_FOR_TESTING_KEY =
- "SET_NEEDS_TO_DECAY_FOR_TESTING";
private static final String SET_CURRENT_TIME_FOR_TESTING_QUERY =
"SET_CURRENT_TIME_FOR_TESTING";
private static final String GET_CURRENT_TIME_QUERY = "GET_CURRENT_TIME";
@@ -47,14 +46,28 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
private static final int DUMMY_PROBABILITY = 0;
+ private int mCurrentTime = 0;
+
@Override
protected void setUp() throws Exception {
super.setUp();
+ mCurrentTime = 0;
}
@Override
protected void tearDown() throws Exception {
super.tearDown();
+ try {
+ final File dictFile =
+ createEmptyDictionaryAndGetFile("TestBinaryDictionary", FormatSpec.VERSION4);
+ final BinaryDictionary binaryDictionary =
+ new BinaryDictionary(dictFile.getAbsolutePath(), 0 /* offset */,
+ dictFile.length(), true /* useFullEditDistance */, Locale.getDefault(),
+ TEST_LOCALE, true /* isUpdatable */);
+ binaryDictionary.getPropertyForTests(QUIT_TIMEKEEPER_TEST_MODE_QUERY);
+ } catch (IOException e) {
+ fail("IOException while writing an initial dictionary : " + e);
+ }
}
private void addUnigramWord(final BinaryDictionary binaryDictionary, final String word,
@@ -62,32 +75,29 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
binaryDictionary.addUnigramWord(word, probability, "" /* shortcutTarget */,
BinaryDictionary.NOT_A_PROBABILITY /* shortcutProbability */,
false /* isNotAWord */, false /* isBlacklisted */,
- BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
+ mCurrentTime /* timestamp */);
}
private void addBigramWords(final BinaryDictionary binaryDictionary, final String word0,
final String word1, final int probability) {
binaryDictionary.addBigramWords(word0, word1, probability,
- BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
+ mCurrentTime /* timestamp */);
}
private void forcePassingShortTime(final BinaryDictionary binaryDictionary) {
- // Entries having low probability would be suppressed once in 3 GCs.
- final int count = 3;
- for (int i = 0; i < count; i++) {
- binaryDictionary.getPropertyForTests(SET_NEEDS_TO_DECAY_FOR_TESTING_KEY);
- binaryDictionary.flushWithGC();
- }
+ // 4 days.
+ final int timeToElapse = (int)TimeUnit.SECONDS.convert(4, TimeUnit.DAYS);
+ mCurrentTime += timeToElapse;
+ setCurrentTime(binaryDictionary, mCurrentTime);
+ binaryDictionary.flushWithGC();
}
private void forcePassingLongTime(final BinaryDictionary binaryDictionary) {
- // Currently, probabilities are decayed when GC is run. All entries that have never been
- // typed in 128 GCs would be removed.
- final int count = 128;
- for (int i = 0; i < count; i++) {
- binaryDictionary.getPropertyForTests(SET_NEEDS_TO_DECAY_FOR_TESTING_KEY);
- binaryDictionary.flushWithGC();
- }
+ // 60 days.
+ final int timeToElapse = (int)TimeUnit.SECONDS.convert(60, TimeUnit.DAYS);
+ mCurrentTime += timeToElapse;
+ setCurrentTime(binaryDictionary, mCurrentTime);
+ binaryDictionary.flushWithGC();
}
private File createEmptyDictionaryAndGetFile(final String dictId,
@@ -147,6 +157,7 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
+ binaryDictionary.getPropertyForTests(QUIT_TIMEKEEPER_TEST_MODE_QUERY);
final int startTime = getCurrentTime(binaryDictionary);
for (int i = 0; i < TEST_COUNT; i++) {
final int currentTime = random.nextInt(Integer.MAX_VALUE);
@@ -233,6 +244,8 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
addUnigramWord(binaryDictionary, "a", DUMMY_PROBABILITY);
addUnigramWord(binaryDictionary, "a", DUMMY_PROBABILITY);
addUnigramWord(binaryDictionary, "a", DUMMY_PROBABILITY);
+ addUnigramWord(binaryDictionary, "a", DUMMY_PROBABILITY);
+ assertTrue(binaryDictionary.isValidWord("a"));
forcePassingShortTime(binaryDictionary);
assertTrue(binaryDictionary.isValidWord("a"));
forcePassingLongTime(binaryDictionary);
@@ -257,6 +270,9 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
addUnigramWord(binaryDictionary, "a", DUMMY_PROBABILITY);
addUnigramWord(binaryDictionary, "b", DUMMY_PROBABILITY);
addBigramWords(binaryDictionary, "a", "b", DUMMY_PROBABILITY);
+ addUnigramWord(binaryDictionary, "a", DUMMY_PROBABILITY);
+ addUnigramWord(binaryDictionary, "b", DUMMY_PROBABILITY);
+ addBigramWords(binaryDictionary, "a", "b", DUMMY_PROBABILITY);
assertTrue(binaryDictionary.isValidBigram("a", "b"));
forcePassingShortTime(binaryDictionary);
assertTrue(binaryDictionary.isValidBigram("a", "b"));
@@ -307,8 +323,7 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
Integer.parseInt(binaryDictionary.getPropertyForTests(
BinaryDictionary.UNIGRAM_COUNT_QUERY));
while (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
- binaryDictionary.getPropertyForTests(SET_NEEDS_TO_DECAY_FOR_TESTING_KEY);
- binaryDictionary.flushWithGC();
+ forcePassingShortTime(binaryDictionary);
}
final int unigramCountAfterGC =
Integer.parseInt(binaryDictionary.getPropertyForTests(
@@ -321,6 +336,10 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
BinaryDictionary.UNIGRAM_COUNT_QUERY)) > 0);
assertTrue(Integer.parseInt(binaryDictionary.getPropertyForTests(
BinaryDictionary.UNIGRAM_COUNT_QUERY)) <= maxUnigramCount);
+ forcePassingLongTime(binaryDictionary);
+ assertEquals(0, Integer.parseInt(binaryDictionary.getPropertyForTests(
+ BinaryDictionary.UNIGRAM_COUNT_QUERY)));
+
}
public void testAddManyBigramsToDecayingDict() {
@@ -378,8 +397,7 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
Integer.parseInt(binaryDictionary.getPropertyForTests(
BinaryDictionary.BIGRAM_COUNT_QUERY));
while (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
- binaryDictionary.getPropertyForTests(SET_NEEDS_TO_DECAY_FOR_TESTING_KEY);
- binaryDictionary.flushWithGC();
+ forcePassingShortTime(binaryDictionary);
}
final int bigramCountAfterGC =
Integer.parseInt(binaryDictionary.getPropertyForTests(
@@ -392,5 +410,8 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
BinaryDictionary.BIGRAM_COUNT_QUERY)) > 0);
assertTrue(Integer.parseInt(binaryDictionary.getPropertyForTests(
BinaryDictionary.BIGRAM_COUNT_QUERY)) <= maxBigramCount);
+ forcePassingLongTime(binaryDictionary);
+ assertEquals(0, Integer.parseInt(binaryDictionary.getPropertyForTests(
+ BinaryDictionary.BIGRAM_COUNT_QUERY)));
}
}