aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKeisuke Kuroyanagi <ksk@google.com>2013-12-09 21:02:41 +0900
committerKeisuke Kuroyanagi <ksk@google.com>2013-12-09 21:02:41 +0900
commit26266bd53b06facd4ef7da3de6e0a52ea82b4482 (patch)
tree778ef65fac96632fec539da05a75ff9da7890574
parentebb57c02c2c2dd97fab118b77cd9217e5ad03ec1 (diff)
downloadlatinime-26266bd53b06facd4ef7da3de6e0a52ea82b4482.tar.gz
latinime-26266bd53b06facd4ef7da3de6e0a52ea82b4482.tar.xz
latinime-26266bd53b06facd4ef7da3de6e0a52ea82b4482.zip
Update historical info for GC.
Bug: 11073222 Change-Id: I08a61c02f9f5d527897095eee2de395f86050e2d
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp10
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp11
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp35
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h15
4 files changed, 63 insertions, 8 deletions
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp
index ad437b1a5..968bacee6 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp
@@ -153,12 +153,16 @@ bool Ver4BigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries(const i
return false;
}
} else if (mNeedsToDecayWhenUpdating) {
- // TODO: Quit decaying probability during GC.
const int probability = ForgettingCurveUtils::getEncodedProbabilityToSave(
bigramEntry.getProbability(), mHeaderPolicy);
+ const HistoricalInfo historicalInfo =
+ ForgettingCurveUtils::createHistoricalInfoToSave(
+ bigramEntry.getHistoricalInfo());
+ // TODO: Use ForgettingCurveUtils::needsToKeep(&historicalInfo).
if (ForgettingCurveUtils::isValidEncodedProbability(probability)) {
const BigramEntry updatedBigramEntry =
- bigramEntry.updateProbabilityAndGetEntry(probability);
+ bigramEntry.updateProbabilityAndGetEntry(probability)
+ .updateHistoricalInfoAndGetEntry(&historicalInfo);
if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) {
return false;
}
@@ -225,7 +229,7 @@ const BigramEntry Ver4BigramListPolicy::createUpdatedBigramEntryFrom(
const int probability = ForgettingCurveUtils::getUpdatedEncodedProbability(
originalBigramEntry->getProbability(), newProbability);
const HistoricalInfo updatedHistoricalInfo =
- ForgettingCurveUtils::createUpdatedHistoricalInfoFrom(
+ ForgettingCurveUtils::createUpdatedHistoricalInfo(
originalBigramEntry->getHistoricalInfo(), newProbability, timestamp);
return originalBigramEntry->updateProbabilityAndGetEntry(probability)
.updateHistoricalInfoAndGetEntry(&updatedHistoricalInfo);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp
index 867cd7673..07554342f 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp
@@ -151,17 +151,22 @@ bool Ver4PatriciaTrieNodeWriter::updatePtNodeProbabilityAndGetNeedsToKeepPtNodeA
const ProbabilityEntry originalProbabilityEntry =
mBuffers->getProbabilityDictContent()->getProbabilityEntry(
toBeUpdatedPtNodeParams->getTerminalId());
- // TODO: Use historical info.
+ // TODO: Remove.
const int newProbability = ForgettingCurveUtils::getEncodedProbabilityToSave(
originalProbabilityEntry.getProbability(), mBuffers->getHeaderPolicy());
+ const HistoricalInfo historicalInfo =
+ ForgettingCurveUtils::createHistoricalInfoToSave(
+ originalProbabilityEntry.getHistoricalInfo());
const ProbabilityEntry probabilityEntry =
- originalProbabilityEntry.createEntryWithUpdatedProbability(newProbability);
+ originalProbabilityEntry.createEntryWithUpdatedProbability(newProbability)
+ .createEntryWithUpdatedHistoricalInfo(&historicalInfo);
if (!mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry(
toBeUpdatedPtNodeParams->getTerminalId(), &probabilityEntry)) {
AKLOGE("Cannot write updated probability entry. terminalId: %d",
toBeUpdatedPtNodeParams->getTerminalId());
return false;
}
+ // TODO: Use ForgettingCurveUtils::needsToKeep(&historicalInfo).
const bool isValid = ForgettingCurveUtils::isValidEncodedProbability(newProbability);
if (!isValid) {
if (!markPtNodeAsWillBecomeNonTerminal(toBeUpdatedPtNodeParams)) {
@@ -379,7 +384,7 @@ const ProbabilityEntry Ver4PatriciaTrieNodeWriter::createUpdatedEntryFrom(
const int updatedProbability = ForgettingCurveUtils::getUpdatedEncodedProbability(
originalProbabilityEntry->getProbability(), newProbability);
const HistoricalInfo updatedHistoricalInfo =
- ForgettingCurveUtils::createUpdatedHistoricalInfoFrom(
+ ForgettingCurveUtils::createUpdatedHistoricalInfo(
originalProbabilityEntry->getHistoricalInfo(), newProbability, timestamp);
return originalProbabilityEntry->createEntryWithUpdatedProbability(updatedProbability)
.createEntryWithUpdatedHistoricalInfo(&updatedHistoricalInfo);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp
index c7fb47ef6..466af7256 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp
@@ -42,10 +42,13 @@ const int ForgettingCurveUtils::DECAY_INTERVAL_SECONDS = 2 * 60 * 60;
const int ForgettingCurveUtils::MAX_LEVEL = 3;
const int ForgettingCurveUtils::MAX_COUNT = 3;
const int ForgettingCurveUtils::MIN_VALID_LEVEL = 1;
+const int ForgettingCurveUtils::TIME_STEP_DURATION_IN_SECONDS = 6 * 60 * 60;
+const int ForgettingCurveUtils::MAX_ELAPSED_TIME_STEP_COUNT = 15;
+const int ForgettingCurveUtils::DISCARD_LEVEL_ZERO_ENTRY_TIME_STEP_COUNT_THRESHOLD = 14;
const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityTable;
-/* static */ const HistoricalInfo ForgettingCurveUtils::createUpdatedHistoricalInfoFrom(
+/* static */ const HistoricalInfo ForgettingCurveUtils::createUpdatedHistoricalInfo(
const HistoricalInfo *const originalHistoricalInfo,
const int newProbability, const int timestamp) {
if (newProbability != NOT_A_PROBABILITY && originalHistoricalInfo->getLevel() == 0) {
@@ -110,6 +113,12 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT
return encodedProbability >= MIN_VALID_ENCODED_PROBABILITY;
}
+/* static */ bool ForgettingCurveUtils::needsToKeep(const HistoricalInfo *const historicalInfo) {
+ return historicalInfo->getLevel() > 0
+ || getElapsedTimeStepCount(historicalInfo->getTimeStamp())
+ < DISCARD_LEVEL_ZERO_ENTRY_TIME_STEP_COUNT_THRESHOLD;
+}
+
/* static */ int ForgettingCurveUtils::getEncodedProbabilityToSave(const int encodedProbability,
const DictionaryHeaderStructurePolicy *const headerPolicy) {
const int elapsedTime = TimeKeeper::peekCurrentTime() - headerPolicy->getLastDecayedTime();
@@ -129,6 +138,26 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT
return currentEncodedProbability;
}
+/* static */ const HistoricalInfo ForgettingCurveUtils::createHistoricalInfoToSave(
+ const HistoricalInfo *const originalHistoricalInfo) {
+ if (originalHistoricalInfo->getTimeStamp() == NOT_A_TIMESTAMP) {
+ return HistoricalInfo();
+ }
+ const int elapsedTimeStep = getElapsedTimeStepCount(originalHistoricalInfo->getTimeStamp());
+ if (elapsedTimeStep < MAX_ELAPSED_TIME_STEP_COUNT) {
+ // No need to update historical info.
+ return *originalHistoricalInfo;
+ }
+ // Level down.
+ const int maxLevelDownAmonut = elapsedTimeStep / MAX_ELAPSED_TIME_STEP_COUNT;
+ const int levelDownAmount = (maxLevelDownAmonut >= originalHistoricalInfo->getLevel()) ?
+ originalHistoricalInfo->getLevel() : maxLevelDownAmonut;
+ const int adjustedTimestamp = originalHistoricalInfo->getTimeStamp() +
+ levelDownAmount * MAX_ELAPSED_TIME_STEP_COUNT * TIME_STEP_DURATION_IN_SECONDS;
+ return HistoricalInfo(adjustedTimestamp,
+ originalHistoricalInfo->getLevel() - levelDownAmount, 0 /* count */);
+}
+
/* static */ bool ForgettingCurveUtils::needsToDecay(const bool mindsBlockByDecay,
const int unigramCount, const int bigramCount,
const DictionaryHeaderStructurePolicy *const headerPolicy) {
@@ -167,6 +196,10 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT
}
}
+/* static */ int ForgettingCurveUtils::getElapsedTimeStepCount(const int timestamp) {
+ return (TimeKeeper::peekCurrentTime() - timestamp) / TIME_STEP_DURATION_IN_SECONDS;
+}
+
ForgettingCurveUtils::ProbabilityTable::ProbabilityTable() : mTable() {
// Table entry is as follows:
// 1, 1, 1, 2, 3, 5, 6, 9, 13, 18, 25, 34, 48, 66, 91, 127.
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h
index 3e5bdf6b2..76d172e0b 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h
@@ -36,18 +36,26 @@ class ForgettingCurveUtils {
static const int MAX_BIGRAM_COUNT;
static const int MAX_BIGRAM_COUNT_AFTER_GC;
- static const HistoricalInfo createUpdatedHistoricalInfoFrom(
+ static const HistoricalInfo createUpdatedHistoricalInfo(
const HistoricalInfo *const originalHistoricalInfo, const int newProbability,
const int timestamp);
+ static const HistoricalInfo createHistoricalInfoToSave(
+ const HistoricalInfo *const originalHistoricalInfo);
+
static int getProbability(const int encodedUnigramProbability,
const int encodedBigramProbability);
+ // TODO: Remove.
static int getUpdatedEncodedProbability(const int originalEncodedProbability,
const int newProbability);
+ // TODO: Remove.
static int isValidEncodedProbability(const int encodedProbability);
+ static bool needsToKeep(const HistoricalInfo *const historicalInfo);
+
+ // TODO: Remove.
static int getEncodedProbabilityToSave(const int encodedProbability,
const DictionaryHeaderStructurePolicy *const headerPolicy);
@@ -84,12 +92,17 @@ class ForgettingCurveUtils {
static const int MAX_LEVEL;
static const int MAX_COUNT;
static const int MIN_VALID_LEVEL;
+ static const int TIME_STEP_DURATION_IN_SECONDS;
+ static const int MAX_ELAPSED_TIME_STEP_COUNT;
+ static const int DISCARD_LEVEL_ZERO_ENTRY_TIME_STEP_COUNT_THRESHOLD;
static const ProbabilityTable sProbabilityTable;
static int decodeProbability(const int encodedProbability);
static int backoff(const int unigramProbability);
+
+ static int getElapsedTimeStepCount(const int timestamp);
};
} // namespace latinime
#endif /* LATINIME_FORGETTING_CURVE_UTILS_H */