aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKeisuke Kuroyanagi <ksk@google.com>2013-12-09 07:22:29 +0000
committerAndroid (Google) Code Review <android-gerrit@google.com>2013-12-09 07:22:29 +0000
commit5ea2f2972ee3d04ecbf6a6cdad6d056a2d85bdfe (patch)
tree909cb95111708c4d46ff0cdcb51cada6f2d8c069
parent8317acad48e4894e55d75c2f117e127667c93d6c (diff)
parent6c1382b4ae5274407b5431949ced9d09c1ab2535 (diff)
downloadlatinime-5ea2f2972ee3d04ecbf6a6cdad6d056a2d85bdfe.tar.gz
latinime-5ea2f2972ee3d04ecbf6a6cdad6d056a2d85bdfe.tar.xz
latinime-5ea2f2972ee3d04ecbf6a6cdad6d056a2d85bdfe.zip
Merge "Start updating historical information."
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp15
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp7
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp31
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h9
5 files changed, 55 insertions, 9 deletions
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp
index 48ddb2ff4..ad437b1a5 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp
@@ -55,7 +55,7 @@ bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTarget
}
const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY,
newTargetTerminalId);
- const BigramEntry bigramEntryToWrite = getUpdatedBigramEntry(&newBigramEntry,
+ const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(&newBigramEntry,
newProbability, timestamp);
// Write an entry.
const int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId);
@@ -81,7 +81,7 @@ bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTarget
}
const BigramEntry updatedBigramEntry =
originalBigramEntry.updateTargetTerminalIdAndGetEntry(newTargetTerminalId);
- const BigramEntry bigramEntryToWrite = getUpdatedBigramEntry(
+ const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(
&updatedBigramEntry, newProbability, timestamp);
return mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, entryPosToUpdate);
}
@@ -94,7 +94,7 @@ bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTarget
// Write new entry at a head position of the bigram list.
int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId);
const BigramEntry newBigramEntry(true /* hasNext */, NOT_A_PROBABILITY, newTargetTerminalId);
- const BigramEntry bigramEntryToWrite = getUpdatedBigramEntry(
+ const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(
&newBigramEntry, newProbability, timestamp);
if (!mBigramDictContent->writeBigramEntryAndAdvancePosition(&bigramEntryToWrite, &writingPos)) {
return false;
@@ -218,14 +218,17 @@ int Ver4BigramListPolicy::getEntryPosToUpdate(const int targetTerminalIdToFind,
return invalidEntryPos;
}
-const BigramEntry Ver4BigramListPolicy::getUpdatedBigramEntry(
+const BigramEntry Ver4BigramListPolicy::createUpdatedBigramEntryFrom(
const BigramEntry *const originalBigramEntry, const int newProbability,
const int timestamp) const {
if (mNeedsToDecayWhenUpdating) {
- // TODO: Update historical information.
const int probability = ForgettingCurveUtils::getUpdatedEncodedProbability(
originalBigramEntry->getProbability(), newProbability);
- return originalBigramEntry->updateProbabilityAndGetEntry(probability);
+ const HistoricalInfo updatedHistoricalInfo =
+ ForgettingCurveUtils::createUpdatedHistoricalInfoFrom(
+ originalBigramEntry->getHistoricalInfo(), newProbability, timestamp);
+ return originalBigramEntry->updateProbabilityAndGetEntry(probability)
+ .updateHistoricalInfoAndGetEntry(&updatedHistoricalInfo);
} else {
return originalBigramEntry->updateProbabilityAndGetEntry(newProbability);
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h
index e718645b4..972144100 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h
@@ -59,7 +59,7 @@ class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy {
int getEntryPosToUpdate(const int targetTerminalIdToFind, const int bigramListPos) const;
- const BigramEntry getUpdatedBigramEntry(const BigramEntry *const originalBigramEntry,
+ const BigramEntry createUpdatedBigramEntryFrom(const BigramEntry *const originalBigramEntry,
const int newProbability, const int timestamp) const;
BigramDictContent *const mBigramDictContent;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp
index 92bec30f5..95921c580 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp
@@ -292,10 +292,13 @@ const ProbabilityEntry Ver4PatriciaTrieNodeWriter::createUpdatedEntryFrom(
const ProbabilityEntry *const originalProbabilityEntry, const int newProbability,
const int timestamp) const {
if (mNeedsToDecayWhenUpdating) {
- // TODO: Update historical information.
const int updatedProbability = ForgettingCurveUtils::getUpdatedEncodedProbability(
originalProbabilityEntry->getProbability(), newProbability);
- return originalProbabilityEntry->createEntryWithUpdatedProbability(updatedProbability);
+ const HistoricalInfo updatedHistoricalInfo =
+ ForgettingCurveUtils::createUpdatedHistoricalInfoFrom(
+ originalProbabilityEntry->getHistoricalInfo(), newProbability, timestamp);
+ return originalProbabilityEntry->createEntryWithUpdatedProbability(updatedProbability)
+ .createEntryWithUpdatedHistoricalInfo(&updatedHistoricalInfo);
} else {
return originalProbabilityEntry->createEntryWithUpdatedProbability(newProbability);
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp
index 3ce5680c9..c7fb47ef6 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp
@@ -39,8 +39,39 @@ const int ForgettingCurveUtils::ENCODED_PROBABILITY_STEP = 1;
const float ForgettingCurveUtils::MIN_PROBABILITY_TO_DECAY = 0.03f;
const int ForgettingCurveUtils::DECAY_INTERVAL_SECONDS = 2 * 60 * 60;
+const int ForgettingCurveUtils::MAX_LEVEL = 3;
+const int ForgettingCurveUtils::MAX_COUNT = 3;
+const int ForgettingCurveUtils::MIN_VALID_LEVEL = 1;
+
const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityTable;
+/* static */ const HistoricalInfo ForgettingCurveUtils::createUpdatedHistoricalInfoFrom(
+ const HistoricalInfo *const originalHistoricalInfo,
+ const int newProbability, const int timestamp) {
+ if (newProbability != NOT_A_PROBABILITY && originalHistoricalInfo->getLevel() == 0) {
+ return HistoricalInfo(timestamp, MIN_VALID_LEVEL /* level */, 0 /* count */);
+ } else if (originalHistoricalInfo->getTimeStamp() == NOT_A_TIMESTAMP) {
+ // Initial information.
+ return HistoricalInfo(timestamp, 0 /* level */, 0 /* count */);
+ } else {
+ const int updatedCount = originalHistoricalInfo->getCount() + 1;
+ if (updatedCount > MAX_COUNT) {
+ // The count exceeds the max value the level can be incremented.
+ if (originalHistoricalInfo->getLevel() >= MAX_LEVEL) {
+ // The level is already max.
+ return HistoricalInfo(timestamp, originalHistoricalInfo->getLevel(),
+ originalHistoricalInfo->getCount());
+ } else {
+ // Level up.
+ return HistoricalInfo(timestamp, originalHistoricalInfo->getLevel() + 1,
+ 0 /* count */);
+ }
+ } else {
+ return HistoricalInfo(timestamp, originalHistoricalInfo->getLevel(), updatedCount);
+ }
+ }
+}
+
/* static */ int ForgettingCurveUtils::getProbability(const int encodedUnigramProbability,
const int encodedBigramProbability) {
if (encodedUnigramProbability == NOT_A_PROBABILITY) {
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h
index a858719ae..3e5bdf6b2 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h
@@ -20,6 +20,7 @@
#include <vector>
#include "defines.h"
+#include "suggest/policyimpl/dictionary/utils/historical_info.h"
namespace latinime {
@@ -35,6 +36,10 @@ class ForgettingCurveUtils {
static const int MAX_BIGRAM_COUNT;
static const int MAX_BIGRAM_COUNT_AFTER_GC;
+ static const HistoricalInfo createUpdatedHistoricalInfoFrom(
+ const HistoricalInfo *const originalHistoricalInfo, const int newProbability,
+ const int timestamp);
+
static int getProbability(const int encodedUnigramProbability,
const int encodedBigramProbability);
@@ -76,6 +81,10 @@ class ForgettingCurveUtils {
static const float MIN_PROBABILITY_TO_DECAY;
static const int DECAY_INTERVAL_SECONDS;
+ static const int MAX_LEVEL;
+ static const int MAX_COUNT;
+ static const int MIN_VALID_LEVEL;
+
static const ProbabilityTable sProbabilityTable;
static int decodeProbability(const int encodedProbability);