aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--java/res/drawable-hdpi/sym_keyboard_settings_holo_dark.pngbin889 -> 787 bytes
-rw-r--r--java/res/drawable-mdpi/sym_keyboard_settings_holo_dark.pngbin708 -> 585 bytes
-rw-r--r--java/res/drawable-xhdpi/sym_keyboard_settings_holo_dark.pngbin1309 -> 1062 bytes
-rw-r--r--java/res/drawable-xxhdpi/sym_keyboard_settings_holo_dark.pngbin1471 -> 1455 bytes
-rw-r--r--native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp4
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h10
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp4
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h14
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp32
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h5
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp14
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h5
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp55
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h23
-rw-r--r--tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java4
-rw-r--r--tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java114
17 files changed, 173 insertions, 113 deletions
diff --git a/java/res/drawable-hdpi/sym_keyboard_settings_holo_dark.png b/java/res/drawable-hdpi/sym_keyboard_settings_holo_dark.png
index c76008ab3..5af09ad8c 100644
--- a/java/res/drawable-hdpi/sym_keyboard_settings_holo_dark.png
+++ b/java/res/drawable-hdpi/sym_keyboard_settings_holo_dark.png
Binary files differ
diff --git a/java/res/drawable-mdpi/sym_keyboard_settings_holo_dark.png b/java/res/drawable-mdpi/sym_keyboard_settings_holo_dark.png
index a76a976c5..36c8c9623 100644
--- a/java/res/drawable-mdpi/sym_keyboard_settings_holo_dark.png
+++ b/java/res/drawable-mdpi/sym_keyboard_settings_holo_dark.png
Binary files differ
diff --git a/java/res/drawable-xhdpi/sym_keyboard_settings_holo_dark.png b/java/res/drawable-xhdpi/sym_keyboard_settings_holo_dark.png
index 05eaffe2e..99ee97dbf 100644
--- a/java/res/drawable-xhdpi/sym_keyboard_settings_holo_dark.png
+++ b/java/res/drawable-xhdpi/sym_keyboard_settings_holo_dark.png
Binary files differ
diff --git a/java/res/drawable-xxhdpi/sym_keyboard_settings_holo_dark.png b/java/res/drawable-xxhdpi/sym_keyboard_settings_holo_dark.png
index e4358463b..7041bb6ce 100644
--- a/java/res/drawable-xxhdpi/sym_keyboard_settings_holo_dark.png
+++ b/java/res/drawable-xxhdpi/sym_keyboard_settings_holo_dark.png
Binary files differ
diff --git a/native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h b/native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h
index a6829b476..5492c6070 100644
--- a/native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h
+++ b/native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h
@@ -37,6 +37,8 @@ class DictionaryHeaderStructurePolicy {
virtual float getMultiWordCostMultiplier() const = 0;
+ virtual int getLastDecayedTime() const = 0;
+
virtual void readHeaderValueOrQuestionMark(const char *const key, int *outValue,
int outValueSize) const = 0;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp
index 8753c6eb0..b1170e251 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp
@@ -360,13 +360,13 @@ int DynamicBigramListPolicy::followBigramLinkAndGetCurrentBigramPtNodePos(
}
bool DynamicBigramListPolicy::updateProbabilityForDecay(
- BigramListReadWriteUtils::BigramFlags bigramFlags, const int targetPtNodePos,
+ const BigramListReadWriteUtils::BigramFlags bigramFlags, const int targetPtNodePos,
int *const bigramEntryPos, bool *const outRemoved) const {
*outRemoved = false;
if (mIsDecayingDict) {
// Update bigram probability for decaying.
const int newProbability = ForgettingCurveUtils::getEncodedProbabilityToSave(
- BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags));
+ BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags), mHeaderPolicy);
if (ForgettingCurveUtils::isValidEncodedProbability(newProbability)) {
// Write new probability.
const BigramListReadWriteUtils::BigramFlags updatedBigramFlags =
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h
index b358b4ed5..0504b59d5 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h
@@ -27,6 +27,7 @@
namespace latinime {
class BufferWithExtendableBuffer;
+class DictionaryHeaderStructurePolicy;
class DictionaryShortcutsStructurePolicy;
/*
@@ -34,10 +35,12 @@ class DictionaryShortcutsStructurePolicy;
*/
class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy {
public:
- DynamicBigramListPolicy(BufferWithExtendableBuffer *const buffer,
+ DynamicBigramListPolicy(const DictionaryHeaderStructurePolicy *const headerPolicy,
+ BufferWithExtendableBuffer *const buffer,
const DictionaryShortcutsStructurePolicy *const shortcutPolicy,
const bool isDecayingDict)
- : mBuffer(buffer), mShortcutPolicy(shortcutPolicy), mIsDecayingDict(isDecayingDict) {}
+ : mHeaderPolicy(headerPolicy), mBuffer(buffer), mShortcutPolicy(shortcutPolicy),
+ mIsDecayingDict(isDecayingDict) {}
~DynamicBigramListPolicy() {}
@@ -74,6 +77,7 @@ class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy {
static const int CONTINUING_BIGRAM_LINK_COUNT_LIMIT;
static const int BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT;
+ const DictionaryHeaderStructurePolicy *const mHeaderPolicy;
BufferWithExtendableBuffer *const mBuffer;
const DictionaryShortcutsStructurePolicy *const mShortcutPolicy;
const bool mIsDecayingDict;
@@ -81,7 +85,7 @@ class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy {
// Follow bigram link and return the position of bigram target PtNode that is currently valid.
int followBigramLinkAndGetCurrentBigramPtNodePos(const int originalBigramPos) const;
- bool updateProbabilityForDecay(BigramListReadWriteUtils::BigramFlags bigramFlags,
+ bool updateProbabilityForDecay(const BigramListReadWriteUtils::BigramFlags bigramFlags,
const int targetPtNodePos, int *const bigramEntryPos, bool *const outRemoved) const;
};
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp
index 324b53062..a17a0acf6 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp
@@ -16,6 +16,7 @@
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h"
+#include "suggest/core/policy/dictionary_header_structure_policy.h"
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
namespace latinime {
@@ -29,7 +30,8 @@ bool DynamicPatriciaTrieGcEventListeners
bool isUselessPtNode = !node->isTerminal();
if (node->isTerminal() && mIsDecayingDict) {
const int newProbability =
- ForgettingCurveUtils::getEncodedProbabilityToSave(node->getProbability());
+ ForgettingCurveUtils::getEncodedProbabilityToSave(node->getProbability(),
+ mHeaderPolicy);
int writingPos = node->getProbabilityFieldPos();
// Update probability.
if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h
index 463715af5..3ca2f2a01 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h
@@ -29,6 +29,8 @@
namespace latinime {
+class DictionaryHeaderStructurePolicy;
+
class DynamicPatriciaTrieGcEventListeners {
public:
// Updates all PtNodes that can be reached from the root. Checks if each PtNode is useless or
@@ -38,10 +40,12 @@ class DynamicPatriciaTrieGcEventListeners {
: public DynamicPatriciaTrieReadingHelper::TraversingEventListener {
public:
TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted(
+ const DictionaryHeaderStructurePolicy *const headerPolicy,
DynamicPatriciaTrieWritingHelper *const writingHelper,
BufferWithExtendableBuffer *const buffer, const bool isDecayingDict)
- : mWritingHelper(writingHelper), mBuffer(buffer), mIsDecayingDict(isDecayingDict),
- mValueStack(), mChildrenValue(0), mValidUnigramCount(0) {}
+ : mHeaderPolicy(headerPolicy), mWritingHelper(writingHelper), mBuffer(buffer),
+ mIsDecayingDict(isDecayingDict), mValueStack(), mChildrenValue(0),
+ mValidUnigramCount(0) {}
~TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted() {};
@@ -72,9 +76,10 @@ class DynamicPatriciaTrieGcEventListeners {
DISALLOW_IMPLICIT_CONSTRUCTORS(
TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted);
+ const DictionaryHeaderStructurePolicy *const mHeaderPolicy;
DynamicPatriciaTrieWritingHelper *const mWritingHelper;
BufferWithExtendableBuffer *const mBuffer;
- const int mIsDecayingDict;
+ const bool mIsDecayingDict;
std::vector<int> mValueStack;
int mChildrenValue;
int mValidUnigramCount;
@@ -85,7 +90,8 @@ class DynamicPatriciaTrieGcEventListeners {
class TraversePolicyToUpdateBigramProbability
: public DynamicPatriciaTrieReadingHelper::TraversingEventListener {
public:
- TraversePolicyToUpdateBigramProbability(DynamicBigramListPolicy *const bigramPolicy)
+ TraversePolicyToUpdateBigramProbability(
+ DynamicBigramListPolicy *const bigramPolicy)
: mBigramPolicy(bigramPolicy), mValidBigramEntryCount(0) {}
bool onAscend() { return true; }
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp
index 60d0db0c0..31e3fb42f 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp
@@ -42,7 +42,6 @@ const char *const DynamicPatriciaTriePolicy::SET_NEEDS_TO_DECAY_FOR_TESTING_QUER
const int DynamicPatriciaTriePolicy::MAX_DICT_EXTENDED_REGION_SIZE = 1024 * 1024;
const int DynamicPatriciaTriePolicy::MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS =
DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE - 1024;
-const int DynamicPatriciaTriePolicy::DECAY_INTERVAL_FOR_DECAYING_DICTS = 2 * 60 * 60;
void DynamicPatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode,
DicNodeVector *const childDicNodes) const {
@@ -314,15 +313,15 @@ void DynamicPatriciaTriePolicy::flushWithGC(const char *const filePath) {
AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary.");
return;
}
- const bool runGCwithDecay = needsToDecay();
- DynamicBigramListPolicy bigramListPolicyForGC(&mBufferWithExtendableBuffer,
- &mShortcutListPolicy, runGCwithDecay);
+ const bool needsToDecay = mHeaderPolicy.isDecayingDict()
+ && (mNeedsToDecayForTesting || ForgettingCurveUtils::needsToDecay(
+ false /* mindsBlockByDecay */, mUnigramCount, mBigramCount, &mHeaderPolicy));
+ DynamicBigramListPolicy bigramListPolicyForGC(&mHeaderPolicy, &mBufferWithExtendableBuffer,
+ &mShortcutListPolicy, needsToDecay);
DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
- &bigramListPolicyForGC, &mShortcutListPolicy, runGCwithDecay);
+ &bigramListPolicyForGC, &mShortcutListPolicy, needsToDecay);
writingHelper.writeToDictFileWithGC(getRootPosition(), filePath, &mHeaderPolicy);
- if (runGCwithDecay) {
- mNeedsToDecayForTesting = false;
- }
+ mNeedsToDecayForTesting = false;
}
bool DynamicPatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const {
@@ -344,16 +343,8 @@ bool DynamicPatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const {
// Needs to reduce dictionary size.
return true;
} else if (mHeaderPolicy.isDecayingDict()) {
- if (mUnigramCount >= ForgettingCurveUtils::MAX_UNIGRAM_COUNT) {
- // Unigram count exceeds the limit.
- return true;
- } else if (mBigramCount >= ForgettingCurveUtils::MAX_BIGRAM_COUNT) {
- // Bigram count exceeds the limit.
- return true;
- } else if (mindsBlockByGC && needsToDecay()) {
- // Time to update probabilities for decaying.
- return true;
- }
+ return mNeedsToDecayForTesting || ForgettingCurveUtils::needsToDecay(
+ mindsBlockByGC, mUnigramCount, mBigramCount, &mHeaderPolicy);
}
return false;
}
@@ -369,9 +360,4 @@ void DynamicPatriciaTriePolicy::getProperty(const char *const query, char *const
}
}
-bool DynamicPatriciaTriePolicy::needsToDecay() const {
- return mHeaderPolicy.isDecayingDict() && (mNeedsToDecayForTesting
- || mHeaderPolicy.getLastDecayedTime() + DECAY_INTERVAL_FOR_DECAYING_DICTS < time(0));
-}
-
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h
index c3bbe9977..903f65e8e 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h
@@ -37,7 +37,7 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
mBufferWithExtendableBuffer(mBuffer->getBuffer() + mHeaderPolicy.getSize(),
mBuffer->getBufferSize() - mHeaderPolicy.getSize()),
mShortcutListPolicy(&mBufferWithExtendableBuffer),
- mBigramListPolicy(&mBufferWithExtendableBuffer, &mShortcutListPolicy,
+ mBigramListPolicy(&mHeaderPolicy, &mBufferWithExtendableBuffer, &mShortcutListPolicy,
mHeaderPolicy.isDecayingDict()),
mUnigramCount(mHeaderPolicy.getUnigramCount()),
mBigramCount(mHeaderPolicy.getBigramCount()), mNeedsToDecayForTesting(false) {}
@@ -105,7 +105,6 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
static const char *const SET_NEEDS_TO_DECAY_FOR_TESTING_QUERY;
static const int MAX_DICT_EXTENDED_REGION_SIZE;
static const int MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS;
- static const int DECAY_INTERVAL_FOR_DECAYING_DICTS;
const MmappedBuffer *const mBuffer;
const HeaderPolicy mHeaderPolicy;
@@ -115,8 +114,6 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
int mUnigramCount;
int mBigramCount;
int mNeedsToDecayForTesting;
-
- bool needsToDecay() const;
};
} // namespace latinime
#endif // LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp
index 70a9ee564..067c8ec98 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp
@@ -165,7 +165,10 @@ void DynamicPatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNod
MAX_DICTIONARY_SIZE);
int unigramCount = 0;
int bigramCount = 0;
- if (!runGC(rootPtNodeArrayPos, &newDictBuffer, &unigramCount, &bigramCount)) {
+ if (mNeedsToDecay) {
+ ForgettingCurveUtils::sTimeKeeper.setCurrentTime();
+ }
+ if (!runGC(rootPtNodeArrayPos, headerPolicy, &newDictBuffer, &unigramCount, &bigramCount)) {
return;
}
BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */);
@@ -481,14 +484,14 @@ bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes(
}
bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
- BufferWithExtendableBuffer *const bufferToWrite, int *const outUnigramCount,
- int *const outBigramCount) {
+ const HeaderPolicy *const headerPolicy, BufferWithExtendableBuffer *const bufferToWrite,
+ int *const outUnigramCount, int *const outBigramCount) {
DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, mBigramPolicy, mShortcutPolicy);
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
DynamicPatriciaTrieGcEventListeners
::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted(
- this, mBuffer, mNeedsToDecay);
+ headerPolicy, this, mBuffer, mNeedsToDecay);
if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
&traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted)) {
return false;
@@ -505,7 +508,6 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
&traversePolicyToUpdateBigramProbability)) {
return false;
}
-
if (mNeedsToDecay && traversePolicyToUpdateBigramProbability.getValidBigramEntryCount()
> ForgettingCurveUtils::MAX_BIGRAM_COUNT_AFTER_GC) {
// TODO: Remove more bigrams.
@@ -524,7 +526,7 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
// Create policy instance for the GCed dictionary.
DynamicShortcutListPolicy newDictShortcutPolicy(bufferToWrite);
- DynamicBigramListPolicy newDictBigramPolicy(bufferToWrite, &newDictShortcutPolicy,
+ DynamicBigramListPolicy newDictBigramPolicy(headerPolicy, bufferToWrite, &newDictShortcutPolicy,
mNeedsToDecay);
// Create reading helper for the GCed dictionary.
DynamicPatriciaTrieReadingHelper newDictReadingHelper(bufferToWrite, &newDictBigramPolicy,
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h
index 0caf29120..ca8664729 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h
@@ -128,8 +128,9 @@ class DynamicPatriciaTrieWritingHelper {
const int probabilityOfNewPtNode, const int *const newNodeCodePoints,
const int newNodeCodePointCount);
- bool runGC(const int rootPtNodeArrayPos, BufferWithExtendableBuffer *const bufferToWrite,
- int *const outUnigramCount, int *const outBigramCount);
+ bool runGC(const int rootPtNodeArrayPos, const HeaderPolicy *const headerPolicy,
+ BufferWithExtendableBuffer *const bufferToWrite, int *const outUnigramCount,
+ int *const outBigramCount);
int getUpdatedProbability(const int originalProbability, const int newProbability);
};
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp
index b502fe25d..19ca35481 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp
@@ -15,10 +15,12 @@
*/
#include <cmath>
+#include <ctime>
#include <stdlib.h>
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
+#include "suggest/core/policy/dictionary_header_structure_policy.h"
#include "suggest/policyimpl/dictionary/utils/probability_utils.h"
namespace latinime {
@@ -35,8 +37,14 @@ const int ForgettingCurveUtils::ENCODED_PROBABILITY_STEP = 1;
// Currently, we try to decay each uni/bigram once every 2 hours. Accordingly, the expected
// duration of the decay is approximately 66hours.
const float ForgettingCurveUtils::MIN_PROBABILITY_TO_DECAY = 0.03f;
+const int ForgettingCurveUtils::DECAY_INTERVAL_SECONDS = 2 * 60 * 60;
const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityTable;
+ForgettingCurveUtils::TimeKeeper ForgettingCurveUtils::sTimeKeeper;
+
+void ForgettingCurveUtils::TimeKeeper::setCurrentTime() {
+ mCurrentTime = time(0);
+}
/* static */ int ForgettingCurveUtils::getProbability(const int encodedUnigramProbability,
const int encodedBigramProbability) {
@@ -76,19 +84,44 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT
return encodedProbability >= MIN_VALID_ENCODED_PROBABILITY;
}
-/* static */ int ForgettingCurveUtils::getEncodedProbabilityToSave(const int encodedProbability) {
- const int currentEncodedProbability = max(min(encodedProbability, MAX_ENCODED_PROBABILITY), 0);
+/* static */ int ForgettingCurveUtils::getEncodedProbabilityToSave(const int encodedProbability,
+ const DictionaryHeaderStructurePolicy *const headerPolicy) {
+ const int elapsedTime = sTimeKeeper.peekCurrentTime() - headerPolicy->getLastDecayedTime();
+ const int decayIterationCount = max(elapsedTime / DECAY_INTERVAL_SECONDS, 1);
+ int currentEncodedProbability = max(min(encodedProbability, MAX_ENCODED_PROBABILITY), 0);
// TODO: Implement the decay in more proper way.
- const float currentRate = static_cast<float>(currentEncodedProbability)
- / static_cast<float>(MAX_ENCODED_PROBABILITY);
- const float thresholdToDecay = MIN_PROBABILITY_TO_DECAY
- + (1.0f - MIN_PROBABILITY_TO_DECAY) * (1.0f - currentRate);
- const float randValue = static_cast<float>(rand()) / static_cast<float>(RAND_MAX);
- if (thresholdToDecay < randValue) {
- return max(currentEncodedProbability - ENCODED_PROBABILITY_STEP, 0);
- } else {
- return currentEncodedProbability;
+ for (int i = 0; i < decayIterationCount; ++i) {
+ const float currentRate = static_cast<float>(currentEncodedProbability)
+ / static_cast<float>(MAX_ENCODED_PROBABILITY);
+ const float thresholdToDecay = MIN_PROBABILITY_TO_DECAY
+ + (1.0f - MIN_PROBABILITY_TO_DECAY) * currentRate;
+ const float randValue = static_cast<float>(rand()) / static_cast<float>(RAND_MAX);
+ if (thresholdToDecay < randValue) {
+ currentEncodedProbability = max(currentEncodedProbability - ENCODED_PROBABILITY_STEP,
+ 0);
+ }
+ }
+ return currentEncodedProbability;
+}
+
+/* static */ bool ForgettingCurveUtils::needsToDecay(const bool mindsBlockByDecay,
+ const int unigramCount, const int bigramCount,
+ const DictionaryHeaderStructurePolicy *const headerPolicy) {
+ if (unigramCount >= ForgettingCurveUtils::MAX_UNIGRAM_COUNT) {
+ // Unigram count exceeds the limit.
+ return true;
+ } else if (bigramCount >= ForgettingCurveUtils::MAX_BIGRAM_COUNT) {
+ // Bigram count exceeds the limit.
+ return true;
+ }
+ if (mindsBlockByDecay) {
+ return false;
+ }
+ if (headerPolicy->getLastDecayedTime() + DECAY_INTERVAL_SECONDS < time(0)) {
+ // Time to decay.
+ return true;
}
+ return false;
}
/* static */ int ForgettingCurveUtils::decodeProbability(const int encodedProbability) {
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h
index d666f22aa..2ad423874 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h
@@ -23,16 +23,32 @@
namespace latinime {
+class DictionaryHeaderStructurePolicy;
+
// TODO: Check the elapsed time and decrease the probability depending on the time. Time field is
// required to introduced to each terminal PtNode and bigram entry.
// TODO: Quit using bigram probability to indicate the delta.
class ForgettingCurveUtils {
public:
+ class TimeKeeper {
+ public:
+ TimeKeeper() : mCurrentTime(0) {}
+ void setCurrentTime();
+ int peekCurrentTime() const { return mCurrentTime; };
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(TimeKeeper);
+
+ int mCurrentTime;
+ };
+
static const int MAX_UNIGRAM_COUNT;
static const int MAX_UNIGRAM_COUNT_AFTER_GC;
static const int MAX_BIGRAM_COUNT;
static const int MAX_BIGRAM_COUNT_AFTER_GC;
+ static TimeKeeper sTimeKeeper;
+
static int getProbability(const int encodedUnigramProbability,
const int encodedBigramProbability);
@@ -41,7 +57,11 @@ class ForgettingCurveUtils {
static int isValidEncodedProbability(const int encodedProbability);
- static int getEncodedProbabilityToSave(const int encodedProbability);
+ static int getEncodedProbabilityToSave(const int encodedProbability,
+ const DictionaryHeaderStructurePolicy *const headerPolicy);
+
+ static bool needsToDecay(const bool mindsBlockByDecay, const int unigramCount,
+ const int bigramCount, const DictionaryHeaderStructurePolicy *const headerPolicy);
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(ForgettingCurveUtils);
@@ -68,6 +88,7 @@ class ForgettingCurveUtils {
static const int MIN_VALID_ENCODED_PROBABILITY;
static const int ENCODED_PROBABILITY_STEP;
static const float MIN_PROBABILITY_TO_DECAY;
+ static const int DECAY_INTERVAL_SECONDS;
static const ProbabilityTable sProbabilityTable;
diff --git a/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java b/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java
index b2d31c21f..ded8eaa97 100644
--- a/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java
+++ b/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java
@@ -50,8 +50,8 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
}
private void forcePassingShortTime(final BinaryDictionary binaryDictionary) {
- // Entries having low probability would be suppressed once in 2 GCs.
- final int count = 2;
+ // Entries having low probability would be suppressed once in 3 GCs.
+ final int count = 3;
for (int i = 0; i < count; i++) {
binaryDictionary.getPropertyForTests(SET_NEEDS_TO_DECAY_FOR_TESTING_KEY);
binaryDictionary.flushWithGC();
diff --git a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java
index 6a21522f9..5b8f0e977 100644
--- a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java
+++ b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java
@@ -18,6 +18,7 @@ package com.android.inputmethod.latin;
import android.test.AndroidTestCase;
import android.test.suitebuilder.annotation.LargeTest;
+import android.text.TextUtils;
import android.util.Pair;
import com.android.inputmethod.latin.makedict.CodePointUtils;
@@ -126,7 +127,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
public void testRandomlyAddUnigramWord() {
final int wordCount = 1000;
final int codePointSetSize = 50;
- final int seed = 123456789;
+ final long seed = System.currentTimeMillis();
File dictFile = null;
try {
@@ -223,7 +224,8 @@ public class BinaryDictionaryTests extends AndroidTestCase {
final int wordCount = 100;
final int bigramCount = 1000;
final int codePointSetSize = 50;
- final int seed = 11111;
+ final long seed = System.currentTimeMillis();
+ final Random random = new Random(seed);
File dictFile = null;
try {
@@ -234,43 +236,42 @@ public class BinaryDictionaryTests extends AndroidTestCase {
BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
+
final ArrayList<String> words = new ArrayList<String>();
- // Test a word that isn't contained within the dictionary.
- final Random random = new Random(seed);
+ final ArrayList<Pair<String, String>> bigramWords = new ArrayList<Pair<String,String>>();
final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
- final int[] unigramProbabilities = new int[wordCount];
+ final HashMap<String, Integer> unigramProbabilities = new HashMap<String, Integer>();
+ final HashMap<Pair<String, String>, Integer> bigramProbabilities =
+ new HashMap<Pair<String, String>, Integer>();
+
for (int i = 0; i < wordCount; ++i) {
final String word = CodePointUtils.generateWord(random, codePointSet);
words.add(word);
final int unigramProbability = random.nextInt(0xFF);
- unigramProbabilities[i] = unigramProbability;
+ unigramProbabilities.put(word, unigramProbability);
binaryDictionary.addUnigramWord(word, unigramProbability);
}
- final int[][] probabilities = new int[wordCount][wordCount];
-
- for (int i = 0; i < wordCount; ++i) {
- for (int j = 0; j < wordCount; ++j) {
- probabilities[i][j] = Dictionary.NOT_A_PROBABILITY;
- }
- }
-
for (int i = 0; i < bigramCount; i++) {
- final int word0Index = random.nextInt(wordCount);
- final int word1Index = random.nextInt(wordCount);
- final String word0 = words.get(word0Index);
- final String word1 = words.get(word1Index);
+ final String word0 = words.get(random.nextInt(wordCount));
+ final String word1 = words.get(random.nextInt(wordCount));
+ if (TextUtils.equals(word0, word1)) {
+ continue;
+ }
+ final Pair<String, String> bigram = new Pair<String, String>(word0, word1);
+ bigramWords.add(bigram);
final int bigramProbability = random.nextInt(0xF);
- probabilities[word0Index][word1Index] = binaryDictionary.calculateProbability(
- unigramProbabilities[word1Index], bigramProbability);
+ bigramProbabilities.put(bigram, bigramProbability);
binaryDictionary.addBigramWords(word0, word1, bigramProbability);
}
- for (int i = 0; i < words.size(); i++) {
- for (int j = 0; j < words.size(); j++) {
- assertEquals(probabilities[i][j],
- binaryDictionary.getBigramProbability(words.get(i), words.get(j)));
- }
+ for (final Pair<String, String> bigram : bigramWords) {
+ final int unigramProbability = unigramProbabilities.get(bigram.second);
+ final int bigramProbability = bigramProbabilities.get(bigram);
+ final int probability = binaryDictionary.calculateProbability(unigramProbability,
+ bigramProbability);
+ assertEquals(probability,
+ binaryDictionary.getBigramProbability(bigram.first, bigram.second));
}
dictFile.delete();
@@ -419,8 +420,8 @@ public class BinaryDictionaryTests extends AndroidTestCase {
final int wordCount = 100;
final int bigramCount = 1000;
final int codePointSetSize = 30;
- // TODO: Use various seeds such as a current timestamp to make this test more random.
- final int seed = 314159265;
+ final long seed = System.currentTimeMillis();
+ final Random random = new Random(seed);
File dictFile = null;
try {
@@ -432,35 +433,32 @@ public class BinaryDictionaryTests extends AndroidTestCase {
BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
+
final ArrayList<String> words = new ArrayList<String>();
- // Test a word that isn't contained within the dictionary.
- final Random random = new Random(seed);
+ final ArrayList<Pair<String, String>> bigramWords = new ArrayList<Pair<String,String>>();
final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
- final int[] unigramProbabilities = new int[wordCount];
+ final HashMap<String, Integer> unigramProbabilities = new HashMap<String, Integer>();
+ final HashMap<Pair<String, String>, Integer> bigramProbabilities =
+ new HashMap<Pair<String, String>, Integer>();
+
for (int i = 0; i < wordCount; ++i) {
final String word = CodePointUtils.generateWord(random, codePointSet);
words.add(word);
final int unigramProbability = random.nextInt(0xFF);
- unigramProbabilities[i] = unigramProbability;
+ unigramProbabilities.put(word, unigramProbability);
binaryDictionary.addUnigramWord(word, unigramProbability);
}
- final int[][] probabilities = new int[wordCount][wordCount];
-
- for (int i = 0; i < wordCount; ++i) {
- for (int j = 0; j < wordCount; ++j) {
- probabilities[i][j] = Dictionary.NOT_A_PROBABILITY;
- }
- }
-
for (int i = 0; i < bigramCount; i++) {
- final int word0Index = random.nextInt(wordCount);
- final int word1Index = random.nextInt(wordCount);
- final String word0 = words.get(word0Index);
- final String word1 = words.get(word1Index);
+ final String word0 = words.get(random.nextInt(wordCount));
+ final String word1 = words.get(random.nextInt(wordCount));
+ if (TextUtils.equals(word0, word1)) {
+ continue;
+ }
+ final Pair<String, String> bigram = new Pair<String, String>(word0, word1);
+ bigramWords.add(bigram);
final int bigramProbability = random.nextInt(0xF);
- probabilities[word0Index][word1Index] = binaryDictionary.calculateProbability(
- unigramProbabilities[word1Index], bigramProbability);
+ bigramProbabilities.put(bigram, bigramProbability);
binaryDictionary.addBigramWords(word0, word1, bigramProbability);
}
@@ -470,12 +468,15 @@ public class BinaryDictionaryTests extends AndroidTestCase {
0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
- for (int i = 0; i < words.size(); i++) {
- for (int j = 0; j < words.size(); j++) {
- assertEquals(probabilities[i][j],
- binaryDictionary.getBigramProbability(words.get(i), words.get(j)));
- }
+ for (final Pair<String, String> bigram : bigramWords) {
+ final int unigramProbability = unigramProbabilities.get(bigram.second);
+ final int bigramProbability = bigramProbabilities.get(bigram);
+ final int probability = binaryDictionary.calculateProbability(unigramProbability,
+ bigramProbability);
+ assertEquals(probability,
+ binaryDictionary.getBigramProbability(bigram.first, bigram.second));
}
+
dictFile.delete();
}
@@ -487,8 +488,8 @@ public class BinaryDictionaryTests extends AndroidTestCase {
final float addBigramProb = 0.8f;
final float removeBigramProb = 0.2f;
final int codePointSetSize = 30;
- final int seed = 141421356;
+ final long seed = System.currentTimeMillis();
final Random random = new Random(seed);
File dictFile = null;
@@ -539,6 +540,9 @@ public class BinaryDictionaryTests extends AndroidTestCase {
}
final String word0 = words.get(word0Index);
final String word1 = words.get(word1Index);
+ if (TextUtils.equals(word0, word1)) {
+ continue;
+ }
final int bigramProbability = random.nextInt(0xF);
final Pair<String, String> bigram = new Pair<String, String>(word0, word1);
bigramWords.add(bigram);
@@ -586,8 +590,8 @@ public class BinaryDictionaryTests extends AndroidTestCase {
public void testAddManyUnigramsAndFlushWithGC() {
final int flashWithGCIterationCount = 3;
final int codePointSetSize = 50;
- final int seed = 22360679;
+ final long seed = System.currentTimeMillis();
final Random random = new Random(seed);
File dictFile = null;
@@ -632,8 +636,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
final int codePointSetSize = 50;
final int unigramCountPerIteration = 1000;
final int bigramCountPerIteration = 2000;
- final int seed = 1123581321;
-
+ final long seed = System.currentTimeMillis();
final Random random = new Random(seed);
File dictFile = null;
@@ -661,6 +664,9 @@ public class BinaryDictionaryTests extends AndroidTestCase {
for (int j = 0; j < bigramCountPerIteration; j++) {
final String word0 = words.get(random.nextInt(words.size()));
final String word1 = words.get(random.nextInt(words.size()));
+ if (TextUtils.equals(word0, word1)) {
+ continue;
+ }
bigrams.add(new Pair<String, String>(word0, word1));
final int bigramProbability = random.nextInt(0xF);
binaryDictionary.addBigramWords(word0, word1, bigramProbability);