aboutsummaryrefslogtreecommitdiffstats
path: root/native/jni/src
diff options
context:
space:
mode:
Diffstat (limited to 'native/jni/src')
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp14
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp6
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp8
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp9
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/decaying_utils.cpp129
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp123
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h (renamed from native/jni/src/suggest/policyimpl/dictionary/utils/decaying_utils.h)57
7 files changed, 175 insertions, 171 deletions
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp
index 67a085de3..8753c6eb0 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp
@@ -20,7 +20,7 @@
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h"
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
-#include "suggest/policyimpl/dictionary/utils/decaying_utils.h"
+#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
namespace latinime {
@@ -43,7 +43,7 @@ void DynamicBigramListPolicy::getNextBigram(int *const outBigramPos, int *const
}
*outProbability = BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags);
*outHasNext = BigramListReadWriteUtils::hasNext(bigramFlags);
- if (mIsDecayingDict && !DecayingUtils::isValidBigram(*outProbability)) {
+ if (mIsDecayingDict && !ForgettingCurveUtils::isValidEncodedProbability(*outProbability)) {
// This bigram is too weak to output.
*outBigramPos = NOT_A_DICT_POS;
} else {
@@ -261,8 +261,8 @@ bool DynamicBigramListPolicy::addNewBigramEntryToBigramList(const int bigramTarg
const int originalProbability = BigramListReadWriteUtils::getProbabilityFromFlags(
bigramFlags);
const int probabilityToWrite = mIsDecayingDict ?
- DecayingUtils::getUpdatedBigramProbabilityDelta(
- originalProbability, probability) : probability;
+ ForgettingCurveUtils::getUpdatedEncodedProbability(originalProbability,
+ probability) : probability;
const BigramListReadWriteUtils::BigramFlags updatedFlags =
BigramListReadWriteUtils::setProbabilityInFlags(bigramFlags,
probabilityToWrite);
@@ -294,7 +294,7 @@ bool DynamicBigramListPolicy::writeNewBigramEntry(const int bigramTargetPos, con
int *const writingPos) {
// hasNext is false because we are adding a new bigram entry at the end of the bigram list.
const int probabilityToWrite = mIsDecayingDict ?
- DecayingUtils::getUpdatedBigramProbabilityDelta(NOT_A_PROBABILITY, probability) :
+ ForgettingCurveUtils::getUpdatedEncodedProbability(NOT_A_PROBABILITY, probability) :
probability;
return BigramListReadWriteUtils::createAndWriteBigramEntry(mBuffer, bigramTargetPos,
probabilityToWrite, false /* hasNext */, writingPos);
@@ -365,9 +365,9 @@ bool DynamicBigramListPolicy::updateProbabilityForDecay(
*outRemoved = false;
if (mIsDecayingDict) {
// Update bigram probability for decaying.
- const int newProbability = DecayingUtils::getBigramProbabilityDeltaToSave(
+ const int newProbability = ForgettingCurveUtils::getEncodedProbabilityToSave(
BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags));
- if (DecayingUtils::isValidBigram(newProbability)) {
+ if (ForgettingCurveUtils::isValidEncodedProbability(newProbability)) {
// Write new probability.
const BigramListReadWriteUtils::BigramFlags updatedBigramFlags =
BigramListReadWriteUtils::setProbabilityInFlags(
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp
index 081163a4d..324b53062 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp
@@ -16,7 +16,7 @@
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h"
-#include "suggest/policyimpl/dictionary/utils/decaying_utils.h"
+#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
namespace latinime {
@@ -29,14 +29,14 @@ bool DynamicPatriciaTrieGcEventListeners
bool isUselessPtNode = !node->isTerminal();
if (node->isTerminal() && mIsDecayingDict) {
const int newProbability =
- DecayingUtils::getUnigramProbabilityToSave(node->getProbability());
+ ForgettingCurveUtils::getEncodedProbabilityToSave(node->getProbability());
int writingPos = node->getProbabilityFieldPos();
// Update probability.
if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(
mBuffer, newProbability, &writingPos)) {
return false;
}
- if (!DecayingUtils::isValidUnigram(newProbability)) {
+ if (!ForgettingCurveUtils::isValidEncodedProbability(newProbability)) {
isUselessPtNode = false;
}
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp
index a5a42ead1..60d0db0c0 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp
@@ -28,7 +28,7 @@
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h"
#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h"
-#include "suggest/policyimpl/dictionary/utils/decaying_utils.h"
+#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
#include "suggest/policyimpl/dictionary/utils/probability_utils.h"
namespace latinime {
@@ -154,7 +154,7 @@ int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const in
int DynamicPatriciaTriePolicy::getProbability(const int unigramProbability,
const int bigramProbability) const {
if (mHeaderPolicy.isDecayingDict()) {
- return DecayingUtils::getProbability(unigramProbability, bigramProbability);
+ return ForgettingCurveUtils::getProbability(unigramProbability, bigramProbability);
} else {
if (unigramProbability == NOT_A_PROBABILITY) {
return NOT_A_PROBABILITY;
@@ -344,10 +344,10 @@ bool DynamicPatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const {
// Needs to reduce dictionary size.
return true;
} else if (mHeaderPolicy.isDecayingDict()) {
- if (mUnigramCount >= DecayingUtils::MAX_UNIGRAM_COUNT) {
+ if (mUnigramCount >= ForgettingCurveUtils::MAX_UNIGRAM_COUNT) {
// Unigram count exceeds the limit.
return true;
- } else if (mBigramCount >= DecayingUtils::MAX_BIGRAM_COUNT) {
+ } else if (mBigramCount >= ForgettingCurveUtils::MAX_BIGRAM_COUNT) {
// Bigram count exceeds the limit.
return true;
} else if (mindsBlockByGC && needsToDecay()) {
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp
index db6d48fdb..70a9ee564 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp
@@ -25,8 +25,8 @@
#include "suggest/policyimpl/dictionary/header/header_policy.h"
#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h"
-#include "suggest/policyimpl/dictionary/utils/decaying_utils.h"
#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
+#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
#include "utils/hash_map_compat.h"
namespace latinime {
@@ -494,7 +494,7 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
return false;
}
if (mNeedsToDecay && traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
- .getValidUnigramCount() > DecayingUtils::MAX_UNIGRAM_COUNT_AFTER_GC) {
+ .getValidUnigramCount() > ForgettingCurveUtils::MAX_UNIGRAM_COUNT_AFTER_GC) {
// TODO: Remove more unigrams.
}
@@ -507,7 +507,7 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
}
if (mNeedsToDecay && traversePolicyToUpdateBigramProbability.getValidBigramEntryCount()
- > DecayingUtils::MAX_BIGRAM_COUNT_AFTER_GC) {
+ > ForgettingCurveUtils::MAX_BIGRAM_COUNT_AFTER_GC) {
// TODO: Remove more bigrams.
}
@@ -545,7 +545,8 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
int DynamicPatriciaTrieWritingHelper::getUpdatedProbability(const int originalProbability,
const int newProbability) {
if (mNeedsToDecay) {
- return DecayingUtils::getUpdatedUnigramProbability(originalProbability, newProbability);
+ return ForgettingCurveUtils::getUpdatedEncodedProbability(originalProbability,
+ newProbability);
} else {
return newProbability;
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/decaying_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/decaying_utils.cpp
deleted file mode 100644
index 942a74238..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/decaying_utils.cpp
+++ /dev/null
@@ -1,129 +0,0 @@
-/*
- * Copyright (C) 2013, The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "suggest/policyimpl/dictionary/utils/decaying_utils.h"
-
-#include "suggest/policyimpl/dictionary/utils/probability_utils.h"
-
-namespace latinime {
-
-const int DecayingUtils::MAX_UNIGRAM_COUNT = 12000;
-const int DecayingUtils::MAX_UNIGRAM_COUNT_AFTER_GC = 10000;
-const int DecayingUtils::MAX_BIGRAM_COUNT = 12000;
-const int DecayingUtils::MAX_BIGRAM_COUNT_AFTER_GC = 10000;
-
-const int DecayingUtils::MAX_COMPUTED_PROBABILITY = 127;
-const int DecayingUtils::MAX_UNIGRAM_PROBABILITY = 120;
-const int DecayingUtils::MIN_VALID_UNIGRAM_PROBABILITY = 24;
-const int DecayingUtils::UNIGRAM_PROBABILITY_STEP = 8;
-const int DecayingUtils::MAX_BIGRAM_PROBABILITY_DELTA = 15;
-const int DecayingUtils::MIN_VALID_BIGRAM_PROBABILITY_DELTA = 3;
-const int DecayingUtils::BIGRAM_PROBABILITY_DELTA_STEP = 1;
-
-/* static */ int DecayingUtils::getProbability(const int encodedUnigramProbability,
- const int encodedBigramProbabilityDelta) {
- if (encodedUnigramProbability == NOT_A_PROBABILITY) {
- return NOT_A_PROBABILITY;
- } else if (encodedBigramProbabilityDelta == NOT_A_PROBABILITY) {
- const int rawProbability = ProbabilityUtils::backoff(decodeUnigramProbability(
- encodedUnigramProbability));
- return min(getDecayedProbability(rawProbability), MAX_COMPUTED_PROBABILITY);
- } else {
- const int rawProbability = ProbabilityUtils::computeProbabilityForBigram(
- decodeUnigramProbability(encodedUnigramProbability),
- decodeBigramProbabilityDelta(encodedBigramProbabilityDelta));
- return min(getDecayedProbability(rawProbability), MAX_COMPUTED_PROBABILITY);
- }
-}
-
-/* static */ int DecayingUtils::getUpdatedUnigramProbability(const int originalEncodedProbability,
- const int newProbability) {
- if (originalEncodedProbability == NOT_A_PROBABILITY) {
- // The unigram is not in this dictionary.
- if (newProbability == NOT_A_PROBABILITY) {
- // The unigram is not in other dictionaries.
- return 0;
- } else {
- return MIN_VALID_UNIGRAM_PROBABILITY;
- }
- } else {
- if (newProbability != NOT_A_PROBABILITY
- && originalEncodedProbability < MIN_VALID_UNIGRAM_PROBABILITY) {
- return MIN_VALID_UNIGRAM_PROBABILITY;
- }
- return min(originalEncodedProbability + UNIGRAM_PROBABILITY_STEP, MAX_UNIGRAM_PROBABILITY);
- }
-}
-
-/* static */ int DecayingUtils::getUnigramProbabilityToSave(const int encodedProbability) {
- return max(encodedProbability - UNIGRAM_PROBABILITY_STEP, 0);
-}
-
-/* static */ int DecayingUtils::getBigramProbabilityDeltaToSave(const int encodedProbabilityDelta) {
- return max(encodedProbabilityDelta - BIGRAM_PROBABILITY_DELTA_STEP, 0);
-}
-
-/* static */ int DecayingUtils::getUpdatedBigramProbabilityDelta(
- const int originalEncodedProbabilityDelta, const int newProbability) {
- if (originalEncodedProbabilityDelta == NOT_A_PROBABILITY) {
- // The bigram relation is not in this dictionary.
- if (newProbability == NOT_A_PROBABILITY) {
- // The bigram target is not in other dictionaries.
- return 0;
- } else {
- return MIN_VALID_BIGRAM_PROBABILITY_DELTA;
- }
- } else {
- if (newProbability != NOT_A_PROBABILITY
- && originalEncodedProbabilityDelta < MIN_VALID_BIGRAM_PROBABILITY_DELTA) {
- return MIN_VALID_BIGRAM_PROBABILITY_DELTA;
- }
- return min(originalEncodedProbabilityDelta + BIGRAM_PROBABILITY_DELTA_STEP,
- MAX_BIGRAM_PROBABILITY_DELTA);
- }
-}
-
-/* static */ int DecayingUtils::isValidUnigram(const int encodedUnigramProbability) {
- return encodedUnigramProbability >= MIN_VALID_UNIGRAM_PROBABILITY;
-}
-
-/* static */ int DecayingUtils::isValidBigram(const int encodedBigramProbabilityDelta) {
- return encodedBigramProbabilityDelta >= MIN_VALID_BIGRAM_PROBABILITY_DELTA;
-}
-
-/* static */ int DecayingUtils::decodeUnigramProbability(const int encodedProbability) {
- const int probability = encodedProbability - MIN_VALID_UNIGRAM_PROBABILITY;
- if (probability < 0) {
- return NOT_A_PROBABILITY;
- } else {
- return min(probability, MAX_UNIGRAM_PROBABILITY);
- }
-}
-
-/* static */ int DecayingUtils::decodeBigramProbabilityDelta(const int encodedProbabilityDelta) {
- const int probabilityDelta = encodedProbabilityDelta - MIN_VALID_BIGRAM_PROBABILITY_DELTA;
- if (probabilityDelta < 0) {
- return NOT_A_PROBABILITY;
- } else {
- return min(probabilityDelta, MAX_BIGRAM_PROBABILITY_DELTA);
- }
-}
-
-/* static */ int DecayingUtils::getDecayedProbability(const int rawProbability) {
- return rawProbability;
-}
-
-} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp
new file mode 100644
index 000000000..b502fe25d
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp
@@ -0,0 +1,123 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cmath>
+#include <stdlib.h>
+
+#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
+
+#include "suggest/policyimpl/dictionary/utils/probability_utils.h"
+
+namespace latinime {
+
+const int ForgettingCurveUtils::MAX_UNIGRAM_COUNT = 12000;
+const int ForgettingCurveUtils::MAX_UNIGRAM_COUNT_AFTER_GC = 10000;
+const int ForgettingCurveUtils::MAX_BIGRAM_COUNT = 12000;
+const int ForgettingCurveUtils::MAX_BIGRAM_COUNT_AFTER_GC = 10000;
+
+const int ForgettingCurveUtils::MAX_COMPUTED_PROBABILITY = 127;
+const int ForgettingCurveUtils::MAX_ENCODED_PROBABILITY = 15;
+const int ForgettingCurveUtils::MIN_VALID_ENCODED_PROBABILITY = 3;
+const int ForgettingCurveUtils::ENCODED_PROBABILITY_STEP = 1;
+// Currently, we try to decay each uni/bigram once every 2 hours. Accordingly, the expected
+// duration of the decay is approximately 66hours.
+const float ForgettingCurveUtils::MIN_PROBABILITY_TO_DECAY = 0.03f;
+
+const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityTable;
+
+/* static */ int ForgettingCurveUtils::getProbability(const int encodedUnigramProbability,
+ const int encodedBigramProbability) {
+ if (encodedUnigramProbability == NOT_A_PROBABILITY) {
+ return NOT_A_PROBABILITY;
+ } else if (encodedBigramProbability == NOT_A_PROBABILITY) {
+ return backoff(decodeProbability(encodedUnigramProbability));
+ } else {
+ const int unigramProbability = decodeProbability(encodedUnigramProbability);
+ const int bigramProbability = decodeProbability(encodedBigramProbability);
+ return min(max(unigramProbability, bigramProbability), MAX_COMPUTED_PROBABILITY);
+ }
+}
+
+// Caveat: Unlike getProbability(), this method doesn't assume special bigram probability encoding
+// (i.e. unigram probability + bigram probability delta).
+/* static */ int ForgettingCurveUtils::getUpdatedEncodedProbability(
+ const int originalEncodedProbability, const int newProbability) {
+ if (originalEncodedProbability == NOT_A_PROBABILITY) {
+ // The bigram relation is not in this dictionary.
+ if (newProbability == NOT_A_PROBABILITY) {
+ // The bigram target is not in other dictionaries.
+ return 0;
+ } else {
+ return MIN_VALID_ENCODED_PROBABILITY;
+ }
+ } else {
+ if (newProbability != NOT_A_PROBABILITY
+ && originalEncodedProbability < MIN_VALID_ENCODED_PROBABILITY) {
+ return MIN_VALID_ENCODED_PROBABILITY;
+ }
+ return min(originalEncodedProbability + ENCODED_PROBABILITY_STEP, MAX_ENCODED_PROBABILITY);
+ }
+}
+
+/* static */ int ForgettingCurveUtils::isValidEncodedProbability(const int encodedProbability) {
+ return encodedProbability >= MIN_VALID_ENCODED_PROBABILITY;
+}
+
+/* static */ int ForgettingCurveUtils::getEncodedProbabilityToSave(const int encodedProbability) {
+ const int currentEncodedProbability = max(min(encodedProbability, MAX_ENCODED_PROBABILITY), 0);
+ // TODO: Implement the decay in more proper way.
+ const float currentRate = static_cast<float>(currentEncodedProbability)
+ / static_cast<float>(MAX_ENCODED_PROBABILITY);
+ const float thresholdToDecay = MIN_PROBABILITY_TO_DECAY
+ + (1.0f - MIN_PROBABILITY_TO_DECAY) * (1.0f - currentRate);
+ const float randValue = static_cast<float>(rand()) / static_cast<float>(RAND_MAX);
+ if (thresholdToDecay < randValue) {
+ return max(currentEncodedProbability - ENCODED_PROBABILITY_STEP, 0);
+ } else {
+ return currentEncodedProbability;
+ }
+}
+
+/* static */ int ForgettingCurveUtils::decodeProbability(const int encodedProbability) {
+ if (encodedProbability < MIN_VALID_ENCODED_PROBABILITY) {
+ return NOT_A_PROBABILITY;
+ } else {
+ return min(sProbabilityTable.getProbability(encodedProbability), MAX_ENCODED_PROBABILITY);
+ }
+}
+
+// See comments in ProbabilityUtils::backoff().
+/* static */ int ForgettingCurveUtils::backoff(const int unigramProbability) {
+ if (unigramProbability == NOT_A_PROBABILITY) {
+ return NOT_A_PROBABILITY;
+ } else {
+ return max(unigramProbability - 8, 0);
+ }
+}
+
+ForgettingCurveUtils::ProbabilityTable::ProbabilityTable() : mTable() {
+ // Table entry is as follows:
+ // 1, 1, 1, 2, 3, 5, 6, 9, 13, 18, 25, 34, 48, 66, 91, 127.
+ // Note that first MIN_VALID_ENCODED_PROBABILITY values are not used.
+ mTable.resize(MAX_ENCODED_PROBABILITY + 1);
+ for (int i = 0; i <= MAX_ENCODED_PROBABILITY; ++i) {
+ const int probability = static_cast<int>(powf(static_cast<float>(MAX_COMPUTED_PROBABILITY),
+ static_cast<float>(i) / static_cast<float>(MAX_ENCODED_PROBABILITY)));
+ mTable[i] = min(MAX_COMPUTED_PROBABILITY, max(0, probability));
+ }
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/decaying_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h
index 1ca03918f..d666f22aa 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/decaying_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h
@@ -14,8 +14,10 @@
* limitations under the License.
*/
-#ifndef LATINIME_DECAYING_UTILS_H
-#define LATINIME_DECAYING_UTILS_H
+#ifndef LATINIME_FORGETTING_CURVE_UTILS_H
+#define LATINIME_FORGETTING_CURVE_UTILS_H
+
+#include <vector>
#include "defines.h"
@@ -24,8 +26,7 @@ namespace latinime {
// TODO: Check the elapsed time and decrease the probability depending on the time. Time field is
// required to introduced to each terminal PtNode and bigram entry.
// TODO: Quit using bigram probability to indicate the delta.
-// TODO: Quit using bigram probability delta.
-class DecayingUtils {
+class ForgettingCurveUtils {
public:
static const int MAX_UNIGRAM_COUNT;
static const int MAX_UNIGRAM_COUNT_AFTER_GC;
@@ -33,38 +34,46 @@ class DecayingUtils {
static const int MAX_BIGRAM_COUNT_AFTER_GC;
static int getProbability(const int encodedUnigramProbability,
- const int encodedBigramProbabilityDelta);
+ const int encodedBigramProbability);
- static int getUpdatedUnigramProbability(const int originalEncodedProbability,
+ static int getUpdatedEncodedProbability(const int originalEncodedProbability,
const int newProbability);
- static int getUpdatedBigramProbabilityDelta(const int originalEncodedProbabilityDelta,
- const int newProbability);
+ static int isValidEncodedProbability(const int encodedProbability);
- static int isValidUnigram(const int encodedUnigramProbability);
+ static int getEncodedProbabilityToSave(const int encodedProbability);
- static int isValidBigram(const int encodedProbabilityDelta);
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(ForgettingCurveUtils);
- static int getUnigramProbabilityToSave(const int encodedProbability);
+ class ProbabilityTable {
+ public:
+ ProbabilityTable();
- static int getBigramProbabilityDeltaToSave(const int encodedProbabilityDelta);
+ int getProbability(const int encodedProbability) const {
+ if (encodedProbability < 0 || encodedProbability > static_cast<int>(mTable.size())) {
+ return NOT_A_PROBABILITY;
+ }
+ return mTable[encodedProbability];
+ }
- private:
- DISALLOW_IMPLICIT_CONSTRUCTORS(DecayingUtils);
+ private:
+ DISALLOW_COPY_AND_ASSIGN(ProbabilityTable);
+
+ std::vector<int> mTable;
+ };
static const int MAX_COMPUTED_PROBABILITY;
- static const int MAX_UNIGRAM_PROBABILITY;
- static const int MIN_VALID_UNIGRAM_PROBABILITY;
- static const int UNIGRAM_PROBABILITY_STEP;
- static const int MAX_BIGRAM_PROBABILITY_DELTA;
- static const int MIN_VALID_BIGRAM_PROBABILITY_DELTA;
- static const int BIGRAM_PROBABILITY_DELTA_STEP;
+ static const int MAX_ENCODED_PROBABILITY;
+ static const int MIN_VALID_ENCODED_PROBABILITY;
+ static const int ENCODED_PROBABILITY_STEP;
+ static const float MIN_PROBABILITY_TO_DECAY;
- static int decodeUnigramProbability(const int encodedProbability);
+ static const ProbabilityTable sProbabilityTable;
- static int decodeBigramProbabilityDelta(const int encodedProbability);
+ static int decodeProbability(const int encodedProbability);
- static int getDecayedProbability(const int rawProbability);
+ static int backoff(const int unigramProbability);
};
} // namespace latinime
-#endif /* LATINIME_DECAYING_UTILS_H */
+#endif /* LATINIME_FORGETTING_CURVE_UTILS_H */