aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp36
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h24
2 files changed, 42 insertions, 18 deletions
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp
index 62a19a5a6..4ff31ba0a 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp
@@ -14,6 +14,7 @@
* limitations under the License.
*/
+#include <cmath>
#include <stdlib.h>
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
@@ -35,15 +36,17 @@ const int ForgettingCurveUtils::ENCODED_PROBABILITY_STEP = 1;
// duration of the decay is approximately 66hours.
const float ForgettingCurveUtils::MIN_PROBABILITY_TO_DECAY = 0.03f;
+const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityTable;
+
/* static */ int ForgettingCurveUtils::getProbability(const int encodedUnigramProbability,
const int encodedBigramProbability) {
if (encodedUnigramProbability == NOT_A_PROBABILITY) {
return NOT_A_PROBABILITY;
} else if (encodedBigramProbability == NOT_A_PROBABILITY) {
- return backoff(decodeUnigramProbability(encodedUnigramProbability));
+ return backoff(decodeProbability(encodedUnigramProbability));
} else {
- const int unigramProbability = decodeUnigramProbability(encodedUnigramProbability);
- const int bigramProbability = decodeBigramProbability(encodedBigramProbability);
+ const int unigramProbability = decodeProbability(encodedUnigramProbability);
+ const int bigramProbability = decodeProbability(encodedBigramProbability);
return min(max(unigramProbability, bigramProbability), MAX_COMPUTED_PROBABILITY);
}
}
@@ -88,21 +91,12 @@ const float ForgettingCurveUtils::MIN_PROBABILITY_TO_DECAY = 0.03f;
}
}
-/* static */ int ForgettingCurveUtils::decodeUnigramProbability(const int encodedProbability) {
- const int probability = encodedProbability - MIN_VALID_ENCODED_PROBABILITY;
- if (probability < 0) {
- return NOT_A_PROBABILITY;
- } else {
- return min(probability, MAX_ENCODED_PROBABILITY) * 8;
- }
-}
-
-/* static */ int ForgettingCurveUtils::decodeBigramProbability(const int encodedProbability) {
+/* static */ int ForgettingCurveUtils::decodeProbability(const int encodedProbability) {
const int probability = encodedProbability - MIN_VALID_ENCODED_PROBABILITY;
- if (probability < 0) {
+ if (encodedProbability < MIN_VALID_ENCODED_PROBABILITY) {
return NOT_A_PROBABILITY;
} else {
- return min(probability, MAX_ENCODED_PROBABILITY) * 8;
+ return min(sProbabilityTable.getProbability(encodedProbability), MAX_ENCODED_PROBABILITY);
}
}
@@ -115,4 +109,16 @@ const float ForgettingCurveUtils::MIN_PROBABILITY_TO_DECAY = 0.03f;
}
}
+ForgettingCurveUtils::ProbabilityTable::ProbabilityTable() : mTable() {
+ // Table entry is as follows:
+ // 1, 1, 1, 2, 3, 5, 6, 9, 13, 18, 25, 34, 48, 66, 91, 127.
+ // Note that first MIN_VALID_ENCODED_PROBABILITY values are not used.
+ mTable.resize(MAX_ENCODED_PROBABILITY + 1);
+ for (int i = 0; i <= MAX_ENCODED_PROBABILITY; ++i) {
+ const int probability = static_cast<int>(powf(static_cast<float>(MAX_COMPUTED_PROBABILITY),
+ static_cast<float>(i) / static_cast<float>(MAX_ENCODED_PROBABILITY)));
+ mTable[i] = min(MAX_COMPUTED_PROBABILITY, max(0, probability));
+ }
+}
+
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h
index 281f76a9c..d666f22aa 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h
@@ -17,6 +17,8 @@
#ifndef LATINIME_FORGETTING_CURVE_UTILS_H
#define LATINIME_FORGETTING_CURVE_UTILS_H
+#include <vector>
+
#include "defines.h"
namespace latinime {
@@ -44,16 +46,32 @@ class ForgettingCurveUtils {
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(ForgettingCurveUtils);
+ class ProbabilityTable {
+ public:
+ ProbabilityTable();
+
+ int getProbability(const int encodedProbability) const {
+ if (encodedProbability < 0 || encodedProbability > static_cast<int>(mTable.size())) {
+ return NOT_A_PROBABILITY;
+ }
+ return mTable[encodedProbability];
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(ProbabilityTable);
+
+ std::vector<int> mTable;
+ };
+
static const int MAX_COMPUTED_PROBABILITY;
static const int MAX_ENCODED_PROBABILITY;
static const int MIN_VALID_ENCODED_PROBABILITY;
static const int ENCODED_PROBABILITY_STEP;
-
static const float MIN_PROBABILITY_TO_DECAY;
- static int decodeUnigramProbability(const int encodedProbability);
+ static const ProbabilityTable sProbabilityTable;
- static int decodeBigramProbability(const int encodedProbability);
+ static int decodeProbability(const int encodedProbability);
static int backoff(const int unigramProbability);
};