aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKeisuke Kuroyanagi <ksk@google.com>2013-10-09 17:55:05 -0700
committerAndroid Git Automerger <android-git-automerger@android.com>2013-10-09 17:55:05 -0700
commit3cabfcef68e12b8c7f606dacb13ce2d57f08d2e1 (patch)
tree10902546e27b06a78ce6d132ee9c4d529486387b
parentaa3ae615d29e8371f873ec870d10e20f67d7c33d (diff)
parent28efd918dada66b2cfc0d2e78dbfe2d574785a92 (diff)
downloadlatinime-3cabfcef68e12b8c7f606dacb13ce2d57f08d2e1.tar.gz
latinime-3cabfcef68e12b8c7f606dacb13ce2d57f08d2e1.tar.xz
latinime-3cabfcef68e12b8c7f606dacb13ce2d57f08d2e1.zip
am 28efd918: am ba9dc086: Merge "Use probability table for decaying dictionaries."
* commit '28efd918dada66b2cfc0d2e78dbfe2d574785a92': Use probability table for decaying dictionaries.
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp36
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h24
2 files changed, 42 insertions, 18 deletions
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp
index 62a19a5a6..4ff31ba0a 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp
@@ -14,6 +14,7 @@
* limitations under the License.
*/
+#include <cmath>
#include <stdlib.h>
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
@@ -35,15 +36,17 @@ const int ForgettingCurveUtils::ENCODED_PROBABILITY_STEP = 1;
// duration of the decay is approximately 66hours.
const float ForgettingCurveUtils::MIN_PROBABILITY_TO_DECAY = 0.03f;
+const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityTable;
+
/* static */ int ForgettingCurveUtils::getProbability(const int encodedUnigramProbability,
const int encodedBigramProbability) {
if (encodedUnigramProbability == NOT_A_PROBABILITY) {
return NOT_A_PROBABILITY;
} else if (encodedBigramProbability == NOT_A_PROBABILITY) {
- return backoff(decodeUnigramProbability(encodedUnigramProbability));
+ return backoff(decodeProbability(encodedUnigramProbability));
} else {
- const int unigramProbability = decodeUnigramProbability(encodedUnigramProbability);
- const int bigramProbability = decodeBigramProbability(encodedBigramProbability);
+ const int unigramProbability = decodeProbability(encodedUnigramProbability);
+ const int bigramProbability = decodeProbability(encodedBigramProbability);
return min(max(unigramProbability, bigramProbability), MAX_COMPUTED_PROBABILITY);
}
}
@@ -88,21 +91,12 @@ const float ForgettingCurveUtils::MIN_PROBABILITY_TO_DECAY = 0.03f;
}
}
-/* static */ int ForgettingCurveUtils::decodeUnigramProbability(const int encodedProbability) {
- const int probability = encodedProbability - MIN_VALID_ENCODED_PROBABILITY;
- if (probability < 0) {
- return NOT_A_PROBABILITY;
- } else {
- return min(probability, MAX_ENCODED_PROBABILITY) * 8;
- }
-}
-
-/* static */ int ForgettingCurveUtils::decodeBigramProbability(const int encodedProbability) {
+/* static */ int ForgettingCurveUtils::decodeProbability(const int encodedProbability) {
const int probability = encodedProbability - MIN_VALID_ENCODED_PROBABILITY;
- if (probability < 0) {
+ if (encodedProbability < MIN_VALID_ENCODED_PROBABILITY) {
return NOT_A_PROBABILITY;
} else {
- return min(probability, MAX_ENCODED_PROBABILITY) * 8;
+ return min(sProbabilityTable.getProbability(encodedProbability), MAX_ENCODED_PROBABILITY);
}
}
@@ -115,4 +109,16 @@ const float ForgettingCurveUtils::MIN_PROBABILITY_TO_DECAY = 0.03f;
}
}
+ForgettingCurveUtils::ProbabilityTable::ProbabilityTable() : mTable() {
+ // Table entry is as follows:
+ // 1, 1, 1, 2, 3, 5, 6, 9, 13, 18, 25, 34, 48, 66, 91, 127.
+ // Note that first MIN_VALID_ENCODED_PROBABILITY values are not used.
+ mTable.resize(MAX_ENCODED_PROBABILITY + 1);
+ for (int i = 0; i <= MAX_ENCODED_PROBABILITY; ++i) {
+ const int probability = static_cast<int>(powf(static_cast<float>(MAX_COMPUTED_PROBABILITY),
+ static_cast<float>(i) / static_cast<float>(MAX_ENCODED_PROBABILITY)));
+ mTable[i] = min(MAX_COMPUTED_PROBABILITY, max(0, probability));
+ }
+}
+
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h
index 281f76a9c..d666f22aa 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h
@@ -17,6 +17,8 @@
#ifndef LATINIME_FORGETTING_CURVE_UTILS_H
#define LATINIME_FORGETTING_CURVE_UTILS_H
+#include <vector>
+
#include "defines.h"
namespace latinime {
@@ -44,16 +46,32 @@ class ForgettingCurveUtils {
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(ForgettingCurveUtils);
+ class ProbabilityTable {
+ public:
+ ProbabilityTable();
+
+ int getProbability(const int encodedProbability) const {
+ if (encodedProbability < 0 || encodedProbability > static_cast<int>(mTable.size())) {
+ return NOT_A_PROBABILITY;
+ }
+ return mTable[encodedProbability];
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(ProbabilityTable);
+
+ std::vector<int> mTable;
+ };
+
static const int MAX_COMPUTED_PROBABILITY;
static const int MAX_ENCODED_PROBABILITY;
static const int MIN_VALID_ENCODED_PROBABILITY;
static const int ENCODED_PROBABILITY_STEP;
-
static const float MIN_PROBABILITY_TO_DECAY;
- static int decodeUnigramProbability(const int encodedProbability);
+ static const ProbabilityTable sProbabilityTable;
- static int decodeBigramProbability(const int encodedProbability);
+ static int decodeProbability(const int encodedProbability);
static int backoff(const int unigramProbability);
};