diff options
author | 2013-10-09 17:55:05 -0700 | |
---|---|---|
committer | 2013-10-09 17:55:05 -0700 | |
commit | 3cabfcef68e12b8c7f606dacb13ce2d57f08d2e1 (patch) | |
tree | 10902546e27b06a78ce6d132ee9c4d529486387b | |
parent | aa3ae615d29e8371f873ec870d10e20f67d7c33d (diff) | |
parent | 28efd918dada66b2cfc0d2e78dbfe2d574785a92 (diff) | |
download | latinime-3cabfcef68e12b8c7f606dacb13ce2d57f08d2e1.tar.gz latinime-3cabfcef68e12b8c7f606dacb13ce2d57f08d2e1.tar.xz latinime-3cabfcef68e12b8c7f606dacb13ce2d57f08d2e1.zip |
am 28efd918: am ba9dc086: Merge "Use probability table for decaying dictionaries."
* commit '28efd918dada66b2cfc0d2e78dbfe2d574785a92':
Use probability table for decaying dictionaries.
-rw-r--r-- | native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp | 36 | ||||
-rw-r--r-- | native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h | 24 |
2 files changed, 42 insertions, 18 deletions
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp index 62a19a5a6..4ff31ba0a 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp @@ -14,6 +14,7 @@ * limitations under the License. */ +#include <cmath> #include <stdlib.h> #include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h" @@ -35,15 +36,17 @@ const int ForgettingCurveUtils::ENCODED_PROBABILITY_STEP = 1; // duration of the decay is approximately 66hours. const float ForgettingCurveUtils::MIN_PROBABILITY_TO_DECAY = 0.03f; +const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityTable; + /* static */ int ForgettingCurveUtils::getProbability(const int encodedUnigramProbability, const int encodedBigramProbability) { if (encodedUnigramProbability == NOT_A_PROBABILITY) { return NOT_A_PROBABILITY; } else if (encodedBigramProbability == NOT_A_PROBABILITY) { - return backoff(decodeUnigramProbability(encodedUnigramProbability)); + return backoff(decodeProbability(encodedUnigramProbability)); } else { - const int unigramProbability = decodeUnigramProbability(encodedUnigramProbability); - const int bigramProbability = decodeBigramProbability(encodedBigramProbability); + const int unigramProbability = decodeProbability(encodedUnigramProbability); + const int bigramProbability = decodeProbability(encodedBigramProbability); return min(max(unigramProbability, bigramProbability), MAX_COMPUTED_PROBABILITY); } } @@ -88,21 +91,12 @@ const float ForgettingCurveUtils::MIN_PROBABILITY_TO_DECAY = 0.03f; } } -/* static */ int ForgettingCurveUtils::decodeUnigramProbability(const int encodedProbability) { - const int probability = encodedProbability - MIN_VALID_ENCODED_PROBABILITY; - if (probability < 0) { - return NOT_A_PROBABILITY; - } else { - return min(probability, MAX_ENCODED_PROBABILITY) * 8; - } -} - -/* static */ int ForgettingCurveUtils::decodeBigramProbability(const int encodedProbability) { +/* static */ int ForgettingCurveUtils::decodeProbability(const int encodedProbability) { const int probability = encodedProbability - MIN_VALID_ENCODED_PROBABILITY; - if (probability < 0) { + if (encodedProbability < MIN_VALID_ENCODED_PROBABILITY) { return NOT_A_PROBABILITY; } else { - return min(probability, MAX_ENCODED_PROBABILITY) * 8; + return min(sProbabilityTable.getProbability(encodedProbability), MAX_ENCODED_PROBABILITY); } } @@ -115,4 +109,16 @@ const float ForgettingCurveUtils::MIN_PROBABILITY_TO_DECAY = 0.03f; } } +ForgettingCurveUtils::ProbabilityTable::ProbabilityTable() : mTable() { + // Table entry is as follows: + // 1, 1, 1, 2, 3, 5, 6, 9, 13, 18, 25, 34, 48, 66, 91, 127. + // Note that first MIN_VALID_ENCODED_PROBABILITY values are not used. + mTable.resize(MAX_ENCODED_PROBABILITY + 1); + for (int i = 0; i <= MAX_ENCODED_PROBABILITY; ++i) { + const int probability = static_cast<int>(powf(static_cast<float>(MAX_COMPUTED_PROBABILITY), + static_cast<float>(i) / static_cast<float>(MAX_ENCODED_PROBABILITY))); + mTable[i] = min(MAX_COMPUTED_PROBABILITY, max(0, probability)); + } +} + } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h index 281f76a9c..d666f22aa 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h @@ -17,6 +17,8 @@ #ifndef LATINIME_FORGETTING_CURVE_UTILS_H #define LATINIME_FORGETTING_CURVE_UTILS_H +#include <vector> + #include "defines.h" namespace latinime { @@ -44,16 +46,32 @@ class ForgettingCurveUtils { private: DISALLOW_IMPLICIT_CONSTRUCTORS(ForgettingCurveUtils); + class ProbabilityTable { + public: + ProbabilityTable(); + + int getProbability(const int encodedProbability) const { + if (encodedProbability < 0 || encodedProbability > static_cast<int>(mTable.size())) { + return NOT_A_PROBABILITY; + } + return mTable[encodedProbability]; + } + + private: + DISALLOW_COPY_AND_ASSIGN(ProbabilityTable); + + std::vector<int> mTable; + }; + static const int MAX_COMPUTED_PROBABILITY; static const int MAX_ENCODED_PROBABILITY; static const int MIN_VALID_ENCODED_PROBABILITY; static const int ENCODED_PROBABILITY_STEP; - static const float MIN_PROBABILITY_TO_DECAY; - static int decodeUnigramProbability(const int encodedProbability); + static const ProbabilityTable sProbabilityTable; - static int decodeBigramProbability(const int encodedProbability); + static int decodeProbability(const int encodedProbability); static int backoff(const int unigramProbability); }; |