diff options
Diffstat (limited to 'native/jni/src')
-rw-r--r-- | native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp | 14 | ||||
-rw-r--r-- | native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp | 6 | ||||
-rw-r--r-- | native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp | 8 | ||||
-rw-r--r-- | native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp | 9 | ||||
-rw-r--r-- | native/jni/src/suggest/policyimpl/dictionary/utils/decaying_utils.cpp | 129 | ||||
-rw-r--r-- | native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp | 123 | ||||
-rw-r--r-- | native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h (renamed from native/jni/src/suggest/policyimpl/dictionary/utils/decaying_utils.h) | 57 |
7 files changed, 175 insertions, 171 deletions
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp index 67a085de3..8753c6eb0 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp @@ -20,7 +20,7 @@ #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h" #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" -#include "suggest/policyimpl/dictionary/utils/decaying_utils.h" +#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h" namespace latinime { @@ -43,7 +43,7 @@ void DynamicBigramListPolicy::getNextBigram(int *const outBigramPos, int *const } *outProbability = BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags); *outHasNext = BigramListReadWriteUtils::hasNext(bigramFlags); - if (mIsDecayingDict && !DecayingUtils::isValidBigram(*outProbability)) { + if (mIsDecayingDict && !ForgettingCurveUtils::isValidEncodedProbability(*outProbability)) { // This bigram is too weak to output. *outBigramPos = NOT_A_DICT_POS; } else { @@ -261,8 +261,8 @@ bool DynamicBigramListPolicy::addNewBigramEntryToBigramList(const int bigramTarg const int originalProbability = BigramListReadWriteUtils::getProbabilityFromFlags( bigramFlags); const int probabilityToWrite = mIsDecayingDict ? - DecayingUtils::getUpdatedBigramProbabilityDelta( - originalProbability, probability) : probability; + ForgettingCurveUtils::getUpdatedEncodedProbability(originalProbability, + probability) : probability; const BigramListReadWriteUtils::BigramFlags updatedFlags = BigramListReadWriteUtils::setProbabilityInFlags(bigramFlags, probabilityToWrite); @@ -294,7 +294,7 @@ bool DynamicBigramListPolicy::writeNewBigramEntry(const int bigramTargetPos, con int *const writingPos) { // hasNext is false because we are adding a new bigram entry at the end of the bigram list. const int probabilityToWrite = mIsDecayingDict ? - DecayingUtils::getUpdatedBigramProbabilityDelta(NOT_A_PROBABILITY, probability) : + ForgettingCurveUtils::getUpdatedEncodedProbability(NOT_A_PROBABILITY, probability) : probability; return BigramListReadWriteUtils::createAndWriteBigramEntry(mBuffer, bigramTargetPos, probabilityToWrite, false /* hasNext */, writingPos); @@ -365,9 +365,9 @@ bool DynamicBigramListPolicy::updateProbabilityForDecay( *outRemoved = false; if (mIsDecayingDict) { // Update bigram probability for decaying. - const int newProbability = DecayingUtils::getBigramProbabilityDeltaToSave( + const int newProbability = ForgettingCurveUtils::getEncodedProbabilityToSave( BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags)); - if (DecayingUtils::isValidBigram(newProbability)) { + if (ForgettingCurveUtils::isValidEncodedProbability(newProbability)) { // Write new probability. const BigramListReadWriteUtils::BigramFlags updatedBigramFlags = BigramListReadWriteUtils::setProbabilityInFlags( diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp index 081163a4d..324b53062 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp @@ -16,7 +16,7 @@ #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h" -#include "suggest/policyimpl/dictionary/utils/decaying_utils.h" +#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h" namespace latinime { @@ -29,14 +29,14 @@ bool DynamicPatriciaTrieGcEventListeners bool isUselessPtNode = !node->isTerminal(); if (node->isTerminal() && mIsDecayingDict) { const int newProbability = - DecayingUtils::getUnigramProbabilityToSave(node->getProbability()); + ForgettingCurveUtils::getEncodedProbabilityToSave(node->getProbability()); int writingPos = node->getProbabilityFieldPos(); // Update probability. if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition( mBuffer, newProbability, &writingPos)) { return false; } - if (!DecayingUtils::isValidUnigram(newProbability)) { + if (!ForgettingCurveUtils::isValidEncodedProbability(newProbability)) { isUselessPtNode = false; } } diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp index a5a42ead1..60d0db0c0 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp @@ -28,7 +28,7 @@ #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h" #include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h" -#include "suggest/policyimpl/dictionary/utils/decaying_utils.h" +#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h" #include "suggest/policyimpl/dictionary/utils/probability_utils.h" namespace latinime { @@ -154,7 +154,7 @@ int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const in int DynamicPatriciaTriePolicy::getProbability(const int unigramProbability, const int bigramProbability) const { if (mHeaderPolicy.isDecayingDict()) { - return DecayingUtils::getProbability(unigramProbability, bigramProbability); + return ForgettingCurveUtils::getProbability(unigramProbability, bigramProbability); } else { if (unigramProbability == NOT_A_PROBABILITY) { return NOT_A_PROBABILITY; @@ -344,10 +344,10 @@ bool DynamicPatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const { // Needs to reduce dictionary size. return true; } else if (mHeaderPolicy.isDecayingDict()) { - if (mUnigramCount >= DecayingUtils::MAX_UNIGRAM_COUNT) { + if (mUnigramCount >= ForgettingCurveUtils::MAX_UNIGRAM_COUNT) { // Unigram count exceeds the limit. return true; - } else if (mBigramCount >= DecayingUtils::MAX_BIGRAM_COUNT) { + } else if (mBigramCount >= ForgettingCurveUtils::MAX_BIGRAM_COUNT) { // Bigram count exceeds the limit. return true; } else if (mindsBlockByGC && needsToDecay()) { diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp index db6d48fdb..70a9ee564 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp @@ -25,8 +25,8 @@ #include "suggest/policyimpl/dictionary/header/header_policy.h" #include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h" -#include "suggest/policyimpl/dictionary/utils/decaying_utils.h" #include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h" +#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h" #include "utils/hash_map_compat.h" namespace latinime { @@ -494,7 +494,7 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, return false; } if (mNeedsToDecay && traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted - .getValidUnigramCount() > DecayingUtils::MAX_UNIGRAM_COUNT_AFTER_GC) { + .getValidUnigramCount() > ForgettingCurveUtils::MAX_UNIGRAM_COUNT_AFTER_GC) { // TODO: Remove more unigrams. } @@ -507,7 +507,7 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, } if (mNeedsToDecay && traversePolicyToUpdateBigramProbability.getValidBigramEntryCount() - > DecayingUtils::MAX_BIGRAM_COUNT_AFTER_GC) { + > ForgettingCurveUtils::MAX_BIGRAM_COUNT_AFTER_GC) { // TODO: Remove more bigrams. } @@ -545,7 +545,8 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, int DynamicPatriciaTrieWritingHelper::getUpdatedProbability(const int originalProbability, const int newProbability) { if (mNeedsToDecay) { - return DecayingUtils::getUpdatedUnigramProbability(originalProbability, newProbability); + return ForgettingCurveUtils::getUpdatedEncodedProbability(originalProbability, + newProbability); } else { return newProbability; } diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/decaying_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/decaying_utils.cpp deleted file mode 100644 index 942a74238..000000000 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/decaying_utils.cpp +++ /dev/null @@ -1,129 +0,0 @@ -/* - * Copyright (C) 2013, The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "suggest/policyimpl/dictionary/utils/decaying_utils.h" - -#include "suggest/policyimpl/dictionary/utils/probability_utils.h" - -namespace latinime { - -const int DecayingUtils::MAX_UNIGRAM_COUNT = 12000; -const int DecayingUtils::MAX_UNIGRAM_COUNT_AFTER_GC = 10000; -const int DecayingUtils::MAX_BIGRAM_COUNT = 12000; -const int DecayingUtils::MAX_BIGRAM_COUNT_AFTER_GC = 10000; - -const int DecayingUtils::MAX_COMPUTED_PROBABILITY = 127; -const int DecayingUtils::MAX_UNIGRAM_PROBABILITY = 120; -const int DecayingUtils::MIN_VALID_UNIGRAM_PROBABILITY = 24; -const int DecayingUtils::UNIGRAM_PROBABILITY_STEP = 8; -const int DecayingUtils::MAX_BIGRAM_PROBABILITY_DELTA = 15; -const int DecayingUtils::MIN_VALID_BIGRAM_PROBABILITY_DELTA = 3; -const int DecayingUtils::BIGRAM_PROBABILITY_DELTA_STEP = 1; - -/* static */ int DecayingUtils::getProbability(const int encodedUnigramProbability, - const int encodedBigramProbabilityDelta) { - if (encodedUnigramProbability == NOT_A_PROBABILITY) { - return NOT_A_PROBABILITY; - } else if (encodedBigramProbabilityDelta == NOT_A_PROBABILITY) { - const int rawProbability = ProbabilityUtils::backoff(decodeUnigramProbability( - encodedUnigramProbability)); - return min(getDecayedProbability(rawProbability), MAX_COMPUTED_PROBABILITY); - } else { - const int rawProbability = ProbabilityUtils::computeProbabilityForBigram( - decodeUnigramProbability(encodedUnigramProbability), - decodeBigramProbabilityDelta(encodedBigramProbabilityDelta)); - return min(getDecayedProbability(rawProbability), MAX_COMPUTED_PROBABILITY); - } -} - -/* static */ int DecayingUtils::getUpdatedUnigramProbability(const int originalEncodedProbability, - const int newProbability) { - if (originalEncodedProbability == NOT_A_PROBABILITY) { - // The unigram is not in this dictionary. - if (newProbability == NOT_A_PROBABILITY) { - // The unigram is not in other dictionaries. - return 0; - } else { - return MIN_VALID_UNIGRAM_PROBABILITY; - } - } else { - if (newProbability != NOT_A_PROBABILITY - && originalEncodedProbability < MIN_VALID_UNIGRAM_PROBABILITY) { - return MIN_VALID_UNIGRAM_PROBABILITY; - } - return min(originalEncodedProbability + UNIGRAM_PROBABILITY_STEP, MAX_UNIGRAM_PROBABILITY); - } -} - -/* static */ int DecayingUtils::getUnigramProbabilityToSave(const int encodedProbability) { - return max(encodedProbability - UNIGRAM_PROBABILITY_STEP, 0); -} - -/* static */ int DecayingUtils::getBigramProbabilityDeltaToSave(const int encodedProbabilityDelta) { - return max(encodedProbabilityDelta - BIGRAM_PROBABILITY_DELTA_STEP, 0); -} - -/* static */ int DecayingUtils::getUpdatedBigramProbabilityDelta( - const int originalEncodedProbabilityDelta, const int newProbability) { - if (originalEncodedProbabilityDelta == NOT_A_PROBABILITY) { - // The bigram relation is not in this dictionary. - if (newProbability == NOT_A_PROBABILITY) { - // The bigram target is not in other dictionaries. - return 0; - } else { - return MIN_VALID_BIGRAM_PROBABILITY_DELTA; - } - } else { - if (newProbability != NOT_A_PROBABILITY - && originalEncodedProbabilityDelta < MIN_VALID_BIGRAM_PROBABILITY_DELTA) { - return MIN_VALID_BIGRAM_PROBABILITY_DELTA; - } - return min(originalEncodedProbabilityDelta + BIGRAM_PROBABILITY_DELTA_STEP, - MAX_BIGRAM_PROBABILITY_DELTA); - } -} - -/* static */ int DecayingUtils::isValidUnigram(const int encodedUnigramProbability) { - return encodedUnigramProbability >= MIN_VALID_UNIGRAM_PROBABILITY; -} - -/* static */ int DecayingUtils::isValidBigram(const int encodedBigramProbabilityDelta) { - return encodedBigramProbabilityDelta >= MIN_VALID_BIGRAM_PROBABILITY_DELTA; -} - -/* static */ int DecayingUtils::decodeUnigramProbability(const int encodedProbability) { - const int probability = encodedProbability - MIN_VALID_UNIGRAM_PROBABILITY; - if (probability < 0) { - return NOT_A_PROBABILITY; - } else { - return min(probability, MAX_UNIGRAM_PROBABILITY); - } -} - -/* static */ int DecayingUtils::decodeBigramProbabilityDelta(const int encodedProbabilityDelta) { - const int probabilityDelta = encodedProbabilityDelta - MIN_VALID_BIGRAM_PROBABILITY_DELTA; - if (probabilityDelta < 0) { - return NOT_A_PROBABILITY; - } else { - return min(probabilityDelta, MAX_BIGRAM_PROBABILITY_DELTA); - } -} - -/* static */ int DecayingUtils::getDecayedProbability(const int rawProbability) { - return rawProbability; -} - -} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp new file mode 100644 index 000000000..b502fe25d --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp @@ -0,0 +1,123 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <cmath> +#include <stdlib.h> + +#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h" + +#include "suggest/policyimpl/dictionary/utils/probability_utils.h" + +namespace latinime { + +const int ForgettingCurveUtils::MAX_UNIGRAM_COUNT = 12000; +const int ForgettingCurveUtils::MAX_UNIGRAM_COUNT_AFTER_GC = 10000; +const int ForgettingCurveUtils::MAX_BIGRAM_COUNT = 12000; +const int ForgettingCurveUtils::MAX_BIGRAM_COUNT_AFTER_GC = 10000; + +const int ForgettingCurveUtils::MAX_COMPUTED_PROBABILITY = 127; +const int ForgettingCurveUtils::MAX_ENCODED_PROBABILITY = 15; +const int ForgettingCurveUtils::MIN_VALID_ENCODED_PROBABILITY = 3; +const int ForgettingCurveUtils::ENCODED_PROBABILITY_STEP = 1; +// Currently, we try to decay each uni/bigram once every 2 hours. Accordingly, the expected +// duration of the decay is approximately 66hours. +const float ForgettingCurveUtils::MIN_PROBABILITY_TO_DECAY = 0.03f; + +const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityTable; + +/* static */ int ForgettingCurveUtils::getProbability(const int encodedUnigramProbability, + const int encodedBigramProbability) { + if (encodedUnigramProbability == NOT_A_PROBABILITY) { + return NOT_A_PROBABILITY; + } else if (encodedBigramProbability == NOT_A_PROBABILITY) { + return backoff(decodeProbability(encodedUnigramProbability)); + } else { + const int unigramProbability = decodeProbability(encodedUnigramProbability); + const int bigramProbability = decodeProbability(encodedBigramProbability); + return min(max(unigramProbability, bigramProbability), MAX_COMPUTED_PROBABILITY); + } +} + +// Caveat: Unlike getProbability(), this method doesn't assume special bigram probability encoding +// (i.e. unigram probability + bigram probability delta). +/* static */ int ForgettingCurveUtils::getUpdatedEncodedProbability( + const int originalEncodedProbability, const int newProbability) { + if (originalEncodedProbability == NOT_A_PROBABILITY) { + // The bigram relation is not in this dictionary. + if (newProbability == NOT_A_PROBABILITY) { + // The bigram target is not in other dictionaries. + return 0; + } else { + return MIN_VALID_ENCODED_PROBABILITY; + } + } else { + if (newProbability != NOT_A_PROBABILITY + && originalEncodedProbability < MIN_VALID_ENCODED_PROBABILITY) { + return MIN_VALID_ENCODED_PROBABILITY; + } + return min(originalEncodedProbability + ENCODED_PROBABILITY_STEP, MAX_ENCODED_PROBABILITY); + } +} + +/* static */ int ForgettingCurveUtils::isValidEncodedProbability(const int encodedProbability) { + return encodedProbability >= MIN_VALID_ENCODED_PROBABILITY; +} + +/* static */ int ForgettingCurveUtils::getEncodedProbabilityToSave(const int encodedProbability) { + const int currentEncodedProbability = max(min(encodedProbability, MAX_ENCODED_PROBABILITY), 0); + // TODO: Implement the decay in more proper way. + const float currentRate = static_cast<float>(currentEncodedProbability) + / static_cast<float>(MAX_ENCODED_PROBABILITY); + const float thresholdToDecay = MIN_PROBABILITY_TO_DECAY + + (1.0f - MIN_PROBABILITY_TO_DECAY) * (1.0f - currentRate); + const float randValue = static_cast<float>(rand()) / static_cast<float>(RAND_MAX); + if (thresholdToDecay < randValue) { + return max(currentEncodedProbability - ENCODED_PROBABILITY_STEP, 0); + } else { + return currentEncodedProbability; + } +} + +/* static */ int ForgettingCurveUtils::decodeProbability(const int encodedProbability) { + if (encodedProbability < MIN_VALID_ENCODED_PROBABILITY) { + return NOT_A_PROBABILITY; + } else { + return min(sProbabilityTable.getProbability(encodedProbability), MAX_ENCODED_PROBABILITY); + } +} + +// See comments in ProbabilityUtils::backoff(). +/* static */ int ForgettingCurveUtils::backoff(const int unigramProbability) { + if (unigramProbability == NOT_A_PROBABILITY) { + return NOT_A_PROBABILITY; + } else { + return max(unigramProbability - 8, 0); + } +} + +ForgettingCurveUtils::ProbabilityTable::ProbabilityTable() : mTable() { + // Table entry is as follows: + // 1, 1, 1, 2, 3, 5, 6, 9, 13, 18, 25, 34, 48, 66, 91, 127. + // Note that first MIN_VALID_ENCODED_PROBABILITY values are not used. + mTable.resize(MAX_ENCODED_PROBABILITY + 1); + for (int i = 0; i <= MAX_ENCODED_PROBABILITY; ++i) { + const int probability = static_cast<int>(powf(static_cast<float>(MAX_COMPUTED_PROBABILITY), + static_cast<float>(i) / static_cast<float>(MAX_ENCODED_PROBABILITY))); + mTable[i] = min(MAX_COMPUTED_PROBABILITY, max(0, probability)); + } +} + +} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/decaying_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h index 1ca03918f..d666f22aa 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/decaying_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h @@ -14,8 +14,10 @@ * limitations under the License. */ -#ifndef LATINIME_DECAYING_UTILS_H -#define LATINIME_DECAYING_UTILS_H +#ifndef LATINIME_FORGETTING_CURVE_UTILS_H +#define LATINIME_FORGETTING_CURVE_UTILS_H + +#include <vector> #include "defines.h" @@ -24,8 +26,7 @@ namespace latinime { // TODO: Check the elapsed time and decrease the probability depending on the time. Time field is // required to introduced to each terminal PtNode and bigram entry. // TODO: Quit using bigram probability to indicate the delta. -// TODO: Quit using bigram probability delta. -class DecayingUtils { +class ForgettingCurveUtils { public: static const int MAX_UNIGRAM_COUNT; static const int MAX_UNIGRAM_COUNT_AFTER_GC; @@ -33,38 +34,46 @@ class DecayingUtils { static const int MAX_BIGRAM_COUNT_AFTER_GC; static int getProbability(const int encodedUnigramProbability, - const int encodedBigramProbabilityDelta); + const int encodedBigramProbability); - static int getUpdatedUnigramProbability(const int originalEncodedProbability, + static int getUpdatedEncodedProbability(const int originalEncodedProbability, const int newProbability); - static int getUpdatedBigramProbabilityDelta(const int originalEncodedProbabilityDelta, - const int newProbability); + static int isValidEncodedProbability(const int encodedProbability); - static int isValidUnigram(const int encodedUnigramProbability); + static int getEncodedProbabilityToSave(const int encodedProbability); - static int isValidBigram(const int encodedProbabilityDelta); + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(ForgettingCurveUtils); - static int getUnigramProbabilityToSave(const int encodedProbability); + class ProbabilityTable { + public: + ProbabilityTable(); - static int getBigramProbabilityDeltaToSave(const int encodedProbabilityDelta); + int getProbability(const int encodedProbability) const { + if (encodedProbability < 0 || encodedProbability > static_cast<int>(mTable.size())) { + return NOT_A_PROBABILITY; + } + return mTable[encodedProbability]; + } - private: - DISALLOW_IMPLICIT_CONSTRUCTORS(DecayingUtils); + private: + DISALLOW_COPY_AND_ASSIGN(ProbabilityTable); + + std::vector<int> mTable; + }; static const int MAX_COMPUTED_PROBABILITY; - static const int MAX_UNIGRAM_PROBABILITY; - static const int MIN_VALID_UNIGRAM_PROBABILITY; - static const int UNIGRAM_PROBABILITY_STEP; - static const int MAX_BIGRAM_PROBABILITY_DELTA; - static const int MIN_VALID_BIGRAM_PROBABILITY_DELTA; - static const int BIGRAM_PROBABILITY_DELTA_STEP; + static const int MAX_ENCODED_PROBABILITY; + static const int MIN_VALID_ENCODED_PROBABILITY; + static const int ENCODED_PROBABILITY_STEP; + static const float MIN_PROBABILITY_TO_DECAY; - static int decodeUnigramProbability(const int encodedProbability); + static const ProbabilityTable sProbabilityTable; - static int decodeBigramProbabilityDelta(const int encodedProbability); + static int decodeProbability(const int encodedProbability); - static int getDecayedProbability(const int rawProbability); + static int backoff(const int unigramProbability); }; } // namespace latinime -#endif /* LATINIME_DECAYING_UTILS_H */ +#endif /* LATINIME_FORGETTING_CURVE_UTILS_H */ |