diff options
Diffstat (limited to 'native/jni/src')
24 files changed, 215 insertions, 225 deletions
diff --git a/native/jni/src/defines.h b/native/jni/src/defines.h index e55c9eb8a..885118524 100644 --- a/native/jni/src/defines.h +++ b/native/jni/src/defines.h @@ -119,7 +119,7 @@ static inline void dumpWordInfo(const int *word, const int length, const int ran const int probability) { static char charBuf[50]; const int N = intArrayToCharArray(word, length, charBuf, NELEMS(charBuf)); - if (N > 1) { + if (N > 0) { AKLOGI("%2d [ %s ] (%d)", rank, charBuf, probability); } } diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp index 8d3f8a9f8..697e99ffb 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.cpp +++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp @@ -23,7 +23,7 @@ #include "suggest/core/policy/dictionary_header_structure_policy.h" #include "suggest/core/result/suggestion_results.h" #include "suggest/core/session/dic_traverse_session.h" -#include "suggest/core/session/prev_words_info.h" +#include "suggest/core/session/ngram_context.h" #include "suggest/core/suggest.h" #include "suggest/core/suggest_options.h" #include "suggest/policyimpl/gesture/gesture_suggest_policy_factory.h" @@ -46,25 +46,22 @@ Dictionary::Dictionary(JNIEnv *env, DictionaryStructureWithBufferPolicy::Structu void Dictionary::getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession *traverseSession, int *xcoordinates, int *ycoordinates, int *times, int *pointerIds, int *inputCodePoints, - int inputSize, const PrevWordsInfo *const prevWordsInfo, + int inputSize, const NgramContext *const ngramContext, const SuggestOptions *const suggestOptions, const float weightOfLangModelVsSpatialModel, SuggestionResults *const outSuggestionResults) const { TimeKeeper::setCurrentTime(); - traverseSession->init(this, prevWordsInfo, suggestOptions); + traverseSession->init(this, ngramContext, suggestOptions); const auto &suggest = suggestOptions->isGesture() ? mGestureSuggest : mTypingSuggest; suggest->getSuggestions(proximityInfo, traverseSession, xcoordinates, ycoordinates, times, pointerIds, inputCodePoints, inputSize, weightOfLangModelVsSpatialModel, outSuggestionResults); - if (DEBUG_DICT) { - outSuggestionResults->dumpSuggestions(); - } } Dictionary::NgramListenerForPrediction::NgramListenerForPrediction( - const PrevWordsInfo *const prevWordsInfo, const WordIdArrayView prevWordIds, + const NgramContext *const ngramContext, const WordIdArrayView prevWordIds, SuggestionResults *const suggestionResults, const DictionaryStructureWithBufferPolicy *const dictStructurePolicy) - : mPrevWordsInfo(prevWordsInfo), mPrevWordIds(prevWordIds), + : mNgramContext(ngramContext), mPrevWordIds(prevWordIds), mSuggestionResults(suggestionResults), mDictStructurePolicy(dictStructurePolicy) {} void Dictionary::NgramListenerForPrediction::onVisitEntry(const int ngramProbability, @@ -72,7 +69,7 @@ void Dictionary::NgramListenerForPrediction::onVisitEntry(const int ngramProbabi if (targetWordId == NOT_A_WORD_ID) { return; } - if (mPrevWordsInfo->isNthPrevWordBeginningOfSentence(1 /* n */) + if (mNgramContext->isNthPrevWordBeginningOfSentence(1 /* n */) && ngramProbability == NOT_A_PROBABILITY) { return; } @@ -88,20 +85,20 @@ void Dictionary::NgramListenerForPrediction::onVisitEntry(const int ngramProbabi wordAttributes.getProbability()); } -void Dictionary::getPredictions(const PrevWordsInfo *const prevWordsInfo, +void Dictionary::getPredictions(const NgramContext *const ngramContext, SuggestionResults *const outSuggestionResults) const { TimeKeeper::setCurrentTime(); WordIdArray<MAX_PREV_WORD_COUNT_FOR_N_GRAM> prevWordIdArray; - const WordIdArrayView prevWordIds = prevWordsInfo->getPrevWordIds( + const WordIdArrayView prevWordIds = ngramContext->getPrevWordIds( mDictionaryStructureWithBufferPolicy.get(), &prevWordIdArray, true /* tryLowerCaseSearch */); - NgramListenerForPrediction listener(prevWordsInfo, prevWordIds, outSuggestionResults, + NgramListenerForPrediction listener(ngramContext, prevWordIds, outSuggestionResults, mDictionaryStructureWithBufferPolicy.get()); mDictionaryStructureWithBufferPolicy->iterateNgramEntries(prevWordIds, &listener); } int Dictionary::getProbability(const CodePointArrayView codePoints) const { - return getNgramProbability(nullptr /* prevWordsInfo */, codePoints); + return getNgramProbability(nullptr /* ngramContext */, codePoints); } int Dictionary::getMaxProbabilityOfExactMatches(const CodePointArrayView codePoints) const { @@ -110,18 +107,18 @@ int Dictionary::getMaxProbabilityOfExactMatches(const CodePointArrayView codePoi mDictionaryStructureWithBufferPolicy.get(), codePoints); } -int Dictionary::getNgramProbability(const PrevWordsInfo *const prevWordsInfo, +int Dictionary::getNgramProbability(const NgramContext *const ngramContext, const CodePointArrayView codePoints) const { TimeKeeper::setCurrentTime(); const int wordId = mDictionaryStructureWithBufferPolicy->getWordId(codePoints, false /* forceLowerCaseSearch */); if (wordId == NOT_A_WORD_ID) return NOT_A_PROBABILITY; - if (!prevWordsInfo) { + if (!ngramContext) { return getDictionaryStructurePolicy()->getProbabilityOfWord(WordIdArrayView(), wordId); } WordIdArray<MAX_PREV_WORD_COUNT_FOR_N_GRAM> prevWordIdArray; - const WordIdArrayView prevWordIds = prevWordsInfo->getPrevWordIds - (mDictionaryStructureWithBufferPolicy.get(), &prevWordIdArray, + const WordIdArrayView prevWordIds = ngramContext->getPrevWordIds( + mDictionaryStructureWithBufferPolicy.get(), &prevWordIdArray, true /* tryLowerCaseSearch */); return getDictionaryStructurePolicy()->getProbabilityOfWord(prevWordIds, wordId); } @@ -143,24 +140,24 @@ bool Dictionary::removeUnigramEntry(const CodePointArrayView codePoints) { return mDictionaryStructureWithBufferPolicy->removeUnigramEntry(codePoints); } -bool Dictionary::addNgramEntry(const PrevWordsInfo *const prevWordsInfo, +bool Dictionary::addNgramEntry(const NgramContext *const ngramContext, const NgramProperty *const ngramProperty) { TimeKeeper::setCurrentTime(); - return mDictionaryStructureWithBufferPolicy->addNgramEntry(prevWordsInfo, ngramProperty); + return mDictionaryStructureWithBufferPolicy->addNgramEntry(ngramContext, ngramProperty); } -bool Dictionary::removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, +bool Dictionary::removeNgramEntry(const NgramContext *const ngramContext, const CodePointArrayView codePoints) { TimeKeeper::setCurrentTime(); - return mDictionaryStructureWithBufferPolicy->removeNgramEntry(prevWordsInfo, codePoints); + return mDictionaryStructureWithBufferPolicy->removeNgramEntry(ngramContext, codePoints); } -bool Dictionary::updateCounter(const PrevWordsInfo *const prevWordsInfo, +bool Dictionary::updateEntriesForWordWithNgramContext(const NgramContext *const ngramContext, const CodePointArrayView codePoints, const bool isValidWord, const HistoricalInfo historicalInfo) { TimeKeeper::setCurrentTime(); - return mDictionaryStructureWithBufferPolicy->updateCounter(prevWordsInfo, codePoints, - isValidWord, historicalInfo); + return mDictionaryStructureWithBufferPolicy->updateEntriesForWordWithNgramContext(ngramContext, + codePoints, isValidWord, historicalInfo); } bool Dictionary::flush(const char *const filePath) { diff --git a/native/jni/src/suggest/core/dictionary/dictionary.h b/native/jni/src/suggest/core/dictionary/dictionary.h index a58dbfbd7..843aec473 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.h +++ b/native/jni/src/suggest/core/dictionary/dictionary.h @@ -33,7 +33,7 @@ namespace latinime { class DictionaryStructureWithBufferPolicy; class DicTraverseSession; -class PrevWordsInfo; +class NgramContext; class ProximityInfo; class SuggestionResults; class SuggestOptions; @@ -66,18 +66,18 @@ class Dictionary { void getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession *traverseSession, int *xcoordinates, int *ycoordinates, int *times, int *pointerIds, int *inputCodePoints, - int inputSize, const PrevWordsInfo *const prevWordsInfo, + int inputSize, const NgramContext *const ngramContext, const SuggestOptions *const suggestOptions, const float weightOfLangModelVsSpatialModel, SuggestionResults *const outSuggestionResults) const; - void getPredictions(const PrevWordsInfo *const prevWordsInfo, + void getPredictions(const NgramContext *const ngramContext, SuggestionResults *const outSuggestionResults) const; int getProbability(const CodePointArrayView codePoints) const; int getMaxProbabilityOfExactMatches(const CodePointArrayView codePoints) const; - int getNgramProbability(const PrevWordsInfo *const prevWordsInfo, + int getNgramProbability(const NgramContext *const ngramContext, const CodePointArrayView codePoints) const; bool addUnigramEntry(const CodePointArrayView codePoints, @@ -85,13 +85,13 @@ class Dictionary { bool removeUnigramEntry(const CodePointArrayView codePoints); - bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo, + bool addNgramEntry(const NgramContext *const ngramContext, const NgramProperty *const ngramProperty); - bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, + bool removeNgramEntry(const NgramContext *const ngramContext, const CodePointArrayView codePoints); - bool updateCounter(const PrevWordsInfo *const prevWordsInfo, + bool updateEntriesForWordWithNgramContext(const NgramContext *const ngramContext, const CodePointArrayView codePoints, const bool isValidWord, const HistoricalInfo historicalInfo); @@ -123,7 +123,7 @@ class Dictionary { class NgramListenerForPrediction : public NgramListener { public: - NgramListenerForPrediction(const PrevWordsInfo *const prevWordsInfo, + NgramListenerForPrediction(const NgramContext *const ngramContext, const WordIdArrayView prevWordIds, SuggestionResults *const suggestionResults, const DictionaryStructureWithBufferPolicy *const dictStructurePolicy); virtual void onVisitEntry(const int ngramProbability, const int targetWordId); @@ -131,7 +131,7 @@ class Dictionary { private: DISALLOW_IMPLICIT_CONSTRUCTORS(NgramListenerForPrediction); - const PrevWordsInfo *const mPrevWordsInfo; + const NgramContext *const mNgramContext; const WordIdArrayView mPrevWordIds; SuggestionResults *const mSuggestionResults; const DictionaryStructureWithBufferPolicy *const mDictStructurePolicy; diff --git a/native/jni/src/suggest/core/dictionary/dictionary_utils.cpp b/native/jni/src/suggest/core/dictionary/dictionary_utils.cpp index b85f3622a..9573c37bc 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary_utils.cpp +++ b/native/jni/src/suggest/core/dictionary/dictionary_utils.cpp @@ -21,7 +21,7 @@ #include "suggest/core/dicnode/dic_node_vector.h" #include "suggest/core/dictionary/dictionary.h" #include "suggest/core/dictionary/digraph_utils.h" -#include "suggest/core/session/prev_words_info.h" +#include "suggest/core/session/ngram_context.h" #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" #include "utils/int_array_view.h" @@ -33,10 +33,10 @@ namespace latinime { std::vector<DicNode> current; std::vector<DicNode> next; - // No prev words information. - PrevWordsInfo emptyPrevWordsInfo; + // No ngram context. + NgramContext emptyNgramContext; WordIdArray<MAX_PREV_WORD_COUNT_FOR_N_GRAM> prevWordIdArray; - const WordIdArrayView prevWordIds = emptyPrevWordsInfo.getPrevWordIds( + const WordIdArrayView prevWordIds = emptyNgramContext.getPrevWordIds( dictionaryStructurePolicy, &prevWordIdArray, false /* tryLowerCaseSearch */); current.emplace_back(); DicNodeUtils::initAsRoot(dictionaryStructurePolicy, prevWordIds, ¤t.front()); diff --git a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h index 6624b7921..ceda5c03f 100644 --- a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h +++ b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h @@ -33,7 +33,7 @@ class DicNodeVector; class DictionaryHeaderStructurePolicy; class MultiBigramMap; class NgramListener; -class PrevWordsInfo; +class NgramContext; class UnigramProperty; /* @@ -81,15 +81,15 @@ class DictionaryStructureWithBufferPolicy { virtual bool removeUnigramEntry(const CodePointArrayView wordCodePoints) = 0; // Returns whether the update was success or not. - virtual bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo, + virtual bool addNgramEntry(const NgramContext *const ngramContext, const NgramProperty *const ngramProperty) = 0; // Returns whether the update was success or not. - virtual bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, + virtual bool removeNgramEntry(const NgramContext *const ngramContext, const CodePointArrayView wordCodePoints) = 0; // Returns whether the update was success or not. - virtual bool updateCounter(const PrevWordsInfo *const prevWordsInfo, + virtual bool updateEntriesForWordWithNgramContext(const NgramContext *const ngramContext, const CodePointArrayView wordCodePoints, const bool isValidWord, const HistoricalInfo historicalInfo) = 0; diff --git a/native/jni/src/suggest/core/session/dic_traverse_session.cpp b/native/jni/src/suggest/core/session/dic_traverse_session.cpp index b4d01d0f0..52dc2f86c 100644 --- a/native/jni/src/suggest/core/session/dic_traverse_session.cpp +++ b/native/jni/src/suggest/core/session/dic_traverse_session.cpp @@ -20,7 +20,7 @@ #include "suggest/core/dictionary/dictionary.h" #include "suggest/core/policy/dictionary_header_structure_policy.h" #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" -#include "suggest/core/session/prev_words_info.h" +#include "suggest/core/session/ngram_context.h" namespace latinime { @@ -30,12 +30,12 @@ const int DicTraverseSession::DICTIONARY_SIZE_THRESHOLD_TO_USE_LARGE_CACHE_FOR_S 256 * 1024; void DicTraverseSession::init(const Dictionary *const dictionary, - const PrevWordsInfo *const prevWordsInfo, const SuggestOptions *const suggestOptions) { + const NgramContext *const ngramContext, const SuggestOptions *const suggestOptions) { mDictionary = dictionary; mMultiWordCostMultiplier = getDictionaryStructurePolicy()->getHeaderStructurePolicy() ->getMultiWordCostMultiplier(); mSuggestOptions = suggestOptions; - mPrevWordIdCount = prevWordsInfo->getPrevWordIds(getDictionaryStructurePolicy(), + mPrevWordIdCount = ngramContext->getPrevWordIds(getDictionaryStructurePolicy(), &mPrevWordIdArray, true /* tryLowerCaseSearch */).size(); } diff --git a/native/jni/src/suggest/core/session/dic_traverse_session.h b/native/jni/src/suggest/core/session/dic_traverse_session.h index 9f841aa3c..bc53167f0 100644 --- a/native/jni/src/suggest/core/session/dic_traverse_session.h +++ b/native/jni/src/suggest/core/session/dic_traverse_session.h @@ -30,7 +30,7 @@ namespace latinime { class Dictionary; class DictionaryStructureWithBufferPolicy; -class PrevWordsInfo; +class NgramContext; class ProximityInfo; class SuggestOptions; @@ -61,7 +61,7 @@ class DicTraverseSession { // Non virtual inline destructor -- never inherit this class AK_FORCE_INLINE ~DicTraverseSession() {} - void init(const Dictionary *dictionary, const PrevWordsInfo *const prevWordsInfo, + void init(const Dictionary *dictionary, const NgramContext *const ngramContext, const SuggestOptions *const suggestOptions); // TODO: Remove and merge into init void setupForGetSuggestions(const ProximityInfo *pInfo, const int *inputCodePoints, diff --git a/native/jni/src/suggest/core/session/prev_words_info.h b/native/jni/src/suggest/core/session/ngram_context.h index 553d5ad07..64c71410f 100644 --- a/native/jni/src/suggest/core/session/prev_words_info.h +++ b/native/jni/src/suggest/core/session/ngram_context.h @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef LATINIME_PREV_WORDS_INFO_H -#define LATINIME_PREV_WORDS_INFO_H +#ifndef LATINIME_NGRAM_CONTEXT_H +#define LATINIME_NGRAM_CONTEXT_H #include <array> @@ -26,25 +26,26 @@ namespace latinime { -class PrevWordsInfo { +// Rename to NgramContext. +class NgramContext { public: // No prev word information. - PrevWordsInfo() : mPrevWordCount(0) { + NgramContext() : mPrevWordCount(0) { clear(); } - PrevWordsInfo(const PrevWordsInfo &prevWordsInfo) - : mPrevWordCount(prevWordsInfo.mPrevWordCount) { + NgramContext(const NgramContext &ngramContext) + : mPrevWordCount(ngramContext.mPrevWordCount) { for (size_t i = 0; i < mPrevWordCount; ++i) { - mPrevWordCodePointCount[i] = prevWordsInfo.mPrevWordCodePointCount[i]; - memmove(mPrevWordCodePoints[i], prevWordsInfo.mPrevWordCodePoints[i], + mPrevWordCodePointCount[i] = ngramContext.mPrevWordCodePointCount[i]; + memmove(mPrevWordCodePoints[i], ngramContext.mPrevWordCodePoints[i], sizeof(mPrevWordCodePoints[i][0]) * mPrevWordCodePointCount[i]); - mIsBeginningOfSentence[i] = prevWordsInfo.mIsBeginningOfSentence[i]; + mIsBeginningOfSentence[i] = ngramContext.mIsBeginningOfSentence[i]; } } // Construct from previous words. - PrevWordsInfo(const int prevWordCodePoints[][MAX_WORD_LENGTH], + NgramContext(const int prevWordCodePoints[][MAX_WORD_LENGTH], const int *const prevWordCodePointCount, const bool *const isBeginningOfSentence, const size_t prevWordCount) : mPrevWordCount(std::min(NELEMS(mPrevWordCodePoints), prevWordCount)) { @@ -61,7 +62,7 @@ class PrevWordsInfo { } // Construct from a previous word. - PrevWordsInfo(const int *const prevWordCodePoints, const int prevWordCodePointCount, + NgramContext(const int *const prevWordCodePoints, const int prevWordCodePointCount, const bool isBeginningOfSentence) : mPrevWordCount(1) { clear(); if (prevWordCodePointCount > MAX_WORD_LENGTH || !prevWordCodePoints) { @@ -78,8 +79,8 @@ class PrevWordsInfo { } // TODO: Remove. - const PrevWordsInfo getTrimmedPrevWordsInfo(const size_t maxPrevWordCount) const { - return PrevWordsInfo(mPrevWordCodePoints, mPrevWordCodePointCount, mIsBeginningOfSentence, + const NgramContext getTrimmedNgramContext(const size_t maxPrevWordCount) const { + return NgramContext(mPrevWordCodePoints, mPrevWordCodePointCount, mIsBeginningOfSentence, std::min(mPrevWordCount, maxPrevWordCount)); } @@ -122,7 +123,7 @@ class PrevWordsInfo { } private: - DISALLOW_ASSIGNMENT_OPERATOR(PrevWordsInfo); + DISALLOW_ASSIGNMENT_OPERATOR(NgramContext); static int getWordId(const DictionaryStructureWithBufferPolicy *const dictStructurePolicy, const int *const wordCodePoints, const int wordCodePointCount, @@ -165,4 +166,4 @@ class PrevWordsInfo { bool mIsBeginningOfSentence[MAX_PREV_WORD_COUNT_FOR_N_GRAM]; }; } // namespace latinime -#endif // LATINIME_PREV_WORDS_INFO_H +#endif // LATINIME_NGRAM_CONTEXT_H diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp index 6ed65d921..4c4dfc578 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp @@ -35,23 +35,15 @@ const char *const HeaderPolicy::EXTENDED_REGION_SIZE_KEY = "EXTENDED_REGION_SIZE // count. const char *const HeaderPolicy::HAS_HISTORICAL_INFO_KEY = "HAS_HISTORICAL_INFO"; const char *const HeaderPolicy::LOCALE_KEY = "locale"; // match Java declaration -const char *const HeaderPolicy::FORGETTING_CURVE_OCCURRENCES_TO_LEVEL_UP_KEY = - "FORGETTING_CURVE_OCCURRENCES_TO_LEVEL_UP"; const char *const HeaderPolicy::FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID_KEY = "FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID"; -const char *const HeaderPolicy::FORGETTING_CURVE_DURATION_TO_LEVEL_DOWN_IN_SECONDS_KEY = - "FORGETTING_CURVE_DURATION_TO_LEVEL_DOWN_IN_SECONDS"; const char *const HeaderPolicy::MAX_UNIGRAM_COUNT_KEY = "MAX_UNIGRAM_COUNT"; const char *const HeaderPolicy::MAX_BIGRAM_COUNT_KEY = "MAX_BIGRAM_COUNT"; const int HeaderPolicy::DEFAULT_MULTIPLE_WORDS_DEMOTION_RATE = 100; const float HeaderPolicy::MULTIPLE_WORD_COST_MULTIPLIER_SCALE = 100.0f; -const int HeaderPolicy::DEFAULT_FORGETTING_CURVE_OCCURRENCES_TO_LEVEL_UP = 2; const int HeaderPolicy::DEFAULT_FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID = 3; -// 30 days -const int HeaderPolicy::DEFAULT_FORGETTING_CURVE_DURATION_TO_LEVEL_DOWN_IN_SECONDS = - 30 * 24 * 60 * 60; const int HeaderPolicy::DEFAULT_MAX_UNIGRAM_COUNT = 10000; const int HeaderPolicy::DEFAULT_MAX_BIGRAM_COUNT = 10000; diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h index daf40d4f9..bc8eaded3 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h @@ -53,15 +53,9 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { EXTENDED_REGION_SIZE_KEY, 0 /* defaultValue */)), mHasHistoricalInfoOfWords(HeaderReadWriteUtils::readBoolAttributeValue( &mAttributeMap, HAS_HISTORICAL_INFO_KEY, false /* defaultValue */)), - mForgettingCurveOccurrencesToLevelUp(HeaderReadWriteUtils::readIntAttributeValue( - &mAttributeMap, FORGETTING_CURVE_OCCURRENCES_TO_LEVEL_UP_KEY, - DEFAULT_FORGETTING_CURVE_OCCURRENCES_TO_LEVEL_UP)), mForgettingCurveProbabilityValuesTableId(HeaderReadWriteUtils::readIntAttributeValue( &mAttributeMap, FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID_KEY, DEFAULT_FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID)), - mForgettingCurveDurationToLevelDown(HeaderReadWriteUtils::readIntAttributeValue( - &mAttributeMap, FORGETTING_CURVE_DURATION_TO_LEVEL_DOWN_IN_SECONDS_KEY, - DEFAULT_FORGETTING_CURVE_DURATION_TO_LEVEL_DOWN_IN_SECONDS)), mMaxUnigramCount(HeaderReadWriteUtils::readIntAttributeValue( &mAttributeMap, MAX_UNIGRAM_COUNT_KEY, DEFAULT_MAX_UNIGRAM_COUNT)), mMaxBigramCount(HeaderReadWriteUtils::readIntAttributeValue( @@ -86,15 +80,9 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { mUnigramCount(0), mBigramCount(0), mExtendedRegionSize(0), mHasHistoricalInfoOfWords(HeaderReadWriteUtils::readBoolAttributeValue( &mAttributeMap, HAS_HISTORICAL_INFO_KEY, false /* defaultValue */)), - mForgettingCurveOccurrencesToLevelUp(HeaderReadWriteUtils::readIntAttributeValue( - &mAttributeMap, FORGETTING_CURVE_OCCURRENCES_TO_LEVEL_UP_KEY, - DEFAULT_FORGETTING_CURVE_OCCURRENCES_TO_LEVEL_UP)), mForgettingCurveProbabilityValuesTableId(HeaderReadWriteUtils::readIntAttributeValue( &mAttributeMap, FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID_KEY, DEFAULT_FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID)), - mForgettingCurveDurationToLevelDown(HeaderReadWriteUtils::readIntAttributeValue( - &mAttributeMap, FORGETTING_CURVE_DURATION_TO_LEVEL_DOWN_IN_SECONDS_KEY, - DEFAULT_FORGETTING_CURVE_DURATION_TO_LEVEL_DOWN_IN_SECONDS)), mMaxUnigramCount(HeaderReadWriteUtils::readIntAttributeValue( &mAttributeMap, MAX_UNIGRAM_COUNT_KEY, DEFAULT_MAX_UNIGRAM_COUNT)), mMaxBigramCount(HeaderReadWriteUtils::readIntAttributeValue( @@ -113,12 +101,8 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { mUnigramCount(headerPolicy->mUnigramCount), mBigramCount(headerPolicy->mBigramCount), mExtendedRegionSize(headerPolicy->mExtendedRegionSize), mHasHistoricalInfoOfWords(headerPolicy->mHasHistoricalInfoOfWords), - mForgettingCurveOccurrencesToLevelUp( - headerPolicy->mForgettingCurveOccurrencesToLevelUp), mForgettingCurveProbabilityValuesTableId( headerPolicy->mForgettingCurveProbabilityValuesTableId), - mForgettingCurveDurationToLevelDown( - headerPolicy->mForgettingCurveDurationToLevelDown), mMaxUnigramCount(headerPolicy->mMaxUnigramCount), mMaxBigramCount(headerPolicy->mMaxBigramCount), mCodePointTable(headerPolicy->mCodePointTable) {} @@ -130,8 +114,7 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { mRequiresGermanUmlautProcessing(false), mIsDecayingDict(false), mDate(0), mLastDecayedTime(0), mUnigramCount(0), mBigramCount(0), mExtendedRegionSize(0), mHasHistoricalInfoOfWords(false), - mForgettingCurveOccurrencesToLevelUp(0), mForgettingCurveProbabilityValuesTableId(0), - mForgettingCurveDurationToLevelDown(0), mMaxUnigramCount(0), mMaxBigramCount(0), + mForgettingCurveProbabilityValuesTableId(0), mMaxUnigramCount(0), mMaxBigramCount(0), mCodePointTable(nullptr) {} ~HeaderPolicy() {} @@ -217,18 +200,10 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { return &mAttributeMap; } - AK_FORCE_INLINE int getForgettingCurveOccurrencesToLevelUp() const { - return mForgettingCurveOccurrencesToLevelUp; - } - AK_FORCE_INLINE int getForgettingCurveProbabilityValuesTableId() const { return mForgettingCurveProbabilityValuesTableId; } - AK_FORCE_INLINE int getForgettingCurveDurationToLevelDown() const { - return mForgettingCurveDurationToLevelDown; - } - AK_FORCE_INLINE int getMaxUnigramCount() const { return mMaxUnigramCount; } @@ -280,9 +255,7 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { static const char *const MAX_BIGRAM_COUNT_KEY; static const int DEFAULT_MULTIPLE_WORDS_DEMOTION_RATE; static const float MULTIPLE_WORD_COST_MULTIPLIER_SCALE; - static const int DEFAULT_FORGETTING_CURVE_OCCURRENCES_TO_LEVEL_UP; static const int DEFAULT_FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID; - static const int DEFAULT_FORGETTING_CURVE_DURATION_TO_LEVEL_DOWN_IN_SECONDS; static const int DEFAULT_MAX_UNIGRAM_COUNT; static const int DEFAULT_MAX_BIGRAM_COUNT; @@ -300,9 +273,7 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { const int mBigramCount; const int mExtendedRegionSize; const bool mHasHistoricalInfoOfWords; - const int mForgettingCurveOccurrencesToLevelUp; const int mForgettingCurveProbabilityValuesTableId; - const int mForgettingCurveDurationToLevelDown; const int mMaxUnigramCount; const int mMaxBigramCount; const int *const mCodePointTable; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/probability_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/probability_dict_content.cpp index 4a740d47b..ef6166ffd 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/probability_dict_content.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/probability_dict_content.cpp @@ -74,8 +74,8 @@ bool ProbabilityDictContent::setProbabilityEntry(const int terminalId, return false; } writingPos += getEntrySize(); - mSize++; } + mSize = terminalId + 1; } return writeEntry(probabilityEntry, entryPos); } @@ -100,7 +100,6 @@ bool ProbabilityDictContent::flushToFile(const char *const dictPath) const { bool ProbabilityDictContent::runGC( const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap, const ProbabilityDictContent *const originalProbabilityDictContent) { - mSize = 0; for (TerminalPositionLookupTable::TerminalIdMap::const_iterator it = terminalIdMap->begin(); it != terminalIdMap->end(); ++it) { const ProbabilityEntry probabilityEntry = @@ -109,7 +108,6 @@ bool ProbabilityDictContent::runGC( AKLOGE("Cannot set probability entry in runGC. terminalId: %d", it->second); return false; } - mSize++; } return true; } diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.cpp index 8d169743c..6243f14cc 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.cpp @@ -310,7 +310,7 @@ bool Ver4PatriciaTrieNodeWriter::addShortcutTarget(const PtNodeParams *const ptN const int shortcutProbability) { if (!mShortcutPolicy->addNewShortcut(ptNodeParams->getTerminalId(), targetCodePoints, targetCodePointCount, shortcutProbability)) { - AKLOGE("Cannot add new shortuct entry. terminalId: %d", ptNodeParams->getTerminalId()); + AKLOGE("Cannot add new shortcut entry. terminalId: %d", ptNodeParams->getTerminalId()); return false; } if (!ptNodeParams->hasShortcutTargets()) { diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp index 11f7b305f..1c61bd4b9 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp @@ -33,7 +33,7 @@ #include "suggest/core/dictionary/property/ngram_property.h" #include "suggest/core/dictionary/property/unigram_property.h" #include "suggest/core/dictionary/property/word_property.h" -#include "suggest/core/session/prev_words_info.h" +#include "suggest/core/session/ngram_context.h" #include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h" #include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_reader.h" #include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h" @@ -146,18 +146,15 @@ const WordAttributes Ver4PatriciaTriePolicy::getWordAttributes(const int probabi int Ver4PatriciaTriePolicy::getProbability(const int unigramProbability, const int bigramProbability) const { - if (mHeaderPolicy->isDecayingDict()) { - // Both probabilities are encoded. Decode them and get probability. - return ForgettingCurveUtils::getProbability(unigramProbability, bigramProbability); - } else { - if (unigramProbability == NOT_A_PROBABILITY) { - return NOT_A_PROBABILITY; - } else if (bigramProbability == NOT_A_PROBABILITY) { - return ProbabilityUtils::backoff(unigramProbability); - } else { - return bigramProbability; - } + // In the v4 format, bigramProbability is a conditional probability. + const int bigramConditionalProbability = bigramProbability; + if (unigramProbability == NOT_A_PROBABILITY) { + return NOT_A_PROBABILITY; } + if (bigramConditionalProbability == NOT_A_PROBABILITY) { + return ProbabilityUtils::backoff(unigramProbability); + } + return bigramConditionalProbability; } int Ver4PatriciaTriePolicy::getProbabilityOfWord(const WordIdArrayView prevWordIds, @@ -170,37 +167,66 @@ int Ver4PatriciaTriePolicy::getProbabilityOfWord(const WordIdArrayView prevWordI if (ptNodeParams.isDeleted() || ptNodeParams.isBlacklisted() || ptNodeParams.isNotAWord()) { return NOT_A_PROBABILITY; } - if (!prevWordIds.empty()) { - const int bigramsPosition = getBigramsPositionOfPtNode( - getTerminalPtNodePosFromWordId(prevWordIds[0])); - BinaryDictionaryBigramsIterator bigramsIt(&mBigramPolicy, bigramsPosition); - while (bigramsIt.hasNext()) { - bigramsIt.next(); - if (bigramsIt.getBigramPos() == ptNodePos - && bigramsIt.getProbability() != NOT_A_PROBABILITY) { - return getProbability(ptNodeParams.getProbability(), bigramsIt.getProbability()); - } - } + if (prevWordIds.empty()) { + return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY); + } + if (prevWordIds[0] == NOT_A_WORD_ID) { return NOT_A_PROBABILITY; } - return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY); + const PtNodeParams prevWordPtNodeParams = + mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(prevWordIds[0]); + if (prevWordPtNodeParams.isDeleted()) { + return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY); + } + const int bigramsPosition = mBuffers->getBigramDictContent()->getBigramListHeadPos( + prevWordPtNodeParams.getTerminalId()); + BinaryDictionaryBigramsIterator bigramsIt(&mBigramPolicy, bigramsPosition); + while (bigramsIt.hasNext()) { + bigramsIt.next(); + if (bigramsIt.getBigramPos() == ptNodePos + && bigramsIt.getProbability() != NOT_A_PROBABILITY) { + const int bigramConditionalProbability = getBigramConditionalProbability( + prevWordPtNodeParams.getProbability(), bigramsIt.getProbability()); + return getProbability(ptNodeParams.getProbability(), bigramConditionalProbability); + } + } + return NOT_A_PROBABILITY; } void Ver4PatriciaTriePolicy::iterateNgramEntries(const WordIdArrayView prevWordIds, NgramListener *const listener) const { - if (prevWordIds.empty()) { + if (prevWordIds.firstOrDefault(NOT_A_DICT_POS) == NOT_A_DICT_POS) { + return; + } + const PtNodeParams prevWordPtNodeParams = + mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(prevWordIds[0]); + if (prevWordPtNodeParams.isDeleted()) { return; } - const int bigramsPosition = getBigramsPositionOfPtNode( - getTerminalPtNodePosFromWordId(prevWordIds[0])); + const int bigramsPosition = mBuffers->getBigramDictContent()->getBigramListHeadPos( + prevWordPtNodeParams.getTerminalId()); BinaryDictionaryBigramsIterator bigramsIt(&mBigramPolicy, bigramsPosition); while (bigramsIt.hasNext()) { bigramsIt.next(); - listener->onVisitEntry(bigramsIt.getProbability(), + const int bigramConditionalProbability = getBigramConditionalProbability( + prevWordPtNodeParams.getProbability(), bigramsIt.getProbability()); + listener->onVisitEntry(bigramConditionalProbability, getWordIdFromTerminalPtNodePos(bigramsIt.getBigramPos())); } } +int Ver4PatriciaTriePolicy::getBigramConditionalProbability(const int prevWordUnigramProbability, + const int bigramProbability) const { + if (mHeaderPolicy->hasHistoricalInfoOfWords()) { + // Calculate conditional probability. + return std::min(MAX_PROBABILITY - prevWordUnigramProbability + bigramProbability, + MAX_PROBABILITY); + } else { + // bigramProbability is a conditional probability. + return bigramProbability; + } +} + BinaryDictionaryShortcutIterator Ver4PatriciaTriePolicy::getShortcutIterator( const int wordId) const { const int shortcutPos = getShortcutPositionOfPtNode(getTerminalPtNodePosFromWordId(wordId)); @@ -312,7 +338,7 @@ bool Ver4PatriciaTriePolicy::removeUnigramEntry(const CodePointArrayView wordCod return mNodeWriter.suppressUnigramEntry(&ptNodeParams); } -bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsInfo, +bool Ver4PatriciaTriePolicy::addNgramEntry(const NgramContext *const ngramContext, const NgramProperty *const ngramProperty) { if (!mBuffers->isUpdatable()) { AKLOGI("Warning: addNgramEntry() is called for non-updatable dictionary."); @@ -323,8 +349,8 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI mDictBuffer->getTailPosition()); return false; } - if (!prevWordsInfo->isValid()) { - AKLOGE("prev words info is not valid for adding n-gram entry to the dictionary."); + if (!ngramContext->isValid()) { + AKLOGE("Ngram context is not valid for adding n-gram entry to the dictionary."); return false; } if (ngramProperty->getTargetCodePoints()->size() > MAX_WORD_LENGTH) { @@ -333,23 +359,23 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI return false; } WordIdArray<MAX_PREV_WORD_COUNT_FOR_N_GRAM> prevWordIdArray; - const WordIdArrayView prevWordIds = prevWordsInfo->getPrevWordIds(this, &prevWordIdArray, + const WordIdArrayView prevWordIds = ngramContext->getPrevWordIds(this, &prevWordIdArray, false /* tryLowerCaseSearch */); if (prevWordIds.empty()) { return false; } if (prevWordIds[0] == NOT_A_WORD_ID) { - if (prevWordsInfo->isNthPrevWordBeginningOfSentence(1 /* n */)) { + if (ngramContext->isNthPrevWordBeginningOfSentence(1 /* n */)) { const UnigramProperty beginningOfSentenceUnigramProperty( true /* representsBeginningOfSentence */, true /* isNotAWord */, false /* isBlacklisted */, MAX_PROBABILITY /* probability */, HistoricalInfo()); - if (!addUnigramEntry(prevWordsInfo->getNthPrevWordCodePoints(1 /* n */), + if (!addUnigramEntry(ngramContext->getNthPrevWordCodePoints(1 /* n */), &beginningOfSentenceUnigramProperty)) { AKLOGE("Cannot add unigram entry for the beginning-of-sentence."); return false; } // Refresh word ids. - prevWordsInfo->getPrevWordIds(this, &prevWordIdArray, false /* tryLowerCaseSearch */); + ngramContext->getPrevWordIds(this, &prevWordIdArray, false /* tryLowerCaseSearch */); } else { return false; } @@ -373,7 +399,7 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI } } -bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, +bool Ver4PatriciaTriePolicy::removeNgramEntry(const NgramContext *const ngramContext, const CodePointArrayView wordCodePoints) { if (!mBuffers->isUpdatable()) { AKLOGI("Warning: removeNgramEntry() is called for non-updatable dictionary."); @@ -384,8 +410,8 @@ bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWor mDictBuffer->getTailPosition()); return false; } - if (!prevWordsInfo->isValid()) { - AKLOGE("prev words info is not valid for removing n-gram entry form the dictionary."); + if (!ngramContext->isValid()) { + AKLOGE("Ngram context is not valid for removing n-gram entry form the dictionary."); return false; } if (wordCodePoints.size() > MAX_WORD_LENGTH) { @@ -393,7 +419,7 @@ bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWor wordCodePoints.size()); } WordIdArray<MAX_PREV_WORD_COUNT_FOR_N_GRAM> prevWordIdArray; - const WordIdArrayView prevWordIds = prevWordsInfo->getPrevWordIds(this, &prevWordIdArray, + const WordIdArrayView prevWordIds = ngramContext->getPrevWordIds(this, &prevWordIdArray, false /* tryLowerCaseSerch */); if (prevWordIds.firstOrDefault(NOT_A_WORD_ID) == NOT_A_WORD_ID) { return false; @@ -414,23 +440,27 @@ bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWor } -bool Ver4PatriciaTriePolicy::updateCounter(const PrevWordsInfo *const prevWordsInfo, - const CodePointArrayView wordCodePoints, const bool isValidWord, - const HistoricalInfo historicalInfo) { +bool Ver4PatriciaTriePolicy::updateEntriesForWordWithNgramContext( + const NgramContext *const ngramContext, const CodePointArrayView wordCodePoints, + const bool isValidWord, const HistoricalInfo historicalInfo) { if (!mBuffers->isUpdatable()) { - AKLOGI("Warning: updateCounter() is called for non-updatable dictionary."); + AKLOGI("Warning: updateEntriesForWordWithNgramContext() is called for non-updatable " + "dictionary."); return false; } const int probability = isValidWord ? DUMMY_PROBABILITY_FOR_VALID_WORDS : NOT_A_PROBABILITY; const UnigramProperty unigramProperty(false /* representsBeginningOfSentence */, false /* isNotAWord */, false /*isBlacklisted*/, probability, historicalInfo); if (!addUnigramEntry(wordCodePoints, &unigramProperty)) { - AKLOGE("Cannot update unigarm entry in updateCounter()."); + AKLOGE("Cannot update unigarm entry in updateEntriesForWordWithNgramContext()."); return false; } - const NgramProperty ngramProperty(wordCodePoints.toVector(), probability, historicalInfo); - if (!addNgramEntry(prevWordsInfo, &ngramProperty)) { - AKLOGE("Cannot update unigarm entry in updateCounter()."); + const int probabilityForNgram = ngramContext->isNthPrevWordBeginningOfSentence(1 /* n */) + ? NOT_A_PROBABILITY : probability; + const NgramProperty ngramProperty(wordCodePoints.toVector(), probabilityForNgram, + historicalInfo); + if (!addNgramEntry(ngramContext, &ngramProperty)) { + AKLOGE("Cannot update unigarm entry in updateEntriesForWordWithNgramContext()."); return false; } return true; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h index 995d7764f..4aa399c3e 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h @@ -112,13 +112,13 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { bool removeUnigramEntry(const CodePointArrayView wordCodePoints); - bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo, + bool addNgramEntry(const NgramContext *const ngramContext, const NgramProperty *const ngramProperty); - bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, + bool removeNgramEntry(const NgramContext *const ngramContext, const CodePointArrayView wordCodePoints); - bool updateCounter(const PrevWordsInfo *const prevWordsInfo, + bool updateEntriesForWordWithNgramContext(const NgramContext *const ngramContext, const CodePointArrayView wordCodePoints, const bool isValidWord, const HistoricalInfo historicalInfo); @@ -174,6 +174,8 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { int getTerminalPtNodePosFromWordId(const int wordId) const; const WordAttributes getWordAttributes(const int probability, const PtNodeParams &ptNodeParams) const; + int getBigramConditionalProbability(const int prevWordUnigramProbability, + const int bigramProbability) const; }; } // namespace v402 } // namespace backward diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp index d3d684bfa..b7f1199c5 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp @@ -23,7 +23,7 @@ #include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h" #include "suggest/core/dictionary/multi_bigram_map.h" #include "suggest/core/dictionary/ngram_listener.h" -#include "suggest/core/session/prev_words_info.h" +#include "suggest/core/session/ngram_context.h" #include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h" #include "suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/utils/probability_utils.h" diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h index 32a95bb6c..b17681388 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h @@ -93,25 +93,26 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { return false; } - bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo, + bool addNgramEntry(const NgramContext *const ngramContext, const NgramProperty *const ngramProperty) { // This method should not be called for non-updatable dictionary. AKLOGI("Warning: addNgramEntry() is called for non-updatable dictionary."); return false; } - bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, + bool removeNgramEntry(const NgramContext *const ngramContext, const CodePointArrayView wordCodePoints) { // This method should not be called for non-updatable dictionary. AKLOGI("Warning: removeNgramEntry() is called for non-updatable dictionary."); return false; } - bool updateCounter(const PrevWordsInfo *const prevWordsInfo, + bool updateEntriesForWordWithNgramContext(const NgramContext *const ngramContext, const CodePointArrayView wordCodePoints, const bool isValidWord, const HistoricalInfo historicalInfo) { // This method should not be called for non-updatable dictionary. - AKLOGI("Warning: updateCounter() is called for non-updatable dictionary."); + AKLOGI("Warning: updateEntriesForWordWithNgramContext() is called for non-updatable " + "dictionary."); return false; } diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.cpp index dc0ed96d0..90d4687dd 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.cpp @@ -37,7 +37,7 @@ const PtNodeParams Ver2ParticiaTrieNodeReader::fetchPtNodeParamsInBufferFromPtNo int shortcutPos = NOT_A_DICT_POS; int bigramPos = NOT_A_DICT_POS; int siblingPos = NOT_A_DICT_POS; - PatriciaTrieReadingUtils::readPtNodeInfo(mBuffer.data(), ptNodePos, mShortuctPolicy, + PatriciaTrieReadingUtils::readPtNodeInfo(mBuffer.data(), ptNodePos, mShortcutPolicy, mBigramPolicy, mCodePointTable, &flags, &mergedNodeCodePointCount, mergedNodeCodePoints, &probability, &childrenPos, &shortcutPos, &bigramPos, &siblingPos); if (mergedNodeCodePointCount <= 0) { diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.h index 24ec5bcca..838d37314 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.h @@ -35,7 +35,7 @@ class Ver2ParticiaTrieNodeReader : public PtNodeReader { const DictionaryBigramsStructurePolicy *const bigramPolicy, const DictionaryShortcutsStructurePolicy *const shortcutPolicy, const int *const codePointTable) - : mBuffer(buffer), mBigramPolicy(bigramPolicy), mShortuctPolicy(shortcutPolicy), + : mBuffer(buffer), mBigramPolicy(bigramPolicy), mShortcutPolicy(shortcutPolicy), mCodePointTable(codePointTable) {} virtual const PtNodeParams fetchPtNodeParamsInBufferFromPtNodePos(const int ptNodePos) const; @@ -45,7 +45,7 @@ class Ver2ParticiaTrieNodeReader : public PtNodeReader { const ReadOnlyByteArrayView mBuffer; const DictionaryBigramsStructurePolicy *const mBigramPolicy; - const DictionaryShortcutsStructurePolicy *const mShortuctPolicy; + const DictionaryShortcutsStructurePolicy *const mShortcutPolicy; const int *const mCodePointTable; }; } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp index f13512d5a..d28006ae9 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp @@ -285,7 +285,7 @@ bool Ver4PatriciaTrieNodeWriter::addShortcutTarget(const PtNodeParams *const ptN const int shortcutProbability) { if (!mShortcutPolicy->addNewShortcut(ptNodeParams->getTerminalId(), targetCodePoints, targetCodePointCount, shortcutProbability)) { - AKLOGE("Cannot add new shortuct entry. terminalId: %d", ptNodeParams->getTerminalId()); + AKLOGE("Cannot add new shortcut entry. terminalId: %d", ptNodeParams->getTerminalId()); return false; } return true; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp index 41b109f95..445bbe07e 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp @@ -26,7 +26,7 @@ #include "suggest/core/dictionary/property/ngram_property.h" #include "suggest/core/dictionary/property/unigram_property.h" #include "suggest/core/dictionary/property/word_property.h" -#include "suggest/core/session/prev_words_info.h" +#include "suggest/core/session/ngram_context.h" #include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h" #include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h" @@ -266,7 +266,7 @@ bool Ver4PatriciaTriePolicy::removeUnigramEntry(const CodePointArrayView wordCod return true; } -bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsInfo, +bool Ver4PatriciaTriePolicy::addNgramEntry(const NgramContext *const ngramContext, const NgramProperty *const ngramProperty) { if (!mBuffers->isUpdatable()) { AKLOGI("Warning: addNgramEntry() is called for non-updatable dictionary."); @@ -277,8 +277,8 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI mDictBuffer->getTailPosition()); return false; } - if (!prevWordsInfo->isValid()) { - AKLOGE("prev words info is not valid for adding n-gram entry to the dictionary."); + if (!ngramContext->isValid()) { + AKLOGE("Ngram context is not valid for adding n-gram entry to the dictionary."); return false; } if (ngramProperty->getTargetCodePoints()->size() > MAX_WORD_LENGTH) { @@ -287,7 +287,7 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI return false; } WordIdArray<MAX_PREV_WORD_COUNT_FOR_N_GRAM> prevWordIdArray; - const WordIdArrayView prevWordIds = prevWordsInfo->getPrevWordIds(this, &prevWordIdArray, + const WordIdArrayView prevWordIds = ngramContext->getPrevWordIds(this, &prevWordIdArray, false /* tryLowerCaseSearch */); if (prevWordIds.empty()) { return false; @@ -296,19 +296,19 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI if (prevWordIds[i] != NOT_A_WORD_ID) { continue; } - if (!prevWordsInfo->isNthPrevWordBeginningOfSentence(i + 1 /* n */)) { + if (!ngramContext->isNthPrevWordBeginningOfSentence(i + 1 /* n */)) { return false; } const UnigramProperty beginningOfSentenceUnigramProperty( true /* representsBeginningOfSentence */, true /* isNotAWord */, false /* isBlacklisted */, MAX_PROBABILITY /* probability */, HistoricalInfo()); - if (!addUnigramEntry(prevWordsInfo->getNthPrevWordCodePoints(1 /* n */), + if (!addUnigramEntry(ngramContext->getNthPrevWordCodePoints(1 /* n */), &beginningOfSentenceUnigramProperty)) { AKLOGE("Cannot add unigram entry for the beginning-of-sentence."); return false; } // Refresh word ids. - prevWordsInfo->getPrevWordIds(this, &prevWordIdArray, false /* tryLowerCaseSearch */); + ngramContext->getPrevWordIds(this, &prevWordIdArray, false /* tryLowerCaseSearch */); } const int wordId = getWordId(CodePointArrayView(*ngramProperty->getTargetCodePoints()), false /* forceLowerCaseSearch */); @@ -326,7 +326,7 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI } } -bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, +bool Ver4PatriciaTriePolicy::removeNgramEntry(const NgramContext *const ngramContext, const CodePointArrayView wordCodePoints) { if (!mBuffers->isUpdatable()) { AKLOGI("Warning: removeNgramEntry() is called for non-updatable dictionary."); @@ -337,8 +337,8 @@ bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWor mDictBuffer->getTailPosition()); return false; } - if (!prevWordsInfo->isValid()) { - AKLOGE("prev words info is not valid for removing n-gram entry form the dictionary."); + if (!ngramContext->isValid()) { + AKLOGE("Ngram context is not valid for removing n-gram entry form the dictionary."); return false; } if (wordCodePoints.size() > MAX_WORD_LENGTH) { @@ -346,7 +346,7 @@ bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWor wordCodePoints.size()); } WordIdArray<MAX_PREV_WORD_COUNT_FOR_N_GRAM> prevWordIdArray; - const WordIdArrayView prevWordIds = prevWordsInfo->getPrevWordIds(this, &prevWordIdArray, + const WordIdArrayView prevWordIds = ngramContext->getPrevWordIds(this, &prevWordIdArray, false /* tryLowerCaseSerch */); if (prevWordIds.empty() || prevWordIds.contains(NOT_A_WORD_ID)) { return false; @@ -363,26 +363,30 @@ bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWor } } -bool Ver4PatriciaTriePolicy::updateCounter(const PrevWordsInfo *const prevWordsInfo, - const CodePointArrayView wordCodePoints, const bool isValidWord, - const HistoricalInfo historicalInfo) { +bool Ver4PatriciaTriePolicy::updateEntriesForWordWithNgramContext( + const NgramContext *const ngramContext, const CodePointArrayView wordCodePoints, + const bool isValidWord, const HistoricalInfo historicalInfo) { if (!mBuffers->isUpdatable()) { - AKLOGI("Warning: updateCounter() is called for non-updatable dictionary."); + AKLOGI("Warning: updateEntriesForWordWithNgramContext() is called for non-updatable " + "dictionary."); return false; } // TODO: Have count up method in language model dict content. const int probability = isValidWord ? DUMMY_PROBABILITY_FOR_VALID_WORDS : NOT_A_PROBABILITY; const UnigramProperty unigramProperty(false /* representsBeginningOfSentence */, - false /* isNotAWord */, false /*isBlacklisted*/, probability, historicalInfo); + false /* isNotAWord */, false /* isBlacklisted */, probability, historicalInfo); if (!addUnigramEntry(wordCodePoints, &unigramProperty)) { - AKLOGE("Cannot update unigarm entry in updateCounter()."); + AKLOGE("Cannot update unigarm entry in updateEntriesForWordWithNgramContext()."); return false; } - const NgramProperty ngramProperty(wordCodePoints.toVector(), probability, historicalInfo); - for (size_t i = 1; i <= prevWordsInfo->getPrevWordCount(); ++i) { - const PrevWordsInfo trimmedPrevWordsInfo(prevWordsInfo->getTrimmedPrevWordsInfo(i)); - if (!addNgramEntry(&trimmedPrevWordsInfo, &ngramProperty)) { - AKLOGE("Cannot update ngram entry in updateCounter()."); + const int probabilityForNgram = ngramContext->isNthPrevWordBeginningOfSentence(1 /* n */) + ? NOT_A_PROBABILITY : probability; + const NgramProperty ngramProperty(wordCodePoints.toVector(), probabilityForNgram, + historicalInfo); + for (size_t i = 1; i <= ngramContext->getPrevWordCount(); ++i) { + const NgramContext trimmedNgramContext(ngramContext->getTrimmedNgramContext(i)); + if (!addNgramEntry(&trimmedNgramContext, &ngramProperty)) { + AKLOGE("Cannot update ngram entry in updateEntriesForWordWithNgramContext()."); return false; } } diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h index 662bb8d4b..60e30f209 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h @@ -92,13 +92,13 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { bool removeUnigramEntry(const CodePointArrayView wordCodePoints); - bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo, + bool addNgramEntry(const NgramContext *const ngramContext, const NgramProperty *const ngramProperty); - bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, + bool removeNgramEntry(const NgramContext *const ngramContext, const CodePointArrayView wordCodePoints); - bool updateCounter(const PrevWordsInfo *const prevWordsInfo, + bool updateEntriesForWordWithNgramContext(const NgramContext *const ngramContext, const CodePointArrayView wordCodePoints, const bool isValidWord, const HistoricalInfo historicalInfo); diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp index af4bc186a..e5ef2abf8 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp @@ -29,10 +29,14 @@ namespace latinime { const int ForgettingCurveUtils::MULTIPLIER_TWO_IN_PROBABILITY_SCALE = 8; const int ForgettingCurveUtils::DECAY_INTERVAL_SECONDS = 2 * 60 * 60; -const int ForgettingCurveUtils::MAX_LEVEL = 3; -const int ForgettingCurveUtils::MIN_VISIBLE_LEVEL = 1; -const int ForgettingCurveUtils::MAX_ELAPSED_TIME_STEP_COUNT = 15; -const int ForgettingCurveUtils::DISCARD_LEVEL_ZERO_ENTRY_TIME_STEP_COUNT_THRESHOLD = 14; +const int ForgettingCurveUtils::MAX_LEVEL = 15; +const int ForgettingCurveUtils::MIN_VISIBLE_LEVEL = 2; +const int ForgettingCurveUtils::MAX_ELAPSED_TIME_STEP_COUNT = 31; +const int ForgettingCurveUtils::DISCARD_LEVEL_ZERO_ENTRY_TIME_STEP_COUNT_THRESHOLD = 30; +const int ForgettingCurveUtils::OCCURRENCES_TO_RAISE_THE_LEVEL = 1; +// TODO: Evaluate whether this should be 7.5 days. +// 15 days +const int ForgettingCurveUtils::DURATION_TO_LOWER_THE_LEVEL_IN_SECONDS = 15 * 24 * 60 * 60; const float ForgettingCurveUtils::UNIGRAM_COUNT_HARD_LIMIT_WEIGHT = 1.2; const float ForgettingCurveUtils::BIGRAM_COUNT_HARD_LIMIT_WEIGHT = 1.2; @@ -54,19 +58,23 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT || (originalHistoricalInfo->getLevel() == newHistoricalInfo->getLevel() && originalHistoricalInfo->getCount() < newHistoricalInfo->getCount())) { // Initial information. + int count = newHistoricalInfo->getCount(); + if (count >= OCCURRENCES_TO_RAISE_THE_LEVEL) { + const int level = clampToValidLevelRange(newHistoricalInfo->getLevel() + 1); + return HistoricalInfo(timestamp, level, 0 /* count */); + } const int level = clampToValidLevelRange(newHistoricalInfo->getLevel()); - const int count = clampToValidCountRange(newHistoricalInfo->getCount(), headerPolicy); - return HistoricalInfo(timestamp, level, count); + return HistoricalInfo(timestamp, level, clampToValidCountRange(count, headerPolicy)); } else { const int updatedCount = originalHistoricalInfo->getCount() + 1; - if (updatedCount >= headerPolicy->getForgettingCurveOccurrencesToLevelUp()) { + if (updatedCount >= OCCURRENCES_TO_RAISE_THE_LEVEL) { // The count exceeds the max value the level can be incremented. if (originalHistoricalInfo->getLevel() >= MAX_LEVEL) { // The level is already max. return HistoricalInfo(timestamp, originalHistoricalInfo->getLevel(), originalHistoricalInfo->getCount()); } else { - // Level up. + // Raise the level. return HistoricalInfo(timestamp, originalHistoricalInfo->getLevel() + 1, 0 /* count */); } @@ -79,31 +87,18 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT /* static */ int ForgettingCurveUtils::decodeProbability( const HistoricalInfo *const historicalInfo, const HeaderPolicy *const headerPolicy) { const int elapsedTimeStepCount = getElapsedTimeStepCount(historicalInfo->getTimestamp(), - headerPolicy->getForgettingCurveDurationToLevelDown()); + DURATION_TO_LOWER_THE_LEVEL_IN_SECONDS); return sProbabilityTable.getProbability( headerPolicy->getForgettingCurveProbabilityValuesTableId(), clampToValidLevelRange(historicalInfo->getLevel()), clampToValidTimeStepCountRange(elapsedTimeStepCount)); } -/* static */ int ForgettingCurveUtils::getProbability(const int unigramProbability, - const int bigramProbability) { - if (unigramProbability == NOT_A_PROBABILITY) { - return NOT_A_PROBABILITY; - } else if (bigramProbability == NOT_A_PROBABILITY) { - return std::min(backoff(unigramProbability), MAX_PROBABILITY); - } else { - // TODO: Investigate better way to handle bigram probability. - return std::min(std::max(unigramProbability, - bigramProbability + MULTIPLIER_TWO_IN_PROBABILITY_SCALE), MAX_PROBABILITY); - } -} - /* static */ bool ForgettingCurveUtils::needsToKeep(const HistoricalInfo *const historicalInfo, const HeaderPolicy *const headerPolicy) { return historicalInfo->getLevel() > 0 || getElapsedTimeStepCount(historicalInfo->getTimestamp(), - headerPolicy->getForgettingCurveDurationToLevelDown()) + DURATION_TO_LOWER_THE_LEVEL_IN_SECONDS) < DISCARD_LEVEL_ZERO_ENTRY_TIME_STEP_COUNT_THRESHOLD; } @@ -113,14 +108,14 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT if (originalHistoricalInfo->getTimestamp() == NOT_A_TIMESTAMP) { return HistoricalInfo(); } - const int durationToLevelDownInSeconds = headerPolicy->getForgettingCurveDurationToLevelDown(); + const int durationToLevelDownInSeconds = DURATION_TO_LOWER_THE_LEVEL_IN_SECONDS; const int elapsedTimeStep = getElapsedTimeStepCount( originalHistoricalInfo->getTimestamp(), durationToLevelDownInSeconds); if (elapsedTimeStep <= MAX_ELAPSED_TIME_STEP_COUNT) { // No need to update historical info. return *originalHistoricalInfo; } - // Level down. + // Lower the level. const int maxLevelDownAmonut = elapsedTimeStep / (MAX_ELAPSED_TIME_STEP_COUNT + 1); const int levelDownAmount = (maxLevelDownAmonut >= originalHistoricalInfo->getLevel()) ? originalHistoricalInfo->getLevel() : maxLevelDownAmonut; @@ -170,7 +165,7 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT /* static */ int ForgettingCurveUtils::clampToValidCountRange(const int count, const HeaderPolicy *const headerPolicy) { - return std::min(std::max(count, 0), headerPolicy->getForgettingCurveOccurrencesToLevelUp() - 1); + return std::min(std::max(count, 0), OCCURRENCES_TO_RAISE_THE_LEVEL - 1); } /* static */ int ForgettingCurveUtils::clampToValidLevelRange(const int level) { @@ -187,9 +182,9 @@ const int ForgettingCurveUtils::ProbabilityTable::MODEST_PROBABILITY_TABLE_ID = const int ForgettingCurveUtils::ProbabilityTable::STRONG_PROBABILITY_TABLE_ID = 2; const int ForgettingCurveUtils::ProbabilityTable::AGGRESSIVE_PROBABILITY_TABLE_ID = 3; const int ForgettingCurveUtils::ProbabilityTable::WEAK_MAX_PROBABILITY = 127; -const int ForgettingCurveUtils::ProbabilityTable::MODEST_BASE_PROBABILITY = 32; -const int ForgettingCurveUtils::ProbabilityTable::STRONG_BASE_PROBABILITY = 35; -const int ForgettingCurveUtils::ProbabilityTable::AGGRESSIVE_BASE_PROBABILITY = 40; +const int ForgettingCurveUtils::ProbabilityTable::MODEST_BASE_PROBABILITY = 8; +const int ForgettingCurveUtils::ProbabilityTable::STRONG_BASE_PROBABILITY = 9; +const int ForgettingCurveUtils::ProbabilityTable::AGGRESSIVE_BASE_PROBABILITY = 10; ForgettingCurveUtils::ProbabilityTable::ProbabilityTable() : mTables() { @@ -202,7 +197,7 @@ ForgettingCurveUtils::ProbabilityTable::ProbabilityTable() : mTables() { const float endProbability = getBaseProbabilityForLevel(tableId, level - 1); for (int timeStepCount = 0; timeStepCount <= MAX_ELAPSED_TIME_STEP_COUNT; ++timeStepCount) { - if (level == 0) { + if (level < MIN_VISIBLE_LEVEL) { mTables[tableId][level][timeStepCount] = NOT_A_PROBABILITY; continue; } diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h index 10abb405a..ccbc4a98d 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h @@ -39,9 +39,6 @@ class ForgettingCurveUtils { static int decodeProbability(const HistoricalInfo *const historicalInfo, const HeaderPolicy *const headerPolicy); - static int getProbability(const int encodedUnigramProbability, - const int encodedBigramProbability); - static bool needsToKeep(const HistoricalInfo *const historicalInfo, const HeaderPolicy *const headerPolicy); @@ -101,6 +98,8 @@ class ForgettingCurveUtils { static const int MIN_VISIBLE_LEVEL; static const int MAX_ELAPSED_TIME_STEP_COUNT; static const int DISCARD_LEVEL_ZERO_ENTRY_TIME_STEP_COUNT_THRESHOLD; + static const int OCCURRENCES_TO_RAISE_THE_LEVEL; + static const int DURATION_TO_LOWER_THE_LEVEL_IN_SECONDS; static const float UNIGRAM_COUNT_HARD_LIMIT_WEIGHT; static const float BIGRAM_COUNT_HARD_LIMIT_WEIGHT; diff --git a/native/jni/src/utils/jni_data_utils.h b/native/jni/src/utils/jni_data_utils.h index 235a03bba..25cc41742 100644 --- a/native/jni/src/utils/jni_data_utils.h +++ b/native/jni/src/utils/jni_data_utils.h @@ -21,7 +21,7 @@ #include "defines.h" #include "jni.h" -#include "suggest/core/session/prev_words_info.h" +#include "suggest/core/session/ngram_context.h" #include "suggest/core/policy/dictionary_header_structure_policy.h" #include "suggest/policyimpl/dictionary/header/header_read_write_utils.h" #include "utils/char_utils.h" @@ -96,7 +96,7 @@ class JniDataUtils { } } - static PrevWordsInfo constructPrevWordsInfo(JNIEnv *env, jobjectArray prevWordCodePointArrays, + static NgramContext constructNgramContext(JNIEnv *env, jobjectArray prevWordCodePointArrays, jbooleanArray isBeginningOfSentenceArray, const size_t prevWordCount) { int prevWordCodePoints[MAX_PREV_WORD_COUNT_FOR_N_GRAM][MAX_WORD_LENGTH]; int prevWordCodePointCount[MAX_PREV_WORD_COUNT_FOR_N_GRAM]; @@ -119,7 +119,7 @@ class JniDataUtils { &isBeginningOfSentenceBoolean); isBeginningOfSentence[i] = isBeginningOfSentenceBoolean == JNI_TRUE; } - return PrevWordsInfo(prevWordCodePoints, prevWordCodePointCount, isBeginningOfSentence, + return NgramContext(prevWordCodePoints, prevWordCodePointCount, isBeginningOfSentence, prevWordCount); } |