diff options
Diffstat (limited to 'native/jni/src')
36 files changed, 245 insertions, 175 deletions
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp index a3bb408c3..8d3f8a9f8 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.cpp +++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp @@ -144,9 +144,9 @@ bool Dictionary::removeUnigramEntry(const CodePointArrayView codePoints) { } bool Dictionary::addNgramEntry(const PrevWordsInfo *const prevWordsInfo, - const BigramProperty *const bigramProperty) { + const NgramProperty *const ngramProperty) { TimeKeeper::setCurrentTime(); - return mDictionaryStructureWithBufferPolicy->addNgramEntry(prevWordsInfo, bigramProperty); + return mDictionaryStructureWithBufferPolicy->addNgramEntry(prevWordsInfo, ngramProperty); } bool Dictionary::removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, @@ -155,6 +155,14 @@ bool Dictionary::removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, return mDictionaryStructureWithBufferPolicy->removeNgramEntry(prevWordsInfo, codePoints); } +bool Dictionary::updateCounter(const PrevWordsInfo *const prevWordsInfo, + const CodePointArrayView codePoints, const bool isValidWord, + const HistoricalInfo historicalInfo) { + TimeKeeper::setCurrentTime(); + return mDictionaryStructureWithBufferPolicy->updateCounter(prevWordsInfo, codePoints, + isValidWord, historicalInfo); +} + bool Dictionary::flush(const char *const filePath) { TimeKeeper::setCurrentTime(); return mDictionaryStructureWithBufferPolicy->flush(filePath); diff --git a/native/jni/src/suggest/core/dictionary/dictionary.h b/native/jni/src/suggest/core/dictionary/dictionary.h index 324e3504a..a58dbfbd7 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.h +++ b/native/jni/src/suggest/core/dictionary/dictionary.h @@ -22,6 +22,7 @@ #include "defines.h" #include "jni.h" #include "suggest/core/dictionary/ngram_listener.h" +#include "suggest/core/dictionary/property/historical_info.h" #include "suggest/core/dictionary/property/word_property.h" #include "suggest/core/policy/dictionary_header_structure_policy.h" #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" @@ -85,11 +86,15 @@ class Dictionary { bool removeUnigramEntry(const CodePointArrayView codePoints); bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo, - const BigramProperty *const bigramProperty); + const NgramProperty *const ngramProperty); bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, const CodePointArrayView codePoints); + bool updateCounter(const PrevWordsInfo *const prevWordsInfo, + const CodePointArrayView codePoints, const bool isValidWord, + const HistoricalInfo historicalInfo); + bool flush(const char *const filePath); bool flushWithGC(const char *const filePath); diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/historical_info.h b/native/jni/src/suggest/core/dictionary/property/historical_info.h index 428ca8626..5ed9ebfca 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/historical_info.h +++ b/native/jni/src/suggest/core/dictionary/property/historical_info.h @@ -34,7 +34,7 @@ class HistoricalInfo { return mTimestamp != NOT_A_TIMESTAMP; } - int getTimeStamp() const { + int getTimestamp() const { return mTimestamp; } @@ -47,12 +47,12 @@ class HistoricalInfo { } private: - // Copy constructor is public to use this class as a type of return value. - DISALLOW_ASSIGNMENT_OPERATOR(HistoricalInfo); + // Default copy constructor and assign operator are used for using in std::vector. - const int mTimestamp; - const int mLevel; - const int mCount; + // TODO: Make members const. + int mTimestamp; + int mLevel; + int mCount; }; } // namespace latinime #endif /* LATINIME_HISTORICAL_INFO_H */ diff --git a/native/jni/src/suggest/core/dictionary/property/bigram_property.h b/native/jni/src/suggest/core/dictionary/property/ngram_property.h index 9e0baa032..dce460099 100644 --- a/native/jni/src/suggest/core/dictionary/property/bigram_property.h +++ b/native/jni/src/suggest/core/dictionary/property/ngram_property.h @@ -14,22 +14,22 @@ * limitations under the License. */ -#ifndef LATINIME_BIGRAM_PROPERTY_H -#define LATINIME_BIGRAM_PROPERTY_H +#ifndef LATINIME_NGRAM_PROPERTY_H +#define LATINIME_NGRAM_PROPERTY_H #include <vector> #include "defines.h" +#include "suggest/core/dictionary/property/historical_info.h" namespace latinime { -// TODO: Change to NgramProperty. -class BigramProperty { +class NgramProperty { public: - BigramProperty(const std::vector<int> &&targetCodePoints, const int probability, - const int timestamp, const int level, const int count) + NgramProperty(const std::vector<int> &&targetCodePoints, const int probability, + const HistoricalInfo historicalInfo) : mTargetCodePoints(std::move(targetCodePoints)), mProbability(probability), - mTimestamp(timestamp), mLevel(level), mCount(count) {} + mHistoricalInfo(historicalInfo) {} const std::vector<int> *getTargetCodePoints() const { return &mTargetCodePoints; @@ -39,28 +39,18 @@ class BigramProperty { return mProbability; } - int getTimestamp() const { - return mTimestamp; - } - - int getLevel() const { - return mLevel; - } - - int getCount() const { - return mCount; + const HistoricalInfo getHistoricalInfo() const { + return mHistoricalInfo; } private: // Default copy constructor and assign operator are used for using in std::vector. - DISALLOW_DEFAULT_CONSTRUCTOR(BigramProperty); + DISALLOW_DEFAULT_CONSTRUCTOR(NgramProperty); // TODO: Make members const. std::vector<int> mTargetCodePoints; int mProbability; - int mTimestamp; - int mLevel; - int mCount; + HistoricalInfo mHistoricalInfo; }; } // namespace latinime -#endif // LATINIME_WORD_PROPERTY_H +#endif // LATINIME_NGRAM_PROPERTY_H diff --git a/native/jni/src/suggest/core/dictionary/property/unigram_property.h b/native/jni/src/suggest/core/dictionary/property/unigram_property.h index b7e7d6686..d1f0ab4ca 100644 --- a/native/jni/src/suggest/core/dictionary/property/unigram_property.h +++ b/native/jni/src/suggest/core/dictionary/property/unigram_property.h @@ -20,6 +20,7 @@ #include <vector> #include "defines.h" +#include "suggest/core/dictionary/property/historical_info.h" namespace latinime { @@ -50,15 +51,21 @@ class UnigramProperty { UnigramProperty() : mRepresentsBeginningOfSentence(false), mIsNotAWord(false), mIsBlacklisted(false), - mProbability(NOT_A_PROBABILITY), mTimestamp(NOT_A_TIMESTAMP), mLevel(0), mCount(0), - mShortcuts() {} + mProbability(NOT_A_PROBABILITY), mHistoricalInfo(), mShortcuts() {} UnigramProperty(const bool representsBeginningOfSentence, const bool isNotAWord, - const bool isBlacklisted, const int probability, const int timestamp, const int level, - const int count, const std::vector<ShortcutProperty> *const shortcuts) + const bool isBlacklisted, const int probability, const HistoricalInfo historicalInfo, + const std::vector<ShortcutProperty> &&shortcuts) : mRepresentsBeginningOfSentence(representsBeginningOfSentence), mIsNotAWord(isNotAWord), mIsBlacklisted(isBlacklisted), mProbability(probability), - mTimestamp(timestamp), mLevel(level), mCount(count), mShortcuts(*shortcuts) {} + mHistoricalInfo(historicalInfo), mShortcuts(std::move(shortcuts)) {} + + // Without shortcuts. + UnigramProperty(const bool representsBeginningOfSentence, const bool isNotAWord, + const bool isBlacklisted, const int probability, const HistoricalInfo historicalInfo) + : mRepresentsBeginningOfSentence(representsBeginningOfSentence), + mIsNotAWord(isNotAWord), mIsBlacklisted(isBlacklisted), mProbability(probability), + mHistoricalInfo(historicalInfo), mShortcuts() {} bool representsBeginningOfSentence() const { return mRepresentsBeginningOfSentence; @@ -85,16 +92,8 @@ class UnigramProperty { return mProbability; } - int getTimestamp() const { - return mTimestamp; - } - - int getLevel() const { - return mLevel; - } - - int getCount() const { - return mCount; + const HistoricalInfo getHistoricalInfo() const { + return mHistoricalInfo; } const std::vector<ShortcutProperty> &getShortcuts() const { @@ -110,10 +109,7 @@ class UnigramProperty { bool mIsNotAWord; bool mIsBlacklisted; int mProbability; - // Historical information - int mTimestamp; - int mLevel; - int mCount; + HistoricalInfo mHistoricalInfo; std::vector<ShortcutProperty> mShortcuts; }; } // namespace latinime diff --git a/native/jni/src/suggest/core/dictionary/property/word_property.cpp b/native/jni/src/suggest/core/dictionary/property/word_property.cpp index 66daf3e3f..caac8fe79 100644 --- a/native/jni/src/suggest/core/dictionary/property/word_property.cpp +++ b/native/jni/src/suggest/core/dictionary/property/word_property.cpp @@ -17,6 +17,7 @@ #include "suggest/core/dictionary/property/word_property.h" #include "utils/jni_data_utils.h" +#include "suggest/core/dictionary/property/historical_info.h" namespace latinime { @@ -28,11 +29,12 @@ void WordProperty::outputProperties(JNIEnv *const env, jintArray outCodePoints, MAX_WORD_LENGTH /* maxLength */, mCodePoints.data(), mCodePoints.size(), false /* needsNullTermination */); jboolean flags[] = {mUnigramProperty.isNotAWord(), mUnigramProperty.isBlacklisted(), - !mBigrams.empty(), mUnigramProperty.hasShortcuts(), + !mNgrams.empty(), mUnigramProperty.hasShortcuts(), mUnigramProperty.representsBeginningOfSentence()}; env->SetBooleanArrayRegion(outFlags, 0 /* start */, NELEMS(flags), flags); - int probabilityInfo[] = {mUnigramProperty.getProbability(), mUnigramProperty.getTimestamp(), - mUnigramProperty.getLevel(), mUnigramProperty.getCount()}; + const HistoricalInfo &historicalInfo = mUnigramProperty.getHistoricalInfo(); + int probabilityInfo[] = {mUnigramProperty.getProbability(), historicalInfo.getTimestamp(), + historicalInfo.getLevel(), historicalInfo.getCount()}; env->SetIntArrayRegion(outProbabilityInfo, 0 /* start */, NELEMS(probabilityInfo), probabilityInfo); @@ -42,18 +44,19 @@ void WordProperty::outputProperties(JNIEnv *const env, jintArray outCodePoints, jmethodID addMethodId = env->GetMethodID(arrayListClass, "add", "(Ljava/lang/Object;)Z"); // Output bigrams. - for (const auto &bigramProperty : mBigrams) { - const std::vector<int> *const word1CodePoints = bigramProperty.getTargetCodePoints(); + // TODO: Support n-gram + for (const auto &ngramProperty : mNgrams) { + const std::vector<int> *const word1CodePoints = ngramProperty.getTargetCodePoints(); jintArray bigramWord1CodePointArray = env->NewIntArray(word1CodePoints->size()); JniDataUtils::outputCodePoints(env, bigramWord1CodePointArray, 0 /* start */, word1CodePoints->size(), word1CodePoints->data(), word1CodePoints->size(), false /* needsNullTermination */); env->CallBooleanMethod(outBigramTargets, addMethodId, bigramWord1CodePointArray); env->DeleteLocalRef(bigramWord1CodePointArray); - - int bigramProbabilityInfo[] = {bigramProperty.getProbability(), - bigramProperty.getTimestamp(), bigramProperty.getLevel(), - bigramProperty.getCount()}; + const HistoricalInfo &ngramHistoricalInfo = ngramProperty.getHistoricalInfo(); + int bigramProbabilityInfo[] = {ngramProperty.getProbability(), + ngramHistoricalInfo.getTimestamp(), ngramHistoricalInfo.getLevel(), + ngramHistoricalInfo.getCount()}; jintArray bigramProbabilityInfoArray = env->NewIntArray(NELEMS(bigramProbabilityInfo)); env->SetIntArrayRegion(bigramProbabilityInfoArray, 0 /* start */, NELEMS(bigramProbabilityInfo), bigramProbabilityInfo); diff --git a/native/jni/src/suggest/core/dictionary/property/word_property.h b/native/jni/src/suggest/core/dictionary/property/word_property.h index 4e6febb3f..0c23e8225 100644 --- a/native/jni/src/suggest/core/dictionary/property/word_property.h +++ b/native/jni/src/suggest/core/dictionary/property/word_property.h @@ -21,7 +21,7 @@ #include "defines.h" #include "jni.h" -#include "suggest/core/dictionary/property/bigram_property.h" +#include "suggest/core/dictionary/property/ngram_property.h" #include "suggest/core/dictionary/property/unigram_property.h" namespace latinime { @@ -31,12 +31,12 @@ class WordProperty { public: // Default constructor is used to create an instance that indicates an invalid word. WordProperty() - : mCodePoints(), mUnigramProperty(), mBigrams() {} + : mCodePoints(), mUnigramProperty(), mNgrams() {} WordProperty(const std::vector<int> &&codePoints, const UnigramProperty *const unigramProperty, - const std::vector<BigramProperty> *const bigrams) + const std::vector<NgramProperty> *const bigrams) : mCodePoints(std::move(codePoints)), mUnigramProperty(*unigramProperty), - mBigrams(*bigrams) {} + mNgrams(*bigrams) {} void outputProperties(JNIEnv *const env, jintArray outCodePoints, jbooleanArray outFlags, jintArray outProbabilityInfo, jobject outBigramTargets, jobject outBigramProbabilities, @@ -46,8 +46,8 @@ class WordProperty { return &mUnigramProperty; } - const std::vector<BigramProperty> *getBigramProperties() const { - return &mBigrams; + const std::vector<NgramProperty> *getNgramProperties() const { + return &mNgrams; } private: @@ -56,7 +56,7 @@ class WordProperty { const std::vector<int> mCodePoints; const UnigramProperty mUnigramProperty; - const std::vector<BigramProperty> mBigrams; + const std::vector<NgramProperty> mNgrams; }; } // namespace latinime #endif // LATINIME_WORD_PROPERTY_H diff --git a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h index 1546b2610..6624b7921 100644 --- a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h +++ b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h @@ -21,6 +21,7 @@ #include "defines.h" #include "suggest/core/dictionary/binary_dictionary_shortcut_iterator.h" +#include "suggest/core/dictionary/property/historical_info.h" #include "suggest/core/dictionary/property/word_property.h" #include "suggest/core/dictionary/word_attributes.h" #include "utils/int_array_view.h" @@ -81,12 +82,17 @@ class DictionaryStructureWithBufferPolicy { // Returns whether the update was success or not. virtual bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo, - const BigramProperty *const bigramProperty) = 0; + const NgramProperty *const ngramProperty) = 0; // Returns whether the update was success or not. virtual bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, const CodePointArrayView wordCodePoints) = 0; + // Returns whether the update was success or not. + virtual bool updateCounter(const PrevWordsInfo *const prevWordsInfo, + const CodePointArrayView wordCodePoints, const bool isValidWord, + const HistoricalInfo historicalInfo) = 0; + // Returns whether the flush was success or not. virtual bool flush(const char *const filePath) = 0; diff --git a/native/jni/src/suggest/core/session/prev_words_info.h b/native/jni/src/suggest/core/session/prev_words_info.h index 02e82a8e0..553d5ad07 100644 --- a/native/jni/src/suggest/core/session/prev_words_info.h +++ b/native/jni/src/suggest/core/session/prev_words_info.h @@ -33,7 +33,7 @@ class PrevWordsInfo { clear(); } - PrevWordsInfo(PrevWordsInfo &&prevWordsInfo) + PrevWordsInfo(const PrevWordsInfo &prevWordsInfo) : mPrevWordCount(prevWordsInfo.mPrevWordCount) { for (size_t i = 0; i < mPrevWordCount; ++i) { mPrevWordCodePointCount[i] = prevWordsInfo.mPrevWordCodePointCount[i]; @@ -73,6 +73,16 @@ class PrevWordsInfo { mIsBeginningOfSentence[0] = isBeginningOfSentence; } + size_t getPrevWordCount() const { + return mPrevWordCount; + } + + // TODO: Remove. + const PrevWordsInfo getTrimmedPrevWordsInfo(const size_t maxPrevWordCount) const { + return PrevWordsInfo(mPrevWordCodePoints, mPrevWordCodePointCount, mIsBeginningOfSentence, + std::min(mPrevWordCount, maxPrevWordCount)); + } + bool isValid() const { if (mPrevWordCodePointCount[0] > 0) { return true; @@ -112,7 +122,7 @@ class PrevWordsInfo { } private: - DISALLOW_COPY_AND_ASSIGN(PrevWordsInfo); + DISALLOW_ASSIGNMENT_OPERATOR(PrevWordsInfo); static int getWordId(const DictionaryStructureWithBufferPolicy *const dictStructurePolicy, const int *const wordCodePoints, const int wordCodePointCount, diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.cpp index 3e8e059f2..bc0f47f79 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.cpp @@ -24,7 +24,7 @@ #include "suggest/policyimpl/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.h" -#include "suggest/core/dictionary/property/bigram_property.h" +#include "suggest/core/dictionary/property/ngram_property.h" #include "suggest/policyimpl/dictionary/header/header_policy.h" #include "suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.h" #include "suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_dict_content.h" @@ -60,7 +60,7 @@ void Ver4BigramListPolicy::getNextBigram(int *const outBigramPos, int *const out } bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTargetTerminalId, - const BigramProperty *const bigramProperty, bool *const outAddedNewEntry) { + const NgramProperty *const ngramProperty, bool *const outAddedNewEntry) { // 1. The word has no bigrams yet. // 2. The word has bigrams, and there is the target in the list. // 3. The word has bigrams, and there is an invalid entry that can be reclaimed. @@ -79,7 +79,7 @@ bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTarget const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY, newTargetTerminalId); const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(&newBigramEntry, - bigramProperty); + ngramProperty); // Write an entry. const int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId); if (!mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, writingPos)) { @@ -112,7 +112,7 @@ bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTarget const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY, newTargetTerminalId); const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom( - &newBigramEntry, bigramProperty); + &newBigramEntry, ngramProperty); if (!mBigramDictContent->writeBigramEntryAtTail(&bigramEntryToWrite)) { return false; } @@ -138,7 +138,7 @@ bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTarget const BigramEntry updatedBigramEntry = originalBigramEntry.updateTargetTerminalIdAndGetEntry(newTargetTerminalId); const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom( - &updatedBigramEntry, bigramProperty); + &updatedBigramEntry, ngramProperty); return mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, entryPosToUpdate); } @@ -264,18 +264,17 @@ int Ver4BigramListPolicy::getEntryPosToUpdate(const int targetTerminalIdToFind, const BigramEntry Ver4BigramListPolicy::createUpdatedBigramEntryFrom( const BigramEntry *const originalBigramEntry, - const BigramProperty *const bigramProperty) const { + const NgramProperty *const ngramProperty) const { // TODO: Consolidate historical info and probability. if (mHeaderPolicy->hasHistoricalInfoOfWords()) { - const HistoricalInfo historicalInfoForUpdate(bigramProperty->getTimestamp(), - bigramProperty->getLevel(), bigramProperty->getCount()); + const HistoricalInfo &historicalInfoForUpdate = ngramProperty->getHistoricalInfo(); const HistoricalInfo updatedHistoricalInfo = ForgettingCurveUtils::createUpdatedHistoricalInfo( - originalBigramEntry->getHistoricalInfo(), bigramProperty->getProbability(), + originalBigramEntry->getHistoricalInfo(), ngramProperty->getProbability(), &historicalInfoForUpdate, mHeaderPolicy); return originalBigramEntry->updateHistoricalInfoAndGetEntry(&updatedHistoricalInfo); } else { - return originalBigramEntry->updateProbabilityAndGetEntry(bigramProperty->getProbability()); + return originalBigramEntry->updateProbabilityAndGetEntry(ngramProperty->getProbability()); } } diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.h index 50a4c9743..aac6f5470 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.h @@ -36,7 +36,7 @@ namespace v402 { class BigramDictContent; } // namespace v402 } // namespace backward -class BigramProperty; +class NgramProperty; namespace backward { namespace v402 { } // namespace v402 @@ -64,7 +64,7 @@ class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy { } bool addNewEntry(const int terminalId, const int newTargetTerminalId, - const BigramProperty *const bigramProperty, bool *const outAddedNewEntry); + const NgramProperty *const ngramProperty, bool *const outAddedNewEntry); bool removeEntry(const int terminalId, const int targetTerminalId); @@ -80,7 +80,7 @@ class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy { int *const outTailEntryPos) const; const BigramEntry createUpdatedBigramEntryFrom(const BigramEntry *const originalBigramEntry, - const BigramProperty *const bigramProperty) const; + const NgramProperty *const ngramProperty) const; bool updateHasNextFlag(const bool hasNext, const int bigramEntryPos); diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_dict_content.cpp index e2dd93c5e..9e1adff70 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_dict_content.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_dict_content.cpp @@ -83,10 +83,10 @@ bool BigramDictContent::writeBigramEntryAndAdvancePosition( } if (mHasHistoricalInfo) { const HistoricalInfo *const historicalInfo = bigramEntryToWrite->getHistoricalInfo(); - if (!bigramListBuffer->writeUintAndAdvancePosition(historicalInfo->getTimeStamp(), + if (!bigramListBuffer->writeUintAndAdvancePosition(historicalInfo->getTimestamp(), Ver4DictConstants::TIME_STAMP_FIELD_SIZE, entryWritingPos)) { AKLOGE("Cannot write bigram timestamps. pos: %d, timestamp: %d", *entryWritingPos, - historicalInfo->getTimeStamp()); + historicalInfo->getTimestamp()); return false; } if (!bigramListBuffer->writeUintAndAdvancePosition(historicalInfo->getLevel(), diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_entry.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_entry.h index 40968b4d8..480095a2f 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_entry.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/bigram_entry.h @@ -25,8 +25,8 @@ #define LATINIME_BACKWARD_V402_BIGRAM_ENTRY_H #include "defines.h" +#include "suggest/core/dictionary/property/historical_info.h" #include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.h" -#include "suggest/policyimpl/dictionary/utils/historical_info.h" namespace latinime { namespace backward { diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/probability_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/probability_dict_content.cpp index c671647d4..4a740d47b 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/probability_dict_content.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/probability_dict_content.cpp @@ -147,7 +147,7 @@ bool ProbabilityDictContent::writeEntry(const ProbabilityEntry *const probabilit } if (mHasHistoricalInfo) { const HistoricalInfo *const historicalInfo = probabilityEntry->getHistoricalInfo(); - if (!bufferToWrite->writeUintAndAdvancePosition(historicalInfo->getTimeStamp(), + if (!bufferToWrite->writeUintAndAdvancePosition(historicalInfo->getTimestamp(), Ver4DictConstants::TIME_STAMP_FIELD_SIZE, &writingPos)) { AKLOGE("Cannot write timestamp in probability dict content. pos: %d", writingPos); return false; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/probability_entry.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/probability_entry.h index 8ccfa33dc..4111a49c0 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/probability_entry.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/probability_entry.h @@ -25,8 +25,8 @@ #define LATINIME_BACKWARD_V402_PROBABILITY_ENTRY_H #include "defines.h" +#include "suggest/core/dictionary/property/historical_info.h" #include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.h" -#include "suggest/policyimpl/dictionary/utils/historical_info.h" namespace latinime { namespace backward { diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.cpp index 97a8bcc98..8d169743c 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.cpp @@ -232,8 +232,8 @@ bool Ver4PatriciaTrieNodeWriter::writeNewTerminalPtNodeAndAdvancePosition( } bool Ver4PatriciaTrieNodeWriter::addNgramEntry(const WordIdArrayView prevWordIds, const int wordId, - const BigramProperty *const bigramProperty, bool *const outAddedNewEntry) { - if (!mBigramPolicy->addNewEntry(prevWordIds[0], wordId, bigramProperty, outAddedNewEntry)) { + const NgramProperty *const ngramProperty, bool *const outAddedNewEntry) { + if (!mBigramPolicy->addNewEntry(prevWordIds[0], wordId, ngramProperty, outAddedNewEntry)) { AKLOGE("Cannot add new bigram entry. prevWordId: %d, wordId: %d", prevWordIds[0], wordId); return false; @@ -396,8 +396,7 @@ const ProbabilityEntry Ver4PatriciaTrieNodeWriter::createUpdatedEntryFrom( const UnigramProperty *const unigramProperty) const { // TODO: Consolidate historical info and probability. if (mHeaderPolicy->hasHistoricalInfoOfWords()) { - const HistoricalInfo historicalInfoForUpdate(unigramProperty->getTimestamp(), - unigramProperty->getLevel(), unigramProperty->getCount()); + const HistoricalInfo &historicalInfoForUpdate = unigramProperty->getHistoricalInfo(); const HistoricalInfo updatedHistoricalInfo = ForgettingCurveUtils::createUpdatedHistoricalInfo( originalProbabilityEntry->getHistoricalInfo(), diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.h index 9d8a55bff..d0bab50f8 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.h @@ -94,7 +94,7 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter { const UnigramProperty *const unigramProperty, int *const ptNodeWritingPos); virtual bool addNgramEntry(const WordIdArrayView prevWordIds, const int wordId, - const BigramProperty *const bigramProperty, bool *const outAddedNewEntry); + const NgramProperty *const ngramProperty, bool *const outAddedNewEntry); virtual bool removeNgramEntry(const WordIdArrayView prevWordIds, const int wordId); diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp index f752f89f1..11f7b305f 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp @@ -30,7 +30,7 @@ #include "suggest/core/dicnode/dic_node_vector.h" #include "suggest/core/dictionary/multi_bigram_map.h" #include "suggest/core/dictionary/ngram_listener.h" -#include "suggest/core/dictionary/property/bigram_property.h" +#include "suggest/core/dictionary/property/ngram_property.h" #include "suggest/core/dictionary/property/unigram_property.h" #include "suggest/core/dictionary/property/word_property.h" #include "suggest/core/session/prev_words_info.h" @@ -52,6 +52,7 @@ const char *const Ver4PatriciaTriePolicy::MAX_BIGRAM_COUNT_QUERY = "MAX_BIGRAM_C const int Ver4PatriciaTriePolicy::MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS = 1024; const int Ver4PatriciaTriePolicy::MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS = Ver4DictConstants::MAX_DICTIONARY_SIZE - MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS; +const int Ver4PatriciaTriePolicy::DUMMY_PROBABILITY_FOR_VALID_WORDS = 1; void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode, DicNodeVector *const childDicNodes) const { @@ -312,7 +313,7 @@ bool Ver4PatriciaTriePolicy::removeUnigramEntry(const CodePointArrayView wordCod } bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsInfo, - const BigramProperty *const bigramProperty) { + const NgramProperty *const ngramProperty) { if (!mBuffers->isUpdatable()) { AKLOGI("Warning: addNgramEntry() is called for non-updatable dictionary."); return false; @@ -326,9 +327,9 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI AKLOGE("prev words info is not valid for adding n-gram entry to the dictionary."); return false; } - if (bigramProperty->getTargetCodePoints()->size() > MAX_WORD_LENGTH) { + if (ngramProperty->getTargetCodePoints()->size() > MAX_WORD_LENGTH) { AKLOGE("The word is too long to insert the ngram to the dictionary. " - "length: %zd", bigramProperty->getTargetCodePoints()->size()); + "length: %zd", ngramProperty->getTargetCodePoints()->size()); return false; } WordIdArray<MAX_PREV_WORD_COUNT_FOR_N_GRAM> prevWordIdArray; @@ -339,11 +340,9 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI } if (prevWordIds[0] == NOT_A_WORD_ID) { if (prevWordsInfo->isNthPrevWordBeginningOfSentence(1 /* n */)) { - const std::vector<UnigramProperty::ShortcutProperty> shortcuts; const UnigramProperty beginningOfSentenceUnigramProperty( true /* representsBeginningOfSentence */, true /* isNotAWord */, - false /* isBlacklisted */, MAX_PROBABILITY /* probability */, - NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */, &shortcuts); + false /* isBlacklisted */, MAX_PROBABILITY /* probability */, HistoricalInfo()); if (!addUnigramEntry(prevWordsInfo->getNthPrevWordCodePoints(1 /* n */), &beginningOfSentenceUnigramProperty)) { AKLOGE("Cannot add unigram entry for the beginning-of-sentence."); @@ -356,7 +355,7 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI } } const int wordPos = getTerminalPtNodePosFromWordId(getWordId( - CodePointArrayView(*bigramProperty->getTargetCodePoints()), + CodePointArrayView(*ngramProperty->getTargetCodePoints()), false /* forceLowerCaseSearch */)); if (wordPos == NOT_A_DICT_POS) { return false; @@ -364,7 +363,7 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI bool addedNewBigram = false; const int prevWordPtNodePos = getTerminalPtNodePosFromWordId(prevWordIds[0]); if (mUpdatingHelper.addNgramEntry(PtNodePosArrayView::singleElementView(&prevWordPtNodePos), - wordPos, bigramProperty, &addedNewBigram)) { + wordPos, ngramProperty, &addedNewBigram)) { if (addedNewBigram) { mBigramCount++; } @@ -414,6 +413,29 @@ bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWor } } + +bool Ver4PatriciaTriePolicy::updateCounter(const PrevWordsInfo *const prevWordsInfo, + const CodePointArrayView wordCodePoints, const bool isValidWord, + const HistoricalInfo historicalInfo) { + if (!mBuffers->isUpdatable()) { + AKLOGI("Warning: updateCounter() is called for non-updatable dictionary."); + return false; + } + const int probability = isValidWord ? DUMMY_PROBABILITY_FOR_VALID_WORDS : NOT_A_PROBABILITY; + const UnigramProperty unigramProperty(false /* representsBeginningOfSentence */, + false /* isNotAWord */, false /*isBlacklisted*/, probability, historicalInfo); + if (!addUnigramEntry(wordCodePoints, &unigramProperty)) { + AKLOGE("Cannot update unigarm entry in updateCounter()."); + return false; + } + const NgramProperty ngramProperty(wordCodePoints.toVector(), probability, historicalInfo); + if (!addNgramEntry(prevWordsInfo, &ngramProperty)) { + AKLOGE("Cannot update unigarm entry in updateCounter()."); + return false; + } + return true; +} + bool Ver4PatriciaTriePolicy::flush(const char *const filePath) { if (!mBuffers->isUpdatable()) { AKLOGI("Warning: flush() is called for non-updatable dictionary. filePath: %s", filePath); @@ -499,7 +521,7 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty( ptNodeParams.getTerminalId()); const HistoricalInfo *const historicalInfo = probabilityEntry.getHistoricalInfo(); // Fetch bigram information. - std::vector<BigramProperty> bigrams; + std::vector<NgramProperty> ngrams; const int bigramListPos = getBigramsPositionOfPtNode(ptNodePos); if (bigramListPos != NOT_A_DICT_POS) { int bigramWord1CodePoints[MAX_WORD_LENGTH]; @@ -526,10 +548,9 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty( ForgettingCurveUtils::decodeProbability( bigramEntry.getHistoricalInfo(), mHeaderPolicy) : bigramEntry.getProbability(); - bigrams.emplace_back( + ngrams.emplace_back( CodePointArrayView(bigramWord1CodePoints, codePointCount).toVector(), - probability, historicalInfo->getTimeStamp(), historicalInfo->getLevel(), - historicalInfo->getCount()); + probability, *historicalInfo); } } // Fetch shortcut information. @@ -552,9 +573,8 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty( } const UnigramProperty unigramProperty(ptNodeParams.representsBeginningOfSentence(), ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(), - historicalInfo->getTimeStamp(), historicalInfo->getLevel(), - historicalInfo->getCount(), &shortcuts); - return WordProperty(wordCodePoints.toVector(), &unigramProperty, &bigrams); + *historicalInfo, std::move(shortcuts)); + return WordProperty(wordCodePoints.toVector(), &unigramProperty, &ngrams); } int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints, diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h index 8420c94d0..995d7764f 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h @@ -59,6 +59,7 @@ namespace backward { namespace v402 { // Word id = Position of a PtNode that represents the word. +// Max supported n-gram is bigram. class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { public: Ver4PatriciaTriePolicy(Ver4DictBuffers::Ver4DictBuffersPtr buffers) @@ -112,11 +113,15 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { bool removeUnigramEntry(const CodePointArrayView wordCodePoints); bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo, - const BigramProperty *const bigramProperty); + const NgramProperty *const ngramProperty); bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, const CodePointArrayView wordCodePoints); + bool updateCounter(const PrevWordsInfo *const prevWordsInfo, + const CodePointArrayView wordCodePoints, const bool isValidWord, + const HistoricalInfo historicalInfo); + bool flush(const char *const filePath); bool flushWithGC(const char *const filePath); @@ -146,6 +151,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { // prevent the dictionary from overflowing. static const int MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS; static const int MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS; + static const int DUMMY_PROBABILITY_FOR_VALID_WORDS; const Ver4DictBuffers::Ver4DictBuffersPtr mBuffers; const HeaderPolicy *const mHeaderPolicy; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_writing_helper.cpp index 3fb4caa08..2887dc6b1 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_writing_helper.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_writing_helper.cpp @@ -216,7 +216,7 @@ bool Ver4PatriciaTrieWritingHelper::truncateUnigrams( probabilityEntry.getHistoricalInfo(), mBuffers->getHeaderPolicy()) : probabilityEntry.getProbability(); priorityQueue.push(DictProbability(terminalPos, probability, - probabilityEntry.getHistoricalInfo()->getTimeStamp())); + probabilityEntry.getHistoricalInfo()->getTimestamp())); } // Delete unigrams. @@ -263,7 +263,7 @@ bool Ver4PatriciaTrieWritingHelper::truncateBigrams(const int maxBigramCount) { bigramEntry.getHistoricalInfo(), mBuffers->getHeaderPolicy()) : bigramEntry.getProbability(); priorityQueue.push(DictProbability(entryPos, probability, - bigramEntry.getHistoricalInfo()->getTimeStamp())); + bigramEntry.getHistoricalInfo()->getTimestamp())); } } diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.h index 2aa402748..b8a4a92e8 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.h @@ -76,6 +76,7 @@ class DynamicPtGcEventListeners { int mValidUnigramCount; }; + // TODO: Remove when we stop supporting v402 format. // Updates all bigram entries that are held by valid PtNodes. This removes useless bigram // entries. class TraversePolicyToUpdateBigramProbability diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp index 3b58d7d6d..92fd6f214 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp @@ -82,7 +82,7 @@ bool DynamicPtUpdatingHelper::addUnigramWord(DynamicPtReadingHelper *const readi } bool DynamicPtUpdatingHelper::addNgramEntry(const PtNodePosArrayView prevWordsPtNodePos, - const int wordPos, const BigramProperty *const bigramProperty, + const int wordPos, const NgramProperty *const ngramProperty, bool *const outAddedNewEntry) { if (prevWordsPtNodePos.empty()) { return false; @@ -96,7 +96,7 @@ bool DynamicPtUpdatingHelper::addNgramEntry(const PtNodePosArrayView prevWordsPt const WordIdArrayView prevWordIds(prevWordTerminalIds, prevWordsPtNodePos.size()); const int wordId = mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(wordPos).getTerminalId(); - return mPtNodeWriter->addNgramEntry(prevWordIds, wordId, bigramProperty, outAddedNewEntry); + return mPtNodeWriter->addNgramEntry(prevWordIds, wordId, ngramProperty, outAddedNewEntry); } bool DynamicPtUpdatingHelper::removeNgramEntry(const PtNodePosArrayView prevWordsPtNodePos, diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h index 710047e8c..2bbe2f4dc 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h @@ -23,7 +23,7 @@ namespace latinime { -class BigramProperty; +class NgramProperty; class BufferWithExtendableBuffer; class DynamicPtReadingHelper; class PtNodeReader; @@ -46,7 +46,7 @@ class DynamicPtUpdatingHelper { // TODO: Remove after stopping supporting v402. // Add an n-gram entry. bool addNgramEntry(const PtNodePosArrayView prevWordsPtNodePos, const int wordPos, - const BigramProperty *const bigramProperty, bool *const outAddedNewEntry); + const NgramProperty *const ngramProperty, bool *const outAddedNewEntry); // TODO: Remove after stopping supporting v402. // Remove an n-gram entry. diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h index 955d779ac..954db9b0a 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h @@ -25,7 +25,7 @@ namespace latinime { -class BigramProperty; +class NgramProperty; class UnigramProperty; // Interface class used to write PtNode information. @@ -72,7 +72,7 @@ class PtNodeWriter { const UnigramProperty *const unigramProperty, int *const ptNodeWritingPos) = 0; virtual bool addNgramEntry(const WordIdArrayView prevWordIds, const int wordId, - const BigramProperty *const bigramProperty, bool *const outAddedNewEntry) = 0; + const NgramProperty *const ngramProperty, bool *const outAddedNewEntry) = 0; virtual bool removeNgramEntry(const WordIdArrayView prevWordIds, const int wordId) = 0; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp index 13cf9a5a8..d3d684bfa 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp @@ -436,7 +436,7 @@ const WordProperty PatriciaTriePolicy::getWordProperty( const PtNodeParams ptNodeParams = mPtNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos); // Fetch bigram information. - std::vector<BigramProperty> bigrams; + std::vector<NgramProperty> ngrams; const int bigramListPos = getBigramsPositionOfPtNode(ptNodePos); int bigramWord1CodePoints[MAX_WORD_LENGTH]; BinaryDictionaryBigramsIterator bigramsIt(&mBigramListPolicy, bigramListPos); @@ -450,9 +450,9 @@ const WordProperty PatriciaTriePolicy::getWordProperty( getWordIdFromTerminalPtNodePos(bigramsIt.getBigramPos()), MAX_WORD_LENGTH, bigramWord1CodePoints, &word1Probability); const int probability = getProbability(word1Probability, bigramsIt.getProbability()); - bigrams.emplace_back( + ngrams.emplace_back( CodePointArrayView(bigramWord1CodePoints, word1CodePointCount).toVector(), - probability, NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */); + probability, HistoricalInfo()); } } // Fetch shortcut information. @@ -477,8 +477,8 @@ const WordProperty PatriciaTriePolicy::getWordProperty( } const UnigramProperty unigramProperty(ptNodeParams.representsBeginningOfSentence(), ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(), - NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */, &shortcuts); - return WordProperty(wordCodePoints.toVector(), &unigramProperty, &bigrams); + HistoricalInfo(), std::move(shortcuts)); + return WordProperty(wordCodePoints.toVector(), &unigramProperty, &ngrams); } int PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints, diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h index 0d679c5dc..32a95bb6c 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h @@ -38,6 +38,7 @@ class DicNode; class DicNodeVector; // Word id = Position of a PtNode that represents the word. +// Max supported n-gram is bigram. class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { public: PatriciaTriePolicy(MmappedBuffer::MmappedBufferPtr mmappedBuffer) @@ -93,7 +94,7 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { } bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo, - const BigramProperty *const bigramProperty) { + const NgramProperty *const ngramProperty) { // This method should not be called for non-updatable dictionary. AKLOGI("Warning: addNgramEntry() is called for non-updatable dictionary."); return false; @@ -106,6 +107,14 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { return false; } + bool updateCounter(const PrevWordsInfo *const prevWordsInfo, + const CodePointArrayView wordCodePoints, const bool isValidWord, + const HistoricalInfo historicalInfo) { + // This method should not be called for non-updatable dictionary. + AKLOGI("Warning: updateCounter() is called for non-updatable dictionary."); + return false; + } + bool flush(const char *const filePath) { // This method should not be called for non-updatable dictionary. AKLOGI("Warning: flush() is called for non-updatable dictionary."); diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp index 139230228..956dabb4f 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp @@ -296,7 +296,7 @@ bool LanguageModelDictContent::getEntryInfo(const HeaderPolicy *const headerPoli ForgettingCurveUtils::decodeProbability(probabilityEntry.getHistoricalInfo(), headerPolicy) : probabilityEntry.getProbability(); outEntryInfo->emplace_back(probability, - probabilityEntry.getHistoricalInfo()->getTimeStamp(), + probabilityEntry.getHistoricalInfo()->getTimestamp(), entry.key(), targetLevel, prevWordIds->data()); } return true; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h index e1e10ca17..fa1415633 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h @@ -21,10 +21,10 @@ #include <cstdint> #include "defines.h" -#include "suggest/core/dictionary/property/bigram_property.h" +#include "suggest/core/dictionary/property/historical_info.h" +#include "suggest/core/dictionary/property/ngram_property.h" #include "suggest/core/dictionary/property/unigram_property.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h" -#include "suggest/policyimpl/dictionary/utils/historical_info.h" namespace latinime { @@ -53,15 +53,13 @@ class ProbabilityEntry { unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(), unigramProperty->isPossiblyOffensive())), mProbability(unigramProperty->getProbability()), - mHistoricalInfo(unigramProperty->getTimestamp(), unigramProperty->getLevel(), - unigramProperty->getCount()) {} + mHistoricalInfo(unigramProperty->getHistoricalInfo()) {} - // Create from bigram property. + // Create from ngram property. // TODO: Set flags. - ProbabilityEntry(const BigramProperty *const bigramProperty) - : mFlags(0), mProbability(bigramProperty->getProbability()), - mHistoricalInfo(bigramProperty->getTimestamp(), bigramProperty->getLevel(), - bigramProperty->getCount()) {} + ProbabilityEntry(const NgramProperty *const ngramProperty) + : mFlags(0), mProbability(ngramProperty->getProbability()), + mHistoricalInfo(ngramProperty->getHistoricalInfo()) {} bool isValid() const { return (mFlags & Ver4DictConstants::FLAG_NOT_A_VALID_ENTRY) == 0; @@ -103,7 +101,7 @@ class ProbabilityEntry { uint64_t encodedEntry = static_cast<uint64_t>(mFlags); if (hasHistoricalInfo) { encodedEntry = (encodedEntry << (Ver4DictConstants::TIME_STAMP_FIELD_SIZE * CHAR_BIT)) - ^ static_cast<uint64_t>(mHistoricalInfo.getTimeStamp()); + ^ static_cast<uint64_t>(mHistoricalInfo.getTimestamp()); encodedEntry = (encodedEntry << (Ver4DictConstants::WORD_LEVEL_FIELD_SIZE * CHAR_BIT)) ^ static_cast<uint64_t>(mHistoricalInfo.getLevel()); encodedEntry = (encodedEntry << (Ver4DictConstants::WORD_COUNT_FIELD_SIZE * CHAR_BIT)) diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp index a1a33d27a..f13512d5a 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp @@ -61,6 +61,7 @@ bool Ver4PatriciaTrieNodeWriter::markPtNodeAsDeleted( } } +// TODO: Quit using bigramLinkedNodePos. bool Ver4PatriciaTrieNodeWriter::markPtNodeAsMoved( const PtNodeParams *const toBeUpdatedPtNodeParams, const int movedPos, const int bigramLinkedNodePos) { @@ -208,15 +209,16 @@ bool Ver4PatriciaTrieNodeWriter::writeNewTerminalPtNodeAndAdvancePosition( terminalId, &probabilityEntryToWrite); } +// TODO: Support counting ngram entries. bool Ver4PatriciaTrieNodeWriter::addNgramEntry(const WordIdArrayView prevWordIds, const int wordId, - const BigramProperty *const bigramProperty, bool *const outAddedNewBigram) { + const NgramProperty *const ngramProperty, bool *const outAddedNewBigram) { LanguageModelDictContent *const languageModelDictContent = mBuffers->getMutableLanguageModelDictContent(); const ProbabilityEntry probabilityEntry = languageModelDictContent->getNgramProbabilityEntry(prevWordIds, wordId); - const ProbabilityEntry probabilityEntryOfBigramProperty(bigramProperty); + const ProbabilityEntry probabilityEntryOfNgramProperty(ngramProperty); const ProbabilityEntry updatedProbabilityEntry = createUpdatedEntryFrom( - &probabilityEntry, &probabilityEntryOfBigramProperty); + &probabilityEntry, &probabilityEntryOfNgramProperty); if (!languageModelDictContent->setNgramProbabilityEntry( prevWordIds, wordId, &updatedProbabilityEntry)) { AKLOGE("Cannot add new ngram entry. prevWordId[0]: %d, prevWordId.size(): %zd, wordId: %d", diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h index 17915273b..ea4f09904 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h @@ -74,7 +74,7 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter { const UnigramProperty *const unigramProperty, int *const ptNodeWritingPos); virtual bool addNgramEntry(const WordIdArrayView prevWordIds, const int wordId, - const BigramProperty *const bigramProperty, bool *const outAddedNewEntry); + const NgramProperty *const ngramProperty, bool *const outAddedNewEntry); virtual bool removeNgramEntry(const WordIdArrayView prevWordIds, const int wordId); diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp index 0f0696410..41b109f95 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp @@ -23,7 +23,7 @@ #include "suggest/core/dicnode/dic_node_vector.h" #include "suggest/core/dictionary/multi_bigram_map.h" #include "suggest/core/dictionary/ngram_listener.h" -#include "suggest/core/dictionary/property/bigram_property.h" +#include "suggest/core/dictionary/property/ngram_property.h" #include "suggest/core/dictionary/property/unigram_property.h" #include "suggest/core/dictionary/property/word_property.h" #include "suggest/core/session/prev_words_info.h" @@ -43,6 +43,7 @@ const char *const Ver4PatriciaTriePolicy::MAX_BIGRAM_COUNT_QUERY = "MAX_BIGRAM_C const int Ver4PatriciaTriePolicy::MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS = 1024; const int Ver4PatriciaTriePolicy::MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS = Ver4DictConstants::MAX_DICTIONARY_SIZE - MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS; +const int Ver4PatriciaTriePolicy::DUMMY_PROBABILITY_FOR_VALID_WORDS = 1; void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode, DicNodeVector *const childDicNodes) const { @@ -266,7 +267,7 @@ bool Ver4PatriciaTriePolicy::removeUnigramEntry(const CodePointArrayView wordCod } bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsInfo, - const BigramProperty *const bigramProperty) { + const NgramProperty *const ngramProperty) { if (!mBuffers->isUpdatable()) { AKLOGI("Warning: addNgramEntry() is called for non-updatable dictionary."); return false; @@ -280,9 +281,9 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI AKLOGE("prev words info is not valid for adding n-gram entry to the dictionary."); return false; } - if (bigramProperty->getTargetCodePoints()->size() > MAX_WORD_LENGTH) { + if (ngramProperty->getTargetCodePoints()->size() > MAX_WORD_LENGTH) { AKLOGE("The word is too long to insert the ngram to the dictionary. " - "length: %zd", bigramProperty->getTargetCodePoints()->size()); + "length: %zd", ngramProperty->getTargetCodePoints()->size()); return false; } WordIdArray<MAX_PREV_WORD_COUNT_FOR_N_GRAM> prevWordIdArray; @@ -298,11 +299,9 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI if (!prevWordsInfo->isNthPrevWordBeginningOfSentence(i + 1 /* n */)) { return false; } - const std::vector<UnigramProperty::ShortcutProperty> shortcuts; const UnigramProperty beginningOfSentenceUnigramProperty( true /* representsBeginningOfSentence */, true /* isNotAWord */, - false /* isBlacklisted */, MAX_PROBABILITY /* probability */, - NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */, &shortcuts); + false /* isBlacklisted */, MAX_PROBABILITY /* probability */, HistoricalInfo()); if (!addUnigramEntry(prevWordsInfo->getNthPrevWordCodePoints(1 /* n */), &beginningOfSentenceUnigramProperty)) { AKLOGE("Cannot add unigram entry for the beginning-of-sentence."); @@ -311,13 +310,13 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI // Refresh word ids. prevWordsInfo->getPrevWordIds(this, &prevWordIdArray, false /* tryLowerCaseSearch */); } - const int wordId = getWordId(CodePointArrayView(*bigramProperty->getTargetCodePoints()), + const int wordId = getWordId(CodePointArrayView(*ngramProperty->getTargetCodePoints()), false /* forceLowerCaseSearch */); if (wordId == NOT_A_WORD_ID) { return false; } bool addedNewEntry = false; - if (mNodeWriter.addNgramEntry(prevWordIds, wordId, bigramProperty, &addedNewEntry)) { + if (mNodeWriter.addNgramEntry(prevWordIds, wordId, ngramProperty, &addedNewEntry)) { if (addedNewEntry) { mBigramCount++; } @@ -364,6 +363,32 @@ bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWor } } +bool Ver4PatriciaTriePolicy::updateCounter(const PrevWordsInfo *const prevWordsInfo, + const CodePointArrayView wordCodePoints, const bool isValidWord, + const HistoricalInfo historicalInfo) { + if (!mBuffers->isUpdatable()) { + AKLOGI("Warning: updateCounter() is called for non-updatable dictionary."); + return false; + } + // TODO: Have count up method in language model dict content. + const int probability = isValidWord ? DUMMY_PROBABILITY_FOR_VALID_WORDS : NOT_A_PROBABILITY; + const UnigramProperty unigramProperty(false /* representsBeginningOfSentence */, + false /* isNotAWord */, false /*isBlacklisted*/, probability, historicalInfo); + if (!addUnigramEntry(wordCodePoints, &unigramProperty)) { + AKLOGE("Cannot update unigarm entry in updateCounter()."); + return false; + } + const NgramProperty ngramProperty(wordCodePoints.toVector(), probability, historicalInfo); + for (size_t i = 1; i <= prevWordsInfo->getPrevWordCount(); ++i) { + const PrevWordsInfo trimmedPrevWordsInfo(prevWordsInfo->getTrimmedPrevWordsInfo(i)); + if (!addNgramEntry(&trimmedPrevWordsInfo, &ngramProperty)) { + AKLOGE("Cannot update ngram entry in updateCounter()."); + return false; + } + } + return true; +} + bool Ver4PatriciaTriePolicy::flush(const char *const filePath) { if (!mBuffers->isUpdatable()) { AKLOGI("Warning: flush() is called for non-updatable dictionary. filePath: %s", filePath); @@ -451,7 +476,7 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty( const HistoricalInfo *const historicalInfo = probabilityEntry.getHistoricalInfo(); // Fetch bigram information. // TODO: Support n-gram. - std::vector<BigramProperty> bigrams; + std::vector<NgramProperty> ngrams; const WordIdArrayView prevWordIds = WordIdArrayView::singleElementView(&wordId); int bigramWord1CodePoints[MAX_WORD_LENGTH]; for (const auto entry : mBuffers->getLanguageModelDictContent()->getProbabilityEntries( @@ -463,9 +488,8 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty( const int probability = probabilityEntry.hasHistoricalInfo() ? ForgettingCurveUtils::decodeProbability(historicalInfo, mHeaderPolicy) : probabilityEntry.getProbability(); - bigrams.emplace_back(CodePointArrayView(bigramWord1CodePoints, codePointCount).toVector(), - probability, historicalInfo->getTimeStamp(), historicalInfo->getLevel(), - historicalInfo->getCount()); + ngrams.emplace_back(CodePointArrayView(bigramWord1CodePoints, codePointCount).toVector(), + probability, *historicalInfo); } // Fetch shortcut information. std::vector<UnigramProperty::ShortcutProperty> shortcuts; @@ -487,9 +511,8 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty( } const UnigramProperty unigramProperty(probabilityEntry.representsBeginningOfSentence(), probabilityEntry.isNotAWord(), probabilityEntry.isBlacklisted(), - probabilityEntry.getProbability(), historicalInfo->getTimeStamp(), - historicalInfo->getLevel(), historicalInfo->getCount(), &shortcuts); - return WordProperty(wordCodePoints.toVector(), &unigramProperty, &bigrams); + probabilityEntry.getProbability(), *historicalInfo, std::move(shortcuts)); + return WordProperty(wordCodePoints.toVector(), &unigramProperty, &ngrams); } int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints, diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h index c9bde2cf5..662bb8d4b 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h @@ -37,6 +37,7 @@ namespace latinime { class DicNode; class DicNodeVector; +// TODO: Support counting ngram entries. // Word id = Artificial id that is stored in the PtNode looked up by the word. class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { public: @@ -92,11 +93,15 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { bool removeUnigramEntry(const CodePointArrayView wordCodePoints); bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo, - const BigramProperty *const bigramProperty); + const NgramProperty *const ngramProperty); bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, const CodePointArrayView wordCodePoints); + bool updateCounter(const PrevWordsInfo *const prevWordsInfo, + const CodePointArrayView wordCodePoints, const bool isValidWord, + const HistoricalInfo historicalInfo); + bool flush(const char *const filePath); bool flushWithGC(const char *const filePath); @@ -126,6 +131,8 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { // prevent the dictionary from overflowing. static const int MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS; static const int MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS; + // TODO: Remove + static const int DUMMY_PROBABILITY_FOR_VALID_WORDS; const Ver4DictBuffers::Ver4DictBuffersPtr mBuffers; const HeaderPolicy *const mHeaderPolicy; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp index 442abadee..e1ff973de 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp @@ -114,14 +114,6 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, return false; } - readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); - DynamicPtGcEventListeners::TraversePolicyToUpdateBigramProbability - traversePolicyToUpdateBigramProbability(&ptNodeWriter); - if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner( - &traversePolicyToUpdateBigramProbability)) { - return false; - } - // Mapping from positions in mBuffer to positions in bufferToWrite. PtNodeWriter::DictPositionRelocationMap dictPositionRelocationMap; readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.h index b6278c4cb..3569d0576 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.h @@ -33,6 +33,7 @@ class Ver4PatriciaTrieWritingHelper { Ver4PatriciaTrieWritingHelper(Ver4DictBuffers *const buffers) : mBuffers(buffers) {} + // TODO: Support counting ngram entries. bool writeToDictFile(const char *const dictDirPath, const int unigramCount, const int bigramCount) const; @@ -70,11 +71,6 @@ class Ver4PatriciaTrieWritingHelper { Ver4DictBuffers *const buffersToWrite, int *const outUnigramCount, int *const outBigramCount); - bool truncateUnigrams(const Ver4PatriciaTrieNodeReader *const ptNodeReader, - Ver4PatriciaTrieNodeWriter *const ptNodeWriter, const int maxUnigramCount); - - bool truncateBigrams(const int maxBigramCount); - Ver4DictBuffers *const mBuffers; }; } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp index fed0ae77e..af4bc186a 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp @@ -43,7 +43,7 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT /* static */ const HistoricalInfo ForgettingCurveUtils::createUpdatedHistoricalInfo( const HistoricalInfo *const originalHistoricalInfo, const int newProbability, const HistoricalInfo *const newHistoricalInfo, const HeaderPolicy *const headerPolicy) { - const int timestamp = newHistoricalInfo->getTimeStamp(); + const int timestamp = newHistoricalInfo->getTimestamp(); if (newProbability != NOT_A_PROBABILITY && originalHistoricalInfo->getLevel() == 0) { // Add entry as a valid word. const int level = clampToVisibleEntryLevelRange(newHistoricalInfo->getLevel()); @@ -78,7 +78,7 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT /* static */ int ForgettingCurveUtils::decodeProbability( const HistoricalInfo *const historicalInfo, const HeaderPolicy *const headerPolicy) { - const int elapsedTimeStepCount = getElapsedTimeStepCount(historicalInfo->getTimeStamp(), + const int elapsedTimeStepCount = getElapsedTimeStepCount(historicalInfo->getTimestamp(), headerPolicy->getForgettingCurveDurationToLevelDown()); return sProbabilityTable.getProbability( headerPolicy->getForgettingCurveProbabilityValuesTableId(), @@ -102,7 +102,7 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT /* static */ bool ForgettingCurveUtils::needsToKeep(const HistoricalInfo *const historicalInfo, const HeaderPolicy *const headerPolicy) { return historicalInfo->getLevel() > 0 - || getElapsedTimeStepCount(historicalInfo->getTimeStamp(), + || getElapsedTimeStepCount(historicalInfo->getTimestamp(), headerPolicy->getForgettingCurveDurationToLevelDown()) < DISCARD_LEVEL_ZERO_ENTRY_TIME_STEP_COUNT_THRESHOLD; } @@ -110,12 +110,12 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT /* static */ const HistoricalInfo ForgettingCurveUtils::createHistoricalInfoToSave( const HistoricalInfo *const originalHistoricalInfo, const HeaderPolicy *const headerPolicy) { - if (originalHistoricalInfo->getTimeStamp() == NOT_A_TIMESTAMP) { + if (originalHistoricalInfo->getTimestamp() == NOT_A_TIMESTAMP) { return HistoricalInfo(); } const int durationToLevelDownInSeconds = headerPolicy->getForgettingCurveDurationToLevelDown(); const int elapsedTimeStep = getElapsedTimeStepCount( - originalHistoricalInfo->getTimeStamp(), durationToLevelDownInSeconds); + originalHistoricalInfo->getTimestamp(), durationToLevelDownInSeconds); if (elapsedTimeStep <= MAX_ELAPSED_TIME_STEP_COUNT) { // No need to update historical info. return *originalHistoricalInfo; @@ -124,7 +124,7 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT const int maxLevelDownAmonut = elapsedTimeStep / (MAX_ELAPSED_TIME_STEP_COUNT + 1); const int levelDownAmount = (maxLevelDownAmonut >= originalHistoricalInfo->getLevel()) ? originalHistoricalInfo->getLevel() : maxLevelDownAmonut; - const int adjustedTimestampInSeconds = originalHistoricalInfo->getTimeStamp() + + const int adjustedTimestampInSeconds = originalHistoricalInfo->getTimestamp() + levelDownAmount * durationToLevelDownInSeconds; return HistoricalInfo(adjustedTimestampInSeconds, originalHistoricalInfo->getLevel() - levelDownAmount, 0 /* count */); diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h index 313eb6b64..10abb405a 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h @@ -20,7 +20,7 @@ #include <vector> #include "defines.h" -#include "suggest/policyimpl/dictionary/utils/historical_info.h" +#include "suggest/core/dictionary/property/historical_info.h" namespace latinime { |