diff options
author | 2014-10-15 21:44:43 +0000 | |
---|---|---|
committer | 2014-10-15 21:44:45 +0000 | |
commit | f4928ad4dd3108d427ef421c3ef56b259c7c2063 (patch) | |
tree | 20ef7ffdad0d4d5024242fb6d756a01ed8f4c43b /native/jni/src | |
parent | 8fff6ae68b3c2e31687370fc867d3b6098938be7 (diff) | |
parent | 3601c214f80cf62eecacd84b2fb27fe9c6b14a19 (diff) | |
download | latinime-f4928ad4dd3108d427ef421c3ef56b259c7c2063.tar.gz latinime-f4928ad4dd3108d427ef421c3ef56b259c7c2063.tar.xz latinime-f4928ad4dd3108d427ef421c3ef56b259c7c2063.zip |
Merge "Update useless n-gram entry detection logic during GC."
Diffstat (limited to 'native/jni/src')
2 files changed, 32 insertions, 21 deletions
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp index a7296a302..c4297f5d6 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp @@ -270,16 +270,26 @@ int LanguageModelDictContent::getBitmapEntryIndex(const WordIdArrayView prevWord } bool LanguageModelDictContent::updateAllProbabilityEntriesForGCInner(const int bitmapEntryIndex, - const int level, const HeaderPolicy *const headerPolicy, int *const outEntryCounts) { + const int prevWordCount, const HeaderPolicy *const headerPolicy, + int *const outEntryCounts) { for (const auto &entry : mTrieMap.getEntriesInSpecifiedLevel(bitmapEntryIndex)) { - if (level > MAX_PREV_WORD_COUNT_FOR_N_GRAM) { - AKLOGE("Invalid level. level: %d, MAX_PREV_WORD_COUNT_FOR_N_GRAM: %d.", - level, MAX_PREV_WORD_COUNT_FOR_N_GRAM); + if (prevWordCount > MAX_PREV_WORD_COUNT_FOR_N_GRAM) { + AKLOGE("Invalid prevWordCount. prevWordCount: %d, MAX_PREV_WORD_COUNT_FOR_N_GRAM: %d.", + prevWordCount, MAX_PREV_WORD_COUNT_FOR_N_GRAM); return false; } const ProbabilityEntry probabilityEntry = ProbabilityEntry::decode(entry.value(), mHasHistoricalInfo); - if (mHasHistoricalInfo && !probabilityEntry.representsBeginningOfSentence()) { + if (prevWordCount > 0 && probabilityEntry.isValid() + && !mTrieMap.getRoot(entry.key()).mIsValid) { + // The entry is related to a word that has been removed. Remove the entry. + if (!mTrieMap.remove(entry.key(), bitmapEntryIndex)) { + return false; + } + continue; + } + if (mHasHistoricalInfo && !probabilityEntry.representsBeginningOfSentence() + && probabilityEntry.isValid()) { const HistoricalInfo historicalInfo = ForgettingCurveUtils::createHistoricalInfoToSave( probabilityEntry.getHistoricalInfo(), headerPolicy); if (ForgettingCurveUtils::needsToKeep(&historicalInfo, headerPolicy)) { @@ -298,13 +308,13 @@ bool LanguageModelDictContent::updateAllProbabilityEntriesForGCInner(const int b } } if (!probabilityEntry.representsBeginningOfSentence()) { - outEntryCounts[level] += 1; + outEntryCounts[prevWordCount] += 1; } if (!entry.hasNextLevelMap()) { continue; } - if (!updateAllProbabilityEntriesForGCInner(entry.getNextLevelBitmapEntryIndex(), level + 1, - headerPolicy, outEntryCounts)) { + if (!updateAllProbabilityEntriesForGCInner(entry.getNextLevelBitmapEntryIndex(), + prevWordCount + 1, headerPolicy, outEntryCounts)) { return false; } } @@ -332,7 +342,7 @@ bool LanguageModelDictContent::turncateEntriesInSpecifiedLevel( for (int i = 0; i < entryCountToRemove; ++i) { const EntryInfoToTurncate &entryInfo = entryInfoVector[i]; if (!removeNgramProbabilityEntry( - WordIdArrayView(entryInfo.mPrevWordIds, entryInfo.mEntryLevel), entryInfo.mKey)) { + WordIdArrayView(entryInfo.mPrevWordIds, entryInfo.mPrevWordCount), entryInfo.mKey)) { return false; } } @@ -342,9 +352,9 @@ bool LanguageModelDictContent::turncateEntriesInSpecifiedLevel( bool LanguageModelDictContent::getEntryInfo(const HeaderPolicy *const headerPolicy, const int targetLevel, const int bitmapEntryIndex, std::vector<int> *const prevWordIds, std::vector<EntryInfoToTurncate> *const outEntryInfo) const { - const int currentLevel = prevWordIds->size(); + const int prevWordCount = prevWordIds->size(); for (const auto &entry : mTrieMap.getEntriesInSpecifiedLevel(bitmapEntryIndex)) { - if (currentLevel < targetLevel) { + if (prevWordCount < targetLevel) { if (!entry.hasNextLevelMap()) { continue; } @@ -379,10 +389,10 @@ bool LanguageModelDictContent::EntryInfoToTurncate::Comparator::operator()( if (left.mKey != right.mKey) { return left.mKey < right.mKey; } - if (left.mEntryLevel != right.mEntryLevel) { - return left.mEntryLevel > right.mEntryLevel; + if (left.mPrevWordCount != right.mPrevWordCount) { + return left.mPrevWordCount > right.mPrevWordCount; } - for (int i = 0; i < left.mEntryLevel; ++i) { + for (int i = 0; i < left.mPrevWordCount; ++i) { if (left.mPrevWordIds[i] != right.mPrevWordIds[i]) { return left.mPrevWordIds[i] < right.mPrevWordIds[i]; } @@ -392,9 +402,10 @@ bool LanguageModelDictContent::EntryInfoToTurncate::Comparator::operator()( } LanguageModelDictContent::EntryInfoToTurncate::EntryInfoToTurncate(const int probability, - const int timestamp, const int key, const int entryLevel, const int *const prevWordIds) - : mProbability(probability), mTimestamp(timestamp), mKey(key), mEntryLevel(entryLevel) { - memmove(mPrevWordIds, prevWordIds, mEntryLevel * sizeof(mPrevWordIds[0])); + const int timestamp, const int key, const int prevWordCount, const int *const prevWordIds) + : mProbability(probability), mTimestamp(timestamp), mKey(key), + mPrevWordCount(prevWordCount) { + memmove(mPrevWordIds, prevWordIds, mPrevWordCount * sizeof(mPrevWordIds[0])); } } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h index 834cf933d..51ef090e1 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h @@ -160,7 +160,7 @@ class LanguageModelDictContent { outEntryCounts[i] = 0; } return updateAllProbabilityEntriesForGCInner(mTrieMap.getRootBitmapEntryIndex(), - 0 /* level */, headerPolicy, outEntryCounts); + 0 /* prevWordCount */, headerPolicy, outEntryCounts); } // entryCounts should be created by updateAllProbabilityEntries. @@ -185,12 +185,12 @@ class LanguageModelDictContent { }; EntryInfoToTurncate(const int probability, const int timestamp, const int key, - const int entryLevel, const int *const prevWordIds); + const int prevWordCount, const int *const prevWordIds); int mProbability; int mTimestamp; int mKey; - int mEntryLevel; + int mPrevWordCount; int mPrevWordIds[MAX_PREV_WORD_COUNT_FOR_N_GRAM + 1]; private: @@ -208,7 +208,7 @@ class LanguageModelDictContent { int *const outNgramCount); int createAndGetBitmapEntryIndex(const WordIdArrayView prevWordIds); int getBitmapEntryIndex(const WordIdArrayView prevWordIds) const; - bool updateAllProbabilityEntriesForGCInner(const int bitmapEntryIndex, const int level, + bool updateAllProbabilityEntriesForGCInner(const int bitmapEntryIndex, const int prevWordCount, const HeaderPolicy *const headerPolicy, int *const outEntryCounts); bool turncateEntriesInSpecifiedLevel(const HeaderPolicy *const headerPolicy, const int maxEntryCount, const int targetLevel, int *const outEntryCount); |