aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKeisuke Kuroyanagi <ksk@google.com>2014-10-21 15:46:14 +0900
committerKeisuke Kuroyanagi <ksk@google.com>2014-10-21 15:46:14 +0900
commite8750d970eed61b9239d8b2fa19648b8457696c1 (patch)
tree1f57a342b4d5d4ba8838bef65a3ea29d0c651cb3
parent1085fef8d040a6788f2185e7b03ab6b6032f321d (diff)
downloadlatinime-e8750d970eed61b9239d8b2fa19648b8457696c1.tar.gz
latinime-e8750d970eed61b9239d8b2fa19648b8457696c1.tar.xz
latinime-e8750d970eed61b9239d8b2fa19648b8457696c1.zip
Introduce EntryCounters to count entries in a dictionary.
Bug: 14425059 Change-Id: Ic13ba827d96fa4a147485ba92fdb37e23e04e8e8
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp20
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h34
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp18
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h8
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_writing_helper.cpp11
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_writing_helper.h4
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp9
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h4
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp25
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h10
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp12
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.h5
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp4
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/entry_counters.h119
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp17
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h19
16 files changed, 230 insertions, 89 deletions
diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp
index 8fb256c54..300e96c4e 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp
@@ -30,6 +30,7 @@ const char *const HeaderPolicy::DATE_KEY = "date";
const char *const HeaderPolicy::LAST_DECAYED_TIME_KEY = "LAST_DECAYED_TIME";
const char *const HeaderPolicy::UNIGRAM_COUNT_KEY = "UNIGRAM_COUNT";
const char *const HeaderPolicy::BIGRAM_COUNT_KEY = "BIGRAM_COUNT";
+const char *const HeaderPolicy::TRIGRAM_COUNT_KEY = "TRIGRAM_COUNT";
const char *const HeaderPolicy::EXTENDED_REGION_SIZE_KEY = "EXTENDED_REGION_SIZE";
// Historical info is information that is needed to support decaying such as timestamp, level and
// count.
@@ -94,12 +95,11 @@ bool HeaderPolicy::readRequiresGermanUmlautProcessing() const {
}
bool HeaderPolicy::fillInAndWriteHeaderToBuffer(const bool updatesLastDecayedTime,
- const int unigramCount, const int bigramCount,
- const int extendedRegionSize, BufferWithExtendableBuffer *const outBuffer) const {
+ const EntryCounts &entryCounts, const int extendedRegionSize,
+ BufferWithExtendableBuffer *const outBuffer) const {
int writingPos = 0;
DictionaryHeaderStructurePolicy::AttributeMap attributeMapToWrite(mAttributeMap);
- fillInHeader(updatesLastDecayedTime, unigramCount, bigramCount,
- extendedRegionSize, &attributeMapToWrite);
+ fillInHeader(updatesLastDecayedTime, entryCounts, extendedRegionSize, &attributeMapToWrite);
if (!HeaderReadWriteUtils::writeDictionaryVersion(outBuffer, mDictFormatVersion,
&writingPos)) {
return false;
@@ -126,11 +126,15 @@ bool HeaderPolicy::fillInAndWriteHeaderToBuffer(const bool updatesLastDecayedTim
return true;
}
-void HeaderPolicy::fillInHeader(const bool updatesLastDecayedTime, const int unigramCount,
- const int bigramCount, const int extendedRegionSize,
+void HeaderPolicy::fillInHeader(const bool updatesLastDecayedTime,
+ const EntryCounts &entryCounts, const int extendedRegionSize,
DictionaryHeaderStructurePolicy::AttributeMap *outAttributeMap) const {
- HeaderReadWriteUtils::setIntAttribute(outAttributeMap, UNIGRAM_COUNT_KEY, unigramCount);
- HeaderReadWriteUtils::setIntAttribute(outAttributeMap, BIGRAM_COUNT_KEY, bigramCount);
+ HeaderReadWriteUtils::setIntAttribute(outAttributeMap, UNIGRAM_COUNT_KEY,
+ entryCounts.getUnigramCount());
+ HeaderReadWriteUtils::setIntAttribute(outAttributeMap, BIGRAM_COUNT_KEY,
+ entryCounts.getBigramCount());
+ HeaderReadWriteUtils::setIntAttribute(outAttributeMap, TRIGRAM_COUNT_KEY,
+ entryCounts.getTrigramCount());
HeaderReadWriteUtils::setIntAttribute(outAttributeMap, EXTENDED_REGION_SIZE_KEY,
extendedRegionSize);
// Set the current time as the generation time.
diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
index 836bbe5a1..44c2f443f 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
@@ -22,6 +22,7 @@
#include "defines.h"
#include "suggest/core/policy/dictionary_header_structure_policy.h"
#include "suggest/policyimpl/dictionary/header/header_read_write_utils.h"
+#include "suggest/policyimpl/dictionary/utils/entry_counters.h"
#include "suggest/policyimpl/dictionary/utils/format_utils.h"
#include "utils/char_utils.h"
#include "utils/time_keeper.h"
@@ -49,6 +50,8 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
UNIGRAM_COUNT_KEY, 0 /* defaultValue */)),
mBigramCount(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
BIGRAM_COUNT_KEY, 0 /* defaultValue */)),
+ mTrigramCount(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
+ TRIGRAM_COUNT_KEY, 0 /* defaultValue */)),
mExtendedRegionSize(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
EXTENDED_REGION_SIZE_KEY, 0 /* defaultValue */)),
mHasHistoricalInfoOfWords(HeaderReadWriteUtils::readBoolAttributeValue(
@@ -60,6 +63,8 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
&mAttributeMap, MAX_UNIGRAM_COUNT_KEY, DEFAULT_MAX_UNIGRAM_COUNT)),
mMaxBigramCount(HeaderReadWriteUtils::readIntAttributeValue(
&mAttributeMap, MAX_BIGRAM_COUNT_KEY, DEFAULT_MAX_BIGRAM_COUNT)),
+ mMaxTrigramCount(HeaderReadWriteUtils::readIntAttributeValue(
+ &mAttributeMap, MAX_TRIGRAM_COUNT_KEY, DEFAULT_MAX_TRIGRAM_COUNT)),
mCodePointTable(HeaderReadWriteUtils::readCodePointTable(&mAttributeMap)) {}
// Constructs header information using an attribute map.
@@ -77,7 +82,7 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
DATE_KEY, TimeKeeper::peekCurrentTime() /* defaultValue */)),
mLastDecayedTime(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
DATE_KEY, TimeKeeper::peekCurrentTime() /* defaultValue */)),
- mUnigramCount(0), mBigramCount(0), mExtendedRegionSize(0),
+ mUnigramCount(0), mBigramCount(0), mTrigramCount(0), mExtendedRegionSize(0),
mHasHistoricalInfoOfWords(HeaderReadWriteUtils::readBoolAttributeValue(
&mAttributeMap, HAS_HISTORICAL_INFO_KEY, false /* defaultValue */)),
mForgettingCurveProbabilityValuesTableId(HeaderReadWriteUtils::readIntAttributeValue(
@@ -87,6 +92,8 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
&mAttributeMap, MAX_UNIGRAM_COUNT_KEY, DEFAULT_MAX_UNIGRAM_COUNT)),
mMaxBigramCount(HeaderReadWriteUtils::readIntAttributeValue(
&mAttributeMap, MAX_BIGRAM_COUNT_KEY, DEFAULT_MAX_BIGRAM_COUNT)),
+ mMaxTrigramCount(HeaderReadWriteUtils::readIntAttributeValue(
+ &mAttributeMap, MAX_TRIGRAM_COUNT_KEY, DEFAULT_MAX_TRIGRAM_COUNT)),
mCodePointTable(HeaderReadWriteUtils::readCodePointTable(&mAttributeMap)) {}
// Copy header information
@@ -99,12 +106,14 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
mIsDecayingDict(headerPolicy->mIsDecayingDict),
mDate(headerPolicy->mDate), mLastDecayedTime(headerPolicy->mLastDecayedTime),
mUnigramCount(headerPolicy->mUnigramCount), mBigramCount(headerPolicy->mBigramCount),
+ mTrigramCount(headerPolicy->mTrigramCount),
mExtendedRegionSize(headerPolicy->mExtendedRegionSize),
mHasHistoricalInfoOfWords(headerPolicy->mHasHistoricalInfoOfWords),
mForgettingCurveProbabilityValuesTableId(
headerPolicy->mForgettingCurveProbabilityValuesTableId),
mMaxUnigramCount(headerPolicy->mMaxUnigramCount),
mMaxBigramCount(headerPolicy->mMaxBigramCount),
+ mMaxTrigramCount(headerPolicy->mMaxTrigramCount),
mCodePointTable(headerPolicy->mCodePointTable) {}
// Temporary dummy header.
@@ -112,10 +121,10 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
: mDictFormatVersion(FormatUtils::UNKNOWN_VERSION), mDictionaryFlags(0), mSize(0),
mAttributeMap(), mLocale(CharUtils::EMPTY_STRING), mMultiWordCostMultiplier(0.0f),
mRequiresGermanUmlautProcessing(false), mIsDecayingDict(false),
- mDate(0), mLastDecayedTime(0), mUnigramCount(0), mBigramCount(0),
+ mDate(0), mLastDecayedTime(0), mUnigramCount(0), mBigramCount(0), mTrigramCount(0),
mExtendedRegionSize(0), mHasHistoricalInfoOfWords(false),
mForgettingCurveProbabilityValuesTableId(0), mMaxUnigramCount(0), mMaxBigramCount(0),
- mCodePointTable(nullptr) {}
+ mMaxTrigramCount(0), mCodePointTable(nullptr) {}
~HeaderPolicy() {}
@@ -183,6 +192,10 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
return mBigramCount;
}
+ AK_FORCE_INLINE int getTrigramCount() const {
+ return mTrigramCount;
+ }
+
AK_FORCE_INLINE int getExtendedRegionSize() const {
return mExtendedRegionSize;
}
@@ -212,15 +225,19 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
return mMaxBigramCount;
}
+ AK_FORCE_INLINE int getMaxTrigramCount() const {
+ return mMaxTrigramCount;
+ }
+
void readHeaderValueOrQuestionMark(const char *const key,
int *outValue, int outValueSize) const;
bool fillInAndWriteHeaderToBuffer(const bool updatesLastDecayedTime,
- const int unigramCount, const int bigramCount,
- const int extendedRegionSize, BufferWithExtendableBuffer *const outBuffer) const;
+ const EntryCounts &entryCounts, const int extendedRegionSize,
+ BufferWithExtendableBuffer *const outBuffer) const;
- void fillInHeader(const bool updatesLastDecayedTime,
- const int unigramCount, const int bigramCount, const int extendedRegionSize,
+ void fillInHeader(const bool updatesLastDecayedTime, const EntryCounts &entryCounts,
+ const int extendedRegionSize,
DictionaryHeaderStructurePolicy::AttributeMap *outAttributeMap) const;
AK_FORCE_INLINE const std::vector<int> *getLocale() const {
@@ -245,6 +262,7 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
static const char *const LAST_DECAYED_TIME_KEY;
static const char *const UNIGRAM_COUNT_KEY;
static const char *const BIGRAM_COUNT_KEY;
+ static const char *const TRIGRAM_COUNT_KEY;
static const char *const EXTENDED_REGION_SIZE_KEY;
static const char *const HAS_HISTORICAL_INFO_KEY;
static const char *const LOCALE_KEY;
@@ -273,11 +291,13 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
const int mLastDecayedTime;
const int mUnigramCount;
const int mBigramCount;
+ const int mTrigramCount;
const int mExtendedRegionSize;
const bool mHasHistoricalInfoOfWords;
const int mForgettingCurveProbabilityValuesTableId;
const int mMaxUnigramCount;
const int mMaxBigramCount;
+ const int mMaxTrigramCount;
const int *const mCodePointTable;
const std::vector<int> readLocale() const;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp
index 0eae934ae..d0dccc3be 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp
@@ -303,7 +303,7 @@ bool Ver4PatriciaTriePolicy::addUnigramEntry(const CodePointArrayView wordCodePo
if (mUpdatingHelper.addUnigramWord(&readingHelper, codePointArrayView, unigramProperty,
&addedNewUnigram)) {
if (addedNewUnigram && !unigramProperty->representsBeginningOfSentence()) {
- mUnigramCount++;
+ mEntryCounters.incrementUnigramCount();
}
if (unigramProperty->getShortcuts().size() > 0) {
// Add shortcut target.
@@ -397,7 +397,7 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const NgramContext *const ngramContex
if (mUpdatingHelper.addNgramEntry(PtNodePosArrayView::singleElementView(&prevWordPtNodePos),
wordPos, ngramProperty, &addedNewBigram)) {
if (addedNewBigram) {
- mBigramCount++;
+ mEntryCounters.incrementBigramCount();
}
return true;
} else {
@@ -438,7 +438,7 @@ bool Ver4PatriciaTriePolicy::removeNgramEntry(const NgramContext *const ngramCon
const int prevWordPtNodePos = getTerminalPtNodePosFromWordId(prevWordIds[0]);
if (mUpdatingHelper.removeNgramEntry(
PtNodePosArrayView::singleElementView(&prevWordPtNodePos), wordPos)) {
- mBigramCount--;
+ mEntryCounters.decrementBigramCount();
return true;
} else {
return false;
@@ -477,7 +477,7 @@ bool Ver4PatriciaTriePolicy::flush(const char *const filePath) {
AKLOGI("Warning: flush() is called for non-updatable dictionary. filePath: %s", filePath);
return false;
}
- if (!mWritingHelper.writeToDictFile(filePath, mUnigramCount, mBigramCount)) {
+ if (!mWritingHelper.writeToDictFile(filePath, mEntryCounters.getEntryCounts())) {
AKLOGE("Cannot flush the dictionary to file.");
mIsCorrupted = true;
return false;
@@ -515,7 +515,7 @@ bool Ver4PatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const {
// Needs to reduce dictionary size.
return true;
} else if (mHeaderPolicy->isDecayingDict()) {
- return ForgettingCurveUtils::needsToDecay(mindsBlockByGC, mUnigramCount, mBigramCount,
+ return ForgettingCurveUtils::needsToDecay(mindsBlockByGC, mEntryCounters.getEntryCounts(),
mHeaderPolicy);
}
return false;
@@ -525,19 +525,19 @@ void Ver4PatriciaTriePolicy::getProperty(const char *const query, const int quer
char *const outResult, const int maxResultLength) {
const int compareLength = queryLength + 1 /* terminator */;
if (strncmp(query, UNIGRAM_COUNT_QUERY, compareLength) == 0) {
- snprintf(outResult, maxResultLength, "%d", mUnigramCount);
+ snprintf(outResult, maxResultLength, "%d", mEntryCounters.getUnigramCount());
} else if (strncmp(query, BIGRAM_COUNT_QUERY, compareLength) == 0) {
- snprintf(outResult, maxResultLength, "%d", mBigramCount);
+ snprintf(outResult, maxResultLength, "%d", mEntryCounters.getBigramCount());
} else if (strncmp(query, MAX_UNIGRAM_COUNT_QUERY, compareLength) == 0) {
snprintf(outResult, maxResultLength, "%d",
mHeaderPolicy->isDecayingDict() ?
- ForgettingCurveUtils::getUnigramCountHardLimit(
+ ForgettingCurveUtils::getEntryCountHardLimit(
mHeaderPolicy->getMaxUnigramCount()) :
static_cast<int>(Ver4DictConstants::MAX_DICTIONARY_SIZE));
} else if (strncmp(query, MAX_BIGRAM_COUNT_QUERY, compareLength) == 0) {
snprintf(outResult, maxResultLength, "%d",
mHeaderPolicy->isDecayingDict() ?
- ForgettingCurveUtils::getBigramCountHardLimit(
+ ForgettingCurveUtils::getEntryCountHardLimit(
mHeaderPolicy->getMaxBigramCount()) :
static_cast<int>(Ver4DictConstants::MAX_DICTIONARY_SIZE));
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h
index 1ad5e7e36..2cda0d3fa 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h
@@ -41,6 +41,7 @@
#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_writing_helper.h"
#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_pt_node_array_reader.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+#include "suggest/policyimpl/dictionary/utils/entry_counters.h"
#include "utils/int_array_view.h"
namespace latinime {
@@ -75,8 +76,8 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
&mPtNodeArrayReader, &mBigramPolicy, &mShortcutPolicy),
mUpdatingHelper(mDictBuffer, &mNodeReader, &mNodeWriter),
mWritingHelper(mBuffers.get()),
- mUnigramCount(mHeaderPolicy->getUnigramCount()),
- mBigramCount(mHeaderPolicy->getBigramCount()),
+ mEntryCounters(mHeaderPolicy->getUnigramCount(), mHeaderPolicy->getBigramCount(),
+ mHeaderPolicy->getTrigramCount()),
mTerminalPtNodePositionsForIteratingWords(), mIsCorrupted(false) {};
virtual int getRootPosition() const {
@@ -163,8 +164,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
Ver4PatriciaTrieNodeWriter mNodeWriter;
DynamicPtUpdatingHelper mUpdatingHelper;
Ver4PatriciaTrieWritingHelper mWritingHelper;
- int mUnigramCount;
- int mBigramCount;
+ MutableEntryCounters mEntryCounters;
std::vector<int> mTerminalPtNodePositionsForIteratingWords;
mutable bool mIsCorrupted;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_writing_helper.cpp
index 2887dc6b1..5f440819a 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_writing_helper.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_writing_helper.cpp
@@ -43,18 +43,18 @@ namespace backward {
namespace v402 {
bool Ver4PatriciaTrieWritingHelper::writeToDictFile(const char *const dictDirPath,
- const int unigramCount, const int bigramCount) const {
+ const EntryCounts &entryCounts) const {
const HeaderPolicy *const headerPolicy = mBuffers->getHeaderPolicy();
BufferWithExtendableBuffer headerBuffer(
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE);
const int extendedRegionSize = headerPolicy->getExtendedRegionSize()
+ mBuffers->getTrieBuffer()->getUsedAdditionalBufferSize();
if (!headerPolicy->fillInAndWriteHeaderToBuffer(false /* updatesLastDecayedTime */,
- unigramCount, bigramCount, extendedRegionSize, &headerBuffer)) {
+ entryCounts, extendedRegionSize, &headerBuffer)) {
AKLOGE("Cannot write header structure to buffer. "
"updatesLastDecayedTime: %d, unigramCount: %d, bigramCount: %d, "
- "extendedRegionSize: %d", false, unigramCount, bigramCount,
- extendedRegionSize);
+ "extendedRegionSize: %d", false, entryCounters.getUnigramCount(),
+ entryCounters.getBigramCount(), extendedRegionSize);
return false;
}
return mBuffers->flushHeaderAndDictBuffers(dictDirPath, &headerBuffer);
@@ -74,7 +74,8 @@ bool Ver4PatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNodeAr
BufferWithExtendableBuffer headerBuffer(
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE);
if (!headerPolicy->fillInAndWriteHeaderToBuffer(true /* updatesLastDecayedTime */,
- unigramCount, bigramCount, 0 /* extendedRegionSize */, &headerBuffer)) {
+ EntryCounts(unigramCount, bigramCount, 0 /* trigramCount */),
+ 0 /* extendedRegionSize */, &headerBuffer)) {
return false;
}
return dictBuffers->flushHeaderAndDictBuffers(dictDirPath, &headerBuffer);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_writing_helper.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_writing_helper.h
index 9034ee656..1aad33e38 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_writing_helper.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_writing_helper.h
@@ -27,6 +27,7 @@
#include "defines.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.h"
#include "suggest/policyimpl/dictionary/structure/backward/v402/content/terminal_position_lookup_table.h"
+#include "suggest/policyimpl/dictionary/utils/entry_counters.h"
namespace latinime {
namespace backward {
@@ -46,8 +47,7 @@ class Ver4PatriciaTrieWritingHelper {
Ver4PatriciaTrieWritingHelper(Ver4DictBuffers *const buffers)
: mBuffers(buffers) {}
- bool writeToDictFile(const char *const dictDirPath, const int unigramCount,
- const int bigramCount) const;
+ bool writeToDictFile(const char *const dictDirPath, const EntryCounts &entryCounts) const;
// This method cannot be const because the original dictionary buffer will be updated to detect
// useless PtNodes during GC.
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp
index c4297f5d6..934c4f470 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp
@@ -161,10 +161,7 @@ bool LanguageModelDictContent::truncateEntries(const int *const entryCounts,
bool LanguageModelDictContent::updateAllEntriesOnInputWord(const WordIdArrayView prevWordIds,
const int wordId, const bool isValid, const HistoricalInfo historicalInfo,
- const HeaderPolicy *const headerPolicy, int *const outAddedNewNgramEntryCount) {
- if (outAddedNewNgramEntryCount) {
- *outAddedNewNgramEntryCount = 0;
- }
+ const HeaderPolicy *const headerPolicy, MutableEntryCounters *const entryCountersToUpdate) {
if (!mHasHistoricalInfo) {
AKLOGE("updateAllEntriesOnInputWord is called for dictionary without historical info.");
return false;
@@ -188,8 +185,8 @@ bool LanguageModelDictContent::updateAllEntriesOnInputWord(const WordIdArrayView
if (!setNgramProbabilityEntry(limitedPrevWordIds, wordId, &updatedNgramProbabilityEntry)) {
return false;
}
- if (!originalNgramProbabilityEntry.isValid() && outAddedNewNgramEntryCount) {
- *outAddedNewNgramEntryCount += 1;
+ if (!originalNgramProbabilityEntry.isValid()) {
+ entryCountersToUpdate->incrementNgramCount(i + 2);
}
}
return true;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h
index 51ef090e1..9a5f87741 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h
@@ -25,6 +25,7 @@
#include "suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
+#include "suggest/policyimpl/dictionary/utils/entry_counters.h"
#include "suggest/policyimpl/dictionary/utils/trie_map.h"
#include "utils/byte_array_view.h"
#include "utils/int_array_view.h"
@@ -169,7 +170,8 @@ class LanguageModelDictContent {
bool updateAllEntriesOnInputWord(const WordIdArrayView prevWordIds, const int wordId,
const bool isValid, const HistoricalInfo historicalInfo,
- const HeaderPolicy *const headerPolicy, int *const outAddedNewNgramEntryCount);
+ const HeaderPolicy *const headerPolicy,
+ MutableEntryCounters *const entryCountersToUpdate);
private:
DISALLOW_COPY_AND_ASSIGN(LanguageModelDictContent);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
index ea8c0dc22..ead1bde50 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
@@ -211,7 +211,7 @@ bool Ver4PatriciaTriePolicy::addUnigramEntry(const CodePointArrayView wordCodePo
if (mUpdatingHelper.addUnigramWord(&readingHelper, codePointArrayView, unigramProperty,
&addedNewUnigram)) {
if (addedNewUnigram && !unigramProperty->representsBeginningOfSentence()) {
- mUnigramCount++;
+ mEntryCounters.incrementUnigramCount();
}
if (unigramProperty->getShortcuts().size() > 0) {
// Add shortcut target.
@@ -259,7 +259,7 @@ bool Ver4PatriciaTriePolicy::removeUnigramEntry(const CodePointArrayView wordCod
return false;
}
if (!ptNodeParams.representsNonWordInfo()) {
- mUnigramCount--;
+ mEntryCounters.decrementUnigramCount();
}
return true;
}
@@ -316,7 +316,7 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const NgramContext *const ngramContex
bool addedNewEntry = false;
if (mNodeWriter.addNgramEntry(prevWordIds, wordId, ngramProperty, &addedNewEntry)) {
if (addedNewEntry) {
- mBigramCount++;
+ mEntryCounters.incrementNgramCount(prevWordIds.size() + 1);
}
return true;
} else {
@@ -354,7 +354,7 @@ bool Ver4PatriciaTriePolicy::removeNgramEntry(const NgramContext *const ngramCon
return false;
}
if (mNodeWriter.removeNgramEntry(prevWordIds, wordId)) {
- mBigramCount--;
+ mEntryCounters.decrementNgramCount(prevWordIds.size());
return true;
} else {
return false;
@@ -401,12 +401,10 @@ bool Ver4PatriciaTriePolicy::updateEntriesForWordWithNgramContext(
// Refresh word ids.
ngramContext->getPrevWordIds(this, &prevWordIdArray, false /* tryLowerCaseSearch */);
}
- int addedNewNgramEntryCount = 0;
if (!mBuffers->getMutableLanguageModelDictContent()->updateAllEntriesOnInputWord(prevWordIds,
- wordId, updateAsAValidWord, historicalInfo, mHeaderPolicy, &addedNewNgramEntryCount)) {
+ wordId, updateAsAValidWord, historicalInfo, mHeaderPolicy, &mEntryCounters)) {
return false;
}
- mBigramCount += addedNewNgramEntryCount;
return true;
}
@@ -415,7 +413,7 @@ bool Ver4PatriciaTriePolicy::flush(const char *const filePath) {
AKLOGI("Warning: flush() is called for non-updatable dictionary. filePath: %s", filePath);
return false;
}
- if (!mWritingHelper.writeToDictFile(filePath, mUnigramCount, mBigramCount)) {
+ if (!mWritingHelper.writeToDictFile(filePath, mEntryCounters.getEntryCounts())) {
AKLOGE("Cannot flush the dictionary to file.");
mIsCorrupted = true;
return false;
@@ -453,8 +451,7 @@ bool Ver4PatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const {
// Needs to reduce dictionary size.
return true;
} else if (mHeaderPolicy->isDecayingDict()) {
- return ForgettingCurveUtils::needsToDecay(mindsBlockByGC, mUnigramCount, mBigramCount,
- mHeaderPolicy);
+ return ForgettingCurveUtils::needsToDecay(mindsBlockByGC, mEntryCounters.getEntryCounts(), mHeaderPolicy);
}
return false;
}
@@ -463,19 +460,19 @@ void Ver4PatriciaTriePolicy::getProperty(const char *const query, const int quer
char *const outResult, const int maxResultLength) {
const int compareLength = queryLength + 1 /* terminator */;
if (strncmp(query, UNIGRAM_COUNT_QUERY, compareLength) == 0) {
- snprintf(outResult, maxResultLength, "%d", mUnigramCount);
+ snprintf(outResult, maxResultLength, "%d", mEntryCounters.getUnigramCount());
} else if (strncmp(query, BIGRAM_COUNT_QUERY, compareLength) == 0) {
- snprintf(outResult, maxResultLength, "%d", mBigramCount);
+ snprintf(outResult, maxResultLength, "%d", mEntryCounters.getBigramCount());
} else if (strncmp(query, MAX_UNIGRAM_COUNT_QUERY, compareLength) == 0) {
snprintf(outResult, maxResultLength, "%d",
mHeaderPolicy->isDecayingDict() ?
- ForgettingCurveUtils::getUnigramCountHardLimit(
+ ForgettingCurveUtils::getEntryCountHardLimit(
mHeaderPolicy->getMaxUnigramCount()) :
static_cast<int>(Ver4DictConstants::MAX_DICTIONARY_SIZE));
} else if (strncmp(query, MAX_BIGRAM_COUNT_QUERY, compareLength) == 0) {
snprintf(outResult, maxResultLength, "%d",
mHeaderPolicy->isDecayingDict() ?
- ForgettingCurveUtils::getBigramCountHardLimit(
+ ForgettingCurveUtils::getEntryCountHardLimit(
mHeaderPolicy->getMaxBigramCount()) :
static_cast<int>(Ver4DictConstants::MAX_DICTIONARY_SIZE));
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
index c0532815c..e3611cb32 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
@@ -30,6 +30,7 @@
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_pt_node_array_reader.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+#include "suggest/policyimpl/dictionary/utils/entry_counters.h"
#include "utils/int_array_view.h"
namespace latinime {
@@ -37,7 +38,6 @@ namespace latinime {
class DicNode;
class DicNodeVector;
-// TODO: Support counting ngram entries.
// Word id = Artificial id that is stored in the PtNode looked up by the word.
class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
public:
@@ -51,8 +51,8 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
&mShortcutPolicy),
mUpdatingHelper(mDictBuffer, &mNodeReader, &mNodeWriter),
mWritingHelper(mBuffers.get()),
- mUnigramCount(mHeaderPolicy->getUnigramCount()),
- mBigramCount(mHeaderPolicy->getBigramCount()),
+ mEntryCounters(mHeaderPolicy->getUnigramCount(), mHeaderPolicy->getBigramCount(),
+ mHeaderPolicy->getTrigramCount()),
mTerminalPtNodePositionsForIteratingWords(), mIsCorrupted(false) {};
AK_FORCE_INLINE int getRootPosition() const {
@@ -141,9 +141,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
Ver4PatriciaTrieNodeWriter mNodeWriter;
DynamicPtUpdatingHelper mUpdatingHelper;
Ver4PatriciaTrieWritingHelper mWritingHelper;
- int mUnigramCount;
- // TODO: Support counting ngram entries.
- int mBigramCount;
+ MutableEntryCounters mEntryCounters;
std::vector<int> mTerminalPtNodePositionsForIteratingWords;
mutable bool mIsCorrupted;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp
index f0d59c150..e49d0308e 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp
@@ -33,17 +33,18 @@
namespace latinime {
bool Ver4PatriciaTrieWritingHelper::writeToDictFile(const char *const dictDirPath,
- const int unigramCount, const int bigramCount) const {
+ const EntryCounts &entryCounts) const {
const HeaderPolicy *const headerPolicy = mBuffers->getHeaderPolicy();
BufferWithExtendableBuffer headerBuffer(
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE);
const int extendedRegionSize = headerPolicy->getExtendedRegionSize()
+ mBuffers->getTrieBuffer()->getUsedAdditionalBufferSize();
if (!headerPolicy->fillInAndWriteHeaderToBuffer(false /* updatesLastDecayedTime */,
- unigramCount, bigramCount, extendedRegionSize, &headerBuffer)) {
+ entryCounts, extendedRegionSize, &headerBuffer)) {
AKLOGE("Cannot write header structure to buffer. "
- "updatesLastDecayedTime: %d, unigramCount: %d, bigramCount: %d, "
- "extendedRegionSize: %d", false, unigramCount, bigramCount,
+ "updatesLastDecayedTime: %d, unigramCount: %d, bigramCount: %d, trigramCount: %d,"
+ "extendedRegionSize: %d", false, entryCounters.getUnigramCount(),
+ entryCounters.getBigramCount(), entryCounters.getTrigramCount(),
extendedRegionSize);
return false;
}
@@ -64,7 +65,8 @@ bool Ver4PatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNodeAr
BufferWithExtendableBuffer headerBuffer(
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE);
if (!headerPolicy->fillInAndWriteHeaderToBuffer(true /* updatesLastDecayedTime */,
- unigramCount, bigramCount, 0 /* extendedRegionSize */, &headerBuffer)) {
+ EntryCounts(unigramCount, bigramCount, 0 /* trigramCount */),
+ 0 /* extendedRegionSize */, &headerBuffer)) {
return false;
}
return dictBuffers->flushHeaderAndDictBuffers(dictDirPath, &headerBuffer);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.h
index 3569d0576..57a1f7bb1 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.h
@@ -20,6 +20,7 @@
#include "defines.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
+#include "suggest/policyimpl/dictionary/utils/entry_counters.h"
namespace latinime {
@@ -33,9 +34,7 @@ class Ver4PatriciaTrieWritingHelper {
Ver4PatriciaTrieWritingHelper(Ver4DictBuffers *const buffers)
: mBuffers(buffers) {}
- // TODO: Support counting ngram entries.
- bool writeToDictFile(const char *const dictDirPath, const int unigramCount,
- const int bigramCount) const;
+ bool writeToDictFile(const char *const dictDirPath, const EntryCounts &entryCounts) const;
// This method cannot be const because the original dictionary buffer will be updated to detect
// useless PtNodes during GC.
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp
index b7e2a7278..9d8e86675 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp
@@ -27,6 +27,7 @@
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+#include "suggest/policyimpl/dictionary/utils/entry_counters.h"
#include "suggest/policyimpl/dictionary/utils/file_utils.h"
#include "suggest/policyimpl/dictionary/utils/format_utils.h"
#include "utils/time_keeper.h"
@@ -69,8 +70,7 @@ template<class DictConstants, class DictBuffers, class DictBuffersPtr>
DictBuffersPtr dictBuffers = DictBuffers::createVer4DictBuffers(&headerPolicy,
DictConstants::MAX_DICT_EXTENDED_REGION_SIZE);
headerPolicy.fillInAndWriteHeaderToBuffer(true /* updatesLastDecayedTime */,
- 0 /* unigramCount */, 0 /* bigramCount */,
- 0 /* extendedRegionSize */, dictBuffers->getWritableHeaderBuffer());
+ EntryCounts(), 0 /* extendedRegionSize */, dictBuffers->getWritableHeaderBuffer());
if (!DynamicPtWritingUtils::writeEmptyDictionary(
dictBuffers->getWritableTrieBuffer(), 0 /* rootPos */)) {
AKLOGE("Empty ver4 dictionary structure cannot be created on memory.");
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/entry_counters.h b/native/jni/src/suggest/policyimpl/dictionary/utils/entry_counters.h
new file mode 100644
index 000000000..b8fa5aa9e
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/entry_counters.h
@@ -0,0 +1,119 @@
+/*
+ * Copyright (C) 2014, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_ENTRY_COUNTERS_H
+#define LATINIME_ENTRY_COUNTERS_H
+
+#include <array>
+
+#include "defines.h"
+
+namespace latinime {
+
+// Copyable but immutable
+class EntryCounts final {
+ public:
+ EntryCounts() : mEntryCounts({{0, 0, 0}}) {}
+
+ EntryCounts(const int unigramCount, const int bigramCount, const int trigramCount)
+ : mEntryCounts({{unigramCount, bigramCount, trigramCount}}) {}
+
+ explicit EntryCounts(const std::array<int, MAX_PREV_WORD_COUNT_FOR_N_GRAM + 1> &counters)
+ : mEntryCounts(counters) {}
+
+ int getUnigramCount() const {
+ return mEntryCounts[0];
+ }
+
+ int getBigramCount() const {
+ return mEntryCounts[1];
+ }
+
+ int getTrigramCount() const {
+ return mEntryCounts[2];
+ }
+
+ private:
+ DISALLOW_ASSIGNMENT_OPERATOR(EntryCounts);
+
+ const std::array<int, MAX_PREV_WORD_COUNT_FOR_N_GRAM + 1> mEntryCounts;
+};
+
+class MutableEntryCounters final {
+ public:
+ MutableEntryCounters() {
+ mEntryCounters.fill(0);
+ }
+
+ MutableEntryCounters(const int unigramCount, const int bigramCount, const int trigramCount)
+ : mEntryCounters({{unigramCount, bigramCount, trigramCount}}) {}
+
+ const EntryCounts getEntryCounts() const {
+ return EntryCounts(mEntryCounters);
+ }
+
+ int getUnigramCount() const {
+ return mEntryCounters[0];
+ }
+
+ int getBigramCount() const {
+ return mEntryCounters[1];
+ }
+
+ int getTrigramCount() const {
+ return mEntryCounters[2];
+ }
+
+ void incrementUnigramCount() {
+ ++mEntryCounters[0];
+ }
+
+ void decrementUnigramCount() {
+ ASSERT(mEntryCounters[0] != 0);
+ --mEntryCounters[0];
+ }
+
+ void incrementBigramCount() {
+ ++mEntryCounters[1];
+ }
+
+ void decrementBigramCount() {
+ ASSERT(mEntryCounters[1] != 0);
+ --mEntryCounters[1];
+ }
+
+ void incrementNgramCount(const size_t n) {
+ if (n < 1 || n > mEntryCounters.size()) {
+ return;
+ }
+ ++mEntryCounters[n - 1];
+ }
+
+ void decrementNgramCount(const size_t n) {
+ if (n < 1 || n > mEntryCounters.size()) {
+ return;
+ }
+ ASSERT(mEntryCounters[n - 1] != 0);
+ --mEntryCounters[n - 1];
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(MutableEntryCounters);
+
+ std::array<int, MAX_PREV_WORD_COUNT_FOR_N_GRAM + 1> mEntryCounters;
+};
+} // namespace latinime
+#endif /* LATINIME_ENTRY_COUNTERS_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp
index e5ef2abf8..9055f7bfc 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp
@@ -38,8 +38,7 @@ const int ForgettingCurveUtils::OCCURRENCES_TO_RAISE_THE_LEVEL = 1;
// 15 days
const int ForgettingCurveUtils::DURATION_TO_LOWER_THE_LEVEL_IN_SECONDS = 15 * 24 * 60 * 60;
-const float ForgettingCurveUtils::UNIGRAM_COUNT_HARD_LIMIT_WEIGHT = 1.2;
-const float ForgettingCurveUtils::BIGRAM_COUNT_HARD_LIMIT_WEIGHT = 1.2;
+const float ForgettingCurveUtils::ENTRY_COUNT_HARD_LIMIT_WEIGHT = 1.2;
const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityTable;
@@ -126,14 +125,22 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT
}
/* static */ bool ForgettingCurveUtils::needsToDecay(const bool mindsBlockByDecay,
- const int unigramCount, const int bigramCount, const HeaderPolicy *const headerPolicy) {
- if (unigramCount >= getUnigramCountHardLimit(headerPolicy->getMaxUnigramCount())) {
+ const EntryCounts &entryCounts, const HeaderPolicy *const headerPolicy) {
+ if (entryCounts.getUnigramCount()
+ >= getEntryCountHardLimit(headerPolicy->getMaxUnigramCount())) {
// Unigram count exceeds the limit.
return true;
- } else if (bigramCount >= getBigramCountHardLimit(headerPolicy->getMaxBigramCount())) {
+ }
+ if (entryCounts.getBigramCount()
+ >= getEntryCountHardLimit(headerPolicy->getMaxBigramCount())) {
// Bigram count exceeds the limit.
return true;
}
+ if (entryCounts.getTrigramCount()
+ >= getEntryCountHardLimit(headerPolicy->getMaxTrigramCount())) {
+ // Trigram count exceeds the limit.
+ return true;
+ }
if (mindsBlockByDecay) {
return false;
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h
index ccbc4a98d..06dcae8a1 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h
@@ -21,6 +21,7 @@
#include "defines.h"
#include "suggest/core/dictionary/property/historical_info.h"
+#include "suggest/policyimpl/dictionary/utils/entry_counters.h"
namespace latinime {
@@ -42,22 +43,17 @@ class ForgettingCurveUtils {
static bool needsToKeep(const HistoricalInfo *const historicalInfo,
const HeaderPolicy *const headerPolicy);
- static bool needsToDecay(const bool mindsBlockByDecay, const int unigramCount,
- const int bigramCount, const HeaderPolicy *const headerPolicy);
+ static bool needsToDecay(const bool mindsBlockByDecay, const EntryCounts &entryCounters,
+ const HeaderPolicy *const headerPolicy);
// TODO: Improve probability computation method and remove this.
static int getProbabilityBiasForNgram(const int n) {
return (n - 1) * MULTIPLIER_TWO_IN_PROBABILITY_SCALE;
}
- AK_FORCE_INLINE static int getUnigramCountHardLimit(const int maxUnigramCount) {
- return static_cast<int>(static_cast<float>(maxUnigramCount)
- * UNIGRAM_COUNT_HARD_LIMIT_WEIGHT);
- }
-
- AK_FORCE_INLINE static int getBigramCountHardLimit(const int maxBigramCount) {
- return static_cast<int>(static_cast<float>(maxBigramCount)
- * BIGRAM_COUNT_HARD_LIMIT_WEIGHT);
+ AK_FORCE_INLINE static int getEntryCountHardLimit(const int maxEntryCount) {
+ return static_cast<int>(static_cast<float>(maxEntryCount)
+ * ENTRY_COUNT_HARD_LIMIT_WEIGHT);
}
private:
@@ -101,8 +97,7 @@ class ForgettingCurveUtils {
static const int OCCURRENCES_TO_RAISE_THE_LEVEL;
static const int DURATION_TO_LOWER_THE_LEVEL_IN_SECONDS;
- static const float UNIGRAM_COUNT_HARD_LIMIT_WEIGHT;
- static const float BIGRAM_COUNT_HARD_LIMIT_WEIGHT;
+ static const float ENTRY_COUNT_HARD_LIMIT_WEIGHT;
static const ProbabilityTable sProbabilityTable;