aboutsummaryrefslogtreecommitdiffstats
path: root/native/jni/src/suggest/policyimpl/dictionary/header
diff options
context:
space:
mode:
Diffstat (limited to 'native/jni/src/suggest/policyimpl/dictionary/header')
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp58
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h81
2 files changed, 59 insertions, 80 deletions
diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp
index 300e96c4e..a2a0f11b4 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp
@@ -18,6 +18,8 @@
#include <algorithm>
+#include "utils/ngram_utils.h"
+
namespace latinime {
// Note that these are corresponding definitions in Java side in DictionaryHeader.
@@ -28,9 +30,11 @@ const char *const HeaderPolicy::REQUIRES_GERMAN_UMLAUT_PROCESSING_KEY =
const char *const HeaderPolicy::IS_DECAYING_DICT_KEY = "USES_FORGETTING_CURVE";
const char *const HeaderPolicy::DATE_KEY = "date";
const char *const HeaderPolicy::LAST_DECAYED_TIME_KEY = "LAST_DECAYED_TIME";
-const char *const HeaderPolicy::UNIGRAM_COUNT_KEY = "UNIGRAM_COUNT";
-const char *const HeaderPolicy::BIGRAM_COUNT_KEY = "BIGRAM_COUNT";
-const char *const HeaderPolicy::TRIGRAM_COUNT_KEY = "TRIGRAM_COUNT";
+const char *const HeaderPolicy::NGRAM_COUNT_KEYS[] =
+ {"UNIGRAM_COUNT", "BIGRAM_COUNT", "TRIGRAM_COUNT"};
+const char *const HeaderPolicy::MAX_NGRAM_COUNT_KEYS[] =
+ {"MAX_UNIGRAM_ENTRY_COUNT", "MAX_BIGRAM_ENTRY_COUNT", "MAX_TRIGRAM_ENTRY_COUNT"};
+const int HeaderPolicy::DEFAULT_MAX_NGRAM_COUNTS[] = {10000, 30000, 30000};
const char *const HeaderPolicy::EXTENDED_REGION_SIZE_KEY = "EXTENDED_REGION_SIZE";
// Historical info is information that is needed to support decaying such as timestamp, level and
// count.
@@ -39,18 +43,10 @@ const char *const HeaderPolicy::LOCALE_KEY = "locale"; // match Java declaration
const char *const HeaderPolicy::FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID_KEY =
"FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID";
-const char *const HeaderPolicy::MAX_UNIGRAM_COUNT_KEY = "MAX_UNIGRAM_ENTRY_COUNT";
-const char *const HeaderPolicy::MAX_BIGRAM_COUNT_KEY = "MAX_BIGRAM_ENTRY_COUNT";
-const char *const HeaderPolicy::MAX_TRIGRAM_COUNT_KEY = "MAX_TRIGRAM_ENTRY_COUNT";
-
const int HeaderPolicy::DEFAULT_MULTIPLE_WORDS_DEMOTION_RATE = 100;
const float HeaderPolicy::MULTIPLE_WORD_COST_MULTIPLIER_SCALE = 100.0f;
const int HeaderPolicy::DEFAULT_FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID = 3;
-const int HeaderPolicy::DEFAULT_MAX_UNIGRAM_COUNT = 10000;
-const int HeaderPolicy::DEFAULT_MAX_BIGRAM_COUNT = 30000;
-const int HeaderPolicy::DEFAULT_MAX_TRIGRAM_COUNT = 30000;
-
// Used for logging. Question mark is used to indicate that the key is not found.
void HeaderPolicy::readHeaderValueOrQuestionMark(const char *const key, int *outValue,
int outValueSize) const {
@@ -126,15 +122,22 @@ bool HeaderPolicy::fillInAndWriteHeaderToBuffer(const bool updatesLastDecayedTim
return true;
}
+namespace {
+
+int getIndexFromNgramType(const NgramType ngramType) {
+ return static_cast<int>(ngramType);
+}
+
+} // namespace
+
void HeaderPolicy::fillInHeader(const bool updatesLastDecayedTime,
const EntryCounts &entryCounts, const int extendedRegionSize,
DictionaryHeaderStructurePolicy::AttributeMap *outAttributeMap) const {
- HeaderReadWriteUtils::setIntAttribute(outAttributeMap, UNIGRAM_COUNT_KEY,
- entryCounts.getUnigramCount());
- HeaderReadWriteUtils::setIntAttribute(outAttributeMap, BIGRAM_COUNT_KEY,
- entryCounts.getBigramCount());
- HeaderReadWriteUtils::setIntAttribute(outAttributeMap, TRIGRAM_COUNT_KEY,
- entryCounts.getTrigramCount());
+ for (const auto ngramType : AllNgramTypes::ASCENDING) {
+ HeaderReadWriteUtils::setIntAttribute(outAttributeMap,
+ NGRAM_COUNT_KEYS[getIndexFromNgramType(ngramType)],
+ entryCounts.getNgramCount(ngramType));
+ }
HeaderReadWriteUtils::setIntAttribute(outAttributeMap, EXTENDED_REGION_SIZE_KEY,
extendedRegionSize);
// Set the current time as the generation time.
@@ -155,4 +158,25 @@ void HeaderPolicy::fillInHeader(const bool updatesLastDecayedTime,
return attributeMap;
}
+/* static */ const EntryCounts HeaderPolicy::readNgramCounts() const {
+ MutableEntryCounters entryCounters;
+ for (const auto ngramType : AllNgramTypes::ASCENDING) {
+ const int entryCount = HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
+ NGRAM_COUNT_KEYS[getIndexFromNgramType(ngramType)], 0 /* defaultValue */);
+ entryCounters.setNgramCount(ngramType, entryCount);
+ }
+ return entryCounters.getEntryCounts();
+}
+
+/* static */ const EntryCounts HeaderPolicy::readMaxNgramCounts() const {
+ MutableEntryCounters entryCounters;
+ for (const auto ngramType : AllNgramTypes::ASCENDING) {
+ const int index = getIndexFromNgramType(ngramType);
+ const int maxEntryCount = HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
+ MAX_NGRAM_COUNT_KEYS[index], DEFAULT_MAX_NGRAM_COUNTS[index]);
+ entryCounters.setNgramCount(ngramType, maxEntryCount);
+ }
+ return entryCounters.getEntryCounts();
+}
+
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
index 7a5acd7d5..f76931baa 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
@@ -46,12 +46,7 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
DATE_KEY, TimeKeeper::peekCurrentTime() /* defaultValue */)),
mLastDecayedTime(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
LAST_DECAYED_TIME_KEY, TimeKeeper::peekCurrentTime() /* defaultValue */)),
- mUnigramCount(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
- UNIGRAM_COUNT_KEY, 0 /* defaultValue */)),
- mBigramCount(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
- BIGRAM_COUNT_KEY, 0 /* defaultValue */)),
- mTrigramCount(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
- TRIGRAM_COUNT_KEY, 0 /* defaultValue */)),
+ mNgramCounts(readNgramCounts()), mMaxNgramCounts(readMaxNgramCounts()),
mExtendedRegionSize(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
EXTENDED_REGION_SIZE_KEY, 0 /* defaultValue */)),
mHasHistoricalInfoOfWords(HeaderReadWriteUtils::readBoolAttributeValue(
@@ -59,12 +54,6 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
mForgettingCurveProbabilityValuesTableId(HeaderReadWriteUtils::readIntAttributeValue(
&mAttributeMap, FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID_KEY,
DEFAULT_FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID)),
- mMaxUnigramCount(HeaderReadWriteUtils::readIntAttributeValue(
- &mAttributeMap, MAX_UNIGRAM_COUNT_KEY, DEFAULT_MAX_UNIGRAM_COUNT)),
- mMaxBigramCount(HeaderReadWriteUtils::readIntAttributeValue(
- &mAttributeMap, MAX_BIGRAM_COUNT_KEY, DEFAULT_MAX_BIGRAM_COUNT)),
- mMaxTrigramCount(HeaderReadWriteUtils::readIntAttributeValue(
- &mAttributeMap, MAX_TRIGRAM_COUNT_KEY, DEFAULT_MAX_TRIGRAM_COUNT)),
mCodePointTable(HeaderReadWriteUtils::readCodePointTable(&mAttributeMap)) {}
// Constructs header information using an attribute map.
@@ -82,18 +71,13 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
DATE_KEY, TimeKeeper::peekCurrentTime() /* defaultValue */)),
mLastDecayedTime(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
DATE_KEY, TimeKeeper::peekCurrentTime() /* defaultValue */)),
- mUnigramCount(0), mBigramCount(0), mTrigramCount(0), mExtendedRegionSize(0),
+ mNgramCounts(readNgramCounts()), mMaxNgramCounts(readMaxNgramCounts()),
+ mExtendedRegionSize(0),
mHasHistoricalInfoOfWords(HeaderReadWriteUtils::readBoolAttributeValue(
&mAttributeMap, HAS_HISTORICAL_INFO_KEY, false /* defaultValue */)),
mForgettingCurveProbabilityValuesTableId(HeaderReadWriteUtils::readIntAttributeValue(
&mAttributeMap, FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID_KEY,
DEFAULT_FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID)),
- mMaxUnigramCount(HeaderReadWriteUtils::readIntAttributeValue(
- &mAttributeMap, MAX_UNIGRAM_COUNT_KEY, DEFAULT_MAX_UNIGRAM_COUNT)),
- mMaxBigramCount(HeaderReadWriteUtils::readIntAttributeValue(
- &mAttributeMap, MAX_BIGRAM_COUNT_KEY, DEFAULT_MAX_BIGRAM_COUNT)),
- mMaxTrigramCount(HeaderReadWriteUtils::readIntAttributeValue(
- &mAttributeMap, MAX_TRIGRAM_COUNT_KEY, DEFAULT_MAX_TRIGRAM_COUNT)),
mCodePointTable(HeaderReadWriteUtils::readCodePointTable(&mAttributeMap)) {}
// Copy header information
@@ -105,15 +89,12 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
mRequiresGermanUmlautProcessing(headerPolicy->mRequiresGermanUmlautProcessing),
mIsDecayingDict(headerPolicy->mIsDecayingDict),
mDate(headerPolicy->mDate), mLastDecayedTime(headerPolicy->mLastDecayedTime),
- mUnigramCount(headerPolicy->mUnigramCount), mBigramCount(headerPolicy->mBigramCount),
- mTrigramCount(headerPolicy->mTrigramCount),
+ mNgramCounts(headerPolicy->mNgramCounts),
+ mMaxNgramCounts(headerPolicy->mMaxNgramCounts),
mExtendedRegionSize(headerPolicy->mExtendedRegionSize),
mHasHistoricalInfoOfWords(headerPolicy->mHasHistoricalInfoOfWords),
mForgettingCurveProbabilityValuesTableId(
headerPolicy->mForgettingCurveProbabilityValuesTableId),
- mMaxUnigramCount(headerPolicy->mMaxUnigramCount),
- mMaxBigramCount(headerPolicy->mMaxBigramCount),
- mMaxTrigramCount(headerPolicy->mMaxTrigramCount),
mCodePointTable(headerPolicy->mCodePointTable) {}
// Temporary dummy header.
@@ -121,10 +102,9 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
: mDictFormatVersion(FormatUtils::UNKNOWN_VERSION), mDictionaryFlags(0), mSize(0),
mAttributeMap(), mLocale(CharUtils::EMPTY_STRING), mMultiWordCostMultiplier(0.0f),
mRequiresGermanUmlautProcessing(false), mIsDecayingDict(false),
- mDate(0), mLastDecayedTime(0), mUnigramCount(0), mBigramCount(0), mTrigramCount(0),
+ mDate(0), mLastDecayedTime(0), mNgramCounts(), mMaxNgramCounts(),
mExtendedRegionSize(0), mHasHistoricalInfoOfWords(false),
- mForgettingCurveProbabilityValuesTableId(0), mMaxUnigramCount(0), mMaxBigramCount(0),
- mMaxTrigramCount(0), mCodePointTable(nullptr) {}
+ mForgettingCurveProbabilityValuesTableId(0), mCodePointTable(nullptr) {}
~HeaderPolicy() {}
@@ -186,16 +166,12 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
return mLastDecayedTime;
}
- AK_FORCE_INLINE int getUnigramCount() const {
- return mUnigramCount;
+ AK_FORCE_INLINE const EntryCounts &getNgramCounts() const {
+ return mNgramCounts;
}
- AK_FORCE_INLINE int getBigramCount() const {
- return mBigramCount;
- }
-
- AK_FORCE_INLINE int getTrigramCount() const {
- return mTrigramCount;
+ AK_FORCE_INLINE const EntryCounts getMaxNgramCounts() const {
+ return mMaxNgramCounts;
}
AK_FORCE_INLINE int getExtendedRegionSize() const {
@@ -219,18 +195,6 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
return mForgettingCurveProbabilityValuesTableId;
}
- AK_FORCE_INLINE int getMaxUnigramCount() const {
- return mMaxUnigramCount;
- }
-
- AK_FORCE_INLINE int getMaxBigramCount() const {
- return mMaxBigramCount;
- }
-
- AK_FORCE_INLINE int getMaxTrigramCount() const {
- return mMaxTrigramCount;
- }
-
void readHeaderValueOrQuestionMark(const char *const key,
int *outValue, int outValueSize) const;
@@ -262,24 +226,18 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
static const char *const IS_DECAYING_DICT_KEY;
static const char *const DATE_KEY;
static const char *const LAST_DECAYED_TIME_KEY;
- static const char *const UNIGRAM_COUNT_KEY;
- static const char *const BIGRAM_COUNT_KEY;
- static const char *const TRIGRAM_COUNT_KEY;
+ static const char *const NGRAM_COUNT_KEYS[];
+ static const char *const MAX_NGRAM_COUNT_KEYS[];
+ static const int DEFAULT_MAX_NGRAM_COUNTS[];
static const char *const EXTENDED_REGION_SIZE_KEY;
static const char *const HAS_HISTORICAL_INFO_KEY;
static const char *const LOCALE_KEY;
static const char *const FORGETTING_CURVE_OCCURRENCES_TO_LEVEL_UP_KEY;
static const char *const FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID_KEY;
static const char *const FORGETTING_CURVE_DURATION_TO_LEVEL_DOWN_IN_SECONDS_KEY;
- static const char *const MAX_UNIGRAM_COUNT_KEY;
- static const char *const MAX_BIGRAM_COUNT_KEY;
- static const char *const MAX_TRIGRAM_COUNT_KEY;
static const int DEFAULT_MULTIPLE_WORDS_DEMOTION_RATE;
static const float MULTIPLE_WORD_COST_MULTIPLIER_SCALE;
static const int DEFAULT_FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID;
- static const int DEFAULT_MAX_UNIGRAM_COUNT;
- static const int DEFAULT_MAX_BIGRAM_COUNT;
- static const int DEFAULT_MAX_TRIGRAM_COUNT;
const FormatUtils::FORMAT_VERSION mDictFormatVersion;
const HeaderReadWriteUtils::DictionaryFlags mDictionaryFlags;
@@ -291,21 +249,18 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
const bool mIsDecayingDict;
const int mDate;
const int mLastDecayedTime;
- const int mUnigramCount;
- const int mBigramCount;
- const int mTrigramCount;
+ const EntryCounts mNgramCounts;
+ const EntryCounts mMaxNgramCounts;
const int mExtendedRegionSize;
const bool mHasHistoricalInfoOfWords;
const int mForgettingCurveProbabilityValuesTableId;
- const int mMaxUnigramCount;
- const int mMaxBigramCount;
- const int mMaxTrigramCount;
const int *const mCodePointTable;
const std::vector<int> readLocale() const;
float readMultipleWordCostMultiplier() const;
bool readRequiresGermanUmlautProcessing() const;
-
+ const EntryCounts readNgramCounts() const;
+ const EntryCounts readMaxNgramCounts() const;
static DictionaryHeaderStructurePolicy::AttributeMap createAttributeMapAndReadAllAttributes(
const uint8_t *const dictBuf);
};