diff options
author | 2013-12-13 17:09:16 +0900 | |
---|---|---|
committer | 2013-12-13 17:13:32 +0900 | |
commit | 2fa3693c264a4c150ac307d9bb7f6f8f18cc4ffc (patch) | |
tree | 56a5652edf71dd19d04161f72e3e013608cc2a9c /native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h | |
parent | 18d033405c18a8dc28f60ca22d1d0df23a679384 (diff) | |
download | latinime-2fa3693c264a4c150ac307d9bb7f6f8f18cc4ffc.tar.gz latinime-2fa3693c264a4c150ac307d9bb7f6f8f18cc4ffc.tar.xz latinime-2fa3693c264a4c150ac307d9bb7f6f8f18cc4ffc.zip |
Reset to 9bd6dac4708ad94fd0257c53e977df62b152e20c
The bulk merge from -bayo to klp-dev should not have been merged to master.
Change-Id: I527a03a76f5247e4939a672f27c314dc11cbb854
Diffstat (limited to 'native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h')
-rw-r--r-- | native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h | 81 |
1 files changed, 61 insertions, 20 deletions
diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h index a9c7805a8..1208d2c2a 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h @@ -17,37 +17,40 @@ #ifndef LATINIME_HEADER_POLICY_H #define LATINIME_HEADER_POLICY_H -#include <ctime> #include <stdint.h> #include "defines.h" #include "suggest/core/policy/dictionary_header_structure_policy.h" #include "suggest/policyimpl/dictionary/header/header_read_write_utils.h" #include "suggest/policyimpl/dictionary/utils/format_utils.h" +#include "utils/time_keeper.h" namespace latinime { class HeaderPolicy : public DictionaryHeaderStructurePolicy { public: // Reads information from existing dictionary buffer. - HeaderPolicy(const uint8_t *const dictBuf, const int dictSize) - : mDictFormatVersion(FormatUtils::detectFormatVersion(dictBuf, dictSize)), + HeaderPolicy(const uint8_t *const dictBuf, const FormatUtils::FORMAT_VERSION formatVersion) + : mDictFormatVersion(formatVersion), mDictionaryFlags(HeaderReadWriteUtils::getFlags(dictBuf)), mSize(HeaderReadWriteUtils::getHeaderSize(dictBuf)), mAttributeMap(createAttributeMapAndReadAllAttributes(dictBuf)), mMultiWordCostMultiplier(readMultipleWordCostMultiplier()), + mRequiresGermanUmlautProcessing(readRequiresGermanUmlautProcessing()), mIsDecayingDict(HeaderReadWriteUtils::readBoolAttributeValue(&mAttributeMap, IS_DECAYING_DICT_KEY, false /* defaultValue */)), mLastUpdatedTime(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap, - LAST_UPDATED_TIME_KEY, time(0) /* defaultValue */)), + LAST_UPDATED_TIME_KEY, TimeKeeper::peekCurrentTime() /* defaultValue */)), mLastDecayedTime(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap, - LAST_DECAYED_TIME_KEY, time(0) /* defaultValue */)), + LAST_DECAYED_TIME_KEY, TimeKeeper::peekCurrentTime() /* defaultValue */)), mUnigramCount(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap, UNIGRAM_COUNT_KEY, 0 /* defaultValue */)), mBigramCount(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap, BIGRAM_COUNT_KEY, 0 /* defaultValue */)), mExtendedRegionSize(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap, - EXTENDED_REGION_SIZE_KEY, 0 /* defaultValue */)) {} + EXTENDED_REGION_SIZE_KEY, 0 /* defaultValue */)), + mHasHistoricalInfoOfWords(HeaderReadWriteUtils::readBoolAttributeValue( + &mAttributeMap, HAS_HISTORICAL_INFO_KEY, false /* defaultValue */)) {} // Constructs header information using an attribute map. HeaderPolicy(const FormatUtils::FORMAT_VERSION dictFormatVersion, @@ -56,30 +59,55 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { mDictionaryFlags(HeaderReadWriteUtils::createAndGetDictionaryFlagsUsingAttributeMap( attributeMap)), mSize(0), mAttributeMap(*attributeMap), mMultiWordCostMultiplier(readMultipleWordCostMultiplier()), + mRequiresGermanUmlautProcessing(readRequiresGermanUmlautProcessing()), mIsDecayingDict(HeaderReadWriteUtils::readBoolAttributeValue(&mAttributeMap, IS_DECAYING_DICT_KEY, false /* defaultValue */)), mLastUpdatedTime(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap, - LAST_UPDATED_TIME_KEY, time(0) /* defaultValue */)), + LAST_UPDATED_TIME_KEY, TimeKeeper::peekCurrentTime() /* defaultValue */)), mLastDecayedTime(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap, - LAST_UPDATED_TIME_KEY, time(0) /* defaultValue */)), - mUnigramCount(0), mBigramCount(0), mExtendedRegionSize(0) {} + LAST_UPDATED_TIME_KEY, TimeKeeper::peekCurrentTime() /* defaultValue */)), + mUnigramCount(0), mBigramCount(0), mExtendedRegionSize(0), + mHasHistoricalInfoOfWords(HeaderReadWriteUtils::readBoolAttributeValue( + &mAttributeMap, HAS_HISTORICAL_INFO_KEY, false /* defaultValue */)) {} + + // Temporary dummy header. + HeaderPolicy() + : mDictFormatVersion(FormatUtils::UNKNOWN_VERSION), mDictionaryFlags(0), mSize(0), + mAttributeMap(), mMultiWordCostMultiplier(0.0f), + mRequiresGermanUmlautProcessing(false), mIsDecayingDict(false), + mLastUpdatedTime(0), mLastDecayedTime(0), mUnigramCount(0), mBigramCount(0), + mExtendedRegionSize(0), mHasHistoricalInfoOfWords(false) {} ~HeaderPolicy() {} - AK_FORCE_INLINE int getSize() const { - return mSize; - } - - AK_FORCE_INLINE bool supportsDynamicUpdate() const { - return HeaderReadWriteUtils::supportsDynamicUpdate(mDictionaryFlags); + virtual int getFormatVersionNumber() const { + // Conceptually this converts the symbolic value we use in the code into the + // hardcoded of the bytes in the file. But we want the constants to be the + // same so we use them for both here. + switch (mDictFormatVersion) { + case FormatUtils::VERSION_2: + return FormatUtils::VERSION_2; + case FormatUtils::VERSION_4: + return FormatUtils::VERSION_4; + default: + return FormatUtils::UNKNOWN_VERSION; + } } - AK_FORCE_INLINE bool requiresGermanUmlautProcessing() const { - return HeaderReadWriteUtils::requiresGermanUmlautProcessing(mDictionaryFlags); + AK_FORCE_INLINE bool isValid() const { + // Decaying dictionary must have historical information. + if (!mIsDecayingDict) { + return true; + } + if (mHasHistoricalInfoOfWords) { + return true; + } else { + return false; + } } - AK_FORCE_INLINE bool requiresFrenchLigatureProcessing() const { - return HeaderReadWriteUtils::requiresFrenchLigatureProcessing(mDictionaryFlags); + AK_FORCE_INLINE int getSize() const { + return mSize; } AK_FORCE_INLINE float getMultiWordCostMultiplier() const { @@ -90,6 +118,10 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { return mIsDecayingDict; } + AK_FORCE_INLINE bool requiresGermanUmlautProcessing() const { + return mRequiresGermanUmlautProcessing; + } + AK_FORCE_INLINE int getLastUpdatedTime() const { return mLastUpdatedTime; } @@ -110,6 +142,10 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { return mExtendedRegionSize; } + AK_FORCE_INLINE bool hasHistoricalInfoOfWords() const { + return mHasHistoricalInfoOfWords; + } + void readHeaderValueOrQuestionMark(const char *const key, int *outValue, int outValueSize) const; @@ -118,15 +154,17 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { const int unigramCount, const int bigramCount, const int extendedRegionSize) const; private: - DISALLOW_IMPLICIT_CONSTRUCTORS(HeaderPolicy); + DISALLOW_COPY_AND_ASSIGN(HeaderPolicy); static const char *const MULTIPLE_WORDS_DEMOTION_RATE_KEY; + static const char *const REQUIRES_GERMAN_UMLAUT_PROCESSING_KEY; static const char *const IS_DECAYING_DICT_KEY; static const char *const LAST_UPDATED_TIME_KEY; static const char *const LAST_DECAYED_TIME_KEY; static const char *const UNIGRAM_COUNT_KEY; static const char *const BIGRAM_COUNT_KEY; static const char *const EXTENDED_REGION_SIZE_KEY; + static const char *const HAS_HISTORICAL_INFO_KEY; static const int DEFAULT_MULTIPLE_WORDS_DEMOTION_RATE; static const float MULTIPLE_WORD_COST_MULTIPLIER_SCALE; @@ -135,14 +173,17 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { const int mSize; HeaderReadWriteUtils::AttributeMap mAttributeMap; const float mMultiWordCostMultiplier; + const bool mRequiresGermanUmlautProcessing; const bool mIsDecayingDict; const int mLastUpdatedTime; const int mLastDecayedTime; const int mUnigramCount; const int mBigramCount; const int mExtendedRegionSize; + const bool mHasHistoricalInfoOfWords; float readMultipleWordCostMultiplier() const; + bool readRequiresGermanUmlautProcessing() const; static HeaderReadWriteUtils::AttributeMap createAttributeMapAndReadAllAttributes( const uint8_t *const dictBuf); |