diff options
5 files changed, 178 insertions, 61 deletions
diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp index 439c3de1d..196da5c97 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp @@ -21,6 +21,8 @@ namespace latinime { const char *const HeaderPolicy::MULTIPLE_WORDS_DEMOTION_RATE_KEY = "MULTIPLE_WORDS_DEMOTION_RATE"; +const char *const HeaderPolicy::USES_FORGETTING_CURVE_KEY = "USES_FORGETTING_CURVE"; +const char *const HeaderPolicy::LAST_UPDATED_TIME_KEY = "date"; const float HeaderPolicy::DEFAULT_MULTIPLE_WORD_COST_MULTIPLIER = 1.0f; const float HeaderPolicy::MULTIPLE_WORD_COST_MULTIPLIER_SCALE = 100.0f; @@ -49,24 +51,47 @@ void HeaderPolicy::readHeaderValueOrQuestionMark(const char *const key, int *out } float HeaderPolicy::readMultipleWordCostMultiplier() const { - std::vector<int> multipleWordsDemotionRateKeyVector; - insertCharactersIntoVector(MULTIPLE_WORDS_DEMOTION_RATE_KEY, - &multipleWordsDemotionRateKeyVector); - HeaderReadingUtils::AttributeMap::const_iterator it = - mAttributeMap.find(multipleWordsDemotionRateKeyVector); - if (it == mAttributeMap.end()) { - // The key was not found. + int attributeValue = 0; + if (getAttributeValueAsInt(MULTIPLE_WORDS_DEMOTION_RATE_KEY, &attributeValue)) { + if (attributeValue <= 0) { + return static_cast<float>(MAX_VALUE_FOR_WEIGHTING); + } + return MULTIPLE_WORD_COST_MULTIPLIER_SCALE / static_cast<float>(attributeValue); + } else { return DEFAULT_MULTIPLE_WORD_COST_MULTIPLIER; } - const int headerValue = parseIntAttributeValue(&(it->second)); - if (headerValue == S_INT_MIN) { - // Invalid value - return DEFAULT_MULTIPLE_WORD_COST_MULTIPLIER; +} + +bool HeaderPolicy::readUsesForgettingCurveFlag() const { + int attributeValue = 0; + if (getAttributeValueAsInt(USES_FORGETTING_CURVE_KEY, &attributeValue)) { + return attributeValue != 0; + } else { + return false; } - if (headerValue <= 0) { - return static_cast<float>(MAX_VALUE_FOR_WEIGHTING); +} + +// Returns S_INT_MIN when the key is not found or the value is invalid. +int HeaderPolicy::readLastUpdatedTime() const { + int attributeValue = 0; + if (getAttributeValueAsInt(LAST_UPDATED_TIME_KEY, &attributeValue)) { + return attributeValue; + } else { + return S_INT_MIN; + } +} + +// Returns whether the key is found or not and stores the found value into outValue. +bool HeaderPolicy::getAttributeValueAsInt(const char *const key, int *const outValue) const { + std::vector<int> keyVector; + insertCharactersIntoVector(key, &keyVector); + HeaderReadingUtils::AttributeMap::const_iterator it = mAttributeMap.find(keyVector); + if (it == mAttributeMap.end()) { + // The key was not found. + return false; } - return MULTIPLE_WORD_COST_MULTIPLIER_SCALE / static_cast<float>(headerValue); + *outValue = parseIntAttributeValue(&(it->second)); + return true; } /* static */ HeaderReadingUtils::AttributeMap HeaderPolicy::createAttributeMapAndReadAllAttributes( diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h index 571ff9e69..930b475c7 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h @@ -32,7 +32,9 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { : mDictBuf(dictBuf), mDictionaryFlags(HeaderReadingUtils::getFlags(dictBuf)), mSize(HeaderReadingUtils::getHeaderSize(dictBuf)), mAttributeMap(createAttributeMapAndReadAllAttributes(mDictBuf)), - mMultiWordCostMultiplier(readMultipleWordCostMultiplier()) {} + mMultiWordCostMultiplier(readMultipleWordCostMultiplier()), + mUsesForgettingCurve(readUsesForgettingCurveFlag()), + mLastUpdatedTime(readLastUpdatedTime()) {} ~HeaderPolicy() {} @@ -57,6 +59,14 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { return mMultiWordCostMultiplier; } + AK_FORCE_INLINE bool usesForgettingCurve() const { + return mUsesForgettingCurve; + } + + AK_FORCE_INLINE int getLastUpdatedTime() const { + return mLastUpdatedTime; + } + void readHeaderValueOrQuestionMark(const char *const key, int *outValue, int outValueSize) const; @@ -64,6 +74,8 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { DISALLOW_IMPLICIT_CONSTRUCTORS(HeaderPolicy); static const char *const MULTIPLE_WORDS_DEMOTION_RATE_KEY; + static const char *const USES_FORGETTING_CURVE_KEY; + static const char *const LAST_UPDATED_TIME_KEY; static const float DEFAULT_MULTIPLE_WORD_COST_MULTIPLIER; static const float MULTIPLE_WORD_COST_MULTIPLIER_SCALE; @@ -72,9 +84,17 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { const int mSize; HeaderReadingUtils::AttributeMap mAttributeMap; const float mMultiWordCostMultiplier; + const bool mUsesForgettingCurve; + const int mLastUpdatedTime; float readMultipleWordCostMultiplier() const; + bool readUsesForgettingCurveFlag() const; + + int readLastUpdatedTime() const; + + bool getAttributeValueAsInt(const char *const key, int *const outValue) const; + static HeaderReadingUtils::AttributeMap createAttributeMapAndReadAllAttributes( const uint8_t *const dictBuf); diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp index 8582c4b81..6326754c2 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp @@ -22,4 +22,70 @@ const size_t BufferWithExtendableBuffer::INITIAL_ADDITIONAL_BUFFER_SIZE = 16 * 1 const size_t BufferWithExtendableBuffer::MAX_ADDITIONAL_BUFFER_SIZE = 1024 * 1024; const size_t BufferWithExtendableBuffer::EXTEND_ADDITIONAL_BUFFER_SIZE_STEP = 16 * 1024; +bool BufferWithExtendableBuffer::writeUintAndAdvancePosition(const uint32_t data, const int size, + int *const pos) { + if (!(size >= 1 && size <= 4)) { + AKLOGI("writeUintAndAdvancePosition() is called with invalid size: %d", size); + ASSERT(false); + return false; + } + if (!checkAndPrepareWriting(*pos, size)) { + return false; + } + const bool usesAdditionalBuffer = isInAdditionalBuffer(*pos); + uint8_t *const buffer = usesAdditionalBuffer ? &mAdditionalBuffer[0] : mOriginalBuffer; + if (usesAdditionalBuffer) { + *pos -= mOriginalBufferSize; + } + ByteArrayUtils::writeUintAndAdvancePosition(buffer, data, size, pos); + if (usesAdditionalBuffer) { + *pos += mOriginalBufferSize; + } + return true; +} + +bool BufferWithExtendableBuffer::writeCodePointsAndAdvancePosition(const int *const codePoints, + const int codePointCount, const bool writesTerminator ,int *const pos) { + const size_t size = ByteArrayUtils::calculateRequiredByteCountToStoreCodePoints( + codePoints, codePointCount, writesTerminator); + if (!checkAndPrepareWriting(*pos, size)) { + return false; + } + const bool usesAdditionalBuffer = isInAdditionalBuffer(*pos); + uint8_t *const buffer = usesAdditionalBuffer ? &mAdditionalBuffer[0] : mOriginalBuffer; + if (usesAdditionalBuffer) { + *pos -= mOriginalBufferSize; + } + ByteArrayUtils::writeCodePointsAndAdvancePosition(buffer, codePoints, codePointCount, + writesTerminator, pos); + if (usesAdditionalBuffer) { + *pos += mOriginalBufferSize; + } + return true; +} + +bool BufferWithExtendableBuffer::checkAndPrepareWriting(const int pos, const int size) { + if (isInAdditionalBuffer(pos)) { + if (pos == mUsedAdditionalBufferSize) { + // Append data to the tail. + if (pos + size > static_cast<int>(mAdditionalBuffer.size())) { + // Need to extend buffer. + if (!extendBuffer()) { + return false; + } + } + mUsedAdditionalBufferSize += size; + } else if (pos + size >= mUsedAdditionalBufferSize) { + // The access will beyond the tail of used region. + return false; + } + } else { + if (pos < 0 || mOriginalBufferSize < pos + size) { + // Invalid position or violate the boundary. + return false; + } + } + return true; +} + } diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h index ec871ec85..b35b47d7a 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h @@ -66,27 +66,10 @@ class BufferWithExtendableBuffer { * Writing is allowed for original buffer, already written region of additional buffer and the * tail of additional buffer. */ - AK_FORCE_INLINE bool writeUintAndAdvancePosition(const uint32_t data, const int size, - int *const pos) { - if (!(size >= 1 && size <= 4)) { - AKLOGI("writeUintAndAdvancePosition() is called with invalid size: %d", size); - ASSERT(false); - return false; - } - if (!checkAndPrepareWriting(*pos, size)) { - return false; - } - const bool usesAdditionalBuffer = isInAdditionalBuffer(*pos); - uint8_t *const buffer = usesAdditionalBuffer ? &mAdditionalBuffer[0] : mOriginalBuffer; - if (usesAdditionalBuffer) { - *pos -= mOriginalBufferSize; - } - ByteArrayUtils::writeUintAndAdvancePosition(buffer, data, size, pos); - if (usesAdditionalBuffer) { - *pos += mOriginalBufferSize; - } - return true; - } + bool writeUintAndAdvancePosition(const uint32_t data, const int size, int *const pos); + + bool writeCodePointsAndAdvancePosition(const int *const codePoints, const int codePointCount, + const bool writesTerminator, int *const pos); private: DISALLOW_COPY_AND_ASSIGN(BufferWithExtendableBuffer); @@ -112,29 +95,7 @@ class BufferWithExtendableBuffer { // Returns if it is possible to write size-bytes from pos. When pos is at the tail position of // the additional buffer, try extending the buffer. - AK_FORCE_INLINE bool checkAndPrepareWriting(const int pos, const int size) { - if (isInAdditionalBuffer(pos)) { - if (pos == mUsedAdditionalBufferSize) { - // Append data to the tail. - if (pos + size > static_cast<int>(mAdditionalBuffer.size())) { - // Need to extend buffer. - if (!extendBuffer()) { - return false; - } - } - mUsedAdditionalBufferSize += size; - } else if (pos + size >= mUsedAdditionalBufferSize) { - // The access will beyond the tail of used region. - return false; - } - } else { - if (pos < 0 || mOriginalBufferSize < pos + size) { - // Invalid position or violate the boundary. - return false; - } - } - return true; - } + AK_FORCE_INLINE bool checkAndPrepareWriting(const int pos, const int size); }; } #endif /* LATINIME_BUFFER_WITH_EXTENDABLE_BUFFER_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h index e2cb9a065..f727ecf8e 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h @@ -115,7 +115,7 @@ class ByteArrayUtils { } /** - * Code Point + * Code Point Reading * * 1 byte = bbbbbbbb match * case 000xxxxx: xxxxx << 16 + next byte << 8 + next byte @@ -149,7 +149,7 @@ class ByteArrayUtils { } /** - * String (array of code points) + * String (array of code points) Reading * * Reads code points until the terminator is found. */ @@ -176,6 +176,51 @@ class ByteArrayUtils { return length; } + /** + * String (array of code points) Writing + */ + static void writeCodePointsAndAdvancePosition(uint8_t *const buffer, + const int *const codePoints, const int codePointCount, const bool writesTerminator, + int *const pos) { + for (int i = 0; i < codePointCount; ++i) { + const int codePoint = codePoints[i]; + if (codePoint == NOT_A_CODE_POINT || codePoint == CHARACTER_ARRAY_TERMINATOR) { + break; + } else if (codePoint < MINIMAL_ONE_BYTE_CHARACTER_VALUE) { + // three bytes character. + writeUint24AndAdvancePosition(buffer, codePoint, pos); + } else { + // one byte character. + writeUint8AndAdvancePosition(buffer, codePoint, pos); + } + } + if (writesTerminator) { + writeUint8AndAdvancePosition(buffer, CHARACTER_ARRAY_TERMINATOR, pos); + } + } + + static int calculateRequiredByteCountToStoreCodePoints(const int *const codePoints, + const int codePointCount, const bool writesTerminator) { + int byteCount = 0; + for (int i = 0; i < codePointCount; ++i) { + const int codePoint = codePoints[i]; + if (codePoint == NOT_A_CODE_POINT || codePoint == CHARACTER_ARRAY_TERMINATOR) { + break; + } else if (codePoint < MINIMAL_ONE_BYTE_CHARACTER_VALUE) { + // three bytes character. + byteCount += 3; + } else { + // one byte character. + byteCount += 1; + } + } + if (writesTerminator) { + // The terminator is one byte. + byteCount += 1; + } + return byteCount; + } + private: DISALLOW_IMPLICIT_CONSTRUCTORS(ByteArrayUtils); |