diff options
Diffstat (limited to 'native')
30 files changed, 786 insertions, 270 deletions
diff --git a/native/jni/Android.mk b/native/jni/Android.mk index c2070327e..0594ddff0 100644 --- a/native/jni/Android.mk +++ b/native/jni/Android.mk @@ -85,6 +85,7 @@ LATIN_IME_CORE_SRC_FILES := \ $(addprefix suggest/policyimpl/dictionary/utils/, \ buffer_with_extendable_buffer.cpp \ byte_array_utils.cpp \ + dict_file_writing_utils.cpp \ format_utils.cpp) \ suggest/policyimpl/gesture/gesture_suggest_policy_factory.cpp \ $(addprefix suggest/policyimpl/typing/, \ diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp index 7f47493b2..7761ec4d5 100644 --- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp +++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp @@ -26,12 +26,55 @@ #include "suggest/core/dictionary/dictionary.h" #include "suggest/core/suggest_options.h" #include "suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h" +#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h" #include "utils/autocorrection_threshold_utils.h" namespace latinime { class ProximityInfo; +// TODO: Move to makedict. +static jboolean latinime_BinaryDictionary_createEmptyDictFile(JNIEnv *env, jclass clazz, + jstring filePath, jlong dictVersion, jobjectArray attributeKeyStringArray, + jobjectArray attributeValueStringArray) { + const jsize filePathUtf8Length = env->GetStringUTFLength(filePath); + char filePathChars[filePathUtf8Length + 1]; + env->GetStringUTFRegion(filePath, 0, env->GetStringLength(filePath), filePathChars); + filePathChars[filePathUtf8Length] = '\0'; + + const int keyCount = env->GetArrayLength(attributeKeyStringArray); + const int valueCount = env->GetArrayLength(attributeValueStringArray); + if (keyCount != valueCount) { + return false; + } + + HeaderReadWriteUtils::AttributeMap attributeMap; + for (int i = 0; i < keyCount; i++) { + jstring keyString = static_cast<jstring>( + env->GetObjectArrayElement(attributeKeyStringArray, i)); + const jsize keyUtf8Length = env->GetStringUTFLength(keyString); + char keyChars[keyUtf8Length + 1]; + env->GetStringUTFRegion(keyString, 0, env->GetStringLength(keyString), keyChars); + keyChars[keyUtf8Length] = '\0'; + HeaderReadWriteUtils::AttributeMap::key_type key; + HeaderReadWriteUtils::insertCharactersIntoVector(keyChars, &key); + + jstring valueString = static_cast<jstring>( + env->GetObjectArrayElement(attributeValueStringArray, i)); + const jsize valueUtf8Length = env->GetStringUTFLength(valueString); + char valueChars[valueUtf8Length + 1]; + env->GetStringUTFRegion(valueString, 0, env->GetStringLength(valueString), valueChars); + valueChars[valueUtf8Length] = '\0'; + HeaderReadWriteUtils::AttributeMap::mapped_type value; + HeaderReadWriteUtils::insertCharactersIntoVector(valueChars, &value); + + attributeMap[key] = value; + } + + return DictFileWritingUtils::createEmptyDictFile(filePathChars, static_cast<int>(dictVersion), + &attributeMap); +} + static jlong latinime_BinaryDictionary_open(JNIEnv *env, jclass clazz, jstring sourceDir, jlong dictOffset, jlong dictSize, jboolean isUpdatable) { PROF_OPEN; @@ -282,6 +325,11 @@ static int latinime_BinaryDictionary_calculateProbabilityNative(JNIEnv *env, jcl static const JNINativeMethod sMethods[] = { { + const_cast<char *>("createEmptyDictFileNative"), + const_cast<char *>("(Ljava/lang/String;J[Ljava/lang/String;[Ljava/lang/String;)Z"), + reinterpret_cast<void *>(latinime_BinaryDictionary_createEmptyDictFile) + }, + { const_cast<char *>("openNative"), const_cast<char *>("(Ljava/lang/String;JJZ)J"), reinterpret_cast<void *>(latinime_BinaryDictionary_open) diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp index f8ed2b9aa..1926b9831 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp @@ -16,6 +16,7 @@ #include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h" +#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/utils/byte_array_utils.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" @@ -78,8 +79,10 @@ const int BigramListReadWriteUtils::ATTRIBUTE_ADDRESS_SHIFT = 4; offset = ByteArrayUtils::readUint24AndAdvancePosition(bigramsBuf, pos); break; } - if (offset == 0) { + if (offset == DynamicPatriciaTrieReadingUtils::DICT_OFFSET_INVALID) { return NOT_A_DICT_POS; + } else if (offset == DynamicPatriciaTrieReadingUtils::DICT_OFFSET_ZERO_OFFSET) { + return origin; } if (isOffsetNegative(flags)) { return origin - offset; @@ -88,6 +91,24 @@ const int BigramListReadWriteUtils::ATTRIBUTE_ADDRESS_SHIFT = 4; } } +/* static */ bool BigramListReadWriteUtils::setHasNextFlag( + BufferWithExtendableBuffer *const buffer, const bool hasNext, const int entryPos) { + const bool usesAdditionalBuffer = buffer->isInAdditionalBuffer(entryPos); + int readingPos = entryPos; + if (usesAdditionalBuffer) { + readingPos -= buffer->getOriginalBufferSize(); + } + BigramFlags bigramFlags = ByteArrayUtils::readUint8AndAdvancePosition( + buffer->getBuffer(usesAdditionalBuffer), &readingPos); + if (hasNext) { + bigramFlags = bigramFlags | FLAG_ATTRIBUTE_HAS_NEXT; + } else { + bigramFlags = bigramFlags & (~FLAG_ATTRIBUTE_HAS_NEXT); + } + int writingPos = entryPos; + return buffer->writeUintAndAdvancePosition(bigramFlags, 1 /* size */, &writingPos); +} + /* static */ bool BigramListReadWriteUtils::createAndWriteBigramEntry( BufferWithExtendableBuffer *const buffer, const int targetPos, const int probability, const bool hasNext, int *const writingPos) { @@ -101,10 +122,12 @@ const int BigramListReadWriteUtils::ATTRIBUTE_ADDRESS_SHIFT = 4; /* static */ bool BigramListReadWriteUtils::writeBigramEntry( BufferWithExtendableBuffer *const bufferToWrite, const BigramFlags flags, const int targetPtNodePos, int *const writingPos) { - if (!bufferToWrite->writeUintAndAdvancePosition(flags, 1 /* size */, writingPos)) { + const int offset = getBigramTargetOffset(targetPtNodePos, *writingPos); + const BigramFlags flagsToWrite = (offset < 0) ? + (flags | FLAG_ATTRIBUTE_OFFSET_NEGATIVE) : (flags & ~FLAG_ATTRIBUTE_OFFSET_NEGATIVE); + if (!bufferToWrite->writeUintAndAdvancePosition(flagsToWrite, 1 /* size */, writingPos)) { return false; } - const int offset = (targetPtNodePos != NOT_A_DICT_POS) ? targetPtNodePos - *writingPos : 0; const uint32_t absOffest = abs(offset); const int bigramTargetFieldSize = attributeAddressSize(flags); return bufferToWrite->writeUintAndAdvancePosition(absOffest, bigramTargetFieldSize, @@ -113,14 +136,13 @@ const int BigramListReadWriteUtils::ATTRIBUTE_ADDRESS_SHIFT = 4; // Returns true if the bigram entry is valid and put entry flags into out*. /* static */ bool BigramListReadWriteUtils::createAndGetBigramFlags(const int entryPos, - const int targetPos, const int probability, const bool hasNext, + const int targetPtNodePos, const int probability, const bool hasNext, BigramFlags *const outBigramFlags) { BigramFlags flags = probability & MASK_ATTRIBUTE_PROBABILITY; if (hasNext) { flags |= FLAG_ATTRIBUTE_HAS_NEXT; } - const int targetFieldPos = entryPos + 1; - const int offset = (targetPos != NOT_A_DICT_POS) ? targetPos - targetFieldPos : 0; + const int offset = getBigramTargetOffset(targetPtNodePos, entryPos); if (offset < 0) { flags |= FLAG_ATTRIBUTE_OFFSET_NEGATIVE; } @@ -143,4 +165,18 @@ const int BigramListReadWriteUtils::ATTRIBUTE_ADDRESS_SHIFT = 4; return true; } +/* static */ int BigramListReadWriteUtils::getBigramTargetOffset(const int targetPtNodePos, + const int entryPos) { + if (targetPtNodePos == NOT_A_DICT_POS) { + return DynamicPatriciaTrieReadingUtils::DICT_OFFSET_INVALID; + } else { + const int offset = targetPtNodePos - (entryPos + 1 /* bigramFlagsField */); + if (offset == 0) { + return DynamicPatriciaTrieReadingUtils::DICT_OFFSET_ZERO_OFFSET; + } else { + return offset; + } + } +} + } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h index 234a0ea58..eabe4e099 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h @@ -59,9 +59,8 @@ public: */ } - static AK_FORCE_INLINE BigramFlags setHasNextFlag(const BigramFlags flags) { - return flags | FLAG_ATTRIBUTE_HAS_NEXT; - } + static bool setHasNextFlag(BufferWithExtendableBuffer *const buffer, + const bool hasNext, const int entryPos); static AK_FORCE_INLINE BigramFlags setProbabilityInFlags(const BigramFlags flags, const int probability) { @@ -96,6 +95,8 @@ private: static int getBigramAddressAndAdvancePosition(const uint8_t *const bigramsBuf, const BigramFlags flags, int *const pos); + + static int getBigramTargetOffset(const int targetPtNodePos, const int entryPos); }; } // namespace latinime #endif // LATINIME_BIGRAM_LIST_READ_WRITE_UTILS_H diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp index ba43bdb10..29307b56a 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp @@ -19,6 +19,7 @@ #include "suggest/core/policy/dictionary_shortcuts_structure_policy.h" #include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h" #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h" +#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" namespace latinime { @@ -69,9 +70,11 @@ bool DynamicBigramListPolicy::copyAllBigrams(BufferWithExtendableBuffer *const b *outBigramsCount = 0; BigramListReadWriteUtils::BigramFlags bigramFlags; int bigramEntryCount = 0; + int lastWrittenEntryPos = NOT_A_DICT_POS; do { if (++bigramEntryCount > BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT) { - AKLOGE("Too many bigram entries. %d", BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT); + AKLOGE("Too many bigram entries. Entry count: %d, Limit: %d", + bigramEntryCount, BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT); ASSERT(false); return false; } @@ -88,6 +91,11 @@ bool DynamicBigramListPolicy::copyAllBigrams(BufferWithExtendableBuffer *const b originalBigramPos += mBuffer->getOriginalBufferSize(); } const int bigramPos = followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos); + if (bigramPos == NOT_A_DICT_POS) { + // Target PtNode has been invalidated. + continue; + } + lastWrittenEntryPos = *toPos; if (!BigramListReadWriteUtils::createAndWriteBigramEntry(bufferToWrite, bigramPos, BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags), BigramListReadWriteUtils::hasNext(bigramFlags), toPos)) { @@ -95,6 +103,13 @@ bool DynamicBigramListPolicy::copyAllBigrams(BufferWithExtendableBuffer *const b } (*outBigramsCount)++; } while(BigramListReadWriteUtils::hasNext(bigramFlags)); + // Makes the last entry the terminal of the list. Updates the flags. + if (lastWrittenEntryPos != NOT_A_DICT_POS) { + if (!BigramListReadWriteUtils::setHasNextFlag(bufferToWrite, false /* hasNext */, + lastWrittenEntryPos)) { + return false; + } + } if (usesAdditionalBuffer) { *fromPos += mBuffer->getOriginalBufferSize(); } @@ -114,7 +129,8 @@ bool DynamicBigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries( int bigramEntryCount = 0; do { if (++bigramEntryCount > BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT) { - AKLOGE("Too many bigram entries. %d", BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT); + AKLOGE("Too many bigram entries. Entry count: %d, Limit: %d", + bigramEntryCount, BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT); ASSERT(false); return false; } @@ -150,6 +166,54 @@ bool DynamicBigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries( return true; } +// Updates bigram target PtNode positions in the list after the placing step in GC. +bool DynamicBigramListPolicy::updateAllBigramTargetPtNodePositions(int *const bigramListPos, + const DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap *const + ptNodePositionRelocationMap) { + const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*bigramListPos); + if (usesAdditionalBuffer) { + *bigramListPos -= mBuffer->getOriginalBufferSize(); + } + BigramListReadWriteUtils::BigramFlags bigramFlags; + int bigramEntryCount = 0; + do { + if (++bigramEntryCount > BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT) { + AKLOGE("Too many bigram entries. Entry count: %d, Limit: %d", + bigramEntryCount, BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT); + ASSERT(false); + return false; + } + int bigramEntryPos = *bigramListPos; + if (usesAdditionalBuffer) { + bigramEntryPos += mBuffer->getOriginalBufferSize(); + } + int bigramTargetPtNodePos; + // The buffer address can be changed after calling buffer writing methods. + BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition( + mBuffer->getBuffer(usesAdditionalBuffer), &bigramFlags, &bigramTargetPtNodePos, + bigramListPos); + if (bigramTargetPtNodePos == NOT_A_DICT_POS) { + continue; + } + if (usesAdditionalBuffer) { + bigramTargetPtNodePos += mBuffer->getOriginalBufferSize(); + } + + DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap::const_iterator it = + ptNodePositionRelocationMap->find(bigramTargetPtNodePos); + if (it != ptNodePositionRelocationMap->end()) { + bigramTargetPtNodePos = it->second; + } else { + bigramTargetPtNodePos = NOT_A_DICT_POS; + } + if (!BigramListReadWriteUtils::writeBigramEntry(mBuffer, bigramFlags, + bigramTargetPtNodePos, &bigramEntryPos)) { + return false; + } + } while(BigramListReadWriteUtils::hasNext(bigramFlags)); + return true; +} + bool DynamicBigramListPolicy::addNewBigramEntryToBigramList(const int bigramTargetPos, const int probability, int *const bigramListPos) { const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*bigramListPos); @@ -160,7 +224,8 @@ bool DynamicBigramListPolicy::addNewBigramEntryToBigramList(const int bigramTarg int bigramEntryCount = 0; do { if (++bigramEntryCount > BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT) { - AKLOGE("Too many bigram entries. %d", BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT); + AKLOGE("Too many bigram entries. Entry count: %d, Limit: %d", + bigramEntryCount, BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT); ASSERT(false); return false; } @@ -188,10 +253,7 @@ bool DynamicBigramListPolicy::addNewBigramEntryToBigramList(const int bigramTarg } // The current last entry is found. // First, update the flags of the last entry. - const BigramListReadWriteUtils::BigramFlags updatedFlags = - BigramListReadWriteUtils::setHasNextFlag(bigramFlags); - if (!BigramListReadWriteUtils::writeBigramEntry(mBuffer, updatedFlags, originalBigramPos, - &entryPos)) { + if (!BigramListReadWriteUtils::setHasNextFlag(mBuffer, true /* hasNext */, entryPos)) { return false; } if (usesAdditionalBuffer) { @@ -222,7 +284,8 @@ bool DynamicBigramListPolicy::removeBigram(const int bigramListPos, const int bi int bigramEntryCount = 0; do { if (++bigramEntryCount > BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT) { - AKLOGE("Too many bigram entries. %d", BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT); + AKLOGE("Too many bigram entries. Entry count: %d, Limit: %d", + bigramEntryCount, BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT); ASSERT(false); return false; } @@ -262,7 +325,7 @@ int DynamicBigramListPolicy::followBigramLinkAndGetCurrentBigramPtNodePos( nodeReader.fetchNodeInfoInBufferFromPtNodePos(currentPos); bigramLinkCount++; if (bigramLinkCount > CONTINUING_BIGRAM_LINK_COUNT_LIMIT) { - AKLOGE("Bigram link is invalid. start position: %d", bigramPos); + AKLOGE("Bigram link is invalid. start position: %d", originalBigramPos); ASSERT(false); return NOT_A_DICT_POS; } diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h index 16b080ae5..8ea318a41 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h @@ -21,6 +21,7 @@ #include "defines.h" #include "suggest/core/policy/dictionary_bigrams_structure_policy.h" +#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h" namespace latinime { @@ -51,6 +52,10 @@ class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy { bool updateAllBigramEntriesAndDeleteUselessEntries(int *const bigramListPos); + bool updateAllBigramTargetPtNodePositions(int *const bigramListPos, + const DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap *const + ptNodePositionRelocationMap); + bool addNewBigramEntryToBigramList(const int bigramTargetPos, const int probability, int *const bigramListPos); diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp index ffa02e3f6..c60e45819 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp @@ -19,7 +19,7 @@ namespace latinime { bool DynamicPatriciaTrieGcEventListeners - ::ListenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted + ::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted ::onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node, const int *const nodeCodePoints) { // PtNode is useless when the PtNode is not a terminal and doesn't have any not useless @@ -47,11 +47,13 @@ bool DynamicPatriciaTrieGcEventListeners } // Writes dummy PtNode array size when the head of PtNode array is read. -bool DynamicPatriciaTrieGcEventListeners::ListenerForPlacingAndWritingValidPtNodesToBuffer +bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer ::onDescend(const int ptNodeArrayPos) { mValidPtNodeCount = 0; int writingPos = mBufferToWrite->getTailPosition(); - mPositionMap->insert(hash_map_compat<int, int>::value_type(ptNodeArrayPos, writingPos)); + mDictPositionRelocationMap->mPtNodeArrayPositionRelocationMap.insert( + DynamicPatriciaTrieWritingHelper::PtNodeArrayPositionRelocationMap::value_type( + ptNodeArrayPos, writingPos)); // Writes dummy PtNode array size because arrays can have a forward link or needles PtNodes. // This field will be updated later in onReadingPtNodeArrayTail() with actual PtNode count. mPtNodeArraySizeFieldPos = writingPos; @@ -60,7 +62,7 @@ bool DynamicPatriciaTrieGcEventListeners::ListenerForPlacingAndWritingValidPtNod } // Write PtNode array terminal and actual PtNode array size. -bool DynamicPatriciaTrieGcEventListeners::ListenerForPlacingAndWritingValidPtNodesToBuffer +bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer ::onReadingPtNodeArrayTail() { int writingPos = mBufferToWrite->getTailPosition(); // Write PtNode array terminal. @@ -77,22 +79,71 @@ bool DynamicPatriciaTrieGcEventListeners::ListenerForPlacingAndWritingValidPtNod } // Write valid PtNode to buffer and memorize mapping from the old position to the new position. -bool DynamicPatriciaTrieGcEventListeners::ListenerForPlacingAndWritingValidPtNodesToBuffer +bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer ::onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node, const int *const nodeCodePoints) { if (node->isDeleted()) { // Current PtNode is not written in new buffer because it has been deleted. - mPositionMap->insert(hash_map_compat<int, int>::value_type(node->getHeadPos(), - NOT_A_DICT_POS)); + mDictPositionRelocationMap->mPtNodePositionRelocationMap.insert( + DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap::value_type( + node->getHeadPos(), NOT_A_DICT_POS)); return true; } int writingPos = mBufferToWrite->getTailPosition(); - mPositionMap->insert(hash_map_compat<int, int>::value_type(node->getHeadPos(), writingPos)); + mDictPositionRelocationMap->mPtNodePositionRelocationMap.insert( + DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap::value_type( + node->getHeadPos(), writingPos)); mValidPtNodeCount++; // Writes current PtNode. return mWritingHelper->writePtNodeToBufferByCopyingPtNodeInfo(mBufferToWrite, node, - node->getParentPos(), nodeCodePoints, node->getCodePointCount(), + node->getParentPos(), nodeCodePoints, node->getCodePointCount(), node->getProbability(), &writingPos); } +bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateAllPositionFields + ::onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node, + const int *const nodeCodePoints) { + // Updates parent position. + int parentPos = node->getParentPos(); + if (parentPos != NOT_A_DICT_POS) { + DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap::const_iterator it = + mDictPositionRelocationMap->mPtNodePositionRelocationMap.find(parentPos); + if (it != mDictPositionRelocationMap->mPtNodePositionRelocationMap.end()) { + parentPos = it->second; + } + } + int writingPos = node->getHeadPos() + DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE; + // Write updated parent offset. + if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(mBufferToWrite, + parentPos, node->getHeadPos(), &writingPos)) { + return false; + } + + // Updates children position. + int childrenPos = node->getChildrenPos(); + if (childrenPos != NOT_A_DICT_POS) { + DynamicPatriciaTrieWritingHelper::PtNodeArrayPositionRelocationMap::const_iterator it = + mDictPositionRelocationMap->mPtNodeArrayPositionRelocationMap.find(childrenPos); + if (it != mDictPositionRelocationMap->mPtNodeArrayPositionRelocationMap.end()) { + childrenPos = it->second; + } + } + writingPos = node->getChildrenPosFieldPos(); + if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBufferToWrite, + childrenPos, &writingPos)) { + return false; + } + + // Updates bigram target PtNode positions in the bigram list. + int bigramsPos = node->getBigramsPos(); + if (bigramsPos != NOT_A_DICT_POS) { + if (!mBigramPolicy->updateAllBigramTargetPtNodePositions(&bigramsPos, + &mDictPositionRelocationMap->mPtNodePositionRelocationMap)) { + return false; + } + } + + return true; +} + } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h index 728559330..4256f22fb 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h @@ -34,16 +34,16 @@ class DynamicPatriciaTrieGcEventListeners { // Updates all PtNodes that can be reached from the root. Checks if each PtNode is useless or // not and marks useless PtNodes as deleted. Such deleted PtNodes will be discarded in the GC. // TODO: Concatenate non-terminal PtNodes. - class ListenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted + class TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted : public DynamicPatriciaTrieReadingHelper::TraversingEventListener { public: - ListenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted( + TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted( DynamicPatriciaTrieWritingHelper *const writingHelper, BufferWithExtendableBuffer *const buffer) : mWritingHelper(writingHelper), mBuffer(buffer), valueStack(), mChildrenValue(0) {} - ~ListenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted() {}; + ~TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted() {}; bool onAscend() { if (valueStack.empty()) { @@ -66,7 +66,7 @@ class DynamicPatriciaTrieGcEventListeners { private: DISALLOW_IMPLICIT_CONSTRUCTORS( - ListenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted); + TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted); DynamicPatriciaTrieWritingHelper *const mWritingHelper; BufferWithExtendableBuffer *const mBuffer; @@ -76,10 +76,10 @@ class DynamicPatriciaTrieGcEventListeners { // Updates all bigram entries that are held by valid PtNodes. This removes useless bigram // entries. - class ListenerForUpdatingBigramProbability + class TraversePolicyToUpdateBigramProbability : public DynamicPatriciaTrieReadingHelper::TraversingEventListener { public: - ListenerForUpdatingBigramProbability(DynamicBigramListPolicy *const bigramPolicy) + TraversePolicyToUpdateBigramProbability(DynamicBigramListPolicy *const bigramPolicy) : mBigramPolicy(bigramPolicy) {} bool onAscend() { return true; } @@ -102,20 +102,21 @@ class DynamicPatriciaTrieGcEventListeners { } private: - DISALLOW_IMPLICIT_CONSTRUCTORS(ListenerForUpdatingBigramProbability); + DISALLOW_IMPLICIT_CONSTRUCTORS(TraversePolicyToUpdateBigramProbability); DynamicBigramListPolicy *const mBigramPolicy; }; - class ListenerForPlacingAndWritingValidPtNodesToBuffer + class TraversePolicyToPlaceAndWriteValidPtNodesToBuffer : public DynamicPatriciaTrieReadingHelper::TraversingEventListener { public: - ListenerForPlacingAndWritingValidPtNodesToBuffer( + TraversePolicyToPlaceAndWriteValidPtNodesToBuffer( DynamicPatriciaTrieWritingHelper *const writingHelper, BufferWithExtendableBuffer *const bufferToWrite, - hash_map_compat<int, int> *const positionMap) + DynamicPatriciaTrieWritingHelper::DictPositionRelocationMap *const + dictPositionRelocationMap) : mWritingHelper(writingHelper), mBufferToWrite(bufferToWrite), - mPositionMap(positionMap), mValidPtNodeCount(0), + mDictPositionRelocationMap(dictPositionRelocationMap), mValidPtNodeCount(0), mPtNodeArraySizeFieldPos(NOT_A_DICT_POS) {}; bool onAscend() { return true; } @@ -127,20 +128,49 @@ class DynamicPatriciaTrieGcEventListeners { bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node, const int *const nodeCodePoints); - hash_map_compat<int, int> *getPositionMap() const { - return mPositionMap; - } - private: - DISALLOW_IMPLICIT_CONSTRUCTORS(ListenerForPlacingAndWritingValidPtNodesToBuffer); + DISALLOW_IMPLICIT_CONSTRUCTORS(TraversePolicyToPlaceAndWriteValidPtNodesToBuffer); DynamicPatriciaTrieWritingHelper *const mWritingHelper; BufferWithExtendableBuffer *const mBufferToWrite; - hash_map_compat<int, int> *const mPositionMap; + DynamicPatriciaTrieWritingHelper::DictPositionRelocationMap *const + mDictPositionRelocationMap; int mValidPtNodeCount; int mPtNodeArraySizeFieldPos; }; + class TraversePolicyToUpdateAllPositionFields + : public DynamicPatriciaTrieReadingHelper::TraversingEventListener { + public: + TraversePolicyToUpdateAllPositionFields( + DynamicPatriciaTrieWritingHelper *const writingHelper, + DynamicBigramListPolicy *const bigramPolicy, + BufferWithExtendableBuffer *const bufferToWrite, + const DynamicPatriciaTrieWritingHelper::DictPositionRelocationMap *const + dictPositionRelocationMap) + : mWritingHelper(writingHelper), mBigramPolicy(bigramPolicy), + mBufferToWrite(bufferToWrite), + mDictPositionRelocationMap(dictPositionRelocationMap) {}; + + bool onAscend() { return true; } + + bool onDescend(const int ptNodeArrayPos) { return true; } + + bool onReadingPtNodeArrayTail() { return true; } + + bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node, + const int *const nodeCodePoints); + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(TraversePolicyToUpdateAllPositionFields); + + DynamicPatriciaTrieWritingHelper *const mWritingHelper; + DynamicBigramListPolicy *const mBigramPolicy; + BufferWithExtendableBuffer *const mBufferToWrite; + const DynamicPatriciaTrieWritingHelper::DictPositionRelocationMap *const + mDictPositionRelocationMap; + }; + private: DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieGcEventListeners); }; diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp index d52de7ada..456352c17 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp @@ -40,9 +40,10 @@ void DynamicPatriciaTrieNodeReader::fetchPtNodeInfoFromBufferAndProcessMovedPtNo pos -= mBuffer->getOriginalBufferSize(); } mFlags = PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos); - const int parentPos = - DynamicPatriciaTrieReadingUtils::getParentPosAndAdvancePosition(dictBuf, &pos); - mParentPos = (parentPos != 0) ? ptNodePos + parentPos : NOT_A_DICT_POS; + const int parentPosOffset = + DynamicPatriciaTrieReadingUtils::getParentPtNodePosOffsetAndAdvancePosition(dictBuf, + &pos); + mParentPos = DynamicPatriciaTrieReadingUtils::getParentPtNodePos(parentPosOffset, mHeadPos); if (outCodePoints != 0) { mCodePointCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition( dictBuf, mFlags, maxCodePointCount, outCodePoints, &pos); diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp index 70a09c245..42397c19e 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp @@ -258,7 +258,9 @@ void DynamicPatriciaTriePolicy::flushWithGC(const char *const filePath) { AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary."); return; } - // TODO: Implement. + DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer, + &mBigramListPolicy, &mShortcutListPolicy); + writingHelper.writeToDictFileWithGC(getRootPosition(), filePath, &mHeaderPolicy); } bool DynamicPatriciaTriePolicy::needsToRunGC() const { @@ -266,8 +268,8 @@ bool DynamicPatriciaTriePolicy::needsToRunGC() const { AKLOGI("Warning: needsToRunGC() is called for non-updatable dictionary."); return false; } - // TODO: Implement. - return false; + // TODO: Implement more properly. + return mBufferWithExtendableBuffer.isNearSizeLimit(); } } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp index 754b679d5..f4a2ef389 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp @@ -59,6 +59,9 @@ bool DynamicPatriciaTrieReadingHelper::traverseAllPtNodesInPostorderDepthFirstMa if (!listener->onReadingPtNodeArrayTail()) { return false; } + if (mReadingStateStack.size() <= 0) { + break; + } if (!listener->onAscend()) { return false; } @@ -101,6 +104,9 @@ bool DynamicPatriciaTrieReadingHelper::traverseAllPtNodesInPtNodeArrayLevelPreor if (!listener->onAscend()) { return false; } + if (mReadingStateStack.size() <= 0) { + break; + } popReadingStateFromStack(); alreadyVisitedChildren = true; alreadyVisitedAllPtNodesInArray = true; diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h index b033eee05..c6d8ddcf7 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h @@ -263,7 +263,7 @@ class DynamicPatriciaTrieReadingHelper { AK_FORCE_INLINE void pushReadingStateToStack() { if (mReadingStateStack.size() > MAX_READING_STATE_STACK_SIZE) { - AKLOGI("Reading state stack overflow. Max size: %d", MAX_READING_STATE_STACK_SIZE); + AKLOGI("Reading state stack overflow. Max size: %zd", MAX_READING_STATE_STACK_SIZE); ASSERT(false); mIsError = true; mReadingState.mPos = NOT_A_DICT_POS; diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp index 8428c0b15..d68446db6 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp @@ -28,24 +28,42 @@ const DptReadingUtils::NodeFlags DptReadingUtils::FLAG_IS_NOT_MOVED = 0xC0; const DptReadingUtils::NodeFlags DptReadingUtils::FLAG_IS_MOVED = 0x40; const DptReadingUtils::NodeFlags DptReadingUtils::FLAG_IS_DELETED = 0x80; +// TODO: Make DICT_OFFSET_ZERO_OFFSET = 0. +// Currently, DICT_OFFSET_INVALID is 0 in Java side but offset can be 0 during GC. So, the maximum +// value of offsets, which is 0x7FFFFF is used to represent 0 offset. +const int DptReadingUtils::DICT_OFFSET_INVALID = 0; +const int DptReadingUtils::DICT_OFFSET_ZERO_OFFSET = 0x7FFFFF; + /* static */ int DptReadingUtils::getForwardLinkPosition(const uint8_t *const buffer, const int pos) { int linkAddressPos = pos; return ByteArrayUtils::readSint24AndAdvancePosition(buffer, &linkAddressPos); } -/* static */ int DptReadingUtils::getParentPosAndAdvancePosition(const uint8_t *const buffer, - int *const pos) { +/* static */ int DptReadingUtils::getParentPtNodePosOffsetAndAdvancePosition( + const uint8_t *const buffer, int *const pos) { return ByteArrayUtils::readSint24AndAdvancePosition(buffer, pos); } +/* static */ int DptReadingUtils::getParentPtNodePos(const int parentOffset, const int ptNodePos) { + if (parentOffset == DICT_OFFSET_INVALID) { + return NOT_A_DICT_POS; + } else if (parentOffset == DICT_OFFSET_ZERO_OFFSET) { + return ptNodePos; + } else { + return parentOffset + ptNodePos; + } +} + /* static */ int DptReadingUtils::readChildrenPositionAndAdvancePosition( const uint8_t *const buffer, int *const pos) { const int base = *pos; const int offset = ByteArrayUtils::readSint24AndAdvancePosition(buffer, pos); - if (offset == 0) { - // 0 offset means that the node does not have children. + if (offset == DICT_OFFSET_INVALID) { + // The PtNode does not have children. return NOT_A_DICT_POS; + } else if (offset == DICT_OFFSET_ZERO_OFFSET) { + return base; } else { return base + offset; } diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h index db5f9b1bd..67c3cc57e 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h @@ -27,13 +27,19 @@ class DynamicPatriciaTrieReadingUtils { public: typedef uint8_t NodeFlags; + static const int DICT_OFFSET_INVALID; + static const int DICT_OFFSET_ZERO_OFFSET; + static int getForwardLinkPosition(const uint8_t *const buffer, const int pos); static AK_FORCE_INLINE bool isValidForwardLinkPosition(const int forwardLinkAddress) { return forwardLinkAddress != 0; } - static int getParentPosAndAdvancePosition(const uint8_t *const buffer, int *const pos); + static int getParentPtNodePosOffsetAndAdvancePosition(const uint8_t *const buffer, + int *const pos); + + static int getParentPtNodePos(const int parentOffset, const int ptNodePos); static int readChildrenPositionAndAdvancePosition(const uint8_t *const buffer, int *const pos); diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp index 3fc9c4ce0..578645cd5 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp @@ -16,9 +16,6 @@ #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h" -#include <cstdio> -#include <cstring> - #include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h" #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h" #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h" @@ -28,12 +25,14 @@ #include "suggest/policyimpl/dictionary/header/header_policy.h" #include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h" +#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h" +#include "utils/hash_map_compat.h" namespace latinime { const int DynamicPatriciaTrieWritingHelper::CHILDREN_POSITION_FIELD_SIZE = 3; -const char *const DynamicPatriciaTrieWritingHelper::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE = - ".tmp"; +// TODO: Make MAX_DICTIONARY_SIZE 8MB. +const size_t DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE = 2 * 1024 * 1024; bool DynamicPatriciaTrieWritingHelper::addUnigramWord( DynamicPatriciaTrieReadingHelper *const readingHelper, @@ -144,7 +143,7 @@ void DynamicPatriciaTrieWritingHelper::writeToDictFile(const char *const fileNam if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, false /* updatesLastUpdatedTime */)) { return; } - flushAllToFile(fileName, &headerBuffer, mBuffer); + DictFileWritingUtils::flushAllHeaderAndBodyToFile(fileName, &headerBuffer, mBuffer); } void DynamicPatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNodeArrayPos, @@ -153,11 +152,12 @@ void DynamicPatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNod if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, true /* updatesLastUpdatedTime */)) { return; } - BufferWithExtendableBuffer newDictBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */); + BufferWithExtendableBuffer newDictBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */, + MAX_DICTIONARY_SIZE); if (!runGC(rootPtNodeArrayPos, &newDictBuffer)) { return; } - flushAllToFile(fileName, &headerBuffer, &newDictBuffer); + DictFileWritingUtils::flushAllHeaderAndBodyToFile(fileName, &headerBuffer, &newDictBuffer); } bool DynamicPatriciaTrieWritingHelper::markNodeAsDeleted( @@ -202,9 +202,8 @@ bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition( return false; } // Update moved position, which is stored in the parent offset field. - const int movedPosOffset = movedPos - originalNode->getHeadPos(); - if (!DynamicPatriciaTrieWritingUtils::writeParentOffsetAndAdvancePosition( - mBuffer, movedPosOffset, &writingPos)) { + if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition( + mBuffer, movedPos, originalNode->getHeadPos(), &writingPos)) { return false; } // Update bigram linked node position, which is stored in the children position field. @@ -219,11 +218,10 @@ bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition( const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader(); readingHelper.initWithPtNodeArrayPos(originalNode->getChildrenPos()); while (!readingHelper.isEnd()) { - const int childPtNodeWrittenPos = nodeReader->getHeadPos(); - const int parentOffset = movedPos - childPtNodeWrittenPos; - int parentOffsetFieldPos = childPtNodeWrittenPos + 1 /* Flags */; - if (!DynamicPatriciaTrieWritingUtils::writeParentOffsetAndAdvancePosition( - mBuffer, parentOffset, &parentOffsetFieldPos)) { + int parentOffsetFieldPos = nodeReader->getHeadPos() + + DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE; + if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition( + mBuffer, movedPos, nodeReader->getHeadPos(), &parentOffsetFieldPos)) { // Parent offset cannot be written because of a bug or a broken dictionary; thus, // we give up to update dictionary. return false; @@ -249,9 +247,8 @@ bool DynamicPatriciaTrieWritingHelper::writePtNodeWithFullInfoToBuffer( return false; } // Calculate a parent offset and write the offset. - const int parentOffset = (parentPos != NOT_A_DICT_POS) ? parentPos - nodePos : NOT_A_DICT_POS; - if (!DynamicPatriciaTrieWritingUtils::writeParentOffsetAndAdvancePosition(bufferToWrite, - parentOffset, writingPos)) { + if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(bufferToWrite, + parentPos, nodePos, writingPos)) { return false; } // Write code points @@ -462,89 +459,53 @@ bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes( return true; } -// TODO: Create a struct which contains header, body and etc... and use here as an argument. -void DynamicPatriciaTrieWritingHelper::flushAllToFile(const char *const fileName, - BufferWithExtendableBuffer *const dictHeader, - BufferWithExtendableBuffer *const dictBody) const { - const int tmpFileNameBufSize = strlen(fileName) - + strlen(TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE) + 1 /* terminator */; - // Name of a temporary file used for writing that is a connected string of original name and - // TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE. - char tmpFileName[tmpFileNameBufSize]; - snprintf(tmpFileName, tmpFileNameBufSize, "%s%s", fileName, - TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE); - FILE *const file = fopen(tmpFileName, "wb"); - if (!file) { - AKLOGI("Dictionary file %s cannnot be opened.", tmpFileName); - ASSERT(false); - return; - } - // Write the dictionary header. - if (!writeBufferToFilePointer(file, dictHeader)) { - remove(tmpFileName); - AKLOGI("Dictionary header cannnot be written. size: %d", dictHeader->getTailPosition()); - ASSERT(false); - return; - } - // Write the dictionary body. - if (!writeBufferToFilePointer(file, dictBody)) { - remove(tmpFileName); - AKLOGI("Dictionary body cannnot be written. size: %d", dictBody->getTailPosition()); - ASSERT(false); - return; - } - fclose(file); - rename(tmpFileName, fileName); -} - -// This closes file pointer when an error is caused and returns whether the writing was succeeded -// or not. -bool DynamicPatriciaTrieWritingHelper::writeBufferToFilePointer(FILE *const file, - const BufferWithExtendableBuffer *const buffer) const { - const int originalBufSize = buffer->getOriginalBufferSize(); - if (originalBufSize > 0 && fwrite(buffer->getBuffer(false /* usesAdditionalBuffer */), - originalBufSize, 1, file) < 1) { - fclose(file); - return false; - } - const int additionalBufSize = buffer->getTailPosition() - buffer->getOriginalBufferSize(); - if (additionalBufSize > 0 && fwrite(buffer->getBuffer(true /* usesAdditionalBuffer */), - additionalBufSize, 1, file) < 1) { - fclose(file); - return false; - } - return true; -} - bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, BufferWithExtendableBuffer *const bufferToWrite) { DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, mBigramPolicy, mShortcutPolicy); readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); DynamicPatriciaTrieGcEventListeners - ::ListenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted - listenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted( + ::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted + traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted( this, mBuffer); if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner( - &listenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted)) { + &traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted)) { return false; } readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); - DynamicPatriciaTrieGcEventListeners::ListenerForUpdatingBigramProbability - listenerForupdatingBigramProbability(mBigramPolicy); + DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateBigramProbability + traversePolicyToUpdateBigramProbability(mBigramPolicy); if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner( - &listenerForupdatingBigramProbability)) { + &traversePolicyToUpdateBigramProbability)) { return false; } // Mapping from positions in mBuffer to positions in bufferToWrite. - hash_map_compat<int, int> positionMap; + DictPositionRelocationMap dictPositionRelocationMap; readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); - DynamicPatriciaTrieGcEventListeners::ListenerForPlacingAndWritingValidPtNodesToBuffer - listenerForPlacingAndWritingLivingPtNodesToBuffer(this, mBuffer, &positionMap); + DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer + traversePolicyToPlaceAndWriteValidPtNodesToBuffer(this, bufferToWrite, + &dictPositionRelocationMap); + if (!readingHelper.traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner( + &traversePolicyToPlaceAndWriteValidPtNodesToBuffer)) { + return false; + } - // TODO: Implement. - return false; + // Create policy instance for the GCed dictionary. + DynamicShortcutListPolicy newDictShortcutPolicy(bufferToWrite); + DynamicBigramListPolicy newDictBigramPolicy(bufferToWrite, &newDictShortcutPolicy); + // Create reading helper for the GCed dictionary. + DynamicPatriciaTrieReadingHelper newDictReadingHelper(bufferToWrite, &newDictBigramPolicy, + &newDictShortcutPolicy); + newDictReadingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); + DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateAllPositionFields + traversePolicyToUpdateAllPositionFields(this, &newDictBigramPolicy, bufferToWrite, + &dictPositionRelocationMap); + if (!newDictReadingHelper.traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner( + &traversePolicyToUpdateAllPositionFields)) { + return false; + } + return true; } } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h index e82b80ae5..fe1b2437a 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h @@ -17,10 +17,10 @@ #ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H #define LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H -#include <cstdio> #include <stdint.h> #include "defines.h" +#include "utils/hash_map_compat.h" namespace latinime { @@ -33,6 +33,20 @@ class HeaderPolicy; class DynamicPatriciaTrieWritingHelper { public: + typedef hash_map_compat<int, int> PtNodeArrayPositionRelocationMap; + typedef hash_map_compat<int, int> PtNodePositionRelocationMap; + struct DictPositionRelocationMap { + public: + DictPositionRelocationMap() + : mPtNodeArrayPositionRelocationMap(), mPtNodePositionRelocationMap() {} + + PtNodeArrayPositionRelocationMap mPtNodeArrayPositionRelocationMap; + PtNodePositionRelocationMap mPtNodePositionRelocationMap; + + private: + DISALLOW_COPY_AND_ASSIGN(DictPositionRelocationMap); + }; + DynamicPatriciaTrieWritingHelper(BufferWithExtendableBuffer *const buffer, DynamicBigramListPolicy *const bigramPolicy, DynamicShortcutListPolicy *const shortcutPolicy) @@ -70,7 +84,7 @@ class DynamicPatriciaTrieWritingHelper { DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieWritingHelper); static const int CHILDREN_POSITION_FIELD_SIZE; - static const char *const TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE; + static const size_t MAX_DICTIONARY_SIZE; BufferWithExtendableBuffer *const mBuffer; DynamicBigramListPolicy *const mBigramPolicy; @@ -108,13 +122,6 @@ class DynamicPatriciaTrieWritingHelper { const int probabilityOfNewPtNode, const int *const newNodeCodePoints, const int newNodeCodePointCount); - void flushAllToFile(const char *const fileName, - BufferWithExtendableBuffer *const dictHeader, - BufferWithExtendableBuffer *const dictBody) const; - - bool writeBufferToFilePointer(FILE *const file, - const BufferWithExtendableBuffer *const buffer) const; - bool runGC(const int rootPtNodeArrayPos, BufferWithExtendableBuffer *const bufferToWrite); }; } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.cpp index b261e594d..30ff10cd6 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.cpp @@ -36,21 +36,33 @@ const int DynamicPatriciaTrieWritingUtils::DICT_OFFSET_NEGATIVE_FLAG = 0x800000; const int DynamicPatriciaTrieWritingUtils::PROBABILITY_FIELD_SIZE = 1; const int DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE = 1; +/* static */ bool DynamicPatriciaTrieWritingUtils::writeEmptyDictionary( + BufferWithExtendableBuffer *const buffer, const int rootPos) { + int writingPos = rootPos; + if (!writePtNodeArraySizeAndAdvancePosition(buffer, 0 /* arraySize */, &writingPos)) { + return false; + } + return writeForwardLinkPositionAndAdvancePosition(buffer, NOT_A_DICT_POS /* forwardLinkPos */, + &writingPos); +} + /* static */ bool DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition( BufferWithExtendableBuffer *const buffer, const int forwardLinkPos, int *const forwardLinkFieldPos) { - const int offset = (forwardLinkPos != NOT_A_DICT_POS) ? - forwardLinkPos - (*forwardLinkFieldPos) : 0; - return writeDictOffset(buffer, offset, forwardLinkFieldPos); + return writeDictOffset(buffer, forwardLinkPos, (*forwardLinkFieldPos), forwardLinkFieldPos); } /* static */ bool DynamicPatriciaTrieWritingUtils::writePtNodeArraySizeAndAdvancePosition( BufferWithExtendableBuffer *const buffer, const size_t arraySize, int *const arraySizeFieldPos) { - if (arraySize <= MAX_PTNODE_ARRAY_SIZE_TO_USE_SMALL_SIZE_FIELD) { + // Currently, all array size field to be created has LARGE_PTNODE_ARRAY_SIZE_FIELD_SIZE to + // simplify updating process. + // TODO: Use SMALL_PTNODE_ARRAY_SIZE_FIELD_SIZE for small arrays. + /*if (arraySize <= MAX_PTNODE_ARRAY_SIZE_TO_USE_SMALL_SIZE_FIELD) { return buffer->writeUintAndAdvancePosition(arraySize, SMALL_PTNODE_ARRAY_SIZE_FIELD_SIZE, arraySizeFieldPos); - } else if (arraySize <= MAX_PTNODE_ARRAY_SIZE) { + } else */ + if (arraySize <= MAX_PTNODE_ARRAY_SIZE) { uint32_t data = arraySize | LARGE_PTNODE_ARRAY_SIZE_FIELD_SIZE_FLAG; return buffer->writeUintAndAdvancePosition(data, LARGE_PTNODE_ARRAY_SIZE_FIELD_SIZE, arraySizeFieldPos); @@ -69,11 +81,10 @@ const int DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE = 1; } // Note that parentOffset is offset from node's head position. -/* static */ bool DynamicPatriciaTrieWritingUtils::writeParentOffsetAndAdvancePosition( - BufferWithExtendableBuffer *const buffer, const int parentOffset, +/* static */ bool DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition( + BufferWithExtendableBuffer *const buffer, const int parentPos, const int basePos, int *const parentPosFieldPos) { - int offset = (parentOffset != NOT_A_DICT_POS) ? parentOffset : 0; - return writeDictOffset(buffer, offset, parentPosFieldPos); + return writeDictOffset(buffer, parentPos, basePos, parentPosFieldPos); } /* static */ bool DynamicPatriciaTrieWritingUtils::writeCodePointsAndAdvancePosition( @@ -106,13 +117,19 @@ const int DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE = 1; /* static */ bool DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition( BufferWithExtendableBuffer *const buffer, const int childrenPosition, int *const childrenPositionFieldPos) { - int offset = (childrenPosition != NOT_A_DICT_POS) ? - childrenPosition - (*childrenPositionFieldPos) : 0; - return writeDictOffset(buffer, offset, childrenPositionFieldPos); + return writeDictOffset(buffer, childrenPosition, (*childrenPositionFieldPos), + childrenPositionFieldPos); } /* static */ bool DynamicPatriciaTrieWritingUtils::writeDictOffset( - BufferWithExtendableBuffer *const buffer, const int offset, int *const offsetFieldPos) { + BufferWithExtendableBuffer *const buffer, const int targetPos, const int basePos, + int *const offsetFieldPos) { + int offset = targetPos - basePos; + if (targetPos == NOT_A_DICT_POS) { + offset = DynamicPatriciaTrieReadingUtils::DICT_OFFSET_INVALID; + } else if (offset == 0) { + offset = DynamicPatriciaTrieReadingUtils::DICT_OFFSET_ZERO_OFFSET; + } if (offset > MAX_DICT_OFFSET_VALUE || offset < MIN_DICT_OFFSET_VALUE) { AKLOGI("offset cannot be written because the offset is too large or too small: %d", offset); diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h index 183ede444..af76bc6b5 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h @@ -28,6 +28,10 @@ class BufferWithExtendableBuffer; class DynamicPatriciaTrieWritingUtils { public: + static const int NODE_FLAG_FIELD_SIZE; + + static bool writeEmptyDictionary(BufferWithExtendableBuffer *const buffer, const int rootPos); + static bool writeForwardLinkPositionAndAdvancePosition( BufferWithExtendableBuffer *const buffer, const int forwardLinkPos, int *const forwardLinkFieldPos); @@ -39,8 +43,8 @@ class DynamicPatriciaTrieWritingUtils { const DynamicPatriciaTrieReadingUtils::NodeFlags nodeFlags, int *const nodeFlagsFieldPos); - static bool writeParentOffsetAndAdvancePosition(BufferWithExtendableBuffer *const buffer, - const int parentPosition, int *const parentPosFieldPos); + static bool writeParentPosOffsetAndAdvancePosition(BufferWithExtendableBuffer *const buffer, + const int parentPosition, const int basePos, int *const parentPosFieldPos); static bool writeCodePointsAndAdvancePosition(BufferWithExtendableBuffer *const buffer, const int *const codePoints, const int codePointCount, int *const codePointFieldPos); @@ -63,11 +67,10 @@ class DynamicPatriciaTrieWritingUtils { static const int MAX_DICT_OFFSET_VALUE; static const int MIN_DICT_OFFSET_VALUE; static const int DICT_OFFSET_NEGATIVE_FLAG; - static const int NODE_FLAG_FIELD_SIZE; static const int PROBABILITY_FIELD_SIZE; - static bool writeDictOffset(BufferWithExtendableBuffer *const buffer, const int offset, - int *const offsetFieldPos); + static bool writeDictOffset(BufferWithExtendableBuffer *const buffer, const int targetPos, + const int basePos, int *const offsetFieldPos); }; } // namespace latinime #endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_UTILS_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp index 47ace23a1..7bbeacaa0 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp @@ -22,10 +22,12 @@ namespace latinime { + +// Note that these are corresponding definitions in Java side in FormatSpec.FileHeader. const char *const HeaderPolicy::MULTIPLE_WORDS_DEMOTION_RATE_KEY = "MULTIPLE_WORDS_DEMOTION_RATE"; const char *const HeaderPolicy::USES_FORGETTING_CURVE_KEY = "USES_FORGETTING_CURVE"; const char *const HeaderPolicy::LAST_UPDATED_TIME_KEY = "date"; -const float HeaderPolicy::DEFAULT_MULTIPLE_WORD_COST_MULTIPLIER = 1.0f; +const int HeaderPolicy::DEFAULT_MULTIPLE_WORDS_DEMOTION_RATE = 100; const float HeaderPolicy::MULTIPLE_WORD_COST_MULTIPLIER_SCALE = 100.0f; // Used for logging. Question mark is used to indicate that the key is not found. @@ -37,7 +39,7 @@ void HeaderPolicy::readHeaderValueOrQuestionMark(const char *const key, int *out return; } std::vector<int> keyCodePointVector; - insertCharactersIntoVector(key, &keyCodePointVector); + HeaderReadWriteUtils::insertCharactersIntoVector(key, &keyCodePointVector); HeaderReadWriteUtils::AttributeMap::const_iterator it = mAttributeMap.find(keyCodePointVector); if (it == mAttributeMap.end()) { // The key was not found. @@ -53,47 +55,29 @@ void HeaderPolicy::readHeaderValueOrQuestionMark(const char *const key, int *out } float HeaderPolicy::readMultipleWordCostMultiplier() const { - int attributeValue = 0; - if (getAttributeValueAsInt(MULTIPLE_WORDS_DEMOTION_RATE_KEY, &attributeValue)) { - if (attributeValue <= 0) { - return static_cast<float>(MAX_VALUE_FOR_WEIGHTING); - } - return MULTIPLE_WORD_COST_MULTIPLIER_SCALE / static_cast<float>(attributeValue); - } else { - return DEFAULT_MULTIPLE_WORD_COST_MULTIPLIER; + std::vector<int> keyVector; + HeaderReadWriteUtils::insertCharactersIntoVector(MULTIPLE_WORDS_DEMOTION_RATE_KEY, &keyVector); + const int demotionRate = HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap, + &keyVector, DEFAULT_MULTIPLE_WORDS_DEMOTION_RATE); + if (demotionRate <= 0) { + return static_cast<float>(MAX_VALUE_FOR_WEIGHTING); } + return MULTIPLE_WORD_COST_MULTIPLIER_SCALE / static_cast<float>(demotionRate); } bool HeaderPolicy::readUsesForgettingCurveFlag() const { - int attributeValue = 0; - if (getAttributeValueAsInt(USES_FORGETTING_CURVE_KEY, &attributeValue)) { - return attributeValue != 0; - } else { - return false; - } + std::vector<int> keyVector; + HeaderReadWriteUtils::insertCharactersIntoVector(USES_FORGETTING_CURVE_KEY, &keyVector); + return HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap, &keyVector, + false /* defaultValue */); } -// Returns S_INT_MIN when the key is not found or the value is invalid. +// Returns current time when the key is not found or the value is invalid. int HeaderPolicy::readLastUpdatedTime() const { - int attributeValue = 0; - if (getAttributeValueAsInt(LAST_UPDATED_TIME_KEY, &attributeValue)) { - return attributeValue; - } else { - return S_INT_MIN; - } -} - -// Returns whether the key is found or not and stores the found value into outValue. -bool HeaderPolicy::getAttributeValueAsInt(const char *const key, int *const outValue) const { std::vector<int> keyVector; - insertCharactersIntoVector(key, &keyVector); - HeaderReadWriteUtils::AttributeMap::const_iterator it = mAttributeMap.find(keyVector); - if (it == mAttributeMap.end()) { - // The key was not found. - return false; - } - *outValue = parseIntAttributeValue(&(it->second)); - return true; + HeaderReadWriteUtils::insertCharactersIntoVector(LAST_UPDATED_TIME_KEY, &keyVector); + return HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap, &keyVector, + time(0) /* defaultValue */); } bool HeaderPolicy::writeHeaderToBuffer(BufferWithExtendableBuffer *const bufferToWrite, @@ -117,13 +101,8 @@ bool HeaderPolicy::writeHeaderToBuffer(BufferWithExtendableBuffer *const bufferT // Set current time as a last updated time. HeaderReadWriteUtils::AttributeMap attributeMapTowrite(mAttributeMap); std::vector<int> updatedTimekey; - insertCharactersIntoVector(LAST_UPDATED_TIME_KEY, &updatedTimekey); - const time_t currentTime = time(NULL); - std::vector<int> updatedTimeValue; - char charBuf[LARGEST_INT_DIGIT_COUNT + 1]; - snprintf(charBuf, LARGEST_INT_DIGIT_COUNT + 1, "%ld", currentTime); - insertCharactersIntoVector(charBuf, &updatedTimeValue); - attributeMapTowrite[updatedTimekey] = updatedTimeValue; + HeaderReadWriteUtils::insertCharactersIntoVector(LAST_UPDATED_TIME_KEY, &updatedTimekey); + HeaderReadWriteUtils::setIntAttribute(&attributeMapTowrite, &updatedTimekey, time(0)); if (!HeaderReadWriteUtils::writeHeaderAttributes(bufferToWrite, &attributeMapTowrite, &writingPos)) { return false; @@ -149,30 +128,4 @@ bool HeaderPolicy::writeHeaderToBuffer(BufferWithExtendableBuffer *const bufferT return attributeMap; } -/* static */ int HeaderPolicy::parseIntAttributeValue( - const std::vector<int> *const attributeValue) { - int value = 0; - bool isNegative = false; - for (size_t i = 0; i < attributeValue->size(); ++i) { - if (i == 0 && attributeValue->at(i) == '-') { - isNegative = true; - } else { - if (!isdigit(attributeValue->at(i))) { - // If not a number, return S_INT_MIN - return S_INT_MIN; - } - value *= 10; - value += attributeValue->at(i) - '0'; - } - } - return isNegative ? -value : value; -} - -/* static */ void HeaderPolicy::insertCharactersIntoVector(const char *const characters, - std::vector<int> *const vector) { - for (int i = 0; characters[i]; ++i) { - vector->push_back(characters[i]); - } -} - } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h index 6b396f3f2..e97c08ca4 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h @@ -17,7 +17,6 @@ #ifndef LATINIME_HEADER_POLICY_H #define LATINIME_HEADER_POLICY_H -#include <cctype> #include <stdint.h> #include "defines.h" @@ -29,16 +28,26 @@ namespace latinime { class HeaderPolicy : public DictionaryHeaderStructurePolicy { public: - explicit HeaderPolicy(const uint8_t *const dictBuf, const int dictSize) - : mDictBuf(dictBuf), - mDictFormatVersion(FormatUtils::detectFormatVersion(dictBuf, dictSize)), + // Reads information from existing dictionary buffer. + HeaderPolicy(const uint8_t *const dictBuf, const int dictSize) + : mDictFormatVersion(FormatUtils::detectFormatVersion(dictBuf, dictSize)), mDictionaryFlags(HeaderReadWriteUtils::getFlags(dictBuf)), mSize(HeaderReadWriteUtils::getHeaderSize(dictBuf)), - mAttributeMap(createAttributeMapAndReadAllAttributes(mDictBuf)), + mAttributeMap(createAttributeMapAndReadAllAttributes(dictBuf)), mMultiWordCostMultiplier(readMultipleWordCostMultiplier()), mUsesForgettingCurve(readUsesForgettingCurveFlag()), mLastUpdatedTime(readLastUpdatedTime()) {} + // Constructs header information using an attribute map. + HeaderPolicy(const FormatUtils::FORMAT_VERSION dictFormatVersion, + const HeaderReadWriteUtils::AttributeMap *const attributeMap) + : mDictFormatVersion(dictFormatVersion), + mDictionaryFlags(HeaderReadWriteUtils::createAndGetDictionaryFlagsUsingAttributeMap( + attributeMap)), mSize(0), mAttributeMap(*attributeMap), + mMultiWordCostMultiplier(readUsesForgettingCurveFlag()), + mUsesForgettingCurve(readUsesForgettingCurveFlag()), + mLastUpdatedTime(readLastUpdatedTime()) {} + ~HeaderPolicy() {} AK_FORCE_INLINE int getSize() const { @@ -81,10 +90,9 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { static const char *const MULTIPLE_WORDS_DEMOTION_RATE_KEY; static const char *const USES_FORGETTING_CURVE_KEY; static const char *const LAST_UPDATED_TIME_KEY; - static const float DEFAULT_MULTIPLE_WORD_COST_MULTIPLIER; + static const int DEFAULT_MULTIPLE_WORDS_DEMOTION_RATE; static const float MULTIPLE_WORD_COST_MULTIPLIER_SCALE; - const uint8_t *const mDictBuf; const FormatUtils::FORMAT_VERSION mDictFormatVersion; const HeaderReadWriteUtils::DictionaryFlags mDictionaryFlags; const int mSize; @@ -99,15 +107,8 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { int readLastUpdatedTime() const; - bool getAttributeValueAsInt(const char *const key, int *const outValue) const; - static HeaderReadWriteUtils::AttributeMap createAttributeMapAndReadAllAttributes( const uint8_t *const dictBuf); - - static int parseIntAttributeValue(const std::vector<int> *const attributeValue); - - static void insertCharactersIntoVector( - const char *const characters, std::vector<int> *const vector); }; } // namespace latinime #endif /* LATINIME_HEADER_POLICY_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp index 80fe88671..3b1c78085 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp @@ -16,6 +16,8 @@ #include "suggest/policyimpl/dictionary/header/header_read_write_utils.h" +#include <cctype> +#include <cstdio> #include <vector> #include "defines.h" @@ -43,6 +45,13 @@ const HeaderReadWriteUtils::DictionaryFlags const HeaderReadWriteUtils::DictionaryFlags HeaderReadWriteUtils::FRENCH_LIGATURE_PROCESSING_FLAG = 0x4; +// Note that these are corresponding definitions in Java side in FormatSpec.FileHeader. +const char *const HeaderReadWriteUtils::SUPPORTS_DYNAMIC_UPDATE_KEY = "SUPPORTS_DYNAMIC_UPDATE"; +const char *const HeaderReadWriteUtils::REQUIRES_GERMAN_UMLAUT_PROCESSING_KEY = + "REQUIRES_GERMAN_UMLAUT_PROCESSING"; +const char *const HeaderReadWriteUtils::REQUIRES_FRENCH_LIGATURE_PROCESSING_KEY = + "REQUIRES_FRENCH_LIGATURE_PROCESSING"; + /* static */ int HeaderReadWriteUtils::getHeaderSize(const uint8_t *const dictBuf) { // See the format of the header in the comment in // BinaryDictionaryFormatUtils::detectFormatVersion() @@ -56,6 +65,28 @@ const HeaderReadWriteUtils::DictionaryFlags HEADER_MAGIC_NUMBER_SIZE + HEADER_DICTIONARY_VERSION_SIZE); } +/* static */ HeaderReadWriteUtils::DictionaryFlags + HeaderReadWriteUtils::createAndGetDictionaryFlagsUsingAttributeMap( + const HeaderReadWriteUtils::AttributeMap *const attributeMap) { + AttributeMap::key_type key; + insertCharactersIntoVector(REQUIRES_GERMAN_UMLAUT_PROCESSING_KEY, &key); + const bool requiresGermanUmlautProcessing = readBoolAttributeValue(attributeMap, &key, + false /* defaultValue */); + key.clear(); + insertCharactersIntoVector(REQUIRES_FRENCH_LIGATURE_PROCESSING_KEY, &key); + const bool requiresFrenchLigatureProcessing = readBoolAttributeValue(attributeMap, &key, + false /* defaultValue */); + key.clear(); + insertCharactersIntoVector(SUPPORTS_DYNAMIC_UPDATE_KEY, &key); + const bool supportsDynamicUpdate = readBoolAttributeValue(attributeMap, &key, + false /* defaultValue */); + DictionaryFlags dictflags = NO_FLAGS; + dictflags |= requiresGermanUmlautProcessing ? GERMAN_UMLAUT_PROCESSING_FLAG : 0; + dictflags |= requiresFrenchLigatureProcessing ? FRENCH_LIGATURE_PROCESSING_FLAG : 0; + dictflags |= supportsDynamicUpdate ? SUPPORTS_DYNAMIC_UPDATE_FLAG : 0; + return dictflags; +} + /* static */ void HeaderReadWriteUtils::fetchAllHeaderAttributes(const uint8_t *const dictBuf, AttributeMap *const headerAttributes) { const int headerSize = getHeaderSize(dictBuf); @@ -128,4 +159,57 @@ const HeaderReadWriteUtils::DictionaryFlags return true; } +/* static */ void HeaderReadWriteUtils::setBoolAttribute(AttributeMap *const headerAttributes, + const AttributeMap::key_type *const key, const bool value) { + setIntAttribute(headerAttributes, key, value ? 1 : 0); +} + +/* static */ void HeaderReadWriteUtils::setIntAttribute(AttributeMap *const headerAttributes, + const AttributeMap::key_type *const key, const int value) { + AttributeMap::mapped_type valueVector; + char charBuf[LARGEST_INT_DIGIT_COUNT + 1]; + snprintf(charBuf, LARGEST_INT_DIGIT_COUNT + 1, "%d", value); + insertCharactersIntoVector(charBuf, &valueVector); + (*headerAttributes)[*key] = valueVector; +} + +/* static */ bool HeaderReadWriteUtils::readBoolAttributeValue( + const AttributeMap *const headerAttributes, const AttributeMap::key_type *const key, + const bool defaultValue) { + const int intDefaultValue = defaultValue ? 1 : 0; + const int intValue = readIntAttributeValue(headerAttributes, key, intDefaultValue); + return intValue != 0; +} + +/* static */ int HeaderReadWriteUtils::readIntAttributeValue( + const AttributeMap *const headerAttributes, const AttributeMap::key_type *const key, + const int defaultValue) { + AttributeMap::const_iterator it = headerAttributes->find(*key); + if (it != headerAttributes->end()) { + int value = 0; + bool isNegative = false; + for (size_t i = 0; i < it->second.size(); ++i) { + if (i == 0 && it->second.at(i) == '-') { + isNegative = true; + } else { + if (!isdigit(it->second.at(i))) { + // If not a number. + return defaultValue; + } + value *= 10; + value += it->second.at(i) - '0'; + } + } + return isNegative ? -value : value; + } + return defaultValue; +} + +/* static */ void HeaderReadWriteUtils::insertCharactersIntoVector(const char *const characters, + std::vector<int> *const vector) { + for (int i = 0; characters[i]; ++i) { + vector->push_back(characters[i]); + } +} + } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.h index 6cce73375..caa5097f6 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.h @@ -54,6 +54,9 @@ class HeaderReadWriteUtils { + HEADER_SIZE_FIELD_SIZE; } + static DictionaryFlags createAndGetDictionaryFlagsUsingAttributeMap( + const HeaderReadWriteUtils::AttributeMap *const attributeMap); + static void fetchAllHeaderAttributes(const uint8_t *const dictBuf, AttributeMap *const headerAttributes); @@ -69,6 +72,24 @@ class HeaderReadWriteUtils { static bool writeHeaderAttributes(BufferWithExtendableBuffer *const buffer, const AttributeMap *const headerAttributes, int *const writingPos); + /** + * Methods for header attributes. + */ + static void setBoolAttribute(AttributeMap *const headerAttributes, + const AttributeMap::key_type *const key, const bool value); + + static void setIntAttribute(AttributeMap *const headerAttributes, + const AttributeMap::key_type *const key, const int value); + + static bool readBoolAttributeValue(const AttributeMap *const headerAttributes, + const AttributeMap::key_type *const key, const bool defaultValue); + + static int readIntAttributeValue(const AttributeMap *const headerAttributes, + const AttributeMap::key_type *const key, const int defaultValue); + + static void insertCharactersIntoVector(const char *const characters, + AttributeMap::key_type *const key); + private: DISALLOW_IMPLICIT_CONSTRUCTORS(HeaderReadWriteUtils); @@ -87,7 +108,10 @@ class HeaderReadWriteUtils { static const DictionaryFlags GERMAN_UMLAUT_PROCESSING_FLAG; static const DictionaryFlags SUPPORTS_DYNAMIC_UPDATE_FLAG; static const DictionaryFlags FRENCH_LIGATURE_PROCESSING_FLAG; - static const DictionaryFlags CONTAINS_BIGRAMS_FLAG; + + static const char *const SUPPORTS_DYNAMIC_UPDATE_KEY; + static const char *const REQUIRES_GERMAN_UMLAUT_PROCESSING_KEY; + static const char *const REQUIRES_FRENCH_LIGATURE_PROCESSING_KEY; }; } #endif /* LATINIME_HEADER_READ_WRITE_UTILS_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp index 5269795a4..8a84bd261 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp @@ -31,9 +31,21 @@ void PatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode, return; } int nextPos = dicNode->getChildrenPos(); + if (nextPos < 0 || nextPos >= mDictBufferSize) { + AKLOGE("Children PtNode array position is invalid. pos: %d, dict size: %d", + nextPos, mDictBufferSize); + ASSERT(false); + return; + } const int childCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition( mDictRoot, &nextPos); for (int i = 0; i < childCount; i++) { + if (nextPos < 0 || nextPos >= mDictBufferSize) { + AKLOGE("Child PtNode position is invalid. pos: %d, dict size: %d, childCount: %d / %d", + nextPos, mDictBufferSize, i, childCount); + ASSERT(false); + return; + } nextPos = createAndGetLeavingChildNode(dicNode, nextPos, childDicNodes); } } @@ -404,6 +416,11 @@ int PatriciaTriePolicy::createAndGetLeavingChildNode(const DicNode *const dicNod if (PatriciaTrieReadingUtils::hasBigrams(flags)) { getBigramsStructurePolicy()->skipAllBigrams(&pos); } + if (mergedNodeCodePointCount <= 0) { + AKLOGE("Empty PtNode is not allowed. Code point count: %d", mergedNodeCodePointCount); + ASSERT(false); + return pos; + } childDicNodes->pushLeavingChild(dicNode, ptNodePos, childrenPos, probability, PatriciaTrieReadingUtils::isTerminal(flags), PatriciaTrieReadingUtils::hasChildrenInFlags(flags), diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h index 19155f938..f1de914cb 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h @@ -36,6 +36,7 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { PatriciaTriePolicy(const MmappedBuffer *const buffer) : mBuffer(buffer), mHeaderPolicy(mBuffer->getBuffer(), buffer->getBufferSize()), mDictRoot(mBuffer->getBuffer() + mHeaderPolicy.getSize()), + mDictBufferSize(mBuffer->getBufferSize() - mHeaderPolicy.getSize()), mBigramListPolicy(mDictRoot), mShortcutListPolicy(mDictRoot) {} ~PatriciaTriePolicy() { @@ -118,6 +119,7 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { const MmappedBuffer *const mBuffer; const HeaderPolicy mHeaderPolicy; const uint8_t *const mDictRoot; + const int mDictBufferSize; const BigramListPolicy mBigramListPolicy; const ShortcutListPolicy mShortcutListPolicy; diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp index 1316b425f..7df55815f 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp @@ -71,8 +71,17 @@ const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_IS_BLACKLISTED = 0x01; length = ByteArrayUtils::readStringAndAdvancePosition(buffer, maxLength, outBuffer, pos); } else { - if (maxLength > 0) { - outBuffer[0] = getCodePointAndAdvancePosition(buffer, pos); + const int codePoint = getCodePointAndAdvancePosition(buffer, pos); + if (codePoint == NOT_A_CODE_POINT) { + // CAVEAT: codePoint == NOT_A_CODE_POINT means the code point is + // CHARACTER_ARRAY_TERMINATOR. The code point must not be CHARACTER_ARRAY_TERMINATOR + // when the PtNode has a single code point. + length = 0; + AKLOGE("codePoint is NOT_A_CODE_POINT. pos: %d, codePoint: 0x%x, buffer[pos - 1]: 0x%x", + *pos - 1, codePoint, buffer[*pos - 1]); + ASSERT(false); + } else if (maxLength > 0) { + outBuffer[0] = codePoint; length = 1; } } diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp index 0fed275e9..f692882f2 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp @@ -18,9 +18,10 @@ namespace latinime { -const size_t BufferWithExtendableBuffer::INITIAL_ADDITIONAL_BUFFER_SIZE = 16 * 1024; const size_t BufferWithExtendableBuffer::MAX_ADDITIONAL_BUFFER_SIZE = 1024 * 1024; -const size_t BufferWithExtendableBuffer::EXTEND_ADDITIONAL_BUFFER_SIZE_STEP = 16 * 1024; +const int BufferWithExtendableBuffer::NEAR_BUFFER_LIMIT_THRESHOLD_PERCENTILE = 90; +// TODO: Needs to allocate larger memory corresponding to the current vector size. +const size_t BufferWithExtendableBuffer::EXTEND_ADDITIONAL_BUFFER_SIZE_STEP = 128 * 1024; bool BufferWithExtendableBuffer::writeUintAndAdvancePosition(const uint32_t data, const int size, int *const pos) { @@ -64,6 +65,16 @@ bool BufferWithExtendableBuffer::writeCodePointsAndAdvancePosition(const int *co return true; } +bool BufferWithExtendableBuffer::extendBuffer() { + const size_t sizeAfterExtending = + mAdditionalBuffer.size() + EXTEND_ADDITIONAL_BUFFER_SIZE_STEP; + if (sizeAfterExtending > mMaxAdditionalBufferSize) { + return false; + } + mAdditionalBuffer.resize(mAdditionalBuffer.size() + EXTEND_ADDITIONAL_BUFFER_SIZE_STEP); + return true; +} + bool BufferWithExtendableBuffer::checkAndPrepareWriting(const int pos, const int size) { if (isInAdditionalBuffer(pos)) { const int tailPosition = getTailPosition(); diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h index c6a484131..17d2e39c2 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h @@ -32,9 +32,11 @@ namespace latinime { // raw pointer but provides several methods that handle boundary checking for writing data. class BufferWithExtendableBuffer { public: - BufferWithExtendableBuffer(uint8_t *const originalBuffer, const int originalBufferSize) + BufferWithExtendableBuffer(uint8_t *const originalBuffer, const int originalBufferSize, + const int maxAdditionalBufferSize = MAX_ADDITIONAL_BUFFER_SIZE) : mOriginalBuffer(originalBuffer), mOriginalBufferSize(originalBufferSize), - mAdditionalBuffer(INITIAL_ADDITIONAL_BUFFER_SIZE), mUsedAdditionalBufferSize(0) {} + mAdditionalBuffer(EXTEND_ADDITIONAL_BUFFER_SIZE_STEP), mUsedAdditionalBufferSize(0), + mMaxAdditionalBufferSize(maxAdditionalBufferSize) {} AK_FORCE_INLINE int getTailPosition() const { return mOriginalBufferSize + mUsedAdditionalBufferSize; @@ -61,6 +63,11 @@ class BufferWithExtendableBuffer { return mOriginalBufferSize; } + AK_FORCE_INLINE bool isNearSizeLimit() const { + return mAdditionalBuffer.size() >= ((mMaxAdditionalBufferSize + * NEAR_BUFFER_LIMIT_THRESHOLD_PERCENTILE) / 100); + } + /** * For writing. * @@ -75,28 +82,22 @@ class BufferWithExtendableBuffer { private: DISALLOW_COPY_AND_ASSIGN(BufferWithExtendableBuffer); - static const size_t INITIAL_ADDITIONAL_BUFFER_SIZE; static const size_t MAX_ADDITIONAL_BUFFER_SIZE; + static const int NEAR_BUFFER_LIMIT_THRESHOLD_PERCENTILE; static const size_t EXTEND_ADDITIONAL_BUFFER_SIZE_STEP; uint8_t *const mOriginalBuffer; const int mOriginalBufferSize; std::vector<uint8_t> mAdditionalBuffer; int mUsedAdditionalBufferSize; + const size_t mMaxAdditionalBufferSize; // Return if the buffer is successfully extended or not. - AK_FORCE_INLINE bool extendBuffer() { - if (mAdditionalBuffer.size() + EXTEND_ADDITIONAL_BUFFER_SIZE_STEP - > MAX_ADDITIONAL_BUFFER_SIZE) { - return false; - } - mAdditionalBuffer.resize(mAdditionalBuffer.size() + EXTEND_ADDITIONAL_BUFFER_SIZE_STEP); - return true; - } + bool extendBuffer(); // Returns if it is possible to write size-bytes from pos. When pos is at the tail position of // the additional buffer, try extending the buffer. - AK_FORCE_INLINE bool checkAndPrepareWriting(const int pos, const int size); + bool checkAndPrepareWriting(const int pos, const int size); }; } #endif /* LATINIME_BUFFER_WITH_EXTENDABLE_BUFFER_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp new file mode 100644 index 000000000..2e4ec2e1d --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp @@ -0,0 +1,107 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h" + +#include <cstdio> +#include <cstring> + +#include "suggest/policyimpl/dictionary/header/header_policy.h" +#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h" +#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" +#include "suggest/policyimpl/dictionary/utils/format_utils.h" + +namespace latinime { + +const char *const DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE = ".tmp"; + +/* static */ bool DictFileWritingUtils::createEmptyDictFile(const char *const filePath, + const int dictVersion, const HeaderReadWriteUtils::AttributeMap *const attributeMap) { + switch (dictVersion) { + case 3: + return createEmptyV3DictFile(filePath, attributeMap); + default: + // Only version 3 dictionary is supported for now. + return false; + } +} + +/* static */ bool DictFileWritingUtils::createEmptyV3DictFile(const char *const filePath, + const HeaderReadWriteUtils::AttributeMap *const attributeMap) { + BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */); + HeaderPolicy headerPolicy(FormatUtils::VERSION_3, attributeMap); + headerPolicy.writeHeaderToBuffer(&headerBuffer, true /* updatesLastUpdatedTime */); + BufferWithExtendableBuffer bodyBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */); + if (!DynamicPatriciaTrieWritingUtils::writeEmptyDictionary(&bodyBuffer, 0 /* rootPos */)) { + return false; + } + return flushAllHeaderAndBodyToFile(filePath, &headerBuffer, &bodyBuffer); +} + +/* static */ bool DictFileWritingUtils::flushAllHeaderAndBodyToFile(const char *const filePath, + BufferWithExtendableBuffer *const dictHeader, BufferWithExtendableBuffer *const dictBody) { + const int tmpFileNameBufSize = strlen(filePath) + + strlen(TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE) + 1 /* terminator */; + // Name of a temporary file used for writing that is a connected string of original name and + // TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE. + char tmpFileName[tmpFileNameBufSize]; + snprintf(tmpFileName, tmpFileNameBufSize, "%s%s", filePath, + TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE); + FILE *const file = fopen(tmpFileName, "wb"); + if (!file) { + AKLOGE("Dictionary file %s cannnot be opened.", tmpFileName); + ASSERT(false); + return false; + } + // Write the dictionary header. + if (!writeBufferToFile(file, dictHeader)) { + remove(tmpFileName); + AKLOGE("Dictionary header cannnot be written. size: %d", dictHeader->getTailPosition()); + ASSERT(false); + return false; + } + // Write the dictionary body. + if (!writeBufferToFile(file, dictBody)) { + remove(tmpFileName); + AKLOGE("Dictionary body cannnot be written. size: %d", dictBody->getTailPosition()); + ASSERT(false); + return false; + } + fclose(file); + rename(tmpFileName, filePath); + return true; +} + +// This closes file pointer when an error is caused and returns whether the writing was succeeded +// or not. +/* static */ bool DictFileWritingUtils::writeBufferToFile(FILE *const file, + const BufferWithExtendableBuffer *const buffer) { + const int originalBufSize = buffer->getOriginalBufferSize(); + if (originalBufSize > 0 && fwrite(buffer->getBuffer(false /* usesAdditionalBuffer */), + originalBufSize, 1, file) < 1) { + fclose(file); + return false; + } + const int additionalBufSize = buffer->getTailPosition() - buffer->getOriginalBufferSize(); + if (additionalBufSize > 0 && fwrite(buffer->getBuffer(true /* usesAdditionalBuffer */), + additionalBufSize, 1, file) < 1) { + fclose(file); + return false; + } + return true; +} + +} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h new file mode 100644 index 000000000..bd4ac66fd --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h @@ -0,0 +1,50 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DICT_FILE_WRITING_UTILS_H +#define LATINIME_DICT_FILE_WRITING_UTILS_H + +#include <cstdio> + +#include "defines.h" +#include "suggest/policyimpl/dictionary/header/header_read_write_utils.h" + +namespace latinime { + +class BufferWithExtendableBuffer; + +class DictFileWritingUtils { + public: + static bool createEmptyDictFile(const char *const filePath, const int dictVersion, + const HeaderReadWriteUtils::AttributeMap *const attributeMap); + + static bool flushAllHeaderAndBodyToFile(const char *const filePath, + BufferWithExtendableBuffer *const dictHeader, + BufferWithExtendableBuffer *const dictBody); + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(DictFileWritingUtils); + + static const char *const TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE; + + static bool createEmptyV3DictFile(const char *const filePath, + const HeaderReadWriteUtils::AttributeMap *const attributeMap); + + static bool writeBufferToFile(FILE *const file, + const BufferWithExtendableBuffer *const buffer); +}; +} // namespace latinime +#endif /* LATINIME_DICT_FILE_WRITING_UTILS_H */ diff --git a/native/jni/src/suggest/policyimpl/typing/typing_weighting.h b/native/jni/src/suggest/policyimpl/typing/typing_weighting.h index b6aa85896..9f0a331e3 100644 --- a/native/jni/src/suggest/policyimpl/typing/typing_weighting.h +++ b/native/jni/src/suggest/policyimpl/typing/typing_weighting.h @@ -74,7 +74,8 @@ class TypingWeighting : public Weighting { // Note: min() required since length can be MAX_POINT_TO_KEY_LENGTH for characters not on // the keyboard (like accented letters) const float normalizedSquaredLength = traverseSession->getProximityInfoState(0) - ->getPointToKeyLength(pointIndex, dicNode->getNodeCodePoint()); + ->getPointToKeyLength(pointIndex, + CharUtils::toBaseLowerCase(dicNode->getNodeCodePoint())); const float normalizedDistance = TouchPositionCorrectionUtils::getSweetSpotFactor( traverseSession->isTouchPositionCorrectionEnabled(), normalizedSquaredLength); const float weightedDistance = ScoringParams::DISTANCE_WEIGHT_LENGTH * normalizedDistance; @@ -113,10 +114,10 @@ class TypingWeighting : public Weighting { const int16_t parentPointIndex = parentDicNode->getInputIndex(0); const int prevCodePoint = parentDicNode->getNodeCodePoint(); const float distance1 = traverseSession->getProximityInfoState(0)->getPointToKeyLength( - parentPointIndex + 1, prevCodePoint); + parentPointIndex + 1, CharUtils::toBaseLowerCase(prevCodePoint)); const int codePoint = dicNode->getNodeCodePoint(); const float distance2 = traverseSession->getProximityInfoState(0)->getPointToKeyLength( - parentPointIndex, codePoint); + parentPointIndex, CharUtils::toBaseLowerCase(codePoint)); const float distance = distance1 + distance2; const float weightedLengthDistance = distance * ScoringParams::DISTANCE_WEIGHT_LENGTH; @@ -133,7 +134,7 @@ class TypingWeighting : public Weighting { const bool existsAdjacentProximityChars = traverseSession->getProximityInfoState(0) ->existsAdjacentProximityChars(insertedPointIndex); const float dist = traverseSession->getProximityInfoState(0)->getPointToKeyLength( - insertedPointIndex + 1, dicNode->getNodeCodePoint()); + insertedPointIndex + 1, CharUtils::toBaseLowerCase(dicNode->getNodeCodePoint())); const float weightedDistance = dist * ScoringParams::DISTANCE_WEIGHT_LENGTH; const bool singleChar = dicNode->getNodeCodePointCount() == 1; float cost = (singleChar ? ScoringParams::INSERTION_COST_FIRST_CHAR : 0.0f); |