aboutsummaryrefslogtreecommitdiffstats
path: root/native/jni/src
diff options
context:
space:
mode:
Diffstat (limited to 'native/jni/src')
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp48
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h7
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp79
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h5
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp69
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h64
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp7
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp8
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp6
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp26
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h8
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp65
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h16
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.cpp33
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h11
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp15
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h25
17 files changed, 383 insertions, 109 deletions
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp
index f8ed2b9aa..1926b9831 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp
@@ -16,6 +16,7 @@
#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h"
+#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
@@ -78,8 +79,10 @@ const int BigramListReadWriteUtils::ATTRIBUTE_ADDRESS_SHIFT = 4;
offset = ByteArrayUtils::readUint24AndAdvancePosition(bigramsBuf, pos);
break;
}
- if (offset == 0) {
+ if (offset == DynamicPatriciaTrieReadingUtils::DICT_OFFSET_INVALID) {
return NOT_A_DICT_POS;
+ } else if (offset == DynamicPatriciaTrieReadingUtils::DICT_OFFSET_ZERO_OFFSET) {
+ return origin;
}
if (isOffsetNegative(flags)) {
return origin - offset;
@@ -88,6 +91,24 @@ const int BigramListReadWriteUtils::ATTRIBUTE_ADDRESS_SHIFT = 4;
}
}
+/* static */ bool BigramListReadWriteUtils::setHasNextFlag(
+ BufferWithExtendableBuffer *const buffer, const bool hasNext, const int entryPos) {
+ const bool usesAdditionalBuffer = buffer->isInAdditionalBuffer(entryPos);
+ int readingPos = entryPos;
+ if (usesAdditionalBuffer) {
+ readingPos -= buffer->getOriginalBufferSize();
+ }
+ BigramFlags bigramFlags = ByteArrayUtils::readUint8AndAdvancePosition(
+ buffer->getBuffer(usesAdditionalBuffer), &readingPos);
+ if (hasNext) {
+ bigramFlags = bigramFlags | FLAG_ATTRIBUTE_HAS_NEXT;
+ } else {
+ bigramFlags = bigramFlags & (~FLAG_ATTRIBUTE_HAS_NEXT);
+ }
+ int writingPos = entryPos;
+ return buffer->writeUintAndAdvancePosition(bigramFlags, 1 /* size */, &writingPos);
+}
+
/* static */ bool BigramListReadWriteUtils::createAndWriteBigramEntry(
BufferWithExtendableBuffer *const buffer, const int targetPos, const int probability,
const bool hasNext, int *const writingPos) {
@@ -101,10 +122,12 @@ const int BigramListReadWriteUtils::ATTRIBUTE_ADDRESS_SHIFT = 4;
/* static */ bool BigramListReadWriteUtils::writeBigramEntry(
BufferWithExtendableBuffer *const bufferToWrite, const BigramFlags flags,
const int targetPtNodePos, int *const writingPos) {
- if (!bufferToWrite->writeUintAndAdvancePosition(flags, 1 /* size */, writingPos)) {
+ const int offset = getBigramTargetOffset(targetPtNodePos, *writingPos);
+ const BigramFlags flagsToWrite = (offset < 0) ?
+ (flags | FLAG_ATTRIBUTE_OFFSET_NEGATIVE) : (flags & ~FLAG_ATTRIBUTE_OFFSET_NEGATIVE);
+ if (!bufferToWrite->writeUintAndAdvancePosition(flagsToWrite, 1 /* size */, writingPos)) {
return false;
}
- const int offset = (targetPtNodePos != NOT_A_DICT_POS) ? targetPtNodePos - *writingPos : 0;
const uint32_t absOffest = abs(offset);
const int bigramTargetFieldSize = attributeAddressSize(flags);
return bufferToWrite->writeUintAndAdvancePosition(absOffest, bigramTargetFieldSize,
@@ -113,14 +136,13 @@ const int BigramListReadWriteUtils::ATTRIBUTE_ADDRESS_SHIFT = 4;
// Returns true if the bigram entry is valid and put entry flags into out*.
/* static */ bool BigramListReadWriteUtils::createAndGetBigramFlags(const int entryPos,
- const int targetPos, const int probability, const bool hasNext,
+ const int targetPtNodePos, const int probability, const bool hasNext,
BigramFlags *const outBigramFlags) {
BigramFlags flags = probability & MASK_ATTRIBUTE_PROBABILITY;
if (hasNext) {
flags |= FLAG_ATTRIBUTE_HAS_NEXT;
}
- const int targetFieldPos = entryPos + 1;
- const int offset = (targetPos != NOT_A_DICT_POS) ? targetPos - targetFieldPos : 0;
+ const int offset = getBigramTargetOffset(targetPtNodePos, entryPos);
if (offset < 0) {
flags |= FLAG_ATTRIBUTE_OFFSET_NEGATIVE;
}
@@ -143,4 +165,18 @@ const int BigramListReadWriteUtils::ATTRIBUTE_ADDRESS_SHIFT = 4;
return true;
}
+/* static */ int BigramListReadWriteUtils::getBigramTargetOffset(const int targetPtNodePos,
+ const int entryPos) {
+ if (targetPtNodePos == NOT_A_DICT_POS) {
+ return DynamicPatriciaTrieReadingUtils::DICT_OFFSET_INVALID;
+ } else {
+ const int offset = targetPtNodePos - (entryPos + 1 /* bigramFlagsField */);
+ if (offset == 0) {
+ return DynamicPatriciaTrieReadingUtils::DICT_OFFSET_ZERO_OFFSET;
+ } else {
+ return offset;
+ }
+ }
+}
+
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h
index 234a0ea58..eabe4e099 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h
@@ -59,9 +59,8 @@ public:
*/
}
- static AK_FORCE_INLINE BigramFlags setHasNextFlag(const BigramFlags flags) {
- return flags | FLAG_ATTRIBUTE_HAS_NEXT;
- }
+ static bool setHasNextFlag(BufferWithExtendableBuffer *const buffer,
+ const bool hasNext, const int entryPos);
static AK_FORCE_INLINE BigramFlags setProbabilityInFlags(const BigramFlags flags,
const int probability) {
@@ -96,6 +95,8 @@ private:
static int getBigramAddressAndAdvancePosition(const uint8_t *const bigramsBuf,
const BigramFlags flags, int *const pos);
+
+ static int getBigramTargetOffset(const int targetPtNodePos, const int entryPos);
};
} // namespace latinime
#endif // LATINIME_BIGRAM_LIST_READ_WRITE_UTILS_H
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp
index ba43bdb10..bc2f5ee58 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp
@@ -19,6 +19,7 @@
#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h"
#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h"
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h"
+#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
namespace latinime {
@@ -69,9 +70,11 @@ bool DynamicBigramListPolicy::copyAllBigrams(BufferWithExtendableBuffer *const b
*outBigramsCount = 0;
BigramListReadWriteUtils::BigramFlags bigramFlags;
int bigramEntryCount = 0;
+ int lastWrittenEntryPos = NOT_A_DICT_POS;
do {
if (++bigramEntryCount > BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT) {
- AKLOGE("Too many bigram entries. %d", BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT);
+ AKLOGE("Too many bigram entries. Entry count: %d, Limit: %d",
+ bigramEntryCount, BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT);
ASSERT(false);
return false;
}
@@ -88,6 +91,11 @@ bool DynamicBigramListPolicy::copyAllBigrams(BufferWithExtendableBuffer *const b
originalBigramPos += mBuffer->getOriginalBufferSize();
}
const int bigramPos = followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos);
+ if (bigramPos == NOT_A_DICT_POS) {
+ // Target PtNode has been invalidated.
+ continue;
+ }
+ lastWrittenEntryPos = *toPos;
if (!BigramListReadWriteUtils::createAndWriteBigramEntry(bufferToWrite, bigramPos,
BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags),
BigramListReadWriteUtils::hasNext(bigramFlags), toPos)) {
@@ -95,6 +103,13 @@ bool DynamicBigramListPolicy::copyAllBigrams(BufferWithExtendableBuffer *const b
}
(*outBigramsCount)++;
} while(BigramListReadWriteUtils::hasNext(bigramFlags));
+ // Makes the last entry the terminal of the list. Updates the flags.
+ if (lastWrittenEntryPos != NOT_A_DICT_POS) {
+ if (!BigramListReadWriteUtils::setHasNextFlag(bufferToWrite, false /* hasNext */,
+ lastWrittenEntryPos)) {
+ return false;
+ }
+ }
if (usesAdditionalBuffer) {
*fromPos += mBuffer->getOriginalBufferSize();
}
@@ -114,7 +129,8 @@ bool DynamicBigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries(
int bigramEntryCount = 0;
do {
if (++bigramEntryCount > BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT) {
- AKLOGE("Too many bigram entries. %d", BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT);
+ AKLOGE("Too many bigram entries. Entry count: %d, Limit: %d",
+ bigramEntryCount, BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT);
ASSERT(false);
return false;
}
@@ -150,6 +166,54 @@ bool DynamicBigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries(
return true;
}
+// Updates bigram target PtNode positions in the list after the placing step in GC.
+bool DynamicBigramListPolicy::updateAllBigramTargetPtNodePositions(int *const bigramListPos,
+ const DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap *const
+ ptNodePositionRelocationMap) {
+ const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*bigramListPos);
+ if (usesAdditionalBuffer) {
+ *bigramListPos -= mBuffer->getOriginalBufferSize();
+ }
+ BigramListReadWriteUtils::BigramFlags bigramFlags;
+ int bigramEntryCount = 0;
+ do {
+ if (++bigramEntryCount > BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT) {
+ AKLOGE("Too many bigram entries. Entry count: %d, Limit: %d",
+ bigramEntryCount, BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT);
+ ASSERT(false);
+ return false;
+ }
+ int bigramEntryPos = *bigramListPos;
+ if (usesAdditionalBuffer) {
+ bigramEntryPos += mBuffer->getOriginalBufferSize();
+ }
+ int bigramTargetPtNodePos;
+ // The buffer address can be changed after calling buffer writing methods.
+ BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(
+ mBuffer->getBuffer(usesAdditionalBuffer), &bigramFlags, &bigramTargetPtNodePos,
+ bigramListPos);
+ if (bigramTargetPtNodePos == NOT_A_DICT_POS) {
+ continue;
+ }
+ if (usesAdditionalBuffer) {
+ bigramTargetPtNodePos += mBuffer->getOriginalBufferSize();
+ }
+
+ DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap::const_iterator it =
+ ptNodePositionRelocationMap->find(bigramTargetPtNodePos);
+ if (it != ptNodePositionRelocationMap->end()) {
+ bigramTargetPtNodePos = it->second;
+ } else {
+ bigramTargetPtNodePos = NOT_A_DICT_POS;
+ }
+ if (!BigramListReadWriteUtils::writeBigramEntry(mBuffer, bigramFlags,
+ bigramTargetPtNodePos, &bigramEntryPos)) {
+ return false;
+ }
+ } while(BigramListReadWriteUtils::hasNext(bigramFlags));
+ return true;
+}
+
bool DynamicBigramListPolicy::addNewBigramEntryToBigramList(const int bigramTargetPos,
const int probability, int *const bigramListPos) {
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*bigramListPos);
@@ -160,7 +224,8 @@ bool DynamicBigramListPolicy::addNewBigramEntryToBigramList(const int bigramTarg
int bigramEntryCount = 0;
do {
if (++bigramEntryCount > BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT) {
- AKLOGE("Too many bigram entries. %d", BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT);
+ AKLOGE("Too many bigram entries. Entry count: %d, Limit: %d",
+ bigramEntryCount, BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT);
ASSERT(false);
return false;
}
@@ -188,10 +253,7 @@ bool DynamicBigramListPolicy::addNewBigramEntryToBigramList(const int bigramTarg
}
// The current last entry is found.
// First, update the flags of the last entry.
- const BigramListReadWriteUtils::BigramFlags updatedFlags =
- BigramListReadWriteUtils::setHasNextFlag(bigramFlags);
- if (!BigramListReadWriteUtils::writeBigramEntry(mBuffer, updatedFlags, originalBigramPos,
- &entryPos)) {
+ if (!BigramListReadWriteUtils::setHasNextFlag(mBuffer, true /* hasNext */, entryPos)) {
return false;
}
if (usesAdditionalBuffer) {
@@ -222,7 +284,8 @@ bool DynamicBigramListPolicy::removeBigram(const int bigramListPos, const int bi
int bigramEntryCount = 0;
do {
if (++bigramEntryCount > BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT) {
- AKLOGE("Too many bigram entries. %d", BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT);
+ AKLOGE("Too many bigram entries. Entry count: %d, Limit: %d",
+ bigramEntryCount, BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT);
ASSERT(false);
return false;
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h
index 16b080ae5..8ea318a41 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h
@@ -21,6 +21,7 @@
#include "defines.h"
#include "suggest/core/policy/dictionary_bigrams_structure_policy.h"
+#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h"
namespace latinime {
@@ -51,6 +52,10 @@ class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy {
bool updateAllBigramEntriesAndDeleteUselessEntries(int *const bigramListPos);
+ bool updateAllBigramTargetPtNodePositions(int *const bigramListPos,
+ const DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap *const
+ ptNodePositionRelocationMap);
+
bool addNewBigramEntryToBigramList(const int bigramTargetPos, const int probability,
int *const bigramListPos);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp
index ffa02e3f6..c60e45819 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp
@@ -19,7 +19,7 @@
namespace latinime {
bool DynamicPatriciaTrieGcEventListeners
- ::ListenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted
+ ::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
::onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
const int *const nodeCodePoints) {
// PtNode is useless when the PtNode is not a terminal and doesn't have any not useless
@@ -47,11 +47,13 @@ bool DynamicPatriciaTrieGcEventListeners
}
// Writes dummy PtNode array size when the head of PtNode array is read.
-bool DynamicPatriciaTrieGcEventListeners::ListenerForPlacingAndWritingValidPtNodesToBuffer
+bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
::onDescend(const int ptNodeArrayPos) {
mValidPtNodeCount = 0;
int writingPos = mBufferToWrite->getTailPosition();
- mPositionMap->insert(hash_map_compat<int, int>::value_type(ptNodeArrayPos, writingPos));
+ mDictPositionRelocationMap->mPtNodeArrayPositionRelocationMap.insert(
+ DynamicPatriciaTrieWritingHelper::PtNodeArrayPositionRelocationMap::value_type(
+ ptNodeArrayPos, writingPos));
// Writes dummy PtNode array size because arrays can have a forward link or needles PtNodes.
// This field will be updated later in onReadingPtNodeArrayTail() with actual PtNode count.
mPtNodeArraySizeFieldPos = writingPos;
@@ -60,7 +62,7 @@ bool DynamicPatriciaTrieGcEventListeners::ListenerForPlacingAndWritingValidPtNod
}
// Write PtNode array terminal and actual PtNode array size.
-bool DynamicPatriciaTrieGcEventListeners::ListenerForPlacingAndWritingValidPtNodesToBuffer
+bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
::onReadingPtNodeArrayTail() {
int writingPos = mBufferToWrite->getTailPosition();
// Write PtNode array terminal.
@@ -77,22 +79,71 @@ bool DynamicPatriciaTrieGcEventListeners::ListenerForPlacingAndWritingValidPtNod
}
// Write valid PtNode to buffer and memorize mapping from the old position to the new position.
-bool DynamicPatriciaTrieGcEventListeners::ListenerForPlacingAndWritingValidPtNodesToBuffer
+bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
::onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
const int *const nodeCodePoints) {
if (node->isDeleted()) {
// Current PtNode is not written in new buffer because it has been deleted.
- mPositionMap->insert(hash_map_compat<int, int>::value_type(node->getHeadPos(),
- NOT_A_DICT_POS));
+ mDictPositionRelocationMap->mPtNodePositionRelocationMap.insert(
+ DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap::value_type(
+ node->getHeadPos(), NOT_A_DICT_POS));
return true;
}
int writingPos = mBufferToWrite->getTailPosition();
- mPositionMap->insert(hash_map_compat<int, int>::value_type(node->getHeadPos(), writingPos));
+ mDictPositionRelocationMap->mPtNodePositionRelocationMap.insert(
+ DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap::value_type(
+ node->getHeadPos(), writingPos));
mValidPtNodeCount++;
// Writes current PtNode.
return mWritingHelper->writePtNodeToBufferByCopyingPtNodeInfo(mBufferToWrite, node,
- node->getParentPos(), nodeCodePoints, node->getCodePointCount(),
+ node->getParentPos(), nodeCodePoints, node->getCodePointCount(),
node->getProbability(), &writingPos);
}
+bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateAllPositionFields
+ ::onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
+ const int *const nodeCodePoints) {
+ // Updates parent position.
+ int parentPos = node->getParentPos();
+ if (parentPos != NOT_A_DICT_POS) {
+ DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap::const_iterator it =
+ mDictPositionRelocationMap->mPtNodePositionRelocationMap.find(parentPos);
+ if (it != mDictPositionRelocationMap->mPtNodePositionRelocationMap.end()) {
+ parentPos = it->second;
+ }
+ }
+ int writingPos = node->getHeadPos() + DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE;
+ // Write updated parent offset.
+ if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(mBufferToWrite,
+ parentPos, node->getHeadPos(), &writingPos)) {
+ return false;
+ }
+
+ // Updates children position.
+ int childrenPos = node->getChildrenPos();
+ if (childrenPos != NOT_A_DICT_POS) {
+ DynamicPatriciaTrieWritingHelper::PtNodeArrayPositionRelocationMap::const_iterator it =
+ mDictPositionRelocationMap->mPtNodeArrayPositionRelocationMap.find(childrenPos);
+ if (it != mDictPositionRelocationMap->mPtNodeArrayPositionRelocationMap.end()) {
+ childrenPos = it->second;
+ }
+ }
+ writingPos = node->getChildrenPosFieldPos();
+ if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBufferToWrite,
+ childrenPos, &writingPos)) {
+ return false;
+ }
+
+ // Updates bigram target PtNode positions in the bigram list.
+ int bigramsPos = node->getBigramsPos();
+ if (bigramsPos != NOT_A_DICT_POS) {
+ if (!mBigramPolicy->updateAllBigramTargetPtNodePositions(&bigramsPos,
+ &mDictPositionRelocationMap->mPtNodePositionRelocationMap)) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h
index 728559330..4256f22fb 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h
@@ -34,16 +34,16 @@ class DynamicPatriciaTrieGcEventListeners {
// Updates all PtNodes that can be reached from the root. Checks if each PtNode is useless or
// not and marks useless PtNodes as deleted. Such deleted PtNodes will be discarded in the GC.
// TODO: Concatenate non-terminal PtNodes.
- class ListenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted
+ class TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
: public DynamicPatriciaTrieReadingHelper::TraversingEventListener {
public:
- ListenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted(
+ TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted(
DynamicPatriciaTrieWritingHelper *const writingHelper,
BufferWithExtendableBuffer *const buffer)
: mWritingHelper(writingHelper), mBuffer(buffer), valueStack(),
mChildrenValue(0) {}
- ~ListenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted() {};
+ ~TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted() {};
bool onAscend() {
if (valueStack.empty()) {
@@ -66,7 +66,7 @@ class DynamicPatriciaTrieGcEventListeners {
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(
- ListenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted);
+ TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted);
DynamicPatriciaTrieWritingHelper *const mWritingHelper;
BufferWithExtendableBuffer *const mBuffer;
@@ -76,10 +76,10 @@ class DynamicPatriciaTrieGcEventListeners {
// Updates all bigram entries that are held by valid PtNodes. This removes useless bigram
// entries.
- class ListenerForUpdatingBigramProbability
+ class TraversePolicyToUpdateBigramProbability
: public DynamicPatriciaTrieReadingHelper::TraversingEventListener {
public:
- ListenerForUpdatingBigramProbability(DynamicBigramListPolicy *const bigramPolicy)
+ TraversePolicyToUpdateBigramProbability(DynamicBigramListPolicy *const bigramPolicy)
: mBigramPolicy(bigramPolicy) {}
bool onAscend() { return true; }
@@ -102,20 +102,21 @@ class DynamicPatriciaTrieGcEventListeners {
}
private:
- DISALLOW_IMPLICIT_CONSTRUCTORS(ListenerForUpdatingBigramProbability);
+ DISALLOW_IMPLICIT_CONSTRUCTORS(TraversePolicyToUpdateBigramProbability);
DynamicBigramListPolicy *const mBigramPolicy;
};
- class ListenerForPlacingAndWritingValidPtNodesToBuffer
+ class TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
: public DynamicPatriciaTrieReadingHelper::TraversingEventListener {
public:
- ListenerForPlacingAndWritingValidPtNodesToBuffer(
+ TraversePolicyToPlaceAndWriteValidPtNodesToBuffer(
DynamicPatriciaTrieWritingHelper *const writingHelper,
BufferWithExtendableBuffer *const bufferToWrite,
- hash_map_compat<int, int> *const positionMap)
+ DynamicPatriciaTrieWritingHelper::DictPositionRelocationMap *const
+ dictPositionRelocationMap)
: mWritingHelper(writingHelper), mBufferToWrite(bufferToWrite),
- mPositionMap(positionMap), mValidPtNodeCount(0),
+ mDictPositionRelocationMap(dictPositionRelocationMap), mValidPtNodeCount(0),
mPtNodeArraySizeFieldPos(NOT_A_DICT_POS) {};
bool onAscend() { return true; }
@@ -127,20 +128,49 @@ class DynamicPatriciaTrieGcEventListeners {
bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
const int *const nodeCodePoints);
- hash_map_compat<int, int> *getPositionMap() const {
- return mPositionMap;
- }
-
private:
- DISALLOW_IMPLICIT_CONSTRUCTORS(ListenerForPlacingAndWritingValidPtNodesToBuffer);
+ DISALLOW_IMPLICIT_CONSTRUCTORS(TraversePolicyToPlaceAndWriteValidPtNodesToBuffer);
DynamicPatriciaTrieWritingHelper *const mWritingHelper;
BufferWithExtendableBuffer *const mBufferToWrite;
- hash_map_compat<int, int> *const mPositionMap;
+ DynamicPatriciaTrieWritingHelper::DictPositionRelocationMap *const
+ mDictPositionRelocationMap;
int mValidPtNodeCount;
int mPtNodeArraySizeFieldPos;
};
+ class TraversePolicyToUpdateAllPositionFields
+ : public DynamicPatriciaTrieReadingHelper::TraversingEventListener {
+ public:
+ TraversePolicyToUpdateAllPositionFields(
+ DynamicPatriciaTrieWritingHelper *const writingHelper,
+ DynamicBigramListPolicy *const bigramPolicy,
+ BufferWithExtendableBuffer *const bufferToWrite,
+ const DynamicPatriciaTrieWritingHelper::DictPositionRelocationMap *const
+ dictPositionRelocationMap)
+ : mWritingHelper(writingHelper), mBigramPolicy(bigramPolicy),
+ mBufferToWrite(bufferToWrite),
+ mDictPositionRelocationMap(dictPositionRelocationMap) {};
+
+ bool onAscend() { return true; }
+
+ bool onDescend(const int ptNodeArrayPos) { return true; }
+
+ bool onReadingPtNodeArrayTail() { return true; }
+
+ bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
+ const int *const nodeCodePoints);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(TraversePolicyToUpdateAllPositionFields);
+
+ DynamicPatriciaTrieWritingHelper *const mWritingHelper;
+ DynamicBigramListPolicy *const mBigramPolicy;
+ BufferWithExtendableBuffer *const mBufferToWrite;
+ const DynamicPatriciaTrieWritingHelper::DictPositionRelocationMap *const
+ mDictPositionRelocationMap;
+ };
+
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieGcEventListeners);
};
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp
index d52de7ada..456352c17 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp
@@ -40,9 +40,10 @@ void DynamicPatriciaTrieNodeReader::fetchPtNodeInfoFromBufferAndProcessMovedPtNo
pos -= mBuffer->getOriginalBufferSize();
}
mFlags = PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
- const int parentPos =
- DynamicPatriciaTrieReadingUtils::getParentPosAndAdvancePosition(dictBuf, &pos);
- mParentPos = (parentPos != 0) ? ptNodePos + parentPos : NOT_A_DICT_POS;
+ const int parentPosOffset =
+ DynamicPatriciaTrieReadingUtils::getParentPtNodePosOffsetAndAdvancePosition(dictBuf,
+ &pos);
+ mParentPos = DynamicPatriciaTrieReadingUtils::getParentPtNodePos(parentPosOffset, mHeadPos);
if (outCodePoints != 0) {
mCodePointCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition(
dictBuf, mFlags, maxCodePointCount, outCodePoints, &pos);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp
index 70a09c245..42397c19e 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp
@@ -258,7 +258,9 @@ void DynamicPatriciaTriePolicy::flushWithGC(const char *const filePath) {
AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary.");
return;
}
- // TODO: Implement.
+ DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
+ &mBigramListPolicy, &mShortcutListPolicy);
+ writingHelper.writeToDictFileWithGC(getRootPosition(), filePath, &mHeaderPolicy);
}
bool DynamicPatriciaTriePolicy::needsToRunGC() const {
@@ -266,8 +268,8 @@ bool DynamicPatriciaTriePolicy::needsToRunGC() const {
AKLOGI("Warning: needsToRunGC() is called for non-updatable dictionary.");
return false;
}
- // TODO: Implement.
- return false;
+ // TODO: Implement more properly.
+ return mBufferWithExtendableBuffer.isNearSizeLimit();
}
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp
index 754b679d5..f4a2ef389 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp
@@ -59,6 +59,9 @@ bool DynamicPatriciaTrieReadingHelper::traverseAllPtNodesInPostorderDepthFirstMa
if (!listener->onReadingPtNodeArrayTail()) {
return false;
}
+ if (mReadingStateStack.size() <= 0) {
+ break;
+ }
if (!listener->onAscend()) {
return false;
}
@@ -101,6 +104,9 @@ bool DynamicPatriciaTrieReadingHelper::traverseAllPtNodesInPtNodeArrayLevelPreor
if (!listener->onAscend()) {
return false;
}
+ if (mReadingStateStack.size() <= 0) {
+ break;
+ }
popReadingStateFromStack();
alreadyVisitedChildren = true;
alreadyVisitedAllPtNodesInArray = true;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp
index 8428c0b15..d68446db6 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp
@@ -28,24 +28,42 @@ const DptReadingUtils::NodeFlags DptReadingUtils::FLAG_IS_NOT_MOVED = 0xC0;
const DptReadingUtils::NodeFlags DptReadingUtils::FLAG_IS_MOVED = 0x40;
const DptReadingUtils::NodeFlags DptReadingUtils::FLAG_IS_DELETED = 0x80;
+// TODO: Make DICT_OFFSET_ZERO_OFFSET = 0.
+// Currently, DICT_OFFSET_INVALID is 0 in Java side but offset can be 0 during GC. So, the maximum
+// value of offsets, which is 0x7FFFFF is used to represent 0 offset.
+const int DptReadingUtils::DICT_OFFSET_INVALID = 0;
+const int DptReadingUtils::DICT_OFFSET_ZERO_OFFSET = 0x7FFFFF;
+
/* static */ int DptReadingUtils::getForwardLinkPosition(const uint8_t *const buffer,
const int pos) {
int linkAddressPos = pos;
return ByteArrayUtils::readSint24AndAdvancePosition(buffer, &linkAddressPos);
}
-/* static */ int DptReadingUtils::getParentPosAndAdvancePosition(const uint8_t *const buffer,
- int *const pos) {
+/* static */ int DptReadingUtils::getParentPtNodePosOffsetAndAdvancePosition(
+ const uint8_t *const buffer, int *const pos) {
return ByteArrayUtils::readSint24AndAdvancePosition(buffer, pos);
}
+/* static */ int DptReadingUtils::getParentPtNodePos(const int parentOffset, const int ptNodePos) {
+ if (parentOffset == DICT_OFFSET_INVALID) {
+ return NOT_A_DICT_POS;
+ } else if (parentOffset == DICT_OFFSET_ZERO_OFFSET) {
+ return ptNodePos;
+ } else {
+ return parentOffset + ptNodePos;
+ }
+}
+
/* static */ int DptReadingUtils::readChildrenPositionAndAdvancePosition(
const uint8_t *const buffer, int *const pos) {
const int base = *pos;
const int offset = ByteArrayUtils::readSint24AndAdvancePosition(buffer, pos);
- if (offset == 0) {
- // 0 offset means that the node does not have children.
+ if (offset == DICT_OFFSET_INVALID) {
+ // The PtNode does not have children.
return NOT_A_DICT_POS;
+ } else if (offset == DICT_OFFSET_ZERO_OFFSET) {
+ return base;
} else {
return base + offset;
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h
index db5f9b1bd..67c3cc57e 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h
@@ -27,13 +27,19 @@ class DynamicPatriciaTrieReadingUtils {
public:
typedef uint8_t NodeFlags;
+ static const int DICT_OFFSET_INVALID;
+ static const int DICT_OFFSET_ZERO_OFFSET;
+
static int getForwardLinkPosition(const uint8_t *const buffer, const int pos);
static AK_FORCE_INLINE bool isValidForwardLinkPosition(const int forwardLinkAddress) {
return forwardLinkAddress != 0;
}
- static int getParentPosAndAdvancePosition(const uint8_t *const buffer, int *const pos);
+ static int getParentPtNodePosOffsetAndAdvancePosition(const uint8_t *const buffer,
+ int *const pos);
+
+ static int getParentPtNodePos(const int parentOffset, const int ptNodePos);
static int readChildrenPositionAndAdvancePosition(const uint8_t *const buffer, int *const pos);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp
index 3fc9c4ce0..a51ae5e1d 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp
@@ -28,12 +28,15 @@
#include "suggest/policyimpl/dictionary/header/header_policy.h"
#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h"
+#include "utils/hash_map_compat.h"
namespace latinime {
const int DynamicPatriciaTrieWritingHelper::CHILDREN_POSITION_FIELD_SIZE = 3;
const char *const DynamicPatriciaTrieWritingHelper::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE =
".tmp";
+// TODO: Make MAX_DICTIONARY_SIZE 8MB.
+const size_t DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE = 2 * 1024 * 1024;
bool DynamicPatriciaTrieWritingHelper::addUnigramWord(
DynamicPatriciaTrieReadingHelper *const readingHelper,
@@ -153,7 +156,8 @@ void DynamicPatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNod
if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, true /* updatesLastUpdatedTime */)) {
return;
}
- BufferWithExtendableBuffer newDictBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */);
+ BufferWithExtendableBuffer newDictBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */,
+ MAX_DICTIONARY_SIZE);
if (!runGC(rootPtNodeArrayPos, &newDictBuffer)) {
return;
}
@@ -202,9 +206,8 @@ bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition(
return false;
}
// Update moved position, which is stored in the parent offset field.
- const int movedPosOffset = movedPos - originalNode->getHeadPos();
- if (!DynamicPatriciaTrieWritingUtils::writeParentOffsetAndAdvancePosition(
- mBuffer, movedPosOffset, &writingPos)) {
+ if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(
+ mBuffer, movedPos, originalNode->getHeadPos(), &writingPos)) {
return false;
}
// Update bigram linked node position, which is stored in the children position field.
@@ -219,11 +222,10 @@ bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition(
const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader();
readingHelper.initWithPtNodeArrayPos(originalNode->getChildrenPos());
while (!readingHelper.isEnd()) {
- const int childPtNodeWrittenPos = nodeReader->getHeadPos();
- const int parentOffset = movedPos - childPtNodeWrittenPos;
- int parentOffsetFieldPos = childPtNodeWrittenPos + 1 /* Flags */;
- if (!DynamicPatriciaTrieWritingUtils::writeParentOffsetAndAdvancePosition(
- mBuffer, parentOffset, &parentOffsetFieldPos)) {
+ int parentOffsetFieldPos = nodeReader->getHeadPos()
+ + DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE;
+ if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(
+ mBuffer, movedPos, nodeReader->getHeadPos(), &parentOffsetFieldPos)) {
// Parent offset cannot be written because of a bug or a broken dictionary; thus,
// we give up to update dictionary.
return false;
@@ -249,9 +251,8 @@ bool DynamicPatriciaTrieWritingHelper::writePtNodeWithFullInfoToBuffer(
return false;
}
// Calculate a parent offset and write the offset.
- const int parentOffset = (parentPos != NOT_A_DICT_POS) ? parentPos - nodePos : NOT_A_DICT_POS;
- if (!DynamicPatriciaTrieWritingUtils::writeParentOffsetAndAdvancePosition(bufferToWrite,
- parentOffset, writingPos)) {
+ if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(bufferToWrite,
+ parentPos, nodePos, writingPos)) {
return false;
}
// Write code points
@@ -521,30 +522,48 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, mBigramPolicy, mShortcutPolicy);
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
DynamicPatriciaTrieGcEventListeners
- ::ListenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted
- listenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted(
+ ::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
+ traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted(
this, mBuffer);
if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
- &listenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted)) {
+ &traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted)) {
return false;
}
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
- DynamicPatriciaTrieGcEventListeners::ListenerForUpdatingBigramProbability
- listenerForupdatingBigramProbability(mBigramPolicy);
+ DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateBigramProbability
+ traversePolicyToUpdateBigramProbability(mBigramPolicy);
if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
- &listenerForupdatingBigramProbability)) {
+ &traversePolicyToUpdateBigramProbability)) {
return false;
}
// Mapping from positions in mBuffer to positions in bufferToWrite.
- hash_map_compat<int, int> positionMap;
+ DictPositionRelocationMap dictPositionRelocationMap;
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
- DynamicPatriciaTrieGcEventListeners::ListenerForPlacingAndWritingValidPtNodesToBuffer
- listenerForPlacingAndWritingLivingPtNodesToBuffer(this, mBuffer, &positionMap);
+ DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
+ traversePolicyToPlaceAndWriteValidPtNodesToBuffer(this, bufferToWrite,
+ &dictPositionRelocationMap);
+ if (!readingHelper.traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
+ &traversePolicyToPlaceAndWriteValidPtNodesToBuffer)) {
+ return false;
+ }
- // TODO: Implement.
- return false;
+ // Create policy instance for the GCed dictionary.
+ DynamicShortcutListPolicy newDictShortcutPolicy(bufferToWrite);
+ DynamicBigramListPolicy newDictBigramPolicy(bufferToWrite, &newDictShortcutPolicy);
+ // Create reading helper for the GCed dictionary.
+ DynamicPatriciaTrieReadingHelper newDictReadingHelper(bufferToWrite, &newDictBigramPolicy,
+ &newDictShortcutPolicy);
+ newDictReadingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
+ DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateAllPositionFields
+ traversePolicyToUpdateAllPositionFields(this, &newDictBigramPolicy, bufferToWrite,
+ &dictPositionRelocationMap);
+ if (!newDictReadingHelper.traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
+ &traversePolicyToUpdateAllPositionFields)) {
+ return false;
+ }
+ return true;
}
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h
index e82b80ae5..028fa6075 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h
@@ -21,6 +21,7 @@
#include <stdint.h>
#include "defines.h"
+#include "utils/hash_map_compat.h"
namespace latinime {
@@ -33,6 +34,20 @@ class HeaderPolicy;
class DynamicPatriciaTrieWritingHelper {
public:
+ typedef hash_map_compat<int, int> PtNodeArrayPositionRelocationMap;
+ typedef hash_map_compat<int, int> PtNodePositionRelocationMap;
+ struct DictPositionRelocationMap {
+ public:
+ DictPositionRelocationMap()
+ : mPtNodeArrayPositionRelocationMap(), mPtNodePositionRelocationMap() {}
+
+ PtNodeArrayPositionRelocationMap mPtNodeArrayPositionRelocationMap;
+ PtNodePositionRelocationMap mPtNodePositionRelocationMap;
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(DictPositionRelocationMap);
+ };
+
DynamicPatriciaTrieWritingHelper(BufferWithExtendableBuffer *const buffer,
DynamicBigramListPolicy *const bigramPolicy,
DynamicShortcutListPolicy *const shortcutPolicy)
@@ -71,6 +86,7 @@ class DynamicPatriciaTrieWritingHelper {
static const int CHILDREN_POSITION_FIELD_SIZE;
static const char *const TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE;
+ static const size_t MAX_DICTIONARY_SIZE;
BufferWithExtendableBuffer *const mBuffer;
DynamicBigramListPolicy *const mBigramPolicy;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.cpp
index b261e594d..5a3983776 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.cpp
@@ -39,18 +39,20 @@ const int DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE = 1;
/* static */ bool DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition(
BufferWithExtendableBuffer *const buffer, const int forwardLinkPos,
int *const forwardLinkFieldPos) {
- const int offset = (forwardLinkPos != NOT_A_DICT_POS) ?
- forwardLinkPos - (*forwardLinkFieldPos) : 0;
- return writeDictOffset(buffer, offset, forwardLinkFieldPos);
+ return writeDictOffset(buffer, forwardLinkPos, (*forwardLinkFieldPos), forwardLinkFieldPos);
}
/* static */ bool DynamicPatriciaTrieWritingUtils::writePtNodeArraySizeAndAdvancePosition(
BufferWithExtendableBuffer *const buffer, const size_t arraySize,
int *const arraySizeFieldPos) {
- if (arraySize <= MAX_PTNODE_ARRAY_SIZE_TO_USE_SMALL_SIZE_FIELD) {
+ // Currently, all array size field to be created has LARGE_PTNODE_ARRAY_SIZE_FIELD_SIZE to
+ // simplify updating process.
+ // TODO: Use SMALL_PTNODE_ARRAY_SIZE_FIELD_SIZE for small arrays.
+ /*if (arraySize <= MAX_PTNODE_ARRAY_SIZE_TO_USE_SMALL_SIZE_FIELD) {
return buffer->writeUintAndAdvancePosition(arraySize, SMALL_PTNODE_ARRAY_SIZE_FIELD_SIZE,
arraySizeFieldPos);
- } else if (arraySize <= MAX_PTNODE_ARRAY_SIZE) {
+ } else */
+ if (arraySize <= MAX_PTNODE_ARRAY_SIZE) {
uint32_t data = arraySize | LARGE_PTNODE_ARRAY_SIZE_FIELD_SIZE_FLAG;
return buffer->writeUintAndAdvancePosition(data, LARGE_PTNODE_ARRAY_SIZE_FIELD_SIZE,
arraySizeFieldPos);
@@ -69,11 +71,10 @@ const int DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE = 1;
}
// Note that parentOffset is offset from node's head position.
-/* static */ bool DynamicPatriciaTrieWritingUtils::writeParentOffsetAndAdvancePosition(
- BufferWithExtendableBuffer *const buffer, const int parentOffset,
+/* static */ bool DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(
+ BufferWithExtendableBuffer *const buffer, const int parentPos, const int basePos,
int *const parentPosFieldPos) {
- int offset = (parentOffset != NOT_A_DICT_POS) ? parentOffset : 0;
- return writeDictOffset(buffer, offset, parentPosFieldPos);
+ return writeDictOffset(buffer, parentPos, basePos, parentPosFieldPos);
}
/* static */ bool DynamicPatriciaTrieWritingUtils::writeCodePointsAndAdvancePosition(
@@ -106,13 +107,19 @@ const int DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE = 1;
/* static */ bool DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(
BufferWithExtendableBuffer *const buffer, const int childrenPosition,
int *const childrenPositionFieldPos) {
- int offset = (childrenPosition != NOT_A_DICT_POS) ?
- childrenPosition - (*childrenPositionFieldPos) : 0;
- return writeDictOffset(buffer, offset, childrenPositionFieldPos);
+ return writeDictOffset(buffer, childrenPosition, (*childrenPositionFieldPos),
+ childrenPositionFieldPos);
}
/* static */ bool DynamicPatriciaTrieWritingUtils::writeDictOffset(
- BufferWithExtendableBuffer *const buffer, const int offset, int *const offsetFieldPos) {
+ BufferWithExtendableBuffer *const buffer, const int targetPos, const int basePos,
+ int *const offsetFieldPos) {
+ int offset = targetPos - basePos;
+ if (targetPos == NOT_A_DICT_POS) {
+ offset = DynamicPatriciaTrieReadingUtils::DICT_OFFSET_INVALID;
+ } else if (offset == 0) {
+ offset = DynamicPatriciaTrieReadingUtils::DICT_OFFSET_ZERO_OFFSET;
+ }
if (offset > MAX_DICT_OFFSET_VALUE || offset < MIN_DICT_OFFSET_VALUE) {
AKLOGI("offset cannot be written because the offset is too large or too small: %d",
offset);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h
index 183ede444..a37e9fb3d 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h
@@ -28,6 +28,8 @@ class BufferWithExtendableBuffer;
class DynamicPatriciaTrieWritingUtils {
public:
+ static const int NODE_FLAG_FIELD_SIZE;
+
static bool writeForwardLinkPositionAndAdvancePosition(
BufferWithExtendableBuffer *const buffer, const int forwardLinkPos,
int *const forwardLinkFieldPos);
@@ -39,8 +41,8 @@ class DynamicPatriciaTrieWritingUtils {
const DynamicPatriciaTrieReadingUtils::NodeFlags nodeFlags,
int *const nodeFlagsFieldPos);
- static bool writeParentOffsetAndAdvancePosition(BufferWithExtendableBuffer *const buffer,
- const int parentPosition, int *const parentPosFieldPos);
+ static bool writeParentPosOffsetAndAdvancePosition(BufferWithExtendableBuffer *const buffer,
+ const int parentPosition, const int basePos, int *const parentPosFieldPos);
static bool writeCodePointsAndAdvancePosition(BufferWithExtendableBuffer *const buffer,
const int *const codePoints, const int codePointCount, int *const codePointFieldPos);
@@ -63,11 +65,10 @@ class DynamicPatriciaTrieWritingUtils {
static const int MAX_DICT_OFFSET_VALUE;
static const int MIN_DICT_OFFSET_VALUE;
static const int DICT_OFFSET_NEGATIVE_FLAG;
- static const int NODE_FLAG_FIELD_SIZE;
static const int PROBABILITY_FIELD_SIZE;
- static bool writeDictOffset(BufferWithExtendableBuffer *const buffer, const int offset,
- int *const offsetFieldPos);
+ static bool writeDictOffset(BufferWithExtendableBuffer *const buffer, const int targetPos,
+ const int basePos, int *const offsetFieldPos);
};
} // namespace latinime
#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_UTILS_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp
index 0fed275e9..f692882f2 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp
@@ -18,9 +18,10 @@
namespace latinime {
-const size_t BufferWithExtendableBuffer::INITIAL_ADDITIONAL_BUFFER_SIZE = 16 * 1024;
const size_t BufferWithExtendableBuffer::MAX_ADDITIONAL_BUFFER_SIZE = 1024 * 1024;
-const size_t BufferWithExtendableBuffer::EXTEND_ADDITIONAL_BUFFER_SIZE_STEP = 16 * 1024;
+const int BufferWithExtendableBuffer::NEAR_BUFFER_LIMIT_THRESHOLD_PERCENTILE = 90;
+// TODO: Needs to allocate larger memory corresponding to the current vector size.
+const size_t BufferWithExtendableBuffer::EXTEND_ADDITIONAL_BUFFER_SIZE_STEP = 128 * 1024;
bool BufferWithExtendableBuffer::writeUintAndAdvancePosition(const uint32_t data, const int size,
int *const pos) {
@@ -64,6 +65,16 @@ bool BufferWithExtendableBuffer::writeCodePointsAndAdvancePosition(const int *co
return true;
}
+bool BufferWithExtendableBuffer::extendBuffer() {
+ const size_t sizeAfterExtending =
+ mAdditionalBuffer.size() + EXTEND_ADDITIONAL_BUFFER_SIZE_STEP;
+ if (sizeAfterExtending > mMaxAdditionalBufferSize) {
+ return false;
+ }
+ mAdditionalBuffer.resize(mAdditionalBuffer.size() + EXTEND_ADDITIONAL_BUFFER_SIZE_STEP);
+ return true;
+}
+
bool BufferWithExtendableBuffer::checkAndPrepareWriting(const int pos, const int size) {
if (isInAdditionalBuffer(pos)) {
const int tailPosition = getTailPosition();
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h
index c6a484131..17d2e39c2 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h
@@ -32,9 +32,11 @@ namespace latinime {
// raw pointer but provides several methods that handle boundary checking for writing data.
class BufferWithExtendableBuffer {
public:
- BufferWithExtendableBuffer(uint8_t *const originalBuffer, const int originalBufferSize)
+ BufferWithExtendableBuffer(uint8_t *const originalBuffer, const int originalBufferSize,
+ const int maxAdditionalBufferSize = MAX_ADDITIONAL_BUFFER_SIZE)
: mOriginalBuffer(originalBuffer), mOriginalBufferSize(originalBufferSize),
- mAdditionalBuffer(INITIAL_ADDITIONAL_BUFFER_SIZE), mUsedAdditionalBufferSize(0) {}
+ mAdditionalBuffer(EXTEND_ADDITIONAL_BUFFER_SIZE_STEP), mUsedAdditionalBufferSize(0),
+ mMaxAdditionalBufferSize(maxAdditionalBufferSize) {}
AK_FORCE_INLINE int getTailPosition() const {
return mOriginalBufferSize + mUsedAdditionalBufferSize;
@@ -61,6 +63,11 @@ class BufferWithExtendableBuffer {
return mOriginalBufferSize;
}
+ AK_FORCE_INLINE bool isNearSizeLimit() const {
+ return mAdditionalBuffer.size() >= ((mMaxAdditionalBufferSize
+ * NEAR_BUFFER_LIMIT_THRESHOLD_PERCENTILE) / 100);
+ }
+
/**
* For writing.
*
@@ -75,28 +82,22 @@ class BufferWithExtendableBuffer {
private:
DISALLOW_COPY_AND_ASSIGN(BufferWithExtendableBuffer);
- static const size_t INITIAL_ADDITIONAL_BUFFER_SIZE;
static const size_t MAX_ADDITIONAL_BUFFER_SIZE;
+ static const int NEAR_BUFFER_LIMIT_THRESHOLD_PERCENTILE;
static const size_t EXTEND_ADDITIONAL_BUFFER_SIZE_STEP;
uint8_t *const mOriginalBuffer;
const int mOriginalBufferSize;
std::vector<uint8_t> mAdditionalBuffer;
int mUsedAdditionalBufferSize;
+ const size_t mMaxAdditionalBufferSize;
// Return if the buffer is successfully extended or not.
- AK_FORCE_INLINE bool extendBuffer() {
- if (mAdditionalBuffer.size() + EXTEND_ADDITIONAL_BUFFER_SIZE_STEP
- > MAX_ADDITIONAL_BUFFER_SIZE) {
- return false;
- }
- mAdditionalBuffer.resize(mAdditionalBuffer.size() + EXTEND_ADDITIONAL_BUFFER_SIZE_STEP);
- return true;
- }
+ bool extendBuffer();
// Returns if it is possible to write size-bytes from pos. When pos is at the tail position of
// the additional buffer, try extending the buffer.
- AK_FORCE_INLINE bool checkAndPrepareWriting(const int pos, const int size);
+ bool checkAndPrepareWriting(const int pos, const int size);
};
}
#endif /* LATINIME_BUFFER_WITH_EXTENDABLE_BUFFER_H */