aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp54
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h5
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h8
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.cpp15
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_updating_helper.cpp3
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h37
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.cpp16
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h28
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h4
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp1
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h1
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp56
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp10
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp72
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp17
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h2
-rw-r--r--tests/src/com/android/inputmethod/latin/Ver4BinaryDictionaryTests.java42
19 files changed, 329 insertions, 46 deletions
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp
index 94d7f1061..8bbbacd54 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp
@@ -94,7 +94,7 @@ bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTarget
bool Ver4BigramListPolicy::removeEntry(const int terminalId, const int targetTerminalId) {
const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId);
if (bigramListPos == NOT_A_DICT_POS) {
- // Bigram list does't exist.
+ // Bigram list doesn't exist.
return false;
}
const int entryPosToUpdate = getEntryPosToUpdate(targetTerminalId, bigramListPos);
@@ -118,12 +118,62 @@ bool Ver4BigramListPolicy::removeEntry(const int terminalId, const int targetTer
Ver4DictConstants::NOT_A_TERMINAL_ID /* targetTerminalId */, &writingPos);
}
+bool Ver4BigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries(const int terminalId,
+ int *const outBigramCount) {
+ const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId);
+ if (bigramListPos == NOT_A_DICT_POS) {
+ // Bigram list doesn't exist.
+ return true;
+ }
+ bool hasNext = true;
+ int readingPos = bigramListPos;
+ while (hasNext) {
+ const int entryPos = readingPos;
+ int probability = NOT_A_PROBABILITY;
+ int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
+ mBigramDictContent->getBigramEntryAndAdvancePosition(&probability, &hasNext,
+ &targetTerminalId, &readingPos);
+ if (targetTerminalId == Ver4DictConstants::NOT_A_TERMINAL_ID) {
+ continue;
+ }
+ const int targetPtNodePos = mTerminalPositionLookupTable->getTerminalPtNodePosition(
+ targetTerminalId);
+ if (targetPtNodePos == NOT_A_DICT_POS) {
+ // Invalidate bigram entry.
+ int writingPos = entryPos;
+ return mBigramDictContent->writeBigramEntryAndAdvancePosition(probability, hasNext,
+ Ver4DictConstants::NOT_A_TERMINAL_ID /* targetTerminalId */, &writingPos);
+ }
+ }
+ return true;
+}
+
+int Ver4BigramListPolicy::getBigramEntryConut(const int terminalId) {
+ const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId);
+ if (bigramListPos == NOT_A_DICT_POS) {
+ // Bigram list doesn't exist.
+ return 0;
+ }
+ int bigramCount = 0;
+ bool hasNext = true;
+ int readingPos = bigramListPos;
+ while (hasNext) {
+ int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
+ mBigramDictContent->getBigramEntryAndAdvancePosition(0 /* probability */, &hasNext,
+ &targetTerminalId, &readingPos);
+ if (targetTerminalId != Ver4DictConstants::NOT_A_TERMINAL_ID) {
+ bigramCount++;
+ }
+ }
+ return bigramCount;
+}
+
int Ver4BigramListPolicy::getEntryPosToUpdate(const int targetTerminalIdToFind,
const int bigramListPos) const {
bool hasNext = true;
int invalidEntryPos = NOT_A_DICT_POS;
int readingPos = bigramListPos;
- while(hasNext) {
+ while (hasNext) {
const int entryPos = readingPos;
int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
mBigramDictContent->getBigramEntryAndAdvancePosition(0 /* probability */, &hasNext,
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h
index b3fe13d7d..5b7d5b527 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h
@@ -44,6 +44,11 @@ class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy {
bool removeEntry(const int terminalId, const int targetTerminalId);
+ bool updateAllBigramEntriesAndDeleteUselessEntries(const int terminalId,
+ int *const outBigramCount);
+
+ int getBigramEntryConut(const int terminalId);
+
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4BigramListPolicy);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h
index 6a4cfee3c..d105a3e49 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h
@@ -148,6 +148,14 @@ class PtNodeParams {
return PatriciaTrieReadingUtils::isNotAWord(mFlags);
}
+ AK_FORCE_INLINE bool hasBigrams() const {
+ return PatriciaTrieReadingUtils::hasBigrams(mFlags);
+ }
+
+ AK_FORCE_INLINE bool hasShortcutTargets() const {
+ return PatriciaTrieReadingUtils::hasShortcutTargets(mFlags);
+ }
+
// Parent node position
AK_FORCE_INLINE int getParentPos() const {
return mParentPos;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.cpp
index 173017dc7..223933f50 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.cpp
@@ -67,16 +67,13 @@ bool DynamicPatriciaTrieGcEventListeners
bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateBigramProbability
::onVisitingPtNode(const PtNodeParams *const ptNodeParams) {
- if (!ptNodeParams->isDeleted()) {
- int pos = ptNodeParams->getBigramsPos();
- if (pos != NOT_A_DICT_POS) {
- int bigramEntryCount = 0;
- if (!mPtNodeWriter->updateAllBigramEntriesAndDeleteUselessEntries(ptNodeParams,
- &bigramEntryCount)) {
- return false;
- }
- mValidBigramEntryCount += bigramEntryCount;
+ if (!ptNodeParams->isDeleted() && ptNodeParams->hasBigrams()) {
+ int bigramEntryCount = 0;
+ if (!mPtNodeWriter->updateAllBigramEntriesAndDeleteUselessEntries(ptNodeParams,
+ &bigramEntryCount)) {
+ return false;
}
+ mValidBigramEntryCount += bigramEntryCount;
}
return true;
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_updating_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_updating_helper.cpp
index cbc0d2304..39e1ecaaa 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_updating_helper.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_updating_helper.cpp
@@ -258,8 +258,7 @@ const PtNodeParams DynamicPatriciaTrieUpdatingHelper::getUpdatedPtNodeParams(
const PatriciaTrieReadingUtils::NodeFlags flags = PatriciaTrieReadingUtils::createAndGetFlags(
originalPtNodeParams->isBlacklisted(), originalPtNodeParams->isNotAWord(),
probability != NOT_A_PROBABILITY /* isTerminal */,
- originalPtNodeParams->getShortcutPos() != NOT_A_DICT_POS /* hasShortcutTargets */,
- originalPtNodeParams->getBigramsPos() != NOT_A_DICT_POS /* hasBigrams */,
+ originalPtNodeParams->hasShortcutTargets(), originalPtNodeParams->hasBigrams(),
codePointCount > 1 /* hasMultipleChars */, CHILDREN_POSITION_FIELD_SIZE);
return PtNodeParams(originalPtNodeParams, flags, parentPos, codePointCount, codePoints,
probability);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp
index 999460086..c22b9b455 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp
@@ -59,7 +59,7 @@ bool BigramDictContent::copyBigramList(const int bigramListPos, const int toPos)
bool hasNext = true;
int readingPos = bigramListPos;
int writingPos = toPos;
- while(hasNext) {
+ while (hasNext) {
int probability = NOT_A_PROBABILITY;
int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
getBigramEntryAndAdvancePosition(&probability, &hasNext, &targetTerminalId,
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h
index 518376426..73a472a21 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h
@@ -41,25 +41,29 @@ class ProbabilityDictContent : public SingleDictContent {
}
bool setProbability(const int terminalId, const int probability) {
- if (terminalId < 0 || terminalId > getSize()) {
+ if (terminalId < 0) {
return false;
}
- if (terminalId == getSize()) {
+ if (terminalId >= getSize()) {
// Write new entry.
- int flagWritingPos = terminalId * (Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE
- + Ver4DictConstants::PROBABILITY_SIZE);
- const int dummyFlags = 0;
- // Write dummy flags.
- if (!getWritableBuffer()->writeUintAndAdvancePosition(dummyFlags,
- Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE, &flagWritingPos)) {
- return false;
+ int writingPos = getBuffer()->getTailPosition();
+ while (writingPos <= getEntryPos(terminalId)) {
+ const int dummyFlags = 0;
+ if (!getWritableBuffer()->writeUintAndAdvancePosition(dummyFlags,
+ Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE, &writingPos)) {
+ return false;
+ }
+ const int dummyProbability = 0;
+ if (!getWritableBuffer()->writeUintAndAdvancePosition(dummyProbability,
+ Ver4DictConstants::PROBABILITY_SIZE, &writingPos)) {
+ return false;
+ }
}
}
- int probabilityWritingPos = terminalId * (Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE
- + Ver4DictConstants::PROBABILITY_SIZE)
- + Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE;
- return getWritableBuffer()->writeUintAndAdvancePosition(probability,
- Ver4DictConstants::PROBABILITY_SIZE, &probabilityWritingPos);
+ const int probabilityWritingPos = getEntryPos(terminalId)
+ + Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE;
+ return getWritableBuffer()->writeUint(probability,
+ Ver4DictConstants::PROBABILITY_SIZE, probabilityWritingPos);
}
bool flushToFile(const char *const dictDirPath) const {
@@ -69,6 +73,11 @@ class ProbabilityDictContent : public SingleDictContent {
private:
DISALLOW_COPY_AND_ASSIGN(ProbabilityDictContent);
+ int getEntryPos(const int terminalId) const {
+ return terminalId * (Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE
+ + Ver4DictConstants::PROBABILITY_SIZE);
+ }
+
int getSize() const {
return getBuffer()->getTailPosition() / (Ver4DictConstants::PROBABILITY_SIZE
+ Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.cpp
index c65420614..1c5caa1a7 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.cpp
@@ -18,6 +18,22 @@
namespace latinime {
+bool SparseTableDictContent::copyContent(
+ const SparseTableDictContent *const sparseTableDictContent) {
+ if (!mExpandableLookupTableBuffer.copy(
+ &sparseTableDictContent->mExpandableLookupTableBuffer)) {
+ return false;
+ }
+ if (!mExpandableAddressTableBuffer.copy(
+ &sparseTableDictContent->mExpandableAddressTableBuffer)) {
+ return false;
+ }
+ if (!mExpandableContentBuffer.copy(&sparseTableDictContent->mExpandableContentBuffer)) {
+ return false;
+ }
+ return true;
+}
+
bool SparseTableDictContent::flush(const char *const dictDirPath,
const char *const lookupTableFileName, const char *const addressTableFileName,
const char *const contentFileName) const {
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h
index 9a4f1e1c0..151568747 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h
@@ -75,6 +75,8 @@ class SparseTableDictContent : public DictContent {
|| mExpandableContentBuffer.isNearSizeLimit();
}
+ bool copyContent(const SparseTableDictContent *const sparseTableDictContent);
+
protected:
SparseTable *getUpdatableAddressLookupTable() {
return &mAddressLookupTable;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h
index 873b2406c..2940a985e 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h
@@ -44,23 +44,27 @@ class TerminalPositionLookupTable : public SingleDictContent {
if (terminalId < 0 || terminalId >= mSize) {
return NOT_A_DICT_POS;
}
- const int readingPos = terminalId * Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE;
return getBuffer()->readUint(Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE,
- readingPos) - mHeaderRegionSize;
+ getEntryPos(terminalId)) - mHeaderRegionSize;
}
bool setTerminalPtNodePosition(const int terminalId, const int terminalPtNodePos) {
- if (terminalId < 0 || terminalId > mSize) {
+ if (terminalId < 0) {
return NOT_A_DICT_POS;
}
- if (terminalId == mSize) {
- // Use new terminal id.
- mSize += 1;
+ if (terminalId >= mSize) {
+ int writingPos = getBuffer()->getTailPosition();
+ while(writingPos <= getEntryPos(terminalId)) {
+ // Write new entry.
+ getWritableBuffer()->writeUintAndAdvancePosition(
+ Ver4DictConstants::NOT_A_TERMINAL_ADDRESS,
+ Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE, &writingPos);
+ }
+ mSize = getBuffer()->getTailPosition()
+ / Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE;
}
- int writingPos = terminalId * Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE;
- return getWritableBuffer()->writeUintAndAdvancePosition(
- terminalPtNodePos + mHeaderRegionSize,
- Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE, &writingPos);
+ return getWritableBuffer()->writeUint(terminalPtNodePos + mHeaderRegionSize,
+ Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE, getEntryPos(terminalId));
}
int getNextTerminalId() const {
@@ -94,6 +98,10 @@ class TerminalPositionLookupTable : public SingleDictContent {
private:
DISALLOW_COPY_AND_ASSIGN(TerminalPositionLookupTable);
+ int getEntryPos(const int terminalId) const {
+ return terminalId * Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE;
+ }
+
int mSize;
const int mHeaderRegionSize;
};
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h
index d3de3c86d..e67bd2edb 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h
@@ -93,6 +93,10 @@ class Ver4DictBuffers {
return &mBigramDictContent;
}
+ AK_FORCE_INLINE ShortcutDictContent *getUpdatableShortcutDictContent() {
+ return &mShortcutDictContent;
+ }
+
AK_FORCE_INLINE const ShortcutDictContent *getShortcutDictContent() const {
return &mShortcutDictContent;
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp
index b4f9553ee..a27d0bc98 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp
@@ -41,6 +41,7 @@ const int Ver4DictConstants::NOT_A_TERMINAL_ID = -1;
const int Ver4DictConstants::PROBABILITY_SIZE = 1;
const int Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE = 1;
const int Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE = 3;
+const int Ver4DictConstants::NOT_A_TERMINAL_ADDRESS = 0;
const int Ver4DictConstants::TERMINAL_ID_FIELD_SIZE = 4;
const int Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE = 4;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h
index 64c388791..96d5f6de0 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h
@@ -41,6 +41,7 @@ class Ver4DictConstants {
static const int PROBABILITY_SIZE;
static const int FLAGS_IN_PROBABILITY_FILE_SIZE;
static const int TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE;
+ static const int NOT_A_TERMINAL_ADDRESS;
static const int TERMINAL_ID_FIELD_SIZE;
static const int BIGRAM_ADDRESS_TABLE_BLOCK_SIZE;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp
index 5b1ddddf5..071792544 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp
@@ -45,8 +45,17 @@ bool Ver4PatriciaTrieNodeWriter::markPtNodeAsDeleted(
true /* isDeleted */);
int writingPos = toBeUpdatedPtNodeParams->getHeadPos();
// Update flags.
- return DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mTrieBuffer, updatedFlags,
- &writingPos);
+ if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mTrieBuffer, updatedFlags,
+ &writingPos)) {
+ return false;
+ }
+ if (toBeUpdatedPtNodeParams->getTerminalId() != NOT_A_DICT_POS) {
+ // The PtNode is a terminal. Delete entry from the terminal position lookup table.
+ return mBuffers->getUpdatableTerminalPositionLookupTable()->setTerminalPtNodePosition(
+ toBeUpdatedPtNodeParams->getTerminalId(), NOT_A_DICT_POS /* ptNodePos */);
+ } else {
+ return true;
+ }
}
bool Ver4PatriciaTrieNodeWriter::markPtNodeAsMoved(
@@ -171,7 +180,7 @@ bool Ver4PatriciaTrieNodeWriter::writePtNodeAndAdvancePosition(
PatriciaTrieReadingUtils::NodeFlags nodeFlags =
PatriciaTrieReadingUtils::createAndGetFlags(ptNodeParams->isBlacklisted(),
ptNodeParams->isNotAWord(), isTerminal,
- false /* hasShortcutTargets */, false /* hasBigrams */,
+ ptNodeParams->hasShortcutTargets(), ptNodeParams->hasBigrams(),
ptNodeParams->getCodePointCount() > 1 /* hasMultipleChars */,
CHILDREN_POSITION_FIELD_SIZE);
int flagsFieldPos = nodePos;
@@ -198,16 +207,49 @@ bool Ver4PatriciaTrieNodeWriter::removeBigramEntry(
bool Ver4PatriciaTrieNodeWriter::updateAllBigramEntriesAndDeleteUselessEntries(
const PtNodeParams *const sourcePtNodeParams, int *const outBigramEntryCount) {
- // TODO: Implement.
- return false;
+ return mBigramPolicy->updateAllBigramEntriesAndDeleteUselessEntries(
+ sourcePtNodeParams->getTerminalId(), outBigramEntryCount);
}
bool Ver4PatriciaTrieNodeWriter::updateAllPositionFields(
const PtNodeParams *const toBeUpdatedPtNodeParams,
const DictPositionRelocationMap *const dictPositionRelocationMap,
int *const outBigramEntryCount) {
- // TODO: Implement.
- return false;
+ int parentPos = toBeUpdatedPtNodeParams->getParentPos();
+ if (parentPos != NOT_A_DICT_POS) {
+ PtNodeWriter::PtNodePositionRelocationMap::const_iterator it =
+ dictPositionRelocationMap->mPtNodePositionRelocationMap.find(parentPos);
+ if (it != dictPositionRelocationMap->mPtNodePositionRelocationMap.end()) {
+ parentPos = it->second;
+ }
+ }
+ int writingPos = toBeUpdatedPtNodeParams->getHeadPos()
+ + DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE;
+ // Write updated parent offset.
+ if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(mTrieBuffer,
+ parentPos, toBeUpdatedPtNodeParams->getHeadPos(), &writingPos)) {
+ return false;
+ }
+
+ // Updates children position.
+ int childrenPos = toBeUpdatedPtNodeParams->getChildrenPos();
+ if (childrenPos != NOT_A_DICT_POS) {
+ PtNodeWriter::PtNodeArrayPositionRelocationMap::const_iterator it =
+ dictPositionRelocationMap->mPtNodeArrayPositionRelocationMap.find(childrenPos);
+ if (it != dictPositionRelocationMap->mPtNodeArrayPositionRelocationMap.end()) {
+ childrenPos = it->second;
+ }
+ }
+ if (!updateChildrenPosition(toBeUpdatedPtNodeParams, childrenPos)) {
+ return false;
+ }
+
+ // Counts bigram entries.
+ if (outBigramEntryCount) {
+ *outBigramEntryCount = mBigramPolicy->getBigramEntryConut(
+ toBeUpdatedPtNodeParams->getTerminalId());
+ }
+ return true;
}
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
index bcad6fe05..eeab3121a 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
@@ -223,7 +223,15 @@ void Ver4PatriciaTriePolicy::flush(const char *const filePath) {
}
void Ver4PatriciaTriePolicy::flushWithGC(const char *const filePath) {
- // TODO: Implement.
+ if (!mBuffers.get()->isUpdatable()) {
+ AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary.");
+ return;
+ }
+ const bool needsToDecay = mHeaderPolicy.isDecayingDict()
+ && (mNeedsToDecayForTesting || ForgettingCurveUtils::needsToDecay(
+ false /* mindsBlockByDecay */, mUnigramCount, mBigramCount, &mHeaderPolicy));
+ mWritingHelper.writeToDictFileWithGC(getRootPosition(), filePath, &mHeaderPolicy, needsToDecay);
+ mNeedsToDecayForTesting = false;
}
bool Ver4PatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const {
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp
index 853cf38ad..11adf2c1d 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp
@@ -20,6 +20,7 @@
#include "suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h"
#include "suggest/policyimpl/dictionary/header/header_policy.h"
+#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.h"
#include "suggest/policyimpl/dictionary/shortcut/ver4_shortcut_list_policy.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
@@ -91,7 +92,78 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
DynamicPatriciaTrieReadingHelper readingHelper(mBuffers->getTrieBuffer(), &ptNodeReader);
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
+ DynamicPatriciaTrieGcEventListeners
+ ::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
+ traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted(
+ headerPolicy, &ptNodeWriter, mBuffers->getWritableTrieBuffer(),
+ needsToDecay);
+ if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
+ &traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted)) {
+ return false;
+ }
+ if (needsToDecay && traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
+ .getValidUnigramCount() > ForgettingCurveUtils::MAX_UNIGRAM_COUNT_AFTER_GC) {
+ // TODO: Remove more unigrams.
+ }
+
+ readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
+ DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateBigramProbability
+ traversePolicyToUpdateBigramProbability(&ptNodeWriter);
+ if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
+ &traversePolicyToUpdateBigramProbability)) {
+ return false;
+ }
+ if (needsToDecay && traversePolicyToUpdateBigramProbability.getValidBigramEntryCount()
+ > ForgettingCurveUtils::MAX_BIGRAM_COUNT_AFTER_GC) {
+ // TODO: Remove more bigrams.
+ }
+
+ // Mapping from positions in mBuffer to positions in bufferToWrite.
+ PtNodeWriter::DictPositionRelocationMap dictPositionRelocationMap;
+ readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
+ Ver4PatriciaTrieNodeWriter ptNodeWriterForNewBuffers(buffersToWrite->getWritableTrieBuffer(),
+ buffersToWrite, &ptNodeReader, &bigramPolicy, &shortcutPolicy);
+ DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
+ traversePolicyToPlaceAndWriteValidPtNodesToBuffer(&ptNodeWriterForNewBuffers,
+ buffersToWrite->getWritableTrieBuffer(), &dictPositionRelocationMap);
+ if (!readingHelper.traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
+ &traversePolicyToPlaceAndWriteValidPtNodesToBuffer)) {
+ return false;
+ }
+
+ // Create policy instances for the GCed dictionary.
+ Ver4PatriciaTrieNodeReader newPtNodeReader(buffersToWrite->getTrieBuffer(),
+ buffersToWrite->getProbabilityDictContent());
+ Ver4BigramListPolicy newBigramPolicy(buffersToWrite->getUpdatableBigramDictContent(),
+ buffersToWrite->getTerminalPositionLookupTable());
+ Ver4ShortcutListPolicy newShortcutPolicy(buffersToWrite->getShortcutDictContent(),
+ buffersToWrite->getTerminalPositionLookupTable());
+ Ver4PatriciaTrieNodeWriter newPtNodeWriter(buffersToWrite->getWritableTrieBuffer(),
+ buffersToWrite, &newPtNodeReader, &newBigramPolicy, &newShortcutPolicy);
+
+ if(!buffersToWrite->getUpdatableBigramDictContent()->copyContent(
+ mBuffers->getBigramDictContent())) {
+ return false;
+ }
+ if(!buffersToWrite->getUpdatableShortcutDictContent()->copyContent(
+ mBuffers->getShortcutDictContent())) {
+ return false;
+ }
+
+ DynamicPatriciaTrieReadingHelper newDictReadingHelper(buffersToWrite->getTrieBuffer(),
+ &newPtNodeReader);
+ newDictReadingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
+ DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateAllPositionFields
+ traversePolicyToUpdateAllPositionFields(&newPtNodeWriter, &dictPositionRelocationMap);
+ if (!newDictReadingHelper.traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
+ &traversePolicyToUpdateAllPositionFields)) {
+ return false;
+ }
+
+ // TODO: GC for dict contents.
+ *outUnigramCount = traversePolicyToUpdateAllPositionFields.getUnigramCount();
+ *outBigramCount = traversePolicyToUpdateAllPositionFields.getBigramCount();
return true;
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp
index 26eafcd44..4b537da8a 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp
@@ -131,4 +131,21 @@ bool BufferWithExtendableBuffer::checkAndPrepareWriting(const int pos, const int
return true;
}
+bool BufferWithExtendableBuffer::copy(const BufferWithExtendableBuffer *const sourceBuffer) {
+ int copyingPos = 0;
+ const int tailPos = sourceBuffer->getTailPosition();
+ const int maxDataChunkSize = sizeof(uint32_t);
+ while (copyingPos < tailPos) {
+ const int remainingSize = tailPos - copyingPos;
+ const int copyingSize = (remainingSize >= maxDataChunkSize) ?
+ maxDataChunkSize : remainingSize;
+ const uint32_t data = sourceBuffer->readUint(copyingSize, copyingPos);
+ if (!writeUint(data, copyingSize, copyingPos)) {
+ return false;
+ }
+ copyingPos += copyingSize;
+ }
+ return true;
+}
+
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h
index ee6107ad7..76be16518 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h
@@ -100,6 +100,8 @@ class BufferWithExtendableBuffer {
bool writeCodePointsAndAdvancePosition(const int *const codePoints, const int codePointCount,
const bool writesTerminator, int *const pos);
+ bool copy(const BufferWithExtendableBuffer *const sourceBuffer);
+
private:
DISALLOW_COPY_AND_ASSIGN(BufferWithExtendableBuffer);
diff --git a/tests/src/com/android/inputmethod/latin/Ver4BinaryDictionaryTests.java b/tests/src/com/android/inputmethod/latin/Ver4BinaryDictionaryTests.java
index b51a86b1c..55177266e 100644
--- a/tests/src/com/android/inputmethod/latin/Ver4BinaryDictionaryTests.java
+++ b/tests/src/com/android/inputmethod/latin/Ver4BinaryDictionaryTests.java
@@ -297,4 +297,46 @@ public class Ver4BinaryDictionaryTests extends AndroidTestCase {
binaryDictionary.close();
}
+ public void testFlushWithGCDictionary() {
+ final String dictVersion = Long.toString(System.currentTimeMillis());
+ File trieFile = null;
+ try {
+ trieFile = createEmptyDictionaryAndGetTrieFile(dictVersion);
+ } catch (IOException e) {
+ fail("IOException while writing an initial dictionary : " + e);
+ }
+ BinaryDictionary binaryDictionary = new BinaryDictionary(trieFile.getAbsolutePath(),
+ 0 /* offset */, trieFile.length(), true /* useFullEditDistance */,
+ Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
+
+ final int unigramProbability = 100;
+ final int bigramProbability = 10;
+ binaryDictionary.addUnigramWord("aaa", unigramProbability);
+ binaryDictionary.addUnigramWord("abb", unigramProbability);
+ binaryDictionary.addUnigramWord("bcc", unigramProbability);
+ binaryDictionary.addBigramWords("aaa", "abb", bigramProbability);
+ binaryDictionary.addBigramWords("aaa", "bcc", bigramProbability);
+ binaryDictionary.addBigramWords("abb", "aaa", bigramProbability);
+ binaryDictionary.addBigramWords("abb", "bcc", bigramProbability);
+ binaryDictionary.flushWithGC();
+ binaryDictionary.close();
+
+ binaryDictionary = new BinaryDictionary(trieFile.getAbsolutePath(),
+ 0 /* offset */, trieFile.length(), true /* useFullEditDistance */,
+ Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
+ final int probability = binaryDictionary.calculateProbability(unigramProbability,
+ bigramProbability);
+ assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa"));
+ assertEquals(unigramProbability, binaryDictionary.getFrequency("abb"));
+ assertEquals(unigramProbability, binaryDictionary.getFrequency("bcc"));
+ assertEquals(probability, binaryDictionary.getBigramProbability("aaa", "abb"));
+ assertEquals(probability, binaryDictionary.getBigramProbability("aaa", "bcc"));
+ assertEquals(probability, binaryDictionary.getBigramProbability("abb", "aaa"));
+ assertEquals(probability, binaryDictionary.getBigramProbability("abb", "bcc"));
+ assertEquals(false, binaryDictionary.isValidBigram("bcc", "aaa"));
+ assertEquals(false, binaryDictionary.isValidBigram("bcc", "bbc"));
+ assertEquals(false, binaryDictionary.isValidBigram("aaa", "aaa"));
+ binaryDictionary.flushWithGC();
+ binaryDictionary.close();
+ }
}