aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.cpp72
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.h2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp98
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h33
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h2
6 files changed, 112 insertions, 97 deletions
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.cpp
index 7a52fd180..146cab6c2 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.cpp
@@ -71,8 +71,14 @@ bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTarget
const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(&newBigramEntry,
bigramProperty);
// Write an entry.
- const int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId);
- if (!mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, writingPos)) {
+ int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId);
+ if (!mBigramDictContent->writeBigramEntryAndAdvancePosition(&bigramEntryToWrite,
+ &writingPos)) {
+ AKLOGE("Cannot write bigram entry. pos: %d.", writingPos);
+ return false;
+ }
+ if (!mBigramDictContent->writeTerminator(writingPos)) {
+ AKLOGE("Cannot write bigram list terminator. pos: %d.", writingPos);
return false;
}
if (outAddedNewEntry) {
@@ -84,32 +90,37 @@ bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTarget
int tailEntryPos = NOT_A_DICT_POS;
const int entryPosToUpdate = getEntryPosToUpdate(newTargetTerminalId, bigramListPos,
&tailEntryPos);
- if (tailEntryPos != NOT_A_DICT_POS || entryPosToUpdate == NOT_A_DICT_POS) {
- // Case 4, 5.
- // Add new entry to the bigram list.
- if (tailEntryPos == NOT_A_DICT_POS) {
- // Case 4. Create new bigram list.
- if (!mBigramDictContent->createNewBigramList(terminalId)) {
- return false;
- }
- const int destPos = mBigramDictContent->getBigramListHeadPos(terminalId);
- // Copy existing bigram list.
- if (!mBigramDictContent->copyBigramList(bigramListPos, destPos, &tailEntryPos)) {
- return false;
- }
- }
+ if (entryPosToUpdate == NOT_A_DICT_POS) {
+ // Case 4, 5. Add new entry to the bigram list.
+ const int contentTailPos = mBigramDictContent->getContentTailPos();
+ // If the tail entry is at the tail of content buffer, the new entry can be written without
+ // link (Case 5).
+ const bool canAppendEntry =
+ contentTailPos == tailEntryPos + mBigramDictContent->getBigramEntrySize();
+ const int newEntryPos = canAppendEntry ? tailEntryPos : contentTailPos;
+ int writingPos = newEntryPos;
// Write new entry at the tail position of the bigram content.
const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY,
newTargetTerminalId);
const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(
&newBigramEntry, bigramProperty);
- if (!mBigramDictContent->writeBigramEntryAtTail(&bigramEntryToWrite)) {
+ if (!mBigramDictContent->writeBigramEntryAndAdvancePosition(&bigramEntryToWrite,
+ &writingPos)) {
+ AKLOGE("Cannot write bigram entry. pos: %d.", writingPos);
return false;
}
- // Update has next flag of the tail entry.
- if (!updateHasNextFlag(true /* hasNext */, tailEntryPos)) {
+ if (!mBigramDictContent->writeTerminator(writingPos)) {
+ AKLOGE("Cannot write bigram list terminator. pos: %d.", writingPos);
return false;
}
+ if (!canAppendEntry) {
+ // Update link of the current tail entry.
+ if (!mBigramDictContent->writeLink(newEntryPos, tailEntryPos)) {
+ AKLOGE("Cannot update bigram entry link. pos: %d, linked entry pos: %d.",
+ tailEntryPos, newEntryPos);
+ return false;
+ }
+ }
if (outAddedNewEntry) {
*outAddedNewEntry = true;
}
@@ -228,14 +239,18 @@ int Ver4BigramListPolicy::getEntryPosToUpdate(const int targetTerminalIdToFind,
if (outTailEntryPos) {
*outTailEntryPos = NOT_A_DICT_POS;
}
- bool hasNext = true;
int invalidEntryPos = NOT_A_DICT_POS;
int readingPos = bigramListPos;
- while (hasNext) {
- const int entryPos = readingPos;
+ while (true) {
const BigramEntry bigramEntry =
mBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
- hasNext = bigramEntry.hasNext();
+ const int entryPos = readingPos - mBigramDictContent->getBigramEntrySize();
+ if (!bigramEntry.hasNext()) {
+ if (outTailEntryPos) {
+ *outTailEntryPos = entryPos;
+ }
+ break;
+ }
if (bigramEntry.getTargetTerminalId() == targetTerminalIdToFind) {
// Entry with same target is found.
return entryPos;
@@ -243,11 +258,6 @@ int Ver4BigramListPolicy::getEntryPosToUpdate(const int targetTerminalIdToFind,
// Invalid entry that can be reused is found.
invalidEntryPos = entryPos;
}
- if (!hasNext && mBigramDictContent->isContentTailPos(readingPos)) {
- if (outTailEntryPos) {
- *outTailEntryPos = entryPos;
- }
- }
}
return invalidEntryPos;
}
@@ -269,10 +279,4 @@ const BigramEntry Ver4BigramListPolicy::createUpdatedBigramEntryFrom(
}
}
-bool Ver4BigramListPolicy::updateHasNextFlag(const bool hasNext, const int bigramEntryPos) {
- const BigramEntry bigramEntry = mBigramDictContent->getBigramEntry(bigramEntryPos);
- const BigramEntry updatedBigramEntry = bigramEntry.updateHasNextAndGetEntry(hasNext);
- return mBigramDictContent->writeBigramEntry(&updatedBigramEntry, bigramEntryPos);
-}
-
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.h
index 1613941c4..55ba613a5 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.h
@@ -63,8 +63,6 @@ class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy {
const BigramEntry createUpdatedBigramEntryFrom(const BigramEntry *const originalBigramEntry,
const BigramProperty *const bigramProperty) const;
- bool updateHasNextFlag(const bool hasNext, const int bigramEntryPos);
-
BigramDictContent *const mBigramDictContent;
const TerminalPositionLookupTable *const mTerminalPositionLookupTable;
const HeaderPolicy *const mHeaderPolicy;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp
index e1ceaee49..d7e1952b5 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp
@@ -20,6 +20,8 @@
namespace latinime {
+const int BigramDictContent::INVALID_LINKED_ENTRY_POS = Ver4DictConstants::NOT_A_TERMINAL_ID;
+
const BigramEntry BigramDictContent::getBigramEntryAndAdvancePosition(
int *const bigramEntryPos) const {
const BufferWithExtendableBuffer *const bigramListBuffer = getContentBuffer();
@@ -34,7 +36,7 @@ const BigramEntry BigramDictContent::getBigramEntryAndAdvancePosition(
}
const int bigramFlags = bigramListBuffer->readUintAndAdvancePosition(
Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, bigramEntryPos);
- const bool hasNext = (bigramFlags & Ver4DictConstants::BIGRAM_HAS_NEXT_MASK) != 0;
+ const bool isLink = (bigramFlags & Ver4DictConstants::BIGRAM_IS_LINK_MASK) != 0;
int probability = NOT_A_PROBABILITY;
int timestamp = NOT_A_TIMESTAMP;
int level = 0;
@@ -55,81 +57,90 @@ const BigramEntry BigramDictContent::getBigramEntryAndAdvancePosition(
const int targetTerminalId =
(encodedTargetTerminalId == Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID) ?
Ver4DictConstants::NOT_A_TERMINAL_ID : encodedTargetTerminalId;
+ if (isLink) {
+ const int linkedEntryPos = targetTerminalId;
+ if (linkedEntryPos == INVALID_LINKED_ENTRY_POS) {
+ // Bigram list terminator is found.
+ return BigramEntry(false /* hasNext */, NOT_A_PROBABILITY,
+ Ver4DictConstants::NOT_A_TERMINAL_ID);
+ }
+ *bigramEntryPos = linkedEntryPos;
+ return getBigramEntryAndAdvancePosition(bigramEntryPos);
+ }
+ // hasNext is always true because we should continue to read the next entry until the terminator
+ // is found.
if (mHasHistoricalInfo) {
const HistoricalInfo historicalInfo(timestamp, level, count);
- return BigramEntry(hasNext, probability, &historicalInfo, targetTerminalId);
+ return BigramEntry(true /* hasNext */, probability, &historicalInfo, targetTerminalId);
} else {
- return BigramEntry(hasNext, probability, targetTerminalId);
+ return BigramEntry(true /* hasNext */, probability, targetTerminalId);
}
}
bool BigramDictContent::writeBigramEntryAndAdvancePosition(
const BigramEntry *const bigramEntryToWrite, int *const entryWritingPos) {
+ return writeBigramEntryAttributesAndAdvancePosition(false /* isLink */,
+ bigramEntryToWrite->getProbability(), bigramEntryToWrite->getTargetTerminalId(),
+ bigramEntryToWrite->getHistoricalInfo()->getTimeStamp(),
+ bigramEntryToWrite->getHistoricalInfo()->getLevel(),
+ bigramEntryToWrite->getHistoricalInfo()->getCount(),
+ entryWritingPos);
+}
+
+bool BigramDictContent::writeBigramEntryAttributesAndAdvancePosition(
+ const bool isLink, const int probability, const int targetTerminalId,
+ const int timestamp, const int level, const int count, int *const entryWritingPos) {
BufferWithExtendableBuffer *const bigramListBuffer = getWritableContentBuffer();
- const int bigramFlags = createAndGetBigramFlags(bigramEntryToWrite->hasNext());
+ const int bigramFlags = isLink ? Ver4DictConstants::BIGRAM_IS_LINK_MASK : 0;
if (!bigramListBuffer->writeUintAndAdvancePosition(bigramFlags,
Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, entryWritingPos)) {
AKLOGE("Cannot write bigram flags. pos: %d, flags: %x", *entryWritingPos, bigramFlags);
return false;
}
if (mHasHistoricalInfo) {
- const HistoricalInfo *const historicalInfo = bigramEntryToWrite->getHistoricalInfo();
- if (!bigramListBuffer->writeUintAndAdvancePosition(historicalInfo->getTimeStamp(),
+ if (!bigramListBuffer->writeUintAndAdvancePosition(timestamp,
Ver4DictConstants::TIME_STAMP_FIELD_SIZE, entryWritingPos)) {
AKLOGE("Cannot write bigram timestamps. pos: %d, timestamp: %d", *entryWritingPos,
- historicalInfo->getTimeStamp());
+ timestamp);
return false;
}
- if (!bigramListBuffer->writeUintAndAdvancePosition(historicalInfo->getLevel(),
+ if (!bigramListBuffer->writeUintAndAdvancePosition(level,
Ver4DictConstants::WORD_LEVEL_FIELD_SIZE, entryWritingPos)) {
AKLOGE("Cannot write bigram level. pos: %d, level: %d", *entryWritingPos,
- historicalInfo->getLevel());
+ level);
return false;
}
- if (!bigramListBuffer->writeUintAndAdvancePosition(historicalInfo->getCount(),
+ if (!bigramListBuffer->writeUintAndAdvancePosition(count,
Ver4DictConstants::WORD_COUNT_FIELD_SIZE, entryWritingPos)) {
AKLOGE("Cannot write bigram count. pos: %d, count: %d", *entryWritingPos,
- historicalInfo->getCount());
+ count);
return false;
}
} else {
- if (!bigramListBuffer->writeUintAndAdvancePosition(bigramEntryToWrite->getProbability(),
+ if (!bigramListBuffer->writeUintAndAdvancePosition(probability,
Ver4DictConstants::PROBABILITY_SIZE, entryWritingPos)) {
AKLOGE("Cannot write bigram probability. pos: %d, probability: %d", *entryWritingPos,
- bigramEntryToWrite->getProbability());
+ probability);
return false;
}
}
- const int targetTerminalIdToWrite =
- (bigramEntryToWrite->getTargetTerminalId() == Ver4DictConstants::NOT_A_TERMINAL_ID) ?
- Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID :
- bigramEntryToWrite->getTargetTerminalId();
+ const int targetTerminalIdToWrite = (targetTerminalId == Ver4DictConstants::NOT_A_TERMINAL_ID) ?
+ Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID : targetTerminalId;
if (!bigramListBuffer->writeUintAndAdvancePosition(targetTerminalIdToWrite,
Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, entryWritingPos)) {
AKLOGE("Cannot write bigram target terminal id. pos: %d, target terminal id: %d",
- *entryWritingPos, bigramEntryToWrite->getTargetTerminalId());
+ *entryWritingPos, targetTerminalId);
return false;
}
return true;
}
-bool BigramDictContent::copyBigramList(const int bigramListPos, const int toPos,
- int *const outTailEntryPos) {
- int readingPos = bigramListPos;
- int writingPos = toPos;
- bool hasNext = true;
- while (hasNext) {
- const BigramEntry bigramEntry = getBigramEntryAndAdvancePosition(&readingPos);
- hasNext = bigramEntry.hasNext();
- if (!hasNext) {
- *outTailEntryPos = writingPos;
- }
- if (!writeBigramEntryAndAdvancePosition(&bigramEntry, &writingPos)) {
- AKLOGE("Cannot write bigram entry to copy. pos: %d", writingPos);
- return false;
- }
- }
- return true;
+bool BigramDictContent::writeLink(const int linkedEntryPos, const int writingPos) {
+ const int targetTerminalId = linkedEntryPos;
+ int pos = writingPos;
+ return writeBigramEntryAttributesAndAdvancePosition(true /* isLink */,
+ NOT_A_PROBABILITY /* probability */, targetTerminalId, NOT_A_TIMESTAMP, 0 /* level */,
+ 0 /* count */, &pos);
}
bool BigramDictContent::runGC(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
@@ -171,16 +182,15 @@ bool BigramDictContent::runGC(const TerminalPositionLookupTable::TerminalIdMap *
bool BigramDictContent::runGCBigramList(const int bigramListPos,
const BigramDictContent *const sourceBigramDictContent, const int toPos,
const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
- int *const outEntrycount) {
+ int *const outEntryCount) {
bool hasNext = true;
int readingPos = bigramListPos;
int writingPos = toPos;
- int lastEntryPos = NOT_A_DICT_POS;
while (hasNext) {
const BigramEntry originalBigramEntry =
sourceBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
hasNext = originalBigramEntry.hasNext();
- if (originalBigramEntry.getTargetTerminalId() == Ver4DictConstants::NOT_A_TERMINAL_ID) {
+ if (!originalBigramEntry.isValid()) {
continue;
}
TerminalPositionLookupTable::TerminalIdMap::const_iterator it =
@@ -189,21 +199,17 @@ bool BigramDictContent::runGCBigramList(const int bigramListPos,
// Target word has been removed.
continue;
}
- lastEntryPos = hasNext ? writingPos : NOT_A_DICT_POS;
const BigramEntry updatedBigramEntry =
originalBigramEntry.updateTargetTerminalIdAndGetEntry(it->second);
if (!writeBigramEntryAndAdvancePosition(&updatedBigramEntry, &writingPos)) {
AKLOGE("Cannot write bigram entry to run GC. pos: %d", writingPos);
return false;
}
- *outEntrycount += 1;
+ *outEntryCount += 1;
}
- if (lastEntryPos != NOT_A_DICT_POS) {
- // Update has next flag in the last written entry.
- const BigramEntry bigramEntry = getBigramEntry(lastEntryPos).updateHasNextAndGetEntry(
- false /* hasNext */);
- if (!writeBigramEntry(&bigramEntry, lastEntryPos)) {
- AKLOGE("Cannot write bigram entry to set hasNext flag after GC. pos: %d", writingPos);
+ if (*outEntryCount > 0) {
+ if (!writeTerminator(writingPos)) {
+ AKLOGE("Cannot write terminator to run GC. pos: %d", writingPos);
return false;
}
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h
index 52447a336..033f18e9e 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h
@@ -42,6 +42,10 @@ class BigramDictContent : public SparseTableDictContent {
Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE),
mHasHistoricalInfo(hasHistoricalInfo) {}
+ int getContentTailPos() const {
+ return getContentBuffer()->getTailPosition();
+ }
+
const BigramEntry getBigramEntry(const int bigramEntryPos) const {
int readingPos = bigramEntryPos;
return getBigramEntryAndAdvancePosition(&readingPos);
@@ -71,13 +75,18 @@ class BigramDictContent : public SparseTableDictContent {
bool writeBigramEntryAndAdvancePosition(const BigramEntry *const bigramEntryToWrite,
int *const entryWritingPos);
+ bool writeTerminator(const int writingPos) {
+ // Terminator is a link to the invalid position.
+ return writeLink(INVALID_LINKED_ENTRY_POS, writingPos);
+ }
+
+ bool writeLink(const int linkedPos, const int writingPos);
+
bool createNewBigramList(const int terminalId) {
const int bigramListPos = getContentBuffer()->getTailPosition();
return getUpdatableAddressLookupTable()->set(terminalId, bigramListPos);
}
- bool copyBigramList(const int bigramListPos, const int toPos, int *const outTailEntryPos);
-
bool flushToFile(const char *const dictPath) const {
return flush(dictPath, Ver4DictConstants::BIGRAM_LOOKUP_TABLE_FILE_EXTENSION,
Ver4DictConstants::BIGRAM_CONTENT_TABLE_FILE_EXTENSION,
@@ -88,17 +97,6 @@ class BigramDictContent : public SparseTableDictContent {
const BigramDictContent *const originalBigramDictContent,
int *const outBigramEntryCount);
- bool isContentTailPos(const int pos) const {
- return pos == getContentBuffer()->getTailPosition();
- }
-
- private:
- DISALLOW_COPY_AND_ASSIGN(BigramDictContent);
-
- int createAndGetBigramFlags(const bool hasNext) const {
- return hasNext ? Ver4DictConstants::BIGRAM_HAS_NEXT_MASK : 0;
- }
-
int getBigramEntrySize() const {
if (mHasHistoricalInfo) {
return Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE
@@ -113,6 +111,15 @@ class BigramDictContent : public SparseTableDictContent {
}
}
+ private:
+ DISALLOW_COPY_AND_ASSIGN(BigramDictContent);
+
+ static const int INVALID_LINKED_ENTRY_POS;
+
+ bool writeBigramEntryAttributesAndAdvancePosition(
+ const bool isLink, const int probability, const int targetTerminalId,
+ const int timestamp, const int level, const int count, int *const entryWritingPos);
+
bool runGCBigramList(const int bigramListPos,
const BigramDictContent *const sourceBigramDictContent, const int toPos,
const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp
index deed010cd..345cabbf9 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp
@@ -60,7 +60,7 @@ const int Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID =
(1 << (BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE * 8)) - 1;
const int Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE = 1;
const int Ver4DictConstants::BIGRAM_PROBABILITY_MASK = 0x0F;
-const int Ver4DictConstants::BIGRAM_HAS_NEXT_MASK = 0x80;
+const int Ver4DictConstants::BIGRAM_IS_LINK_MASK = 0x80;
const int Ver4DictConstants::BIGRAM_LARGE_PROBABILITY_FIELD_SIZE = 1;
const int Ver4DictConstants::SHORTCUT_FLAGS_FIELD_SIZE = 1;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h
index d6d22c5c1..b4effca9c 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h
@@ -57,8 +57,8 @@ class Ver4DictConstants {
static const int BIGRAM_FLAGS_FIELD_SIZE;
static const int BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE;
static const int INVALID_BIGRAM_TARGET_TERMINAL_ID;
+ static const int BIGRAM_IS_LINK_MASK;
static const int BIGRAM_PROBABILITY_MASK;
- static const int BIGRAM_HAS_NEXT_MASK;
// Used when bigram list has time stamp.
static const int BIGRAM_LARGE_PROBABILITY_FIELD_SIZE;