aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKeisuke Kuroyanagi <ksk@google.com>2013-12-02 10:53:18 +0000
committerAndroid (Google) Code Review <android-gerrit@google.com>2013-12-02 10:53:18 +0000
commit027de367067c03e4f07c0bafbb79d68d464d6df2 (patch)
tree2233c4488c3621d1aaa2473a28919489bd5947cd
parent545e4191b30c072315dee61b6189fe2eb03c493f (diff)
parent69e6165d2eea707ab6ba2d5b2bfd1a959b412984 (diff)
downloadlatinime-027de367067c03e4f07c0bafbb79d68d464d6df2.tar.gz
latinime-027de367067c03e4f07c0bafbb79d68d464d6df2.tar.xz
latinime-027de367067c03e4f07c0bafbb79d68d464d6df2.zip
Merge "Extend bigram probability field to support historical info."
-rw-r--r--native/jni/src/defines.h15
-rw-r--r--native/jni/src/suggest/core/dictionary/bloom_filter.h2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp126
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h4
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp120
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h33
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_entry.h104
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h5
-rw-r--r--native/jni/src/utils/exclusive_ownership_pointer.h3
9 files changed, 291 insertions, 121 deletions
diff --git a/native/jni/src/defines.h b/native/jni/src/defines.h
index fbcd612b7..564811560 100644
--- a/native/jni/src/defines.h
+++ b/native/jni/src/defines.h
@@ -341,12 +341,21 @@ template<typename T> AK_FORCE_INLINE const T &max(const T &a, const T &b) { retu
#define INPUTLENGTH_FOR_DEBUG (-1)
#define MIN_OUTPUT_INDEX_FOR_DEBUG (-1)
-#define DISALLOW_COPY_AND_ASSIGN(TypeName) \
- TypeName(const TypeName&); \
+#define DISALLOW_DEFAULT_CONSTRUCTOR(TypeName) \
+ TypeName()
+
+#define DISALLOW_COPY_CONSTRUCTOR(TypeName) \
+ TypeName(const TypeName&)
+
+#define DISALLOW_ASSIGNMENT_OPERATOR(TypeName) \
void operator=(const TypeName&)
+#define DISALLOW_COPY_AND_ASSIGN(TypeName) \
+ DISALLOW_COPY_CONSTRUCTOR(TypeName); \
+ DISALLOW_ASSIGNMENT_OPERATOR(TypeName)
+
#define DISALLOW_IMPLICIT_CONSTRUCTORS(TypeName) \
- TypeName(); \
+ DISALLOW_DEFAULT_CONSTRUCTOR(TypeName); \
DISALLOW_COPY_AND_ASSIGN(TypeName)
// Used as a return value for character comparison
diff --git a/native/jni/src/suggest/core/dictionary/bloom_filter.h b/native/jni/src/suggest/core/dictionary/bloom_filter.h
index 5205456a8..5f9700486 100644
--- a/native/jni/src/suggest/core/dictionary/bloom_filter.h
+++ b/native/jni/src/suggest/core/dictionary/bloom_filter.h
@@ -50,6 +50,8 @@ class BloomFilter {
}
private:
+ DISALLOW_ASSIGNMENT_OPERATOR(BloomFilter);
+
// Size, in bytes, of the bloom filter index for bigrams
// 128 gives us 1024 buckets. The probability of false positive is (1 - e ** (-kn/m))**k,
// where k is the number of hash functions, n the number of bigrams, and m the number of
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp
index 7160f6f48..48ddb2ff4 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp
@@ -26,12 +26,18 @@ namespace latinime {
void Ver4BigramListPolicy::getNextBigram(int *const outBigramPos, int *const outProbability,
bool *const outHasNext, int *const bigramEntryPos) const {
- int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
- mBigramDictContent->getBigramEntryAndAdvancePosition(outProbability, outHasNext,
- &targetTerminalId, bigramEntryPos);
+ const BigramEntry bigramEntry =
+ mBigramDictContent->getBigramEntryAndAdvancePosition(bigramEntryPos);
if (outBigramPos) {
// Lookup target PtNode position.
- *outBigramPos = mTerminalPositionLookupTable->getTerminalPtNodePosition(targetTerminalId);
+ *outBigramPos = mTerminalPositionLookupTable->getTerminalPtNodePosition(
+ bigramEntry.getTargetTerminalId());
+ }
+ if (outProbability) {
+ *outProbability = bigramEntry.getProbability();
+ }
+ if (outHasNext) {
+ *outHasNext = bigramEntry.hasNext();
}
}
@@ -47,12 +53,13 @@ bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTarget
if (!mBigramDictContent->createNewBigramList(terminalId)) {
return false;
}
- const int probabilityToWrite = getUpdatedProbability(
- NOT_A_PROBABILITY /* originalProbability */, newProbability);
+ const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY,
+ newTargetTerminalId);
+ const BigramEntry bigramEntryToWrite = getUpdatedBigramEntry(&newBigramEntry,
+ newProbability, timestamp);
// Write an entry.
const int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId);
- if (!mBigramDictContent->writeBigramEntry(probabilityToWrite, false /* hasNext */,
- newTargetTerminalId, writingPos)) {
+ if (!mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, writingPos)) {
return false;
}
if (outAddedNewEntry) {
@@ -64,18 +71,19 @@ bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTarget
const int entryPosToUpdate = getEntryPosToUpdate(newTargetTerminalId, bigramListPos);
if (entryPosToUpdate != NOT_A_DICT_POS) {
// Overwrite existing entry.
- bool hasNext = false;
- int probability = NOT_A_PROBABILITY;
- int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
- mBigramDictContent->getBigramEntry(&probability, &hasNext, &targetTerminalId,
- entryPosToUpdate);
- const int probabilityToWrite = getUpdatedProbability(probability, newProbability);
- if (targetTerminalId == Ver4DictConstants::NOT_A_TERMINAL_ID && outAddedNewEntry) {
+ const BigramEntry originalBigramEntry =
+ mBigramDictContent->getBigramEntry(entryPosToUpdate);
+ if (!originalBigramEntry.isValid()) {
// Reuse invalid entry.
- *outAddedNewEntry = true;
+ if (outAddedNewEntry) {
+ *outAddedNewEntry = true;
+ }
}
- return mBigramDictContent->writeBigramEntry(probabilityToWrite, hasNext,
- newTargetTerminalId, entryPosToUpdate);
+ const BigramEntry updatedBigramEntry =
+ originalBigramEntry.updateTargetTerminalIdAndGetEntry(newTargetTerminalId);
+ const BigramEntry bigramEntryToWrite = getUpdatedBigramEntry(
+ &updatedBigramEntry, newProbability, timestamp);
+ return mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, entryPosToUpdate);
}
// Add new entry to the bigram list.
@@ -85,10 +93,10 @@ bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTarget
}
// Write new entry at a head position of the bigram list.
int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId);
- const int probabilityToWrite = getUpdatedProbability(
- NOT_A_PROBABILITY /* originalProbability */, newProbability);
- if (!mBigramDictContent->writeBigramEntryAndAdvancePosition(probabilityToWrite,
- true /* hasNext */, newTargetTerminalId, &writingPos)) {
+ const BigramEntry newBigramEntry(true /* hasNext */, NOT_A_PROBABILITY, newTargetTerminalId);
+ const BigramEntry bigramEntryToWrite = getUpdatedBigramEntry(
+ &newBigramEntry, newProbability, timestamp);
+ if (!mBigramDictContent->writeBigramEntryAndAdvancePosition(&bigramEntryToWrite, &writingPos)) {
return false;
}
if (outAddedNewEntry) {
@@ -109,18 +117,14 @@ bool Ver4BigramListPolicy::removeEntry(const int terminalId, const int targetTer
// Bigram entry doesn't exist.
return false;
}
- bool hasNext = false;
- int probability = NOT_A_PROBABILITY;
- int originalTargetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
- mBigramDictContent->getBigramEntry(&probability, &hasNext, &originalTargetTerminalId,
- entryPosToUpdate);
- if (targetTerminalId != originalTargetTerminalId) {
+ const BigramEntry bigramEntry = mBigramDictContent->getBigramEntry(entryPosToUpdate);
+ if (targetTerminalId != bigramEntry.getTargetTerminalId()) {
// Bigram entry doesn't exist.
return false;
}
- // Remove bigram entry by overwriting target terminal Id.
- return mBigramDictContent->writeBigramEntry(probability, hasNext,
- Ver4DictConstants::NOT_A_TERMINAL_ID /* targetTerminalId */, entryPosToUpdate);
+ // Remove bigram entry by marking it as invalid entry and overwriting the original entry.
+ const BigramEntry updatedBigramEntry = bigramEntry.getInvalidatedEntry();
+ return mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPosToUpdate);
}
bool Ver4BigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries(const int terminalId,
@@ -134,34 +138,35 @@ bool Ver4BigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries(const i
int readingPos = bigramListPos;
while (hasNext) {
const int entryPos = readingPos;
- int probability = NOT_A_PROBABILITY;
- int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
- mBigramDictContent->getBigramEntryAndAdvancePosition(&probability, &hasNext,
- &targetTerminalId, &readingPos);
- if (targetTerminalId == Ver4DictConstants::NOT_A_TERMINAL_ID) {
+ const BigramEntry bigramEntry =
+ mBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
+ hasNext = bigramEntry.hasNext();
+ if (!bigramEntry.isValid()) {
continue;
}
const int targetPtNodePos = mTerminalPositionLookupTable->getTerminalPtNodePosition(
- targetTerminalId);
+ bigramEntry.getTargetTerminalId());
if (targetPtNodePos == NOT_A_DICT_POS) {
// Invalidate bigram entry.
- if (!mBigramDictContent->writeBigramEntry(probability, hasNext,
- Ver4DictConstants::NOT_A_TERMINAL_ID /* targetTerminalId */, entryPos)) {
+ const BigramEntry updatedBigramEntry = bigramEntry.getInvalidatedEntry();
+ if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) {
return false;
}
} else if (mNeedsToDecayWhenUpdating) {
- probability = ForgettingCurveUtils::getEncodedProbabilityToSave(
- probability, mHeaderPolicy);
+ // TODO: Quit decaying probability during GC.
+ const int probability = ForgettingCurveUtils::getEncodedProbabilityToSave(
+ bigramEntry.getProbability(), mHeaderPolicy);
if (ForgettingCurveUtils::isValidEncodedProbability(probability)) {
- if (!mBigramDictContent->writeBigramEntry(probability, hasNext, targetTerminalId,
- entryPos)) {
+ const BigramEntry updatedBigramEntry =
+ bigramEntry.updateProbabilityAndGetEntry(probability);
+ if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) {
return false;
}
*outBigramCount += 1;
} else {
// Remove entry.
- if (!mBigramDictContent->writeBigramEntry(probability, hasNext,
- Ver4DictConstants::NOT_A_TERMINAL_ID /* targetTerminalId */, entryPos)) {
+ const BigramEntry updatedBigramEntry = bigramEntry.getInvalidatedEntry();
+ if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) {
return false;
}
}
@@ -182,10 +187,10 @@ int Ver4BigramListPolicy::getBigramEntryConut(const int terminalId) {
bool hasNext = true;
int readingPos = bigramListPos;
while (hasNext) {
- int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
- mBigramDictContent->getBigramEntryAndAdvancePosition(0 /* probability */, &hasNext,
- &targetTerminalId, &readingPos);
- if (targetTerminalId != Ver4DictConstants::NOT_A_TERMINAL_ID) {
+ const BigramEntry bigramEntry =
+ mBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
+ hasNext = bigramEntry.hasNext();
+ if (bigramEntry.isValid()) {
bigramCount++;
}
}
@@ -199,13 +204,13 @@ int Ver4BigramListPolicy::getEntryPosToUpdate(const int targetTerminalIdToFind,
int readingPos = bigramListPos;
while (hasNext) {
const int entryPos = readingPos;
- int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
- mBigramDictContent->getBigramEntryAndAdvancePosition(0 /* probability */, &hasNext,
- &targetTerminalId, &readingPos);
- if (targetTerminalId == targetTerminalIdToFind) {
+ const BigramEntry bigramEntry =
+ mBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
+ hasNext = bigramEntry.hasNext();
+ if (bigramEntry.getTargetTerminalId() == targetTerminalIdToFind) {
// Entry with same target is found.
return entryPos;
- } else if (targetTerminalId == Ver4DictConstants::NOT_A_TERMINAL_ID) {
+ } else if (!bigramEntry.isValid()) {
// Invalid entry that can be reused is found.
invalidEntryPos = entryPos;
}
@@ -213,13 +218,16 @@ int Ver4BigramListPolicy::getEntryPosToUpdate(const int targetTerminalIdToFind,
return invalidEntryPos;
}
-int Ver4BigramListPolicy::getUpdatedProbability(const int originalProbability,
- const int newProbability) const {
+const BigramEntry Ver4BigramListPolicy::getUpdatedBigramEntry(
+ const BigramEntry *const originalBigramEntry, const int newProbability,
+ const int timestamp) const {
if (mNeedsToDecayWhenUpdating) {
- return ForgettingCurveUtils::getUpdatedEncodedProbability(originalProbability,
- newProbability);
+ // TODO: Update historical information.
+ const int probability = ForgettingCurveUtils::getUpdatedEncodedProbability(
+ originalBigramEntry->getProbability(), newProbability);
+ return originalBigramEntry->updateProbabilityAndGetEntry(probability);
} else {
- return newProbability;
+ return originalBigramEntry->updateProbabilityAndGetEntry(newProbability);
}
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h
index c0959118e..e718645b4 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h
@@ -19,6 +19,7 @@
#include "defines.h"
#include "suggest/core/policy/dictionary_bigrams_structure_policy.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_entry.h"
namespace latinime {
@@ -58,7 +59,8 @@ class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy {
int getEntryPosToUpdate(const int targetTerminalIdToFind, const int bigramListPos) const;
- int getUpdatedProbability(const int originalProbability, const int newProbability) const;
+ const BigramEntry getUpdatedBigramEntry(const BigramEntry *const originalBigramEntry,
+ const int newProbability, const int timestamp) const;
BigramDictContent *const mBigramDictContent;
const TerminalPositionLookupTable *const mTerminalPositionLookupTable;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp
index 431f342ae..2a783543a 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp
@@ -20,53 +20,98 @@
namespace latinime {
-void BigramDictContent::getBigramEntryAndAdvancePosition(int *const outProbability,
- bool *const outHasNext, int *const outTargetTerminalId, int *const bigramEntryPos) const {
+const BigramEntry BigramDictContent::getBigramEntryAndAdvancePosition(
+ int *const bigramEntryPos) const {
const BufferWithExtendableBuffer *const bigramListBuffer = getContentBuffer();
const int bigramFlags = bigramListBuffer->readUintAndAdvancePosition(
Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, bigramEntryPos);
- if (outProbability) {
- *outProbability = bigramFlags & Ver4DictConstants::BIGRAM_PROBABILITY_MASK;
+ const int hasNext = (bigramFlags & Ver4DictConstants::BIGRAM_HAS_NEXT_MASK) != 0;
+ int probability = NOT_A_PROBABILITY;
+ int timestamp = Ver4DictConstants::NOT_A_TIME_STAMP;
+ int level = 0;
+ int count = 0;
+ if (mHasHistoricalInfo) {
+ probability = bigramListBuffer->readUintAndAdvancePosition(
+ Ver4DictConstants::PROBABILITY_SIZE, bigramEntryPos);
+ timestamp = bigramListBuffer->readUintAndAdvancePosition(
+ Ver4DictConstants::TIME_STAMP_FIELD_SIZE, bigramEntryPos);
+ level = bigramListBuffer->readUintAndAdvancePosition(
+ Ver4DictConstants::WORD_LEVEL_FIELD_SIZE, bigramEntryPos);
+ count = bigramListBuffer->readUintAndAdvancePosition(
+ Ver4DictConstants::WORD_COUNT_FIELD_SIZE, bigramEntryPos);
+ } else {
+ probability = bigramFlags & Ver4DictConstants::BIGRAM_PROBABILITY_MASK;
}
- if (outHasNext) {
- *outHasNext = (bigramFlags & Ver4DictConstants::BIGRAM_HAS_NEXT_MASK) != 0;
- }
- const int targetTerminalId = bigramListBuffer->readUintAndAdvancePosition(
+ const int encodedTargetTerminalId = bigramListBuffer->readUintAndAdvancePosition(
Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, bigramEntryPos);
- if (outTargetTerminalId) {
- *outTargetTerminalId =
- (targetTerminalId == Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID) ?
- Ver4DictConstants::NOT_A_TERMINAL_ID : targetTerminalId;
+ const int targetTerminalId =
+ (encodedTargetTerminalId == Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID) ?
+ Ver4DictConstants::NOT_A_TERMINAL_ID : encodedTargetTerminalId;
+ if (mHasHistoricalInfo) {
+ return BigramEntry(hasNext, probability, timestamp, level, count, targetTerminalId);
+ } else {
+ return BigramEntry(hasNext, probability, targetTerminalId);
}
}
-bool BigramDictContent::writeBigramEntryAndAdvancePosition(const int probability, const int hasNext,
- const int targetTerminalId, int *const entryWritingPos) {
+bool BigramDictContent::writeBigramEntryAndAdvancePosition(
+ const BigramEntry *const bigramEntryToWrite, int *const entryWritingPos) {
BufferWithExtendableBuffer *const bigramListBuffer = getWritableContentBuffer();
- const int bigramFlags = createAndGetBigramFlags(probability, hasNext);
+ const int bigramFlags = createAndGetBigramFlags(
+ mHasHistoricalInfo ? 0 : bigramEntryToWrite->getProbability(),
+ bigramEntryToWrite->hasNext());
if (!bigramListBuffer->writeUintAndAdvancePosition(bigramFlags,
Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, entryWritingPos)) {
AKLOGE("Cannot write bigram flags. pos: %d, flags: %x", *entryWritingPos, bigramFlags);
return false;
}
+ if (mHasHistoricalInfo) {
+ if (!bigramListBuffer->writeUintAndAdvancePosition(bigramEntryToWrite->getProbability(),
+ Ver4DictConstants::PROBABILITY_SIZE, entryWritingPos)) {
+ AKLOGE("Cannot write bigram probability. pos: %d, probability: %d", *entryWritingPos,
+ bigramEntryToWrite->getProbability());
+ return false;
+ }
+ if (!bigramListBuffer->writeUintAndAdvancePosition(bigramEntryToWrite->getTimeStamp(),
+ Ver4DictConstants::TIME_STAMP_FIELD_SIZE, entryWritingPos)) {
+ AKLOGE("Cannot write bigram timestamps. pos: %d, timestamp: %d", *entryWritingPos,
+ bigramEntryToWrite->getTimeStamp());
+ return false;
+ }
+ if (!bigramListBuffer->writeUintAndAdvancePosition(bigramEntryToWrite->getLevel(),
+ Ver4DictConstants::WORD_LEVEL_FIELD_SIZE, entryWritingPos)) {
+ AKLOGE("Cannot write bigram level. pos: %d, level: %d", *entryWritingPos,
+ bigramEntryToWrite->getLevel());
+ return false;
+ }
+ if (!bigramListBuffer->writeUintAndAdvancePosition(bigramEntryToWrite->getCount(),
+ Ver4DictConstants::WORD_COUNT_FIELD_SIZE, entryWritingPos)) {
+ AKLOGE("Cannot write bigram count. pos: %d, count: %d", *entryWritingPos,
+ bigramEntryToWrite->getCount());
+ return false;
+ }
+ }
const int targetTerminalIdToWrite =
- (targetTerminalId == Ver4DictConstants::NOT_A_TERMINAL_ID) ?
- Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID : targetTerminalId;
- return bigramListBuffer->writeUintAndAdvancePosition(targetTerminalIdToWrite,
- Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, entryWritingPos);
+ (bigramEntryToWrite->getTargetTerminalId() == Ver4DictConstants::NOT_A_TERMINAL_ID) ?
+ Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID :
+ bigramEntryToWrite->getTargetTerminalId();
+ if (!bigramListBuffer->writeUintAndAdvancePosition(targetTerminalIdToWrite,
+ Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, entryWritingPos)) {
+ AKLOGE("Cannot write bigram target terminal id. pos: %d, target terminal id: %d",
+ *entryWritingPos, bigramEntryToWrite->getTargetTerminalId());
+ return false;
+ }
+ return true;
}
bool BigramDictContent::copyBigramList(const int bigramListPos, const int toPos) {
- bool hasNext = true;
int readingPos = bigramListPos;
int writingPos = toPos;
+ bool hasNext = true;
while (hasNext) {
- int probability = NOT_A_PROBABILITY;
- int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
- getBigramEntryAndAdvancePosition(&probability, &hasNext, &targetTerminalId,
- &readingPos);
- if (!writeBigramEntryAndAdvancePosition(probability, hasNext, targetTerminalId,
- &writingPos)) {
+ const BigramEntry bigramEntry = getBigramEntryAndAdvancePosition(&readingPos);
+ hasNext = bigramEntry.hasNext();
+ if (!writeBigramEntryAndAdvancePosition(&bigramEntry, &writingPos)) {
AKLOGE("Cannot write bigram entry to copy. pos: %d", writingPos);
return false;
}
@@ -119,22 +164,22 @@ bool BigramDictContent::runGCBigramList(const int bigramListPos,
int writingPos = toPos;
int lastEntryPos = NOT_A_DICT_POS;
while (hasNext) {
- int probability = NOT_A_PROBABILITY;
- int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
- sourceBigramDictContent->getBigramEntryAndAdvancePosition(&probability, &hasNext,
- &targetTerminalId, &readingPos);
- if (targetTerminalId == Ver4DictConstants::NOT_A_TERMINAL_ID) {
+ const BigramEntry originalBigramEntry =
+ sourceBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
+ hasNext = originalBigramEntry.hasNext();
+ if (originalBigramEntry.getTargetTerminalId() == Ver4DictConstants::NOT_A_TERMINAL_ID) {
continue;
}
TerminalPositionLookupTable::TerminalIdMap::const_iterator it =
- terminalIdMap->find(targetTerminalId);
+ terminalIdMap->find(originalBigramEntry.getTargetTerminalId());
if (it == terminalIdMap->end()) {
// Target word has been removed.
continue;
}
lastEntryPos = hasNext ? writingPos : NOT_A_DICT_POS;
- if (!writeBigramEntryAndAdvancePosition(probability, hasNext, it->second,
- &writingPos)) {
+ const BigramEntry updatedBigramEntry =
+ originalBigramEntry.updateTargetTerminalIdAndGetEntry(it->second);
+ if (!writeBigramEntryAndAdvancePosition(&updatedBigramEntry, &writingPos)) {
AKLOGE("Cannot write bigram entry to run GC. pos: %d", writingPos);
return false;
}
@@ -142,10 +187,9 @@ bool BigramDictContent::runGCBigramList(const int bigramListPos,
}
if (lastEntryPos != NOT_A_DICT_POS) {
// Update has next flag in the last written entry.
- int probability = NOT_A_PROBABILITY;
- int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
- getBigramEntry(&probability, 0 /* outHasNext */, &targetTerminalId, lastEntryPos);
- if (!writeBigramEntry(probability, false /* hasNext */, targetTerminalId, writingPos)) {
+ const BigramEntry bigramEntry = getBigramEntry(lastEntryPos).updateHasNextAndGetEntry(
+ false /* hasNext */);
+ if (!writeBigramEntry(&bigramEntry, writingPos)) {
AKLOGE("Cannot write bigram entry to set hasNext flag after GC. pos: %d", writingPos);
return false;
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h
index cf380f403..ac05b215b 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h
@@ -18,6 +18,7 @@
#define LATINIME_BIGRAM_DICT_CONTENT_H
#include "defines.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_entry.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
@@ -26,27 +27,27 @@ namespace latinime {
class BigramDictContent : public SparseTableDictContent {
public:
- BigramDictContent(const char *const dictDirPath, const bool isUpdatable)
+ BigramDictContent(const char *const dictDirPath, const bool hasHistoricalInfo,
+ const bool isUpdatable)
: SparseTableDictContent(dictDirPath,
Ver4DictConstants::BIGRAM_LOOKUP_TABLE_FILE_EXTENSION,
Ver4DictConstants::BIGRAM_CONTENT_TABLE_FILE_EXTENSION,
Ver4DictConstants::BIGRAM_FILE_EXTENSION, isUpdatable,
Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE,
- Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE) {}
+ Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE),
+ mHasHistoricalInfo(hasHistoricalInfo) {}
- BigramDictContent()
+ BigramDictContent(const bool hasHistoricalInfo)
: SparseTableDictContent(Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE,
- Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE) {}
+ Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE),
+ mHasHistoricalInfo(hasHistoricalInfo) {}
- void getBigramEntry(int *const outProbability, bool *const outHasNext,
- int *const outTargetTerminalId, const int bigramEntryPos) const {
+ const BigramEntry getBigramEntry(const int bigramEntryPos) const {
int readingPos = bigramEntryPos;
- getBigramEntryAndAdvancePosition(outProbability, outHasNext, outTargetTerminalId,
- &readingPos);
+ return getBigramEntryAndAdvancePosition(&readingPos);
}
- void getBigramEntryAndAdvancePosition(int *const outProbability, bool *const outHasNext,
- int *const outTargetTerminalId, int *const bigramEntryPos) const;
+ const BigramEntry getBigramEntryAndAdvancePosition(int *const bigramEntryPos) const;
// Returns head position of bigram list for a PtNode specified by terminalId.
int getBigramListHeadPos(const int terminalId) const {
@@ -57,15 +58,13 @@ class BigramDictContent : public SparseTableDictContent {
return addressLookupTable->get(terminalId);
}
- bool writeBigramEntry(const int probability, const int hasNext, const int targetTerminalId,
- const int entryWritingPos) {
+ bool writeBigramEntry(const BigramEntry *const bigramEntryToWrite, const int entryWritingPos) {
int writingPos = entryWritingPos;
- return writeBigramEntryAndAdvancePosition(probability, hasNext, targetTerminalId,
- &writingPos);
+ return writeBigramEntryAndAdvancePosition(bigramEntryToWrite, &writingPos);
}
- bool writeBigramEntryAndAdvancePosition(const int probability, const int hasNext,
- const int targetTerminalId, int *const entryWritingPos);
+ bool writeBigramEntryAndAdvancePosition(const BigramEntry *const bigramEntryToWrite,
+ int *const entryWritingPos);
bool createNewBigramList(const int terminalId) {
const int bigramListPos = getContentBuffer()->getTailPosition();
@@ -96,6 +95,8 @@ class BigramDictContent : public SparseTableDictContent {
const BigramDictContent *const sourceBigramDictContent, const int toPos,
const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
int *const outEntryCount);
+
+ bool mHasHistoricalInfo;
};
} // namespace latinime
#endif /* LATINIME_BIGRAM_DICT_CONTENT_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_entry.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_entry.h
new file mode 100644
index 000000000..10b3ec8dc
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_entry.h
@@ -0,0 +1,104 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_BIGRAM_ENTRY_H
+#define LATINIME_BIGRAM_ENTRY_H
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
+
+namespace latinime {
+
+class BigramEntry {
+ public:
+ BigramEntry(const BigramEntry& bigramEntry)
+ : mHasNext(bigramEntry.mHasNext), mProbability(bigramEntry.mProbability),
+ mTimestamp(bigramEntry.mTimestamp), mLevel(bigramEntry.mLevel),
+ mCount(bigramEntry.mCount), mTargetTerminalId(bigramEntry.mTargetTerminalId) {}
+
+ // Entry with historical information.
+ BigramEntry(const bool hasNext, const int probability, const int targetTerminalId)
+ : mHasNext(hasNext), mProbability(probability),
+ mTimestamp(Ver4DictConstants::NOT_A_TIME_STAMP), mLevel(0), mCount(0),
+ mTargetTerminalId(targetTerminalId) {}
+
+ // Entry with historical information.
+ BigramEntry(const bool hasNext, const int probability, const int timestamp, const int level,
+ const int count, const int targetTerminalId)
+ : mHasNext(hasNext), mProbability(probability), mTimestamp(timestamp),
+ mLevel(level), mCount(count), mTargetTerminalId(targetTerminalId) {}
+
+ const BigramEntry getInvalidatedEntry() const {
+ return updateTargetTerminalIdAndGetEntry(Ver4DictConstants::NOT_A_TERMINAL_ID);
+ }
+
+ const BigramEntry updateHasNextAndGetEntry(const bool hasNext) const {
+ return BigramEntry(hasNext, mProbability, mTimestamp, mLevel, mCount,
+ mTargetTerminalId);
+ }
+
+ const BigramEntry updateTargetTerminalIdAndGetEntry(const int newTargetTerminalId) const {
+ return BigramEntry(mHasNext, mProbability, mTimestamp, mLevel, mCount,
+ newTargetTerminalId);
+ }
+
+ const BigramEntry updateProbabilityAndGetEntry(const int probability) const {
+ return BigramEntry(mHasNext, probability, mTimestamp, mLevel, mCount,
+ mTargetTerminalId);
+ }
+
+ bool isValid() const {
+ return mTargetTerminalId != Ver4DictConstants::NOT_A_TERMINAL_ID;
+ }
+
+ bool hasNext() const {
+ return mHasNext;
+ }
+
+ int getProbability() const {
+ return mProbability;
+ }
+
+ int getTimeStamp() const {
+ return mTimestamp;
+ }
+
+ int getLevel() const {
+ return mLevel;
+ }
+
+ int getCount() const {
+ return mCount;
+ }
+
+ int getTargetTerminalId() const {
+ return mTargetTerminalId;
+ }
+
+ private:
+ // Copy constructor is public to use this class as a type of return value.
+ DISALLOW_DEFAULT_CONSTRUCTOR(BigramEntry);
+ DISALLOW_ASSIGNMENT_OPERATOR(BigramEntry);
+
+ const bool mHasNext;
+ const int mProbability;
+ const int mTimestamp;
+ const int mLevel;
+ const int mCount;
+ const int mTargetTerminalId;
+};
+} // namespace latinime
+#endif /* LATINIME_BIGRAM_ENTRY_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h
index 8fdbbedfe..07b8f181d 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h
@@ -127,7 +127,7 @@ class Ver4DictBuffers {
// TODO: Quit using header size.
mTerminalPositionLookupTable(dictDirPath, isUpdatable, mHeaderSize),
mProbabilityDictContent(dictDirPath, false /* hasHistoricalInfo */, isUpdatable),
- mBigramDictContent(dictDirPath, isUpdatable),
+ mBigramDictContent(dictDirPath, false /* hasHistoricalInfo */, isUpdatable),
mShortcutDictContent(dictDirPath, isUpdatable),
mIsUpdatable(isUpdatable) {}
@@ -137,7 +137,8 @@ class Ver4DictBuffers {
mExpandableTrieBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
mTerminalPositionLookupTable(),
mProbabilityDictContent(false /* hasHistoricalInfo */),
- mBigramDictContent(), mShortcutDictContent(), mIsUpdatable(true) {}
+ mBigramDictContent(false /* hasHistoricalInfo */), mShortcutDictContent(),
+ mIsUpdatable(true) {}
const MmappedBuffer::MmappedBufferPtr mDictBuffer;
const int mHeaderSize;
diff --git a/native/jni/src/utils/exclusive_ownership_pointer.h b/native/jni/src/utils/exclusive_ownership_pointer.h
index 3cf78954a..6c67df28e 100644
--- a/native/jni/src/utils/exclusive_ownership_pointer.h
+++ b/native/jni/src/utils/exclusive_ownership_pointer.h
@@ -56,8 +56,7 @@ class ExclusiveOwnershipPointer {
private:
// This class allows to copy and assign and ensures only one instance has the ownership of the
// managed pointer.
-
- ExclusiveOwnershipPointer() : mPointer(0), mSharedOwnerPtr(0) {}
+ DISALLOW_DEFAULT_CONSTRUCTOR(ExclusiveOwnershipPointer);
void transferOwnership(const ExclusiveOwnershipPointer<T> *const src) {
if (*mSharedOwnerPtr != src) {