aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--native/jni/src/suggest/core/dictionary/multi_bigram_map.cpp71
-rw-r--r--native/jni/src/suggest/core/dictionary/multi_bigram_map.h67
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp19
3 files changed, 98 insertions, 59 deletions
diff --git a/native/jni/src/suggest/core/dictionary/multi_bigram_map.cpp b/native/jni/src/suggest/core/dictionary/multi_bigram_map.cpp
index b1d2f4b4d..49d82e69a 100644
--- a/native/jni/src/suggest/core/dictionary/multi_bigram_map.cpp
+++ b/native/jni/src/suggest/core/dictionary/multi_bigram_map.cpp
@@ -30,4 +30,75 @@ const size_t MultiBigramMap::MAX_CACHED_PREV_WORDS_IN_BIGRAM_MAP = 25;
// Most common previous word contexts currently have 100 bigrams
const int MultiBigramMap::BigramMap::DEFAULT_HASH_MAP_SIZE_FOR_EACH_BIGRAM_MAP = 100;
+// Look up the bigram probability for the given word pair from the cached bigram maps.
+// Also caches the bigrams if there is space remaining and they have not been cached already.
+int MultiBigramMap::getBigramProbability(
+ const DictionaryStructureWithBufferPolicy *const structurePolicy,
+ const int wordPosition, const int nextWordPosition, const int unigramProbability) {
+ hash_map_compat<int, BigramMap>::const_iterator mapPosition =
+ mBigramMaps.find(wordPosition);
+ if (mapPosition != mBigramMaps.end()) {
+ return mapPosition->second.getBigramProbability(structurePolicy, nextWordPosition,
+ unigramProbability);
+ }
+ if (mBigramMaps.size() < MAX_CACHED_PREV_WORDS_IN_BIGRAM_MAP) {
+ addBigramsForWordPosition(structurePolicy, wordPosition);
+ return mBigramMaps[wordPosition].getBigramProbability(structurePolicy,
+ nextWordPosition, unigramProbability);
+ }
+ return readBigramProbabilityFromBinaryDictionary(structurePolicy, wordPosition,
+ nextWordPosition, unigramProbability);
+}
+
+void MultiBigramMap::BigramMap::init(
+ const DictionaryStructureWithBufferPolicy *const structurePolicy, const int nodePos) {
+ const int bigramsListPos = structurePolicy->getBigramsPositionOfPtNode(nodePos);
+ BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(),
+ bigramsListPos);
+ while (bigramsIt.hasNext()) {
+ bigramsIt.next();
+ if (bigramsIt.getBigramPos() == NOT_A_DICT_POS) {
+ continue;
+ }
+ mBigramMap[bigramsIt.getBigramPos()] = bigramsIt.getProbability();
+ mBloomFilter.setInFilter(bigramsIt.getBigramPos());
+ }
+}
+
+int MultiBigramMap::BigramMap::getBigramProbability(
+ const DictionaryStructureWithBufferPolicy *const structurePolicy,
+ const int nextWordPosition, const int unigramProbability) const {
+ int bigramProbability = NOT_A_PROBABILITY;
+ if (mBloomFilter.isInFilter(nextWordPosition)) {
+ const hash_map_compat<int, int>::const_iterator bigramProbabilityIt =
+ mBigramMap.find(nextWordPosition);
+ if (bigramProbabilityIt != mBigramMap.end()) {
+ bigramProbability = bigramProbabilityIt->second;
+ }
+ }
+ return structurePolicy->getProbability(unigramProbability, bigramProbability);
+}
+
+void MultiBigramMap::addBigramsForWordPosition(
+ const DictionaryStructureWithBufferPolicy *const structurePolicy, const int position) {
+ mBigramMaps[position].init(structurePolicy, position);
+}
+
+int MultiBigramMap::readBigramProbabilityFromBinaryDictionary(
+ const DictionaryStructureWithBufferPolicy *const structurePolicy, const int nodePos,
+ const int nextWordPosition, const int unigramProbability) {
+ int bigramProbability = NOT_A_PROBABILITY;
+ const int bigramsListPos = structurePolicy->getBigramsPositionOfPtNode(nodePos);
+ BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(),
+ bigramsListPos);
+ while (bigramsIt.hasNext()) {
+ bigramsIt.next();
+ if (bigramsIt.getBigramPos() == nextWordPosition) {
+ bigramProbability = bigramsIt.getProbability();
+ break;
+ }
+ }
+ return structurePolicy->getProbability(unigramProbability, bigramProbability);
+}
+
} // namespace latinime
diff --git a/native/jni/src/suggest/core/dictionary/multi_bigram_map.h b/native/jni/src/suggest/core/dictionary/multi_bigram_map.h
index 4633c07b0..421b2681c 100644
--- a/native/jni/src/suggest/core/dictionary/multi_bigram_map.h
+++ b/native/jni/src/suggest/core/dictionary/multi_bigram_map.h
@@ -38,21 +38,7 @@ class MultiBigramMap {
// Look up the bigram probability for the given word pair from the cached bigram maps.
// Also caches the bigrams if there is space remaining and they have not been cached already.
int getBigramProbability(const DictionaryStructureWithBufferPolicy *const structurePolicy,
- const int wordPosition, const int nextWordPosition, const int unigramProbability) {
- hash_map_compat<int, BigramMap>::const_iterator mapPosition =
- mBigramMaps.find(wordPosition);
- if (mapPosition != mBigramMaps.end()) {
- return mapPosition->second.getBigramProbability(structurePolicy, nextWordPosition,
- unigramProbability);
- }
- if (mBigramMaps.size() < MAX_CACHED_PREV_WORDS_IN_BIGRAM_MAP) {
- addBigramsForWordPosition(structurePolicy, wordPosition);
- return mBigramMaps[wordPosition].getBigramProbability(structurePolicy,
- nextWordPosition, unigramProbability);
- }
- return readBigramProbabilityFromBinaryDictionary(structurePolicy, wordPosition,
- nextWordPosition, unigramProbability);
- }
+ const int wordPosition, const int nextWordPosition, const int unigramProbability);
void clear() {
mBigramMaps.clear();
@@ -67,33 +53,11 @@ class MultiBigramMap {
~BigramMap() {}
void init(const DictionaryStructureWithBufferPolicy *const structurePolicy,
- const int nodePos) {
- const int bigramsListPos = structurePolicy->getBigramsPositionOfPtNode(nodePos);
- BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(),
- bigramsListPos);
- while (bigramsIt.hasNext()) {
- bigramsIt.next();
- if (bigramsIt.getBigramPos() == NOT_A_DICT_POS) {
- continue;
- }
- mBigramMap[bigramsIt.getBigramPos()] = bigramsIt.getProbability();
- mBloomFilter.setInFilter(bigramsIt.getBigramPos());
- }
- }
+ const int nodePos);
- AK_FORCE_INLINE int getBigramProbability(
+ int getBigramProbability(
const DictionaryStructureWithBufferPolicy *const structurePolicy,
- const int nextWordPosition, const int unigramProbability) const {
- int bigramProbability = NOT_A_PROBABILITY;
- if (mBloomFilter.isInFilter(nextWordPosition)) {
- const hash_map_compat<int, int>::const_iterator bigramProbabilityIt =
- mBigramMap.find(nextWordPosition);
- if (bigramProbabilityIt != mBigramMap.end()) {
- bigramProbability = bigramProbabilityIt->second;
- }
- }
- return structurePolicy->getProbability(unigramProbability, bigramProbability);
- }
+ const int nextWordPosition, const int unigramProbability) const;
private:
// NOTE: The BigramMap class doesn't use DISALLOW_COPY_AND_ASSIGN() because its default
@@ -103,27 +67,12 @@ class MultiBigramMap {
BloomFilter mBloomFilter;
};
- AK_FORCE_INLINE void addBigramsForWordPosition(
- const DictionaryStructureWithBufferPolicy *const structurePolicy, const int position) {
- mBigramMaps[position].init(structurePolicy, position);
- }
+ void addBigramsForWordPosition(
+ const DictionaryStructureWithBufferPolicy *const structurePolicy, const int position);
- AK_FORCE_INLINE int readBigramProbabilityFromBinaryDictionary(
+ int readBigramProbabilityFromBinaryDictionary(
const DictionaryStructureWithBufferPolicy *const structurePolicy, const int nodePos,
- const int nextWordPosition, const int unigramProbability) {
- int bigramProbability = NOT_A_PROBABILITY;
- const int bigramsListPos = structurePolicy->getBigramsPositionOfPtNode(nodePos);
- BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(),
- bigramsListPos);
- while (bigramsIt.hasNext()) {
- bigramsIt.next();
- if (bigramsIt.getBigramPos() == nextWordPosition) {
- bigramProbability = bigramsIt.getProbability();
- break;
- }
- }
- return structurePolicy->getProbability(unigramProbability, bigramProbability);
- }
+ const int nextWordPosition, const int unigramProbability);
static const size_t MAX_CACHED_PREV_WORDS_IN_BIGRAM_MAP;
hash_map_compat<int, BigramMap> mBigramMaps;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp
index 4cd96722e..431f342ae 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp
@@ -46,6 +46,7 @@ bool BigramDictContent::writeBigramEntryAndAdvancePosition(const int probability
const int bigramFlags = createAndGetBigramFlags(probability, hasNext);
if (!bigramListBuffer->writeUintAndAdvancePosition(bigramFlags,
Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, entryWritingPos)) {
+ AKLOGE("Cannot write bigram flags. pos: %d, flags: %x", *entryWritingPos, bigramFlags);
return false;
}
const int targetTerminalIdToWrite =
@@ -66,6 +67,7 @@ bool BigramDictContent::copyBigramList(const int bigramListPos, const int toPos)
&readingPos);
if (!writeBigramEntryAndAdvancePosition(probability, hasNext, targetTerminalId,
&writingPos)) {
+ AKLOGE("Cannot write bigram entry to copy. pos: %d", writingPos);
return false;
}
}
@@ -88,6 +90,8 @@ bool BigramDictContent::runGC(const TerminalPositionLookupTable::TerminalIdMap *
// Copy bigram list with GC from original content.
if (!runGCBigramList(originalBigramListPos, originalBigramDictContent, bigramListPos,
terminalIdMap, &bigramEntryCount)) {
+ AKLOGE("Cannot complete GC for the bigram list. original pos: %d, pos: %d",
+ originalBigramListPos, bigramListPos);
return false;
}
if (bigramEntryCount == 0) {
@@ -97,6 +101,8 @@ bool BigramDictContent::runGC(const TerminalPositionLookupTable::TerminalIdMap *
*outBigramEntryCount += bigramEntryCount;
// Set bigram list position to the lookup table.
if (!getUpdatableAddressLookupTable()->set(it->second, bigramListPos)) {
+ AKLOGE("Cannot set bigram list position. terminal id: %d, pos: %d",
+ it->second, bigramListPos);
return false;
}
}
@@ -111,6 +117,7 @@ bool BigramDictContent::runGCBigramList(const int bigramListPos,
bool hasNext = true;
int readingPos = bigramListPos;
int writingPos = toPos;
+ int lastEntryPos = NOT_A_DICT_POS;
while (hasNext) {
int probability = NOT_A_PROBABILITY;
int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
@@ -125,12 +132,24 @@ bool BigramDictContent::runGCBigramList(const int bigramListPos,
// Target word has been removed.
continue;
}
+ lastEntryPos = hasNext ? writingPos : NOT_A_DICT_POS;
if (!writeBigramEntryAndAdvancePosition(probability, hasNext, it->second,
&writingPos)) {
+ AKLOGE("Cannot write bigram entry to run GC. pos: %d", writingPos);
return false;
}
*outEntrycount += 1;
}
+ if (lastEntryPos != NOT_A_DICT_POS) {
+ // Update has next flag in the last written entry.
+ int probability = NOT_A_PROBABILITY;
+ int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
+ getBigramEntry(&probability, 0 /* outHasNext */, &targetTerminalId, lastEntryPos);
+ if (!writeBigramEntry(probability, false /* hasNext */, targetTerminalId, writingPos)) {
+ AKLOGE("Cannot write bigram entry to set hasNext flag after GC. pos: %d", writingPos);
+ return false;
+ }
+ }
return true;
}