aboutsummaryrefslogtreecommitdiffstats
path: root/native/jni/src
diff options
context:
space:
mode:
Diffstat (limited to 'native/jni/src')
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.cpp29
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.h14
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp7
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp46
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h11
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h9
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp29
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h15
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.cpp159
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h66
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp10
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h12
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp9
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h1
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp5
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h10
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp27
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h8
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp14
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp10
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/trie_map.h4
-rw-r--r--native/jni/src/utils/int_array_view.h19
23 files changed, 165 insertions, 351 deletions
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.cpp
index 4220a9561..278f2b199 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.cpp
@@ -231,30 +231,31 @@ bool Ver4PatriciaTrieNodeWriter::writeNewTerminalPtNodeAndAdvancePosition(
&probabilityEntryToWrite);
}
-bool Ver4PatriciaTrieNodeWriter::addNewBigramEntry(
- const PtNodeParams *const sourcePtNodeParams, const PtNodeParams *const targetPtNodeParam,
- const BigramProperty *const bigramProperty, bool *const outAddedNewBigram) {
- if (!mBigramPolicy->addNewEntry(sourcePtNodeParams->getTerminalId(),
- targetPtNodeParam->getTerminalId(), bigramProperty, outAddedNewBigram)) {
+bool Ver4PatriciaTrieNodeWriter::addNgramEntry(const WordIdArrayView prevWordIds, const int wordId,
+ const BigramProperty *const bigramProperty, bool *const outAddedNewEntry) {
+ if (!mBigramPolicy->addNewEntry(prevWordIds[0], wordId, bigramProperty, outAddedNewEntry)) {
AKLOGE("Cannot add new bigram entry. terminalId: %d, targetTerminalId: %d",
sourcePtNodeParams->getTerminalId(), targetPtNodeParam->getTerminalId());
return false;
}
- if (!sourcePtNodeParams->hasBigrams()) {
+ const int ptNodePos =
+ mBuffers->getTerminalPositionLookupTable()->getTerminalPtNodePosition(prevWordIds[0]);
+ const PtNodeParams sourcePtNodeParams =
+ mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos);
+ if (!sourcePtNodeParams.hasBigrams()) {
// Update has bigrams flag.
- return updatePtNodeFlags(sourcePtNodeParams->getHeadPos(),
- sourcePtNodeParams->isBlacklisted(), sourcePtNodeParams->isNotAWord(),
- sourcePtNodeParams->isTerminal(), sourcePtNodeParams->hasShortcutTargets(),
+ return updatePtNodeFlags(sourcePtNodeParams.getHeadPos(),
+ sourcePtNodeParams.isBlacklisted(), sourcePtNodeParams.isNotAWord(),
+ sourcePtNodeParams.isTerminal(), sourcePtNodeParams.hasShortcutTargets(),
true /* hasBigrams */,
- sourcePtNodeParams->getCodePointCount() > 1 /* hasMultipleChars */);
+ sourcePtNodeParams.getCodePointCount() > 1 /* hasMultipleChars */);
}
return true;
}
-bool Ver4PatriciaTrieNodeWriter::removeBigramEntry(
- const PtNodeParams *const sourcePtNodeParams, const PtNodeParams *const targetPtNodeParam) {
- return mBigramPolicy->removeEntry(sourcePtNodeParams->getTerminalId(),
- targetPtNodeParam->getTerminalId());
+bool Ver4PatriciaTrieNodeWriter::removeNgramEntry(const WordIdArrayView prevWordIds,
+ const int wordId) {
+ return mBigramPolicy->removeEntry(prevWordIds[0], wordId);
}
bool Ver4PatriciaTrieNodeWriter::updateAllBigramEntriesAndDeleteUselessEntries(
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.h
index 08226ea26..d49d9a666 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.h
@@ -29,6 +29,7 @@
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h"
#include "suggest/policyimpl/dictionary/structure/backward/v402/content/probability_entry.h"
+#include "utils/int_array_view.h"
namespace latinime {
namespace backward {
@@ -61,8 +62,8 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
const PtNodeArrayReader *const ptNodeArrayReader,
Ver4BigramListPolicy *const bigramPolicy, Ver4ShortcutListPolicy *const shortcutPolicy)
: mTrieBuffer(trieBuffer), mBuffers(buffers), mHeaderPolicy(headerPolicy),
- mReadingHelper(ptNodeReader, ptNodeArrayReader), mBigramPolicy(bigramPolicy),
- mShortcutPolicy(shortcutPolicy) {}
+ mPtNodeReader(ptNodeReader), mReadingHelper(ptNodeReader, ptNodeArrayReader),
+ mBigramPolicy(bigramPolicy), mShortcutPolicy(shortcutPolicy) {}
virtual ~Ver4PatriciaTrieNodeWriter() {}
@@ -92,12 +93,10 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
virtual bool writeNewTerminalPtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams,
const UnigramProperty *const unigramProperty, int *const ptNodeWritingPos);
- virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams,
- const PtNodeParams *const targetPtNodeParam, const BigramProperty *const bigramProperty,
- bool *const outAddedNewBigram);
+ virtual bool addNgramEntry(const WordIdArrayView prevWordIds, const int wordId,
+ const BigramProperty *const bigramProperty, bool *const outAddedNewEntry);
- virtual bool removeBigramEntry(const PtNodeParams *const sourcePtNodeParams,
- const PtNodeParams *const targetPtNodeParam);
+ virtual bool removeNgramEntry(const WordIdArrayView prevWordIds, const int wordId);
virtual bool updateAllBigramEntriesAndDeleteUselessEntries(
const PtNodeParams *const sourcePtNodeParams, int *const outBigramEntryCount);
@@ -135,6 +134,7 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
BufferWithExtendableBuffer *const mTrieBuffer;
Ver4DictBuffers *const mBuffers;
const HeaderPolicy *const mHeaderPolicy;
+ const PtNodeReader *const mPtNodeReader;
DynamicPtReadingHelper mReadingHelper;
Ver4BigramListPolicy *const mBigramPolicy;
Ver4ShortcutListPolicy *const mShortcutPolicy;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp
index baa0c0c37..1296b8acd 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp
@@ -309,8 +309,8 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI
return false;
}
bool addedNewBigram = false;
- if (mUpdatingHelper.addBigramWords(prevWordsPtNodePos[0], word1Pos, bigramProperty,
- &addedNewBigram)) {
+ if (mUpdatingHelper.addNgramEntry(PtNodePosArrayView::fromObject(prevWordsPtNodePos),
+ word1Pos, bigramProperty, &addedNewBigram)) {
if (addedNewBigram) {
mBigramCount++;
}
@@ -350,7 +350,8 @@ bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWor
if (wordPos == NOT_A_DICT_POS) {
return false;
}
- if (mUpdatingHelper.removeBigramWords(prevWordsPtNodePos[0], wordPos)) {
+ if (mUpdatingHelper.removeNgramEntry(
+ PtNodePosArrayView::fromObject(prevWordsPtNodePos), wordPos)) {
mBigramCount--;
return true;
} else {
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp
index f31c914d2..3c62e2e56 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp
@@ -84,23 +84,39 @@ bool DynamicPtUpdatingHelper::addUnigramWord(
unigramProperty, &pos);
}
-bool DynamicPtUpdatingHelper::addBigramWords(const int word0Pos, const int word1Pos,
- const BigramProperty *const bigramProperty, bool *const outAddedNewBigram) {
- const PtNodeParams sourcePtNodeParams(
- mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(word0Pos));
- const PtNodeParams targetPtNodeParams(
- mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(word1Pos));
- return mPtNodeWriter->addNewBigramEntry(&sourcePtNodeParams, &targetPtNodeParams,
- bigramProperty, outAddedNewBigram);
+bool DynamicPtUpdatingHelper::addNgramEntry(const PtNodePosArrayView prevWordsPtNodePos,
+ const int wordPos, const BigramProperty *const bigramProperty,
+ bool *const outAddedNewEntry) {
+ if (prevWordsPtNodePos.empty()) {
+ return false;
+ }
+ ASSERT(prevWordsPtNodePos.size() <= MAX_PREV_WORD_COUNT_FOR_N_GRAM);
+ int prevWordTerminalIds[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
+ for (size_t i = 0; i < prevWordsPtNodePos.size(); ++i) {
+ prevWordTerminalIds[i] = mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(
+ prevWordsPtNodePos[i]).getTerminalId();
+ }
+ const WordIdArrayView prevWordIds(prevWordTerminalIds, prevWordsPtNodePos.size());
+ const int wordId =
+ mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(wordPos).getTerminalId();
+ return mPtNodeWriter->addNgramEntry(prevWordIds, wordId, bigramProperty, outAddedNewEntry);
}
-// Remove a bigram relation from word0Pos to word1Pos.
-bool DynamicPtUpdatingHelper::removeBigramWords(const int word0Pos, const int word1Pos) {
- const PtNodeParams sourcePtNodeParams(
- mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(word0Pos));
- const PtNodeParams targetPtNodeParams(
- mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(word1Pos));
- return mPtNodeWriter->removeBigramEntry(&sourcePtNodeParams, &targetPtNodeParams);
+bool DynamicPtUpdatingHelper::removeNgramEntry(const PtNodePosArrayView prevWordsPtNodePos,
+ const int wordPos) {
+ if (prevWordsPtNodePos.empty()) {
+ return false;
+ }
+ ASSERT(prevWordsPtNodePos.size() <= MAX_PREV_WORD_COUNT_FOR_N_GRAM);
+ int prevWordTerminalIds[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
+ for (size_t i = 0; i < prevWordsPtNodePos.size(); ++i) {
+ prevWordTerminalIds[i] = mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(
+ prevWordsPtNodePos[i]).getTerminalId();
+ }
+ const WordIdArrayView prevWordIds(prevWordTerminalIds, prevWordsPtNodePos.size());
+ const int wordId =
+ mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(wordPos).getTerminalId();
+ return mPtNodeWriter->removeNgramEntry(prevWordIds, wordId);
}
bool DynamicPtUpdatingHelper::addShortcutTarget(const int wordPos,
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h
index f10d15a9b..97c05c1ea 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h
@@ -19,6 +19,7 @@
#include "defines.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
+#include "utils/int_array_view.h"
namespace latinime {
@@ -42,12 +43,12 @@ class DynamicPtUpdatingHelper {
const int *const wordCodePoints, const int codePointCount,
const UnigramProperty *const unigramProperty, bool *const outAddedNewUnigram);
- // Add a bigram relation from word0Pos to word1Pos.
- bool addBigramWords(const int word0Pos, const int word1Pos,
- const BigramProperty *const bigramProperty, bool *const outAddedNewBigram);
+ // Add an n-gram entry.
+ bool addNgramEntry(const PtNodePosArrayView prevWordsPtNodePos, const int wordPos,
+ const BigramProperty *const bigramProperty, bool *const outAddedNewEntry);
- // Remove a bigram relation from word0Pos to word1Pos.
- bool removeBigramWords(const int word0Pos, const int word1Pos);
+ // Remove an n-gram entry.
+ bool removeNgramEntry(const PtNodePosArrayView prevWordsPtNodePos, const int wordPos);
// Add a shortcut target.
bool addShortcutTarget(const int wordPos, const int *const targetCodePoints,
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h
index a8029f73f..955d779ac 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h
@@ -21,6 +21,7 @@
#include "defines.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
+#include "utils/int_array_view.h"
namespace latinime {
@@ -70,12 +71,10 @@ class PtNodeWriter {
virtual bool writeNewTerminalPtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams,
const UnigramProperty *const unigramProperty, int *const ptNodeWritingPos) = 0;
- virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams,
- const PtNodeParams *const targetPtNodeParam, const BigramProperty *const bigramProperty,
- bool *const outAddedNewBigram) = 0;
+ virtual bool addNgramEntry(const WordIdArrayView prevWordIds, const int wordId,
+ const BigramProperty *const bigramProperty, bool *const outAddedNewEntry) = 0;
- virtual bool removeBigramEntry(const PtNodeParams *const sourcePtNodeParams,
- const PtNodeParams *const targetPtNodeParam) = 0;
+ virtual bool removeNgramEntry(const WordIdArrayView prevWordIds, const int wordId) = 0;
virtual bool updateAllBigramEntriesAndDeleteUselessEntries(
const PtNodeParams *const sourcePtNodeParams, int *const outBigramEntryCount) = 0;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp
index ae5847568..5dc91ba10 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp
@@ -30,13 +30,13 @@ bool LanguageModelDictContent::runGC(
0 /* nextLevelBitmapEntryIndex */, outNgramCount);
}
-ProbabilityEntry LanguageModelDictContent::getProbabilityEntry(
+ProbabilityEntry LanguageModelDictContent::getNgramProbabilityEntry(
const WordIdArrayView prevWordIds, const int wordId) const {
- if (!prevWordIds.empty()) {
- // TODO: Read n-gram entry.
+ const int bitmapEntryIndex = getBitmapEntryIndex(prevWordIds);
+ if (bitmapEntryIndex == TrieMap::INVALID_INDEX) {
return ProbabilityEntry();
}
- const TrieMap::Result result = mTrieMap.getRoot(wordId);
+ const TrieMap::Result result = mTrieMap.get(wordId, bitmapEntryIndex);
if (!result.mIsValid) {
// Not found.
return ProbabilityEntry();
@@ -44,16 +44,15 @@ ProbabilityEntry LanguageModelDictContent::getProbabilityEntry(
return ProbabilityEntry::decode(result.mValue, mHasHistoricalInfo);
}
-bool LanguageModelDictContent::setProbabilityEntry(const WordIdArrayView prevWordIds,
+bool LanguageModelDictContent::setNgramProbabilityEntry(const WordIdArrayView prevWordIds,
const int terminalId, const ProbabilityEntry *const probabilityEntry) {
- if (!prevWordIds.empty()) {
- // TODO: Add n-gram entry.
+ const int bitmapEntryIndex = getBitmapEntryIndex(prevWordIds);
+ if (bitmapEntryIndex == TrieMap::INVALID_INDEX) {
return false;
}
- return mTrieMap.putRoot(terminalId, probabilityEntry->encode(mHasHistoricalInfo));
+ return mTrieMap.put(terminalId, probabilityEntry->encode(mHasHistoricalInfo), bitmapEntryIndex);
}
-
bool LanguageModelDictContent::runGCInner(
const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
const TrieMap::TrieMapRange trieMapRange,
@@ -81,4 +80,16 @@ bool LanguageModelDictContent::runGCInner(
return true;
}
+int LanguageModelDictContent::getBitmapEntryIndex(const WordIdArrayView prevWordIds) const {
+ int bitmapEntryIndex = mTrieMap.getRootBitmapEntryIndex();
+ for (const int wordId : prevWordIds) {
+ const TrieMap::Result result = mTrieMap.get(wordId, bitmapEntryIndex);
+ if (!result.mIsValid) {
+ return TrieMap::INVALID_INDEX;
+ }
+ bitmapEntryIndex = result.mNextLevelBitmapEntryIndex;
+ }
+ return bitmapEntryIndex;
+}
+
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h
index 7f184f1d7..18f2e0170 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h
@@ -53,9 +53,18 @@ class LanguageModelDictContent {
const LanguageModelDictContent *const originalContent,
int *const outNgramCount);
- ProbabilityEntry getProbabilityEntry(const WordIdArrayView prevWordIds, const int wordId) const;
+ ProbabilityEntry getProbabilityEntry(const int wordId) const {
+ return getNgramProbabilityEntry(WordIdArrayView(), wordId);
+ }
+
+ bool setProbabilityEntry(const int wordId, const ProbabilityEntry *const probabilityEntry) {
+ return setNgramProbabilityEntry(WordIdArrayView(), wordId, probabilityEntry);
+ }
- bool setProbabilityEntry(const WordIdArrayView prevWordIds, const int wordId,
+ ProbabilityEntry getNgramProbabilityEntry(const WordIdArrayView prevWordIds,
+ const int wordId) const;
+
+ bool setNgramProbabilityEntry(const WordIdArrayView prevWordIds, const int wordId,
const ProbabilityEntry *const probabilityEntry);
private:
@@ -67,6 +76,8 @@ class LanguageModelDictContent {
bool runGCInner(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
const TrieMap::TrieMapRange trieMapRange, const int nextLevelBitmapEntryIndex,
int *const outNgramCount);
+
+ int getBitmapEntryIndex(const WordIdArrayView prevWordIds) const;
};
} // namespace latinime
#endif /* LATINIME_LANGUAGE_MODEL_DICT_CONTENT_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.cpp
deleted file mode 100644
index 2425b3b2f..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.cpp
+++ /dev/null
@@ -1,159 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h"
-
-#include "suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h"
-#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
-#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
-#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
-
-namespace latinime {
-
-const ProbabilityEntry ProbabilityDictContent::getProbabilityEntry(const int terminalId) const {
- if (terminalId < 0 || terminalId >= mSize) {
- // This method can be called with invalid terminal id during GC.
- return ProbabilityEntry(0 /* flags */, NOT_A_PROBABILITY);
- }
- const BufferWithExtendableBuffer *const buffer = getBuffer();
- int entryPos = getEntryPos(terminalId);
- const int flags = buffer->readUintAndAdvancePosition(
- Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE, &entryPos);
- const int probability = buffer->readUintAndAdvancePosition(
- Ver4DictConstants::PROBABILITY_SIZE, &entryPos);
- if (mHasHistoricalInfo) {
- const int timestamp = buffer->readUintAndAdvancePosition(
- Ver4DictConstants::TIME_STAMP_FIELD_SIZE, &entryPos);
- const int level = buffer->readUintAndAdvancePosition(
- Ver4DictConstants::WORD_LEVEL_FIELD_SIZE, &entryPos);
- const int count = buffer->readUintAndAdvancePosition(
- Ver4DictConstants::WORD_COUNT_FIELD_SIZE, &entryPos);
- const HistoricalInfo historicalInfo(timestamp, level, count);
- return ProbabilityEntry(flags, probability, &historicalInfo);
- } else {
- return ProbabilityEntry(flags, probability);
- }
-}
-
-bool ProbabilityDictContent::setProbabilityEntry(const int terminalId,
- const ProbabilityEntry *const probabilityEntry) {
- if (terminalId < 0) {
- return false;
- }
- const int entryPos = getEntryPos(terminalId);
- if (terminalId >= mSize) {
- ProbabilityEntry dummyEntry;
- // Write new entry.
- int writingPos = getBuffer()->getTailPosition();
- while (writingPos <= entryPos) {
- // Fulfilling with dummy entries until writingPos.
- if (!writeEntry(&dummyEntry, writingPos)) {
- AKLOGE("Cannot write dummy entry. pos: %d, mSize: %d", writingPos, mSize);
- return false;
- }
- writingPos += getEntrySize();
- mSize++;
- }
- }
- return writeEntry(probabilityEntry, entryPos);
-}
-
-bool ProbabilityDictContent::flushToFile(FILE *const file) const {
- if (getEntryPos(mSize) < getBuffer()->getTailPosition()) {
- ProbabilityDictContent probabilityDictContentToWrite(mHasHistoricalInfo);
- for (int i = 0; i < mSize; ++i) {
- const ProbabilityEntry probabilityEntry = getProbabilityEntry(i);
- if (!probabilityDictContentToWrite.setProbabilityEntry(i, &probabilityEntry)) {
- AKLOGE("Cannot set probability entry in flushToFile. terminalId: %d", i);
- return false;
- }
- }
- return probabilityDictContentToWrite.flush(file);
- } else {
- return flush(file);
- }
-}
-
-bool ProbabilityDictContent::runGC(
- const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
- const ProbabilityDictContent *const originalProbabilityDictContent) {
- mSize = 0;
- for (TerminalPositionLookupTable::TerminalIdMap::const_iterator it = terminalIdMap->begin();
- it != terminalIdMap->end(); ++it) {
- const ProbabilityEntry probabilityEntry =
- originalProbabilityDictContent->getProbabilityEntry(it->first);
- if (!setProbabilityEntry(it->second, &probabilityEntry)) {
- AKLOGE("Cannot set probability entry in runGC. terminalId: %d", it->second);
- return false;
- }
- mSize++;
- }
- return true;
-}
-
-int ProbabilityDictContent::getEntrySize() const {
- if (mHasHistoricalInfo) {
- return Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE
- + Ver4DictConstants::PROBABILITY_SIZE
- + Ver4DictConstants::TIME_STAMP_FIELD_SIZE
- + Ver4DictConstants::WORD_LEVEL_FIELD_SIZE
- + Ver4DictConstants::WORD_COUNT_FIELD_SIZE;
- } else {
- return Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE
- + Ver4DictConstants::PROBABILITY_SIZE;
- }
-}
-
-int ProbabilityDictContent::getEntryPos(const int terminalId) const {
- return terminalId * getEntrySize();
-}
-
-bool ProbabilityDictContent::writeEntry(const ProbabilityEntry *const probabilityEntry,
- const int entryPos) {
- BufferWithExtendableBuffer *const bufferToWrite = getWritableBuffer();
- int writingPos = entryPos;
- if (!bufferToWrite->writeUintAndAdvancePosition(probabilityEntry->getFlags(),
- Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE, &writingPos)) {
- AKLOGE("Cannot write flags in probability dict content. pos: %d", writingPos);
- return false;
- }
- if (!bufferToWrite->writeUintAndAdvancePosition(probabilityEntry->getProbability(),
- Ver4DictConstants::PROBABILITY_SIZE, &writingPos)) {
- AKLOGE("Cannot write probability in probability dict content. pos: %d", writingPos);
- return false;
- }
- if (mHasHistoricalInfo) {
- const HistoricalInfo *const historicalInfo = probabilityEntry->getHistoricalInfo();
- if (!bufferToWrite->writeUintAndAdvancePosition(historicalInfo->getTimeStamp(),
- Ver4DictConstants::TIME_STAMP_FIELD_SIZE, &writingPos)) {
- AKLOGE("Cannot write timestamp in probability dict content. pos: %d", writingPos);
- return false;
- }
- if (!bufferToWrite->writeUintAndAdvancePosition(historicalInfo->getLevel(),
- Ver4DictConstants::WORD_LEVEL_FIELD_SIZE, &writingPos)) {
- AKLOGE("Cannot write level in probability dict content. pos: %d", writingPos);
- return false;
- }
- if (!bufferToWrite->writeUintAndAdvancePosition(historicalInfo->getCount(),
- Ver4DictConstants::WORD_COUNT_FIELD_SIZE, &writingPos)) {
- AKLOGE("Cannot write count in probability dict content. pos: %d", writingPos);
- return false;
- }
- }
- return true;
-}
-
-} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h
deleted file mode 100644
index 80e992c1c..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Copyright (C) 2013, The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LATINIME_PROBABILITY_DICT_CONTENT_H
-#define LATINIME_PROBABILITY_DICT_CONTENT_H
-
-#include <cstdint>
-#include <cstdio>
-
-#include "defines.h"
-#include "suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h"
-#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
-#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
-#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
-
-namespace latinime {
-
-class ProbabilityEntry;
-
-class ProbabilityDictContent : public SingleDictContent {
- public:
- ProbabilityDictContent(uint8_t *const buffer, const int bufferSize,
- const bool hasHistoricalInfo)
- : SingleDictContent(buffer, bufferSize),
- mHasHistoricalInfo(hasHistoricalInfo),
- mSize(getBuffer()->getTailPosition() / getEntrySize()) {}
-
- ProbabilityDictContent(const bool hasHistoricalInfo)
- : mHasHistoricalInfo(hasHistoricalInfo), mSize(0) {}
-
- const ProbabilityEntry getProbabilityEntry(const int terminalId) const;
-
- bool setProbabilityEntry(const int terminalId, const ProbabilityEntry *const probabilityEntry);
-
- bool flushToFile(FILE *const file) const;
-
- bool runGC(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
- const ProbabilityDictContent *const originalProbabilityDictContent);
-
- private:
- DISALLOW_COPY_AND_ASSIGN(ProbabilityDictContent);
-
- int getEntrySize() const;
-
- int getEntryPos(const int terminalId) const;
-
- bool writeEntry(const ProbabilityEntry *const probabilityEntry, const int entryPos);
-
- bool mHasHistoricalInfo;
- int mSize;
-};
-} // namespace latinime
-#endif /* LATINIME_PROBABILITY_DICT_CONTENT_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp
index 125ae1711..3c8008dc4 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp
@@ -157,11 +157,6 @@ bool Ver4DictBuffers::flushDictBuffers(FILE *const file) const {
AKLOGE("Terminal position lookup table cannot be written.");
return false;
}
- // Write probability dict content.
- if (!mProbabilityDictContent.flushToFile(file)) {
- AKLOGE("Probability dict content cannot be written.");
- return false;
- }
// Write language model content.
if (!mLanguageModelDictContent.save(file)) {
AKLOGE("Language model dict content cannot be written.");
@@ -196,10 +191,6 @@ Ver4DictBuffers::Ver4DictBuffers(MmappedBuffer::MmappedBufferPtr &&headerBuffer,
contentBuffers[Ver4DictConstants::TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX],
contentBufferSizes[
Ver4DictConstants::TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX]),
- mProbabilityDictContent(
- contentBuffers[Ver4DictConstants::PROBABILITY_BUFFER_INDEX],
- contentBufferSizes[Ver4DictConstants::PROBABILITY_BUFFER_INDEX],
- mHeaderPolicy.hasHistoricalInfoOfWords()),
mLanguageModelDictContent(
ReadWriteByteArrayView(
contentBuffers[Ver4DictConstants::LANGUAGE_MODEL_BUFFER_INDEX],
@@ -216,7 +207,6 @@ Ver4DictBuffers::Ver4DictBuffers(const HeaderPolicy *const headerPolicy, const i
: mHeaderBuffer(nullptr), mDictBuffer(nullptr), mHeaderPolicy(headerPolicy),
mExpandableHeaderBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
mExpandableTrieBuffer(maxTrieSize), mTerminalPositionLookupTable(),
- mProbabilityDictContent(headerPolicy->hasHistoricalInfoOfWords()),
mLanguageModelDictContent(headerPolicy->hasHistoricalInfoOfWords()),
mBigramDictContent(headerPolicy->hasHistoricalInfoOfWords()), mShortcutDictContent(),
mIsUpdatable(true) {}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h
index d524dd6b7..68027dcb8 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h
@@ -24,7 +24,6 @@
#include "suggest/policyimpl/dictionary/header/header_policy.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h"
-#include "suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
@@ -53,7 +52,7 @@ class Ver4DictBuffers {
AK_FORCE_INLINE bool isNearSizeLimit() const {
return mExpandableTrieBuffer.isNearSizeLimit()
|| mTerminalPositionLookupTable.isNearSizeLimit()
- || mProbabilityDictContent.isNearSizeLimit()
+ || mLanguageModelDictContent.isNearSizeLimit()
|| mBigramDictContent.isNearSizeLimit()
|| mShortcutDictContent.isNearSizeLimit();
}
@@ -82,14 +81,6 @@ class Ver4DictBuffers {
return &mTerminalPositionLookupTable;
}
- AK_FORCE_INLINE ProbabilityDictContent *getMutableProbabilityDictContent() {
- return &mProbabilityDictContent;
- }
-
- AK_FORCE_INLINE const ProbabilityDictContent *getProbabilityDictContent() const {
- return &mProbabilityDictContent;
- }
-
AK_FORCE_INLINE LanguageModelDictContent *getMutableLanguageModelDictContent() {
return &mLanguageModelDictContent;
}
@@ -144,7 +135,6 @@ class Ver4DictBuffers {
BufferWithExtendableBuffer mExpandableHeaderBuffer;
BufferWithExtendableBuffer mExpandableTrieBuffer;
TerminalPositionLookupTable mTerminalPositionLookupTable;
- ProbabilityDictContent mProbabilityDictContent;
LanguageModelDictContent mLanguageModelDictContent;
BigramDictContent mBigramDictContent;
ShortcutDictContent mShortcutDictContent;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp
index e7e31e96f..93d4e562d 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp
@@ -27,19 +27,18 @@ const int Ver4DictConstants::MAX_DICTIONARY_SIZE = 8 * 1024 * 1024;
// limited to 1MB to prevent from inefficient traversing.
const int Ver4DictConstants::MAX_DICT_EXTENDED_REGION_SIZE = 1 * 1024 * 1024;
-// NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT for Trie, TerminalAddressLookupTable and Probability.
+// NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT for Trie and TerminalAddressLookupTable.
+// NUM_OF_BUFFERS_FOR_LANGUAGE_MODEL_DICT_CONTENT for language model.
// NUM_OF_BUFFERS_FOR_SPARSE_TABLE_DICT_CONTENT for bigram and shortcut.
const size_t Ver4DictConstants::NUM_OF_CONTENT_BUFFERS_IN_BODY_FILE =
- NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT * 3
+ NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT * 2
+ NUM_OF_BUFFERS_FOR_LANGUAGE_MODEL_DICT_CONTENT
+ NUM_OF_BUFFERS_FOR_SPARSE_TABLE_DICT_CONTENT * 2;
const int Ver4DictConstants::TRIE_BUFFER_INDEX = 0;
const int Ver4DictConstants::TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX =
TRIE_BUFFER_INDEX + NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT;
-const int Ver4DictConstants::PROBABILITY_BUFFER_INDEX =
- TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX + NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT;
const int Ver4DictConstants::LANGUAGE_MODEL_BUFFER_INDEX =
- PROBABILITY_BUFFER_INDEX + NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT;
+ TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX + NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT;
const int Ver4DictConstants::BIGRAM_BUFFERS_INDEX =
LANGUAGE_MODEL_BUFFER_INDEX + NUM_OF_BUFFERS_FOR_LANGUAGE_MODEL_DICT_CONTENT;
const int Ver4DictConstants::SHORTCUT_BUFFERS_INDEX =
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h
index e75db9f75..6950ca70f 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h
@@ -35,7 +35,6 @@ class Ver4DictConstants {
static const size_t NUM_OF_CONTENT_BUFFERS_IN_BODY_FILE;
static const int TRIE_BUFFER_INDEX;
static const int TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX;
- static const int PROBABILITY_BUFFER_INDEX;
static const int LANGUAGE_MODEL_BUFFER_INDEX;
static const int BIGRAM_BUFFERS_INDEX;
static const int SHORTCUT_BUFFERS_INDEX;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp
index 0a435e91c..731092efd 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp
@@ -18,7 +18,7 @@
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h"
-#include "suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
@@ -61,8 +61,9 @@ const PtNodeParams Ver4PatriciaTrieNodeReader::fetchPtNodeInfoFromBufferAndProce
terminalIdFieldPos += mBuffer->getOriginalBufferSize();
}
terminalId = Ver4PatriciaTrieReadingUtils::getTerminalIdAndAdvancePosition(dictBuf, &pos);
+ // TODO: Quit reading probability here.
const ProbabilityEntry probabilityEntry =
- mProbabilityDictContent->getProbabilityEntry(terminalId);
+ mLanguageModelDictContent->getProbabilityEntry(terminalId);
if (probabilityEntry.hasHistoricalInfo()) {
probability = ForgettingCurveUtils::decodeProbability(
probabilityEntry.getHistoricalInfo(), mHeaderPolicy);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h
index 22ed4a6c0..a91ad5728 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h
@@ -25,18 +25,18 @@ namespace latinime {
class BufferWithExtendableBuffer;
class HeaderPolicy;
-class ProbabilityDictContent;
+class LanguageModelDictContent;
/*
* This class is used for helping to read nodes of ver4 patricia trie. This class handles moved
- * node and reads node attributes including probability form probabilityBuffer.
+ * node and reads node attributes including probability form language model.
*/
class Ver4PatriciaTrieNodeReader : public PtNodeReader {
public:
Ver4PatriciaTrieNodeReader(const BufferWithExtendableBuffer *const buffer,
- const ProbabilityDictContent *const probabilityDictContent,
+ const LanguageModelDictContent *const languageModelDictContent,
const HeaderPolicy *const headerPolicy)
- : mBuffer(buffer), mProbabilityDictContent(probabilityDictContent),
+ : mBuffer(buffer), mLanguageModelDictContent(languageModelDictContent),
mHeaderPolicy(headerPolicy) {}
~Ver4PatriciaTrieNodeReader() {}
@@ -50,7 +50,7 @@ class Ver4PatriciaTrieNodeReader : public PtNodeReader {
DISALLOW_COPY_AND_ASSIGN(Ver4PatriciaTrieNodeReader);
const BufferWithExtendableBuffer *const mBuffer;
- const ProbabilityDictContent *const mProbabilityDictContent;
+ const LanguageModelDictContent *const mLanguageModelDictContent;
const HeaderPolicy *const mHeaderPolicy;
const PtNodeParams fetchPtNodeInfoFromBufferAndProcessMovedPtNode(const int ptNodePos,
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp
index 3d8da9173..857222f5d 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp
@@ -143,11 +143,11 @@ bool Ver4PatriciaTrieNodeWriter::updatePtNodeUnigramProperty(
return false;
}
const ProbabilityEntry originalProbabilityEntry =
- mBuffers->getProbabilityDictContent()->getProbabilityEntry(
+ mBuffers->getLanguageModelDictContent()->getProbabilityEntry(
toBeUpdatedPtNodeParams->getTerminalId());
const ProbabilityEntry probabilityEntry = createUpdatedEntryFrom(&originalProbabilityEntry,
unigramProperty);
- return mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry(
+ return mBuffers->getMutableLanguageModelDictContent()->setProbabilityEntry(
toBeUpdatedPtNodeParams->getTerminalId(), &probabilityEntry);
}
@@ -158,14 +158,14 @@ bool Ver4PatriciaTrieNodeWriter::updatePtNodeProbabilityAndGetNeedsToKeepPtNodeA
return false;
}
const ProbabilityEntry originalProbabilityEntry =
- mBuffers->getProbabilityDictContent()->getProbabilityEntry(
+ mBuffers->getLanguageModelDictContent()->getProbabilityEntry(
toBeUpdatedPtNodeParams->getTerminalId());
if (originalProbabilityEntry.hasHistoricalInfo()) {
const HistoricalInfo historicalInfo = ForgettingCurveUtils::createHistoricalInfoToSave(
originalProbabilityEntry.getHistoricalInfo(), mHeaderPolicy);
const ProbabilityEntry probabilityEntry =
originalProbabilityEntry.createEntryWithUpdatedHistoricalInfo(&historicalInfo);
- if (!mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry(
+ if (!mBuffers->getMutableLanguageModelDictContent()->setProbabilityEntry(
toBeUpdatedPtNodeParams->getTerminalId(), &probabilityEntry)) {
AKLOGE("Cannot write updated probability entry. terminalId: %d",
toBeUpdatedPtNodeParams->getTerminalId());
@@ -218,26 +218,23 @@ bool Ver4PatriciaTrieNodeWriter::writeNewTerminalPtNodeAndAdvancePosition(
ProbabilityEntry newProbabilityEntry;
const ProbabilityEntry probabilityEntryToWrite = createUpdatedEntryFrom(
&newProbabilityEntry, unigramProperty);
- return mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry(terminalId,
- &probabilityEntryToWrite);
+ return mBuffers->getMutableLanguageModelDictContent()->setProbabilityEntry(
+ terminalId, &probabilityEntryToWrite);
}
-bool Ver4PatriciaTrieNodeWriter::addNewBigramEntry(
- const PtNodeParams *const sourcePtNodeParams, const PtNodeParams *const targetPtNodeParam,
+bool Ver4PatriciaTrieNodeWriter::addNgramEntry(const WordIdArrayView prevWordIds, const int wordId,
const BigramProperty *const bigramProperty, bool *const outAddedNewBigram) {
- if (!mBigramPolicy->addNewEntry(sourcePtNodeParams->getTerminalId(),
- targetPtNodeParam->getTerminalId(), bigramProperty, outAddedNewBigram)) {
+ if (!mBigramPolicy->addNewEntry(prevWordIds[0], wordId, bigramProperty, outAddedNewBigram)) {
AKLOGE("Cannot add new bigram entry. terminalId: %d, targetTerminalId: %d",
- sourcePtNodeParams->getTerminalId(), targetPtNodeParam->getTerminalId());
+ prevWordIds[0], wordId);
return false;
}
return true;
}
-bool Ver4PatriciaTrieNodeWriter::removeBigramEntry(
- const PtNodeParams *const sourcePtNodeParams, const PtNodeParams *const targetPtNodeParam) {
- return mBigramPolicy->removeEntry(sourcePtNodeParams->getTerminalId(),
- targetPtNodeParam->getTerminalId());
+bool Ver4PatriciaTrieNodeWriter::removeNgramEntry(const WordIdArrayView prevWordIds,
+ const int wordId) {
+ return mBigramPolicy->removeEntry(prevWordIds[0], wordId);
}
bool Ver4PatriciaTrieNodeWriter::updateAllBigramEntriesAndDeleteUselessEntries(
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h
index 162dc9b1d..6703dba04 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h
@@ -75,12 +75,10 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
virtual bool writeNewTerminalPtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams,
const UnigramProperty *const unigramProperty, int *const ptNodeWritingPos);
- virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams,
- const PtNodeParams *const targetPtNodeParam, const BigramProperty *const bigramProperty,
- bool *const outAddedNewBigram);
+ virtual bool addNgramEntry(const WordIdArrayView prevWordIds, const int wordId,
+ const BigramProperty *const bigramProperty, bool *const outAddedNewEntry);
- virtual bool removeBigramEntry(const PtNodeParams *const sourcePtNodeParams,
- const PtNodeParams *const targetPtNodeParam);
+ virtual bool removeNgramEntry(const WordIdArrayView prevWordIds, const int wordId);
virtual bool updateAllBigramEntriesAndDeleteUselessEntries(
const PtNodeParams *const sourcePtNodeParams, int *const outBigramEntryCount);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
index 4bf8050e1..723808399 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
@@ -292,6 +292,7 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI
int prevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
prevWordsInfo->getPrevWordsTerminalPtNodePos(this, prevWordsPtNodePos,
false /* tryLowerCaseSearch */);
+ const auto prevWordsPtNodePosView = PtNodePosArrayView::fromFixedSizeArray(prevWordsPtNodePos);
// TODO: Support N-gram.
if (prevWordsPtNodePos[0] == NOT_A_DICT_POS) {
if (prevWordsInfo->isNthPrevWordBeginningOfSentence(1 /* n */)) {
@@ -319,10 +320,10 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI
if (word1Pos == NOT_A_DICT_POS) {
return false;
}
- bool addedNewBigram = false;
- if (mUpdatingHelper.addBigramWords(prevWordsPtNodePos[0], word1Pos, bigramProperty,
- &addedNewBigram)) {
- if (addedNewBigram) {
+ bool addedNewEntry = false;
+ if (mUpdatingHelper.addNgramEntry(prevWordsPtNodePosView, word1Pos, bigramProperty,
+ &addedNewEntry)) {
+ if (addedNewEntry) {
mBigramCount++;
}
return true;
@@ -352,6 +353,7 @@ bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWor
int prevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
prevWordsInfo->getPrevWordsTerminalPtNodePos(this, prevWordsPtNodePos,
false /* tryLowerCaseSerch */);
+ const auto prevWordsPtNodePosView = PtNodePosArrayView::fromFixedSizeArray(prevWordsPtNodePos);
// TODO: Support N-gram.
if (prevWordsPtNodePos[0] == NOT_A_DICT_POS) {
return false;
@@ -361,7 +363,7 @@ bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWor
if (wordPos == NOT_A_DICT_POS) {
return false;
}
- if (mUpdatingHelper.removeBigramWords(prevWordsPtNodePos[0], wordPos)) {
+ if (mUpdatingHelper.removeNgramEntry(prevWordsPtNodePosView, wordPos)) {
mBigramCount--;
return true;
} else {
@@ -452,7 +454,7 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(const int *const code
std::vector<int> codePointVector(ptNodeParams.getCodePoints(),
ptNodeParams.getCodePoints() + ptNodeParams.getCodePointCount());
const ProbabilityEntry probabilityEntry =
- mBuffers->getProbabilityDictContent()->getProbabilityEntry(
+ mBuffers->getLanguageModelDictContent()->getProbabilityEntry(
ptNodeParams.getTerminalId());
const HistoricalInfo *const historicalInfo = probabilityEntry.getHistoricalInfo();
// Fetch bigram information.
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
index 76b3404c4..faad4290d 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
@@ -46,7 +46,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
mBuffers->getTerminalPositionLookupTable(), mHeaderPolicy),
mShortcutPolicy(mBuffers->getMutableShortcutDictContent(),
mBuffers->getTerminalPositionLookupTable()),
- mNodeReader(mDictBuffer, mBuffers->getProbabilityDictContent(), mHeaderPolicy),
+ mNodeReader(mDictBuffer, mBuffers->getLanguageModelDictContent(), mHeaderPolicy),
mPtNodeArrayReader(mDictBuffer),
mNodeWriter(mDictBuffer, mBuffers.get(), mHeaderPolicy, &mNodeReader,
&mPtNodeArrayReader, &mBigramPolicy, &mShortcutPolicy),
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp
index 0e658f8e3..4220312e0 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp
@@ -75,7 +75,7 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
const HeaderPolicy *const headerPolicy, Ver4DictBuffers *const buffersToWrite,
int *const outUnigramCount, int *const outBigramCount) {
Ver4PatriciaTrieNodeReader ptNodeReader(mBuffers->getTrieBuffer(),
- mBuffers->getProbabilityDictContent(), headerPolicy);
+ mBuffers->getLanguageModelDictContent(), headerPolicy);
Ver4PtNodeArrayReader ptNodeArrayReader(mBuffers->getTrieBuffer());
Ver4BigramListPolicy bigramPolicy(mBuffers->getMutableBigramDictContent(),
mBuffers->getTerminalPositionLookupTable(), headerPolicy);
@@ -138,7 +138,7 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
// Create policy instances for the GCed dictionary.
Ver4PatriciaTrieNodeReader newPtNodeReader(buffersToWrite->getTrieBuffer(),
- buffersToWrite->getProbabilityDictContent(), headerPolicy);
+ buffersToWrite->getLanguageModelDictContent(), headerPolicy);
Ver4PtNodeArrayReader newPtNodeArrayreader(buffersToWrite->getTrieBuffer());
Ver4BigramListPolicy newBigramPolicy(buffersToWrite->getMutableBigramDictContent(),
buffersToWrite->getTerminalPositionLookupTable(), headerPolicy);
@@ -154,8 +154,8 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
return false;
}
// Run GC for probability dict content.
- if (!buffersToWrite->getMutableProbabilityDictContent()->runGC(&terminalIdMap,
- mBuffers->getProbabilityDictContent())) {
+ if (!buffersToWrite->getMutableLanguageModelDictContent()->runGC(&terminalIdMap,
+ mBuffers->getLanguageModelDictContent(), nullptr /* outNgramCount */)) {
return false;
}
// Run GC for bigram dict content.
@@ -201,7 +201,7 @@ bool Ver4PatriciaTrieWritingHelper::truncateUnigrams(
continue;
}
const ProbabilityEntry probabilityEntry =
- mBuffers->getProbabilityDictContent()->getProbabilityEntry(i);
+ mBuffers->getLanguageModelDictContent()->getProbabilityEntry(i);
const int probability = probabilityEntry.hasHistoricalInfo() ?
ForgettingCurveUtils::decodeProbability(
probabilityEntry.getHistoricalInfo(), mBuffers->getHeaderPolicy()) :
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/trie_map.h b/native/jni/src/suggest/policyimpl/dictionary/utils/trie_map.h
index a294ab876..3e5c4010c 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/trie_map.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/trie_map.h
@@ -169,6 +169,10 @@ class TrieMap {
return mBuffer.isNearSizeLimit();
}
+ int getRootBitmapEntryIndex() const {
+ return ROOT_BITMAP_ENTRY_INDEX;
+ }
+
// Returns bitmapEntryIndex. Create the next level map if it doesn't exist.
int getNextLevelBitmapEntryIndex(const int key) {
return getNextLevelBitmapEntryIndex(key, ROOT_BITMAP_ENTRY_INDEX);
diff --git a/native/jni/src/utils/int_array_view.h b/native/jni/src/utils/int_array_view.h
index 4bc2487ae..c1ddc9812 100644
--- a/native/jni/src/utils/int_array_view.h
+++ b/native/jni/src/utils/int_array_view.h
@@ -56,6 +56,16 @@ class IntArrayView {
explicit IntArrayView(const std::vector<int> &vector)
: mPtr(vector.data()), mSize(vector.size()) {}
+ template <int N>
+ AK_FORCE_INLINE static IntArrayView fromFixedSizeArray(const int (&array)[N]) {
+ return IntArrayView(array, N);
+ }
+
+ // Returns a view that points one int object. Does not take ownership of the given object.
+ AK_FORCE_INLINE static IntArrayView fromObject(const int *const object) {
+ return IntArrayView(object, 1);
+ }
+
AK_FORCE_INLINE int operator[](const size_t index) const {
ASSERT(index < mSize);
return mPtr[index];
@@ -73,6 +83,14 @@ class IntArrayView {
return mPtr;
}
+ AK_FORCE_INLINE const int *begin() const {
+ return mPtr;
+ }
+
+ AK_FORCE_INLINE const int *end() const {
+ return mPtr + mSize;
+ }
+
private:
DISALLOW_ASSIGNMENT_OPERATOR(IntArrayView);
@@ -81,6 +99,7 @@ class IntArrayView {
};
using WordIdArrayView = IntArrayView;
+using PtNodePosArrayView = IntArrayView;
} // namespace latinime
#endif // LATINIME_MEMORY_VIEW_H