aboutsummaryrefslogtreecommitdiffstats
path: root/native
diff options
context:
space:
mode:
Diffstat (limited to 'native')
-rw-r--r--native/jni/NativeFileList.mk4
-rw-r--r--native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h4
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/single_dict_content.h6
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/sparse_table_dict_content.h14
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_buffers.cpp11
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.cpp29
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.h14
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp20
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp8
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h1
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp46
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h11
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h9
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp13
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h12
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/dict_content.h35
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp95
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h83
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.cpp159
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h66
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h52
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h6
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h19
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp40
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h14
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp11
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h3
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp5
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h10
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp27
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h8
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp27
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h4
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp10
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp24
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h22
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h16
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/trie_map.cpp5
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/trie_map.h7
-rw-r--r--native/jni/src/utils/byte_array_view.h5
-rw-r--r--native/jni/src/utils/int_array_view.h25
-rw-r--r--native/jni/tests/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_test.cpp59
-rw-r--r--native/jni/tests/suggest/policyimpl/dictionary/structure/v4/content/probability_entry_test.cpp60
-rw-r--r--native/jni/tests/utils/int_array_view_test.cpp33
45 files changed, 647 insertions, 487 deletions
diff --git a/native/jni/NativeFileList.mk b/native/jni/NativeFileList.mk
index 09777497b..7a732a588 100644
--- a/native/jni/NativeFileList.mk
+++ b/native/jni/NativeFileList.mk
@@ -72,7 +72,7 @@ LATIN_IME_CORE_SRC_FILES := \
ver4_pt_node_array_reader.cpp) \
$(addprefix suggest/policyimpl/dictionary/structure/v4/content/, \
bigram_dict_content.cpp \
- probability_dict_content.cpp \
+ language_model_dict_content.cpp \
shortcut_dict_content.cpp \
sparse_table_dict_content.cpp \
terminal_position_lookup_table.cpp) \
@@ -125,6 +125,8 @@ LATIN_IME_CORE_TEST_FILES := \
defines_test.cpp \
suggest/core/layout/normal_distribution_2d_test.cpp \
suggest/core/dictionary/bloom_filter_test.cpp \
+ suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_test.cpp \
+ suggest/policyimpl/dictionary/structure/v4/content/probability_entry_test.cpp \
suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer_test.cpp \
suggest/policyimpl/dictionary/utils/trie_map_test.cpp \
utils/autocorrection_threshold_utils_test.cpp \
diff --git a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
index 7e3bf3ff6..e91f07682 100644
--- a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
+++ b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
@@ -20,14 +20,12 @@
#include <memory>
#include "defines.h"
-#include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h"
#include "suggest/core/dictionary/property/word_property.h"
namespace latinime {
class DicNode;
class DicNodeVector;
-class DictionaryBigramsStructurePolicy;
class DictionaryHeaderStructurePolicy;
class DictionaryShortcutsStructurePolicy;
class NgramListener;
@@ -67,8 +65,6 @@ class DictionaryStructureWithBufferPolicy {
virtual int getShortcutPositionOfPtNode(const int nodePos) const = 0;
- virtual BinaryDictionaryBigramsIterator getBigramsIteratorOfPtNode(const int nodePos) const = 0;
-
virtual const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const = 0;
virtual const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const = 0;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/single_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/single_dict_content.h
index 6433650b0..49f446814 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/single_dict_content.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/single_dict_content.h
@@ -30,6 +30,7 @@
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
+#include "utils/byte_array_view.h"
namespace latinime {
namespace backward {
@@ -40,8 +41,9 @@ class SingleDictContent : public DictContent {
SingleDictContent(const char *const dictPath, const char *const contentFileName,
const bool isUpdatable)
: mMmappedBuffer(MmappedBuffer::openBuffer(dictPath, contentFileName, isUpdatable)),
- mExpandableContentBuffer(mMmappedBuffer ? mMmappedBuffer->getBuffer() : nullptr,
- mMmappedBuffer ? mMmappedBuffer->getBufferSize() : 0,
+ mExpandableContentBuffer(
+ mMmappedBuffer ? mMmappedBuffer->getReadWriteByteArrayView() :
+ ReadWriteByteArrayView(),
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
mIsValid(mMmappedBuffer) {}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/sparse_table_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/sparse_table_dict_content.h
index c7233edd3..3c626df11 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/sparse_table_dict_content.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/sparse_table_dict_content.h
@@ -31,6 +31,7 @@
#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
#include "suggest/policyimpl/dictionary/utils/sparse_table.h"
+#include "utils/byte_array_view.h"
namespace latinime {
namespace backward {
@@ -50,15 +51,16 @@ class SparseTableDictContent : public DictContent {
mContentBuffer(
MmappedBuffer::openBuffer(dictPath, contentFileName, isUpdatable)),
mExpandableLookupTableBuffer(
- mLookupTableBuffer ? mLookupTableBuffer->getBuffer() : nullptr,
- mLookupTableBuffer ? mLookupTableBuffer->getBufferSize() : 0,
+ mLookupTableBuffer ? mLookupTableBuffer->getReadWriteByteArrayView() :
+ ReadWriteByteArrayView(),
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
mExpandableAddressTableBuffer(
- mAddressTableBuffer ? mAddressTableBuffer->getBuffer() : nullptr,
- mAddressTableBuffer ? mAddressTableBuffer->getBufferSize() : 0,
+ mAddressTableBuffer ? mAddressTableBuffer->getReadWriteByteArrayView() :
+ ReadWriteByteArrayView(),
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
- mExpandableContentBuffer(mContentBuffer ? mContentBuffer->getBuffer() : nullptr,
- mContentBuffer ? mContentBuffer->getBufferSize() : 0,
+ mExpandableContentBuffer(
+ mContentBuffer ? mContentBuffer->getReadWriteByteArrayView() :
+ ReadWriteByteArrayView(),
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
mAddressLookupTable(&mExpandableLookupTableBuffer, &mExpandableAddressTableBuffer,
sparseTableBlockSize, sparseTableDataSize),
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_buffers.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_buffers.cpp
index 93f192976..3dfbd1c94 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_buffers.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_buffers.cpp
@@ -30,6 +30,7 @@
#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
#include "suggest/policyimpl/dictionary/utils/file_utils.h"
+#include "utils/byte_array_view.h"
namespace latinime {
namespace backward {
@@ -130,12 +131,12 @@ Ver4DictBuffers::Ver4DictBuffers(const char *const dictPath,
: mHeaderBuffer(std::move(headerBuffer)),
mDictBuffer(MmappedBuffer::openBuffer(dictPath,
Ver4DictConstants::TRIE_FILE_EXTENSION, isUpdatable)),
- mHeaderPolicy(mHeaderBuffer->getBuffer(), formatVersion),
- mExpandableHeaderBuffer(mHeaderBuffer ? mHeaderBuffer->getBuffer() : nullptr,
- mHeaderPolicy.getSize(),
+ mHeaderPolicy(mHeaderBuffer->getReadOnlyByteArrayView().data(), formatVersion),
+ mExpandableHeaderBuffer(mHeaderBuffer->getReadWriteByteArrayView(),
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
- mExpandableTrieBuffer(mDictBuffer ? mDictBuffer->getBuffer() : nullptr,
- mDictBuffer ? mDictBuffer->getBufferSize() : 0,
+ mExpandableTrieBuffer(
+ mDictBuffer ? mDictBuffer->getReadWriteByteArrayView() :
+ ReadWriteByteArrayView(),
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
mTerminalPositionLookupTable(dictPath, isUpdatable),
mProbabilityDictContent(dictPath, mHeaderPolicy.hasHistoricalInfoOfWords(), isUpdatable),
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.cpp
index 4220a9561..278f2b199 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.cpp
@@ -231,30 +231,31 @@ bool Ver4PatriciaTrieNodeWriter::writeNewTerminalPtNodeAndAdvancePosition(
&probabilityEntryToWrite);
}
-bool Ver4PatriciaTrieNodeWriter::addNewBigramEntry(
- const PtNodeParams *const sourcePtNodeParams, const PtNodeParams *const targetPtNodeParam,
- const BigramProperty *const bigramProperty, bool *const outAddedNewBigram) {
- if (!mBigramPolicy->addNewEntry(sourcePtNodeParams->getTerminalId(),
- targetPtNodeParam->getTerminalId(), bigramProperty, outAddedNewBigram)) {
+bool Ver4PatriciaTrieNodeWriter::addNgramEntry(const WordIdArrayView prevWordIds, const int wordId,
+ const BigramProperty *const bigramProperty, bool *const outAddedNewEntry) {
+ if (!mBigramPolicy->addNewEntry(prevWordIds[0], wordId, bigramProperty, outAddedNewEntry)) {
AKLOGE("Cannot add new bigram entry. terminalId: %d, targetTerminalId: %d",
sourcePtNodeParams->getTerminalId(), targetPtNodeParam->getTerminalId());
return false;
}
- if (!sourcePtNodeParams->hasBigrams()) {
+ const int ptNodePos =
+ mBuffers->getTerminalPositionLookupTable()->getTerminalPtNodePosition(prevWordIds[0]);
+ const PtNodeParams sourcePtNodeParams =
+ mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos);
+ if (!sourcePtNodeParams.hasBigrams()) {
// Update has bigrams flag.
- return updatePtNodeFlags(sourcePtNodeParams->getHeadPos(),
- sourcePtNodeParams->isBlacklisted(), sourcePtNodeParams->isNotAWord(),
- sourcePtNodeParams->isTerminal(), sourcePtNodeParams->hasShortcutTargets(),
+ return updatePtNodeFlags(sourcePtNodeParams.getHeadPos(),
+ sourcePtNodeParams.isBlacklisted(), sourcePtNodeParams.isNotAWord(),
+ sourcePtNodeParams.isTerminal(), sourcePtNodeParams.hasShortcutTargets(),
true /* hasBigrams */,
- sourcePtNodeParams->getCodePointCount() > 1 /* hasMultipleChars */);
+ sourcePtNodeParams.getCodePointCount() > 1 /* hasMultipleChars */);
}
return true;
}
-bool Ver4PatriciaTrieNodeWriter::removeBigramEntry(
- const PtNodeParams *const sourcePtNodeParams, const PtNodeParams *const targetPtNodeParam) {
- return mBigramPolicy->removeEntry(sourcePtNodeParams->getTerminalId(),
- targetPtNodeParam->getTerminalId());
+bool Ver4PatriciaTrieNodeWriter::removeNgramEntry(const WordIdArrayView prevWordIds,
+ const int wordId) {
+ return mBigramPolicy->removeEntry(prevWordIds[0], wordId);
}
bool Ver4PatriciaTrieNodeWriter::updateAllBigramEntriesAndDeleteUselessEntries(
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.h
index 08226ea26..d49d9a666 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.h
@@ -29,6 +29,7 @@
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h"
#include "suggest/policyimpl/dictionary/structure/backward/v402/content/probability_entry.h"
+#include "utils/int_array_view.h"
namespace latinime {
namespace backward {
@@ -61,8 +62,8 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
const PtNodeArrayReader *const ptNodeArrayReader,
Ver4BigramListPolicy *const bigramPolicy, Ver4ShortcutListPolicy *const shortcutPolicy)
: mTrieBuffer(trieBuffer), mBuffers(buffers), mHeaderPolicy(headerPolicy),
- mReadingHelper(ptNodeReader, ptNodeArrayReader), mBigramPolicy(bigramPolicy),
- mShortcutPolicy(shortcutPolicy) {}
+ mPtNodeReader(ptNodeReader), mReadingHelper(ptNodeReader, ptNodeArrayReader),
+ mBigramPolicy(bigramPolicy), mShortcutPolicy(shortcutPolicy) {}
virtual ~Ver4PatriciaTrieNodeWriter() {}
@@ -92,12 +93,10 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
virtual bool writeNewTerminalPtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams,
const UnigramProperty *const unigramProperty, int *const ptNodeWritingPos);
- virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams,
- const PtNodeParams *const targetPtNodeParam, const BigramProperty *const bigramProperty,
- bool *const outAddedNewBigram);
+ virtual bool addNgramEntry(const WordIdArrayView prevWordIds, const int wordId,
+ const BigramProperty *const bigramProperty, bool *const outAddedNewEntry);
- virtual bool removeBigramEntry(const PtNodeParams *const sourcePtNodeParams,
- const PtNodeParams *const targetPtNodeParam);
+ virtual bool removeNgramEntry(const WordIdArrayView prevWordIds, const int wordId);
virtual bool updateAllBigramEntriesAndDeleteUselessEntries(
const PtNodeParams *const sourcePtNodeParams, int *const outBigramEntryCount);
@@ -135,6 +134,7 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
BufferWithExtendableBuffer *const mTrieBuffer;
Ver4DictBuffers *const mBuffers;
const HeaderPolicy *const mHeaderPolicy;
+ const PtNodeReader *const mPtNodeReader;
DynamicPtReadingHelper mReadingHelper;
Ver4BigramListPolicy *const mBigramPolicy;
Ver4ShortcutListPolicy *const mShortcutPolicy;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp
index 994c42505..1296b8acd 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp
@@ -142,8 +142,8 @@ int Ver4PatriciaTriePolicy::getProbabilityOfPtNode(const int *const prevWordsPtN
return NOT_A_PROBABILITY;
}
if (prevWordsPtNodePos) {
- BinaryDictionaryBigramsIterator bigramsIt =
- getBigramsIteratorOfPtNode(prevWordsPtNodePos[0]);
+ const int bigramsPosition = getBigramsPositionOfPtNode(prevWordsPtNodePos[0]);
+ BinaryDictionaryBigramsIterator bigramsIt(&mBigramPolicy, bigramsPosition);
while (bigramsIt.hasNext()) {
bigramsIt.next();
if (bigramsIt.getBigramPos() == ptNodePos
@@ -161,7 +161,8 @@ void Ver4PatriciaTriePolicy::iterateNgramEntries(const int *const prevWordsPtNod
if (!prevWordsPtNodePos) {
return;
}
- BinaryDictionaryBigramsIterator bigramsIt = getBigramsIteratorOfPtNode(prevWordsPtNodePos[0]);
+ const int bigramsPosition = getBigramsPositionOfPtNode(prevWordsPtNodePos[0]);
+ BinaryDictionaryBigramsIterator bigramsIt(&mBigramPolicy, bigramsPosition);
while (bigramsIt.hasNext()) {
bigramsIt.next();
listener->onVisitEntry(bigramsIt.getProbability(), bigramsIt.getBigramPos());
@@ -180,12 +181,6 @@ int Ver4PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) con
ptNodeParams.getTerminalId());
}
-BinaryDictionaryBigramsIterator Ver4PatriciaTriePolicy::getBigramsIteratorOfPtNode(
- const int ptNodePos) const {
- const int bigramsPosition = getBigramsPositionOfPtNode(ptNodePos);
- return BinaryDictionaryBigramsIterator(&mBigramPolicy, bigramsPosition);
-}
-
int Ver4PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const {
if (ptNodePos == NOT_A_DICT_POS) {
return NOT_A_DICT_POS;
@@ -314,8 +309,8 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI
return false;
}
bool addedNewBigram = false;
- if (mUpdatingHelper.addBigramWords(prevWordsPtNodePos[0], word1Pos, bigramProperty,
- &addedNewBigram)) {
+ if (mUpdatingHelper.addNgramEntry(PtNodePosArrayView::fromObject(prevWordsPtNodePos),
+ word1Pos, bigramProperty, &addedNewBigram)) {
if (addedNewBigram) {
mBigramCount++;
}
@@ -355,7 +350,8 @@ bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWor
if (wordPos == NOT_A_DICT_POS) {
return false;
}
- if (mUpdatingHelper.removeBigramWords(prevWordsPtNodePos[0], wordPos)) {
+ if (mUpdatingHelper.removeNgramEntry(
+ PtNodePosArrayView::fromObject(prevWordsPtNodePos), wordPos)) {
mBigramCount--;
return true;
} else {
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h
index ff69de7c0..9e989b268 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h
@@ -97,8 +97,6 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
int getShortcutPositionOfPtNode(const int ptNodePos) const;
- BinaryDictionaryBigramsIterator getBigramsIteratorOfPtNode(const int ptNodePos) const;
-
const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const {
return mHeaderPolicy;
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp
index e4b5fa267..e4ea3da16 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp
@@ -31,6 +31,7 @@
#include "suggest/policyimpl/dictionary/utils/file_utils.h"
#include "suggest/policyimpl/dictionary/utils/format_utils.h"
#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
+#include "utils/byte_array_view.h"
namespace latinime {
@@ -110,7 +111,8 @@ template<class DictConstants, class DictBuffers, class DictBuffersPtr, class Str
return nullptr;
}
const FormatUtils::FORMAT_VERSION formatVersion = FormatUtils::detectFormatVersion(
- mmappedBuffer->getBuffer(), mmappedBuffer->getBufferSize());
+ mmappedBuffer->getReadOnlyByteArrayView().data(),
+ mmappedBuffer->getReadOnlyByteArrayView().size());
switch (formatVersion) {
case FormatUtils::VERSION_2:
AKLOGE("Given path is a directory but the format is version 2. path: %s", path);
@@ -172,8 +174,8 @@ template<class DictConstants, class DictBuffers, class DictBuffersPtr, class Str
if (!mmappedBuffer) {
return nullptr;
}
- switch (FormatUtils::detectFormatVersion(mmappedBuffer->getBuffer(),
- mmappedBuffer->getBufferSize())) {
+ switch (FormatUtils::detectFormatVersion(mmappedBuffer->getReadOnlyByteArrayView().data(),
+ mmappedBuffer->getReadOnlyByteArrayView().size())) {
case FormatUtils::VERSION_2:
return DictionaryStructureWithBufferPolicy::StructurePolicyPtr(
new PatriciaTriePolicy(std::move(mmappedBuffer)));
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h
index 2e05bf397..b7262581a 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h
@@ -26,7 +26,6 @@
namespace latinime {
-class DictionaryBigramsStructurePolicy;
class DictionaryShortcutsStructurePolicy;
class PtNodeArrayReader;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp
index f31c914d2..3c62e2e56 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp
@@ -84,23 +84,39 @@ bool DynamicPtUpdatingHelper::addUnigramWord(
unigramProperty, &pos);
}
-bool DynamicPtUpdatingHelper::addBigramWords(const int word0Pos, const int word1Pos,
- const BigramProperty *const bigramProperty, bool *const outAddedNewBigram) {
- const PtNodeParams sourcePtNodeParams(
- mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(word0Pos));
- const PtNodeParams targetPtNodeParams(
- mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(word1Pos));
- return mPtNodeWriter->addNewBigramEntry(&sourcePtNodeParams, &targetPtNodeParams,
- bigramProperty, outAddedNewBigram);
+bool DynamicPtUpdatingHelper::addNgramEntry(const PtNodePosArrayView prevWordsPtNodePos,
+ const int wordPos, const BigramProperty *const bigramProperty,
+ bool *const outAddedNewEntry) {
+ if (prevWordsPtNodePos.empty()) {
+ return false;
+ }
+ ASSERT(prevWordsPtNodePos.size() <= MAX_PREV_WORD_COUNT_FOR_N_GRAM);
+ int prevWordTerminalIds[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
+ for (size_t i = 0; i < prevWordsPtNodePos.size(); ++i) {
+ prevWordTerminalIds[i] = mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(
+ prevWordsPtNodePos[i]).getTerminalId();
+ }
+ const WordIdArrayView prevWordIds(prevWordTerminalIds, prevWordsPtNodePos.size());
+ const int wordId =
+ mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(wordPos).getTerminalId();
+ return mPtNodeWriter->addNgramEntry(prevWordIds, wordId, bigramProperty, outAddedNewEntry);
}
-// Remove a bigram relation from word0Pos to word1Pos.
-bool DynamicPtUpdatingHelper::removeBigramWords(const int word0Pos, const int word1Pos) {
- const PtNodeParams sourcePtNodeParams(
- mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(word0Pos));
- const PtNodeParams targetPtNodeParams(
- mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(word1Pos));
- return mPtNodeWriter->removeBigramEntry(&sourcePtNodeParams, &targetPtNodeParams);
+bool DynamicPtUpdatingHelper::removeNgramEntry(const PtNodePosArrayView prevWordsPtNodePos,
+ const int wordPos) {
+ if (prevWordsPtNodePos.empty()) {
+ return false;
+ }
+ ASSERT(prevWordsPtNodePos.size() <= MAX_PREV_WORD_COUNT_FOR_N_GRAM);
+ int prevWordTerminalIds[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
+ for (size_t i = 0; i < prevWordsPtNodePos.size(); ++i) {
+ prevWordTerminalIds[i] = mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(
+ prevWordsPtNodePos[i]).getTerminalId();
+ }
+ const WordIdArrayView prevWordIds(prevWordTerminalIds, prevWordsPtNodePos.size());
+ const int wordId =
+ mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(wordPos).getTerminalId();
+ return mPtNodeWriter->removeNgramEntry(prevWordIds, wordId);
}
bool DynamicPtUpdatingHelper::addShortcutTarget(const int wordPos,
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h
index f10d15a9b..97c05c1ea 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h
@@ -19,6 +19,7 @@
#include "defines.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
+#include "utils/int_array_view.h"
namespace latinime {
@@ -42,12 +43,12 @@ class DynamicPtUpdatingHelper {
const int *const wordCodePoints, const int codePointCount,
const UnigramProperty *const unigramProperty, bool *const outAddedNewUnigram);
- // Add a bigram relation from word0Pos to word1Pos.
- bool addBigramWords(const int word0Pos, const int word1Pos,
- const BigramProperty *const bigramProperty, bool *const outAddedNewBigram);
+ // Add an n-gram entry.
+ bool addNgramEntry(const PtNodePosArrayView prevWordsPtNodePos, const int wordPos,
+ const BigramProperty *const bigramProperty, bool *const outAddedNewEntry);
- // Remove a bigram relation from word0Pos to word1Pos.
- bool removeBigramWords(const int word0Pos, const int word1Pos);
+ // Remove an n-gram entry.
+ bool removeNgramEntry(const PtNodePosArrayView prevWordsPtNodePos, const int wordPos);
// Add a shortcut target.
bool addShortcutTarget(const int wordPos, const int *const targetCodePoints,
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h
index a8029f73f..955d779ac 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h
@@ -21,6 +21,7 @@
#include "defines.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
+#include "utils/int_array_view.h"
namespace latinime {
@@ -70,12 +71,10 @@ class PtNodeWriter {
virtual bool writeNewTerminalPtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams,
const UnigramProperty *const unigramProperty, int *const ptNodeWritingPos) = 0;
- virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams,
- const PtNodeParams *const targetPtNodeParam, const BigramProperty *const bigramProperty,
- bool *const outAddedNewBigram) = 0;
+ virtual bool addNgramEntry(const WordIdArrayView prevWordIds, const int wordId,
+ const BigramProperty *const bigramProperty, bool *const outAddedNewEntry) = 0;
- virtual bool removeBigramEntry(const PtNodeParams *const sourcePtNodeParams,
- const PtNodeParams *const targetPtNodeParam) = 0;
+ virtual bool removeNgramEntry(const WordIdArrayView prevWordIds, const int wordId) = 0;
virtual bool updateAllBigramEntriesAndDeleteUselessEntries(
const PtNodeParams *const sourcePtNodeParams, int *const outBigramEntryCount) = 0;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
index 53415aeb6..ea32eb2a9 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
@@ -311,8 +311,8 @@ int PatriciaTriePolicy::getProbabilityOfPtNode(const int *const prevWordsPtNodeP
return NOT_A_PROBABILITY;
}
if (prevWordsPtNodePos) {
- BinaryDictionaryBigramsIterator bigramsIt =
- getBigramsIteratorOfPtNode(prevWordsPtNodePos[0]);
+ const int bigramsPosition = getBigramsPositionOfPtNode(prevWordsPtNodePos[0]);
+ BinaryDictionaryBigramsIterator bigramsIt(&mBigramListPolicy, bigramsPosition);
while (bigramsIt.hasNext()) {
bigramsIt.next();
if (bigramsIt.getBigramPos() == ptNodePos
@@ -330,7 +330,8 @@ void PatriciaTriePolicy::iterateNgramEntries(const int *const prevWordsPtNodePos
if (!prevWordsPtNodePos) {
return;
}
- BinaryDictionaryBigramsIterator bigramsIt = getBigramsIteratorOfPtNode(prevWordsPtNodePos[0]);
+ const int bigramsPosition = getBigramsPositionOfPtNode(prevWordsPtNodePos[0]);
+ BinaryDictionaryBigramsIterator bigramsIt(&mBigramListPolicy, bigramsPosition);
while (bigramsIt.hasNext()) {
bigramsIt.next();
listener->onVisitEntry(bigramsIt.getProbability(), bigramsIt.getBigramPos());
@@ -344,12 +345,6 @@ int PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const {
return mPtNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos).getShortcutPos();
}
-BinaryDictionaryBigramsIterator PatriciaTriePolicy::getBigramsIteratorOfPtNode(
- const int ptNodePos) const {
- const int bigramsPosition = getBigramsPositionOfPtNode(ptNodePos);
- return BinaryDictionaryBigramsIterator(&mBigramListPolicy, bigramsPosition);
-}
-
int PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const {
if (ptNodePos == NOT_A_DICT_POS) {
return NOT_A_DICT_POS;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
index 07cb72b23..70351d147 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
@@ -29,6 +29,7 @@
#include "suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.h"
#include "suggest/policyimpl/dictionary/utils/format_utils.h"
#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
+#include "utils/byte_array_view.h"
namespace latinime {
@@ -39,9 +40,12 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
public:
PatriciaTriePolicy(MmappedBuffer::MmappedBufferPtr mmappedBuffer)
: mMmappedBuffer(std::move(mmappedBuffer)),
- mHeaderPolicy(mMmappedBuffer->getBuffer(), FormatUtils::VERSION_2),
- mDictRoot(mMmappedBuffer->getBuffer() + mHeaderPolicy.getSize()),
- mDictBufferSize(mMmappedBuffer->getBufferSize() - mHeaderPolicy.getSize()),
+ mHeaderPolicy(mMmappedBuffer->getReadOnlyByteArrayView().data(),
+ FormatUtils::VERSION_2),
+ mDictRoot(mMmappedBuffer->getReadOnlyByteArrayView().data()
+ + mHeaderPolicy.getSize()),
+ mDictBufferSize(mMmappedBuffer->getReadOnlyByteArrayView().size()
+ - mHeaderPolicy.getSize()),
mBigramListPolicy(mDictRoot, mDictBufferSize), mShortcutListPolicy(mDictRoot),
mPtNodeReader(mDictRoot, mDictBufferSize, &mBigramListPolicy, &mShortcutListPolicy),
mPtNodeArrayReader(mDictRoot, mDictBufferSize),
@@ -70,8 +74,6 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
int getShortcutPositionOfPtNode(const int ptNodePos) const;
- BinaryDictionaryBigramsIterator getBigramsIteratorOfPtNode(const int ptNodePos) const;
-
const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const {
return &mHeaderPolicy;
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/dict_content.h
deleted file mode 100644
index c264aeac4..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/dict_content.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Copyright (C) 2013, The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LATINIME_DICT_CONTENT_H
-#define LATINIME_DICT_CONTENT_H
-
-#include "defines.h"
-
-namespace latinime {
-
-class DictContent {
- public:
- virtual ~DictContent() {}
-
- protected:
- DictContent() {}
-
- private:
- DISALLOW_COPY_AND_ASSIGN(DictContent);
-};
-} // namespace latinime
-#endif /* LATINIME_DICT_CONTENT_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp
new file mode 100644
index 000000000..5dc91ba10
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp
@@ -0,0 +1,95 @@
+/*
+ * Copyright (C) 2014, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h"
+
+namespace latinime {
+
+bool LanguageModelDictContent::save(FILE *const file) const {
+ return mTrieMap.save(file);
+}
+
+bool LanguageModelDictContent::runGC(
+ const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
+ const LanguageModelDictContent *const originalContent,
+ int *const outNgramCount) {
+ return runGCInner(terminalIdMap, originalContent->mTrieMap.getEntriesInRootLevel(),
+ 0 /* nextLevelBitmapEntryIndex */, outNgramCount);
+}
+
+ProbabilityEntry LanguageModelDictContent::getNgramProbabilityEntry(
+ const WordIdArrayView prevWordIds, const int wordId) const {
+ const int bitmapEntryIndex = getBitmapEntryIndex(prevWordIds);
+ if (bitmapEntryIndex == TrieMap::INVALID_INDEX) {
+ return ProbabilityEntry();
+ }
+ const TrieMap::Result result = mTrieMap.get(wordId, bitmapEntryIndex);
+ if (!result.mIsValid) {
+ // Not found.
+ return ProbabilityEntry();
+ }
+ return ProbabilityEntry::decode(result.mValue, mHasHistoricalInfo);
+}
+
+bool LanguageModelDictContent::setNgramProbabilityEntry(const WordIdArrayView prevWordIds,
+ const int terminalId, const ProbabilityEntry *const probabilityEntry) {
+ const int bitmapEntryIndex = getBitmapEntryIndex(prevWordIds);
+ if (bitmapEntryIndex == TrieMap::INVALID_INDEX) {
+ return false;
+ }
+ return mTrieMap.put(terminalId, probabilityEntry->encode(mHasHistoricalInfo), bitmapEntryIndex);
+}
+
+bool LanguageModelDictContent::runGCInner(
+ const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
+ const TrieMap::TrieMapRange trieMapRange,
+ const int nextLevelBitmapEntryIndex, int *const outNgramCount) {
+ for (auto &entry : trieMapRange) {
+ const auto it = terminalIdMap->find(entry.key());
+ if (it == terminalIdMap->end() || it->second == Ver4DictConstants::NOT_A_TERMINAL_ID) {
+ // The word has been removed.
+ continue;
+ }
+ if (!mTrieMap.put(it->second, entry.value(), nextLevelBitmapEntryIndex)) {
+ return false;
+ }
+ if (outNgramCount) {
+ *outNgramCount += 1;
+ }
+ if (entry.hasNextLevelMap()) {
+ if (!runGCInner(terminalIdMap, entry.getEntriesInNextLevel(),
+ mTrieMap.getNextLevelBitmapEntryIndex(it->second, nextLevelBitmapEntryIndex),
+ outNgramCount)) {
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+int LanguageModelDictContent::getBitmapEntryIndex(const WordIdArrayView prevWordIds) const {
+ int bitmapEntryIndex = mTrieMap.getRootBitmapEntryIndex();
+ for (const int wordId : prevWordIds) {
+ const TrieMap::Result result = mTrieMap.get(wordId, bitmapEntryIndex);
+ if (!result.mIsValid) {
+ return TrieMap::INVALID_INDEX;
+ }
+ bitmapEntryIndex = result.mNextLevelBitmapEntryIndex;
+ }
+ return bitmapEntryIndex;
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h
new file mode 100644
index 000000000..18f2e0170
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (C) 2014, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_LANGUAGE_MODEL_DICT_CONTENT_H
+#define LATINIME_LANGUAGE_MODEL_DICT_CONTENT_H
+
+#include <cstdio>
+
+#include "defines.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
+#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
+#include "suggest/policyimpl/dictionary/utils/trie_map.h"
+#include "utils/byte_array_view.h"
+#include "utils/int_array_view.h"
+
+namespace latinime {
+
+/**
+ * Class representing language model.
+ *
+ * This class provides methods to get and store unigram/n-gram probability information and flags.
+ */
+class LanguageModelDictContent {
+ public:
+ LanguageModelDictContent(const ReadWriteByteArrayView trieMapBuffer,
+ const bool hasHistoricalInfo)
+ : mTrieMap(trieMapBuffer), mHasHistoricalInfo(hasHistoricalInfo) {}
+
+ explicit LanguageModelDictContent(const bool hasHistoricalInfo)
+ : mTrieMap(), mHasHistoricalInfo(hasHistoricalInfo) {}
+
+ bool isNearSizeLimit() const {
+ return mTrieMap.isNearSizeLimit();
+ }
+
+ bool save(FILE *const file) const;
+
+ bool runGC(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
+ const LanguageModelDictContent *const originalContent,
+ int *const outNgramCount);
+
+ ProbabilityEntry getProbabilityEntry(const int wordId) const {
+ return getNgramProbabilityEntry(WordIdArrayView(), wordId);
+ }
+
+ bool setProbabilityEntry(const int wordId, const ProbabilityEntry *const probabilityEntry) {
+ return setNgramProbabilityEntry(WordIdArrayView(), wordId, probabilityEntry);
+ }
+
+ ProbabilityEntry getNgramProbabilityEntry(const WordIdArrayView prevWordIds,
+ const int wordId) const;
+
+ bool setNgramProbabilityEntry(const WordIdArrayView prevWordIds, const int wordId,
+ const ProbabilityEntry *const probabilityEntry);
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(LanguageModelDictContent);
+
+ TrieMap mTrieMap;
+ const bool mHasHistoricalInfo;
+
+ bool runGCInner(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
+ const TrieMap::TrieMapRange trieMapRange, const int nextLevelBitmapEntryIndex,
+ int *const outNgramCount);
+
+ int getBitmapEntryIndex(const WordIdArrayView prevWordIds) const;
+};
+} // namespace latinime
+#endif /* LATINIME_LANGUAGE_MODEL_DICT_CONTENT_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.cpp
deleted file mode 100644
index 2425b3b2f..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.cpp
+++ /dev/null
@@ -1,159 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h"
-
-#include "suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h"
-#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
-#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
-#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
-
-namespace latinime {
-
-const ProbabilityEntry ProbabilityDictContent::getProbabilityEntry(const int terminalId) const {
- if (terminalId < 0 || terminalId >= mSize) {
- // This method can be called with invalid terminal id during GC.
- return ProbabilityEntry(0 /* flags */, NOT_A_PROBABILITY);
- }
- const BufferWithExtendableBuffer *const buffer = getBuffer();
- int entryPos = getEntryPos(terminalId);
- const int flags = buffer->readUintAndAdvancePosition(
- Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE, &entryPos);
- const int probability = buffer->readUintAndAdvancePosition(
- Ver4DictConstants::PROBABILITY_SIZE, &entryPos);
- if (mHasHistoricalInfo) {
- const int timestamp = buffer->readUintAndAdvancePosition(
- Ver4DictConstants::TIME_STAMP_FIELD_SIZE, &entryPos);
- const int level = buffer->readUintAndAdvancePosition(
- Ver4DictConstants::WORD_LEVEL_FIELD_SIZE, &entryPos);
- const int count = buffer->readUintAndAdvancePosition(
- Ver4DictConstants::WORD_COUNT_FIELD_SIZE, &entryPos);
- const HistoricalInfo historicalInfo(timestamp, level, count);
- return ProbabilityEntry(flags, probability, &historicalInfo);
- } else {
- return ProbabilityEntry(flags, probability);
- }
-}
-
-bool ProbabilityDictContent::setProbabilityEntry(const int terminalId,
- const ProbabilityEntry *const probabilityEntry) {
- if (terminalId < 0) {
- return false;
- }
- const int entryPos = getEntryPos(terminalId);
- if (terminalId >= mSize) {
- ProbabilityEntry dummyEntry;
- // Write new entry.
- int writingPos = getBuffer()->getTailPosition();
- while (writingPos <= entryPos) {
- // Fulfilling with dummy entries until writingPos.
- if (!writeEntry(&dummyEntry, writingPos)) {
- AKLOGE("Cannot write dummy entry. pos: %d, mSize: %d", writingPos, mSize);
- return false;
- }
- writingPos += getEntrySize();
- mSize++;
- }
- }
- return writeEntry(probabilityEntry, entryPos);
-}
-
-bool ProbabilityDictContent::flushToFile(FILE *const file) const {
- if (getEntryPos(mSize) < getBuffer()->getTailPosition()) {
- ProbabilityDictContent probabilityDictContentToWrite(mHasHistoricalInfo);
- for (int i = 0; i < mSize; ++i) {
- const ProbabilityEntry probabilityEntry = getProbabilityEntry(i);
- if (!probabilityDictContentToWrite.setProbabilityEntry(i, &probabilityEntry)) {
- AKLOGE("Cannot set probability entry in flushToFile. terminalId: %d", i);
- return false;
- }
- }
- return probabilityDictContentToWrite.flush(file);
- } else {
- return flush(file);
- }
-}
-
-bool ProbabilityDictContent::runGC(
- const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
- const ProbabilityDictContent *const originalProbabilityDictContent) {
- mSize = 0;
- for (TerminalPositionLookupTable::TerminalIdMap::const_iterator it = terminalIdMap->begin();
- it != terminalIdMap->end(); ++it) {
- const ProbabilityEntry probabilityEntry =
- originalProbabilityDictContent->getProbabilityEntry(it->first);
- if (!setProbabilityEntry(it->second, &probabilityEntry)) {
- AKLOGE("Cannot set probability entry in runGC. terminalId: %d", it->second);
- return false;
- }
- mSize++;
- }
- return true;
-}
-
-int ProbabilityDictContent::getEntrySize() const {
- if (mHasHistoricalInfo) {
- return Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE
- + Ver4DictConstants::PROBABILITY_SIZE
- + Ver4DictConstants::TIME_STAMP_FIELD_SIZE
- + Ver4DictConstants::WORD_LEVEL_FIELD_SIZE
- + Ver4DictConstants::WORD_COUNT_FIELD_SIZE;
- } else {
- return Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE
- + Ver4DictConstants::PROBABILITY_SIZE;
- }
-}
-
-int ProbabilityDictContent::getEntryPos(const int terminalId) const {
- return terminalId * getEntrySize();
-}
-
-bool ProbabilityDictContent::writeEntry(const ProbabilityEntry *const probabilityEntry,
- const int entryPos) {
- BufferWithExtendableBuffer *const bufferToWrite = getWritableBuffer();
- int writingPos = entryPos;
- if (!bufferToWrite->writeUintAndAdvancePosition(probabilityEntry->getFlags(),
- Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE, &writingPos)) {
- AKLOGE("Cannot write flags in probability dict content. pos: %d", writingPos);
- return false;
- }
- if (!bufferToWrite->writeUintAndAdvancePosition(probabilityEntry->getProbability(),
- Ver4DictConstants::PROBABILITY_SIZE, &writingPos)) {
- AKLOGE("Cannot write probability in probability dict content. pos: %d", writingPos);
- return false;
- }
- if (mHasHistoricalInfo) {
- const HistoricalInfo *const historicalInfo = probabilityEntry->getHistoricalInfo();
- if (!bufferToWrite->writeUintAndAdvancePosition(historicalInfo->getTimeStamp(),
- Ver4DictConstants::TIME_STAMP_FIELD_SIZE, &writingPos)) {
- AKLOGE("Cannot write timestamp in probability dict content. pos: %d", writingPos);
- return false;
- }
- if (!bufferToWrite->writeUintAndAdvancePosition(historicalInfo->getLevel(),
- Ver4DictConstants::WORD_LEVEL_FIELD_SIZE, &writingPos)) {
- AKLOGE("Cannot write level in probability dict content. pos: %d", writingPos);
- return false;
- }
- if (!bufferToWrite->writeUintAndAdvancePosition(historicalInfo->getCount(),
- Ver4DictConstants::WORD_COUNT_FIELD_SIZE, &writingPos)) {
- AKLOGE("Cannot write count in probability dict content. pos: %d", writingPos);
- return false;
- }
- }
- return true;
-}
-
-} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h
deleted file mode 100644
index 80e992c1c..000000000
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Copyright (C) 2013, The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LATINIME_PROBABILITY_DICT_CONTENT_H
-#define LATINIME_PROBABILITY_DICT_CONTENT_H
-
-#include <cstdint>
-#include <cstdio>
-
-#include "defines.h"
-#include "suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h"
-#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
-#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
-#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
-
-namespace latinime {
-
-class ProbabilityEntry;
-
-class ProbabilityDictContent : public SingleDictContent {
- public:
- ProbabilityDictContent(uint8_t *const buffer, const int bufferSize,
- const bool hasHistoricalInfo)
- : SingleDictContent(buffer, bufferSize),
- mHasHistoricalInfo(hasHistoricalInfo),
- mSize(getBuffer()->getTailPosition() / getEntrySize()) {}
-
- ProbabilityDictContent(const bool hasHistoricalInfo)
- : mHasHistoricalInfo(hasHistoricalInfo), mSize(0) {}
-
- const ProbabilityEntry getProbabilityEntry(const int terminalId) const;
-
- bool setProbabilityEntry(const int terminalId, const ProbabilityEntry *const probabilityEntry);
-
- bool flushToFile(FILE *const file) const;
-
- bool runGC(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
- const ProbabilityDictContent *const originalProbabilityDictContent);
-
- private:
- DISALLOW_COPY_AND_ASSIGN(ProbabilityDictContent);
-
- int getEntrySize() const;
-
- int getEntryPos(const int terminalId) const;
-
- bool writeEntry(const ProbabilityEntry *const probabilityEntry, const int entryPos);
-
- bool mHasHistoricalInfo;
- int mSize;
-};
-} // namespace latinime
-#endif /* LATINIME_PROBABILITY_DICT_CONTENT_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h
index 36ba82be1..feff6b57f 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h
@@ -17,6 +17,9 @@
#ifndef LATINIME_PROBABILITY_ENTRY_H
#define LATINIME_PROBABILITY_ENTRY_H
+#include <climits>
+#include <cstdint>
+
#include "defines.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
#include "suggest/policyimpl/dictionary/utils/historical_info.h"
@@ -67,6 +70,50 @@ class ProbabilityEntry {
return &mHistoricalInfo;
}
+ uint64_t encode(const bool hasHistoricalInfo) const {
+ uint64_t encodedEntry = static_cast<uint64_t>(mFlags);
+ if (hasHistoricalInfo) {
+ encodedEntry = (encodedEntry << (Ver4DictConstants::TIME_STAMP_FIELD_SIZE * CHAR_BIT))
+ ^ static_cast<uint64_t>(mHistoricalInfo.getTimeStamp());
+ encodedEntry = (encodedEntry << (Ver4DictConstants::WORD_LEVEL_FIELD_SIZE * CHAR_BIT))
+ ^ static_cast<uint64_t>(mHistoricalInfo.getLevel());
+ encodedEntry = (encodedEntry << (Ver4DictConstants::WORD_COUNT_FIELD_SIZE * CHAR_BIT))
+ ^ static_cast<uint64_t>(mHistoricalInfo.getCount());
+ } else {
+ encodedEntry = (encodedEntry << (Ver4DictConstants::PROBABILITY_SIZE * CHAR_BIT))
+ ^ static_cast<uint64_t>(mProbability);
+ }
+ return encodedEntry;
+ }
+
+ static ProbabilityEntry decode(const uint64_t encodedEntry, const bool hasHistoricalInfo) {
+ if (hasHistoricalInfo) {
+ const int flags = readFromEncodedEntry(encodedEntry,
+ Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE,
+ Ver4DictConstants::TIME_STAMP_FIELD_SIZE
+ + Ver4DictConstants::WORD_LEVEL_FIELD_SIZE
+ + Ver4DictConstants::WORD_COUNT_FIELD_SIZE);
+ const int timestamp = readFromEncodedEntry(encodedEntry,
+ Ver4DictConstants::TIME_STAMP_FIELD_SIZE,
+ Ver4DictConstants::WORD_LEVEL_FIELD_SIZE
+ + Ver4DictConstants::WORD_COUNT_FIELD_SIZE);
+ const int level = readFromEncodedEntry(encodedEntry,
+ Ver4DictConstants::WORD_LEVEL_FIELD_SIZE,
+ Ver4DictConstants::WORD_COUNT_FIELD_SIZE);
+ const int count = readFromEncodedEntry(encodedEntry,
+ Ver4DictConstants::WORD_COUNT_FIELD_SIZE, 0 /* pos */);
+ const HistoricalInfo historicalInfo(timestamp, level, count);
+ return ProbabilityEntry(flags, NOT_A_PROBABILITY, &historicalInfo);
+ } else {
+ const int flags = readFromEncodedEntry(encodedEntry,
+ Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE,
+ Ver4DictConstants::PROBABILITY_SIZE);
+ const int probability = readFromEncodedEntry(encodedEntry,
+ Ver4DictConstants::PROBABILITY_SIZE, 0 /* pos */);
+ return ProbabilityEntry(flags, probability);
+ }
+ }
+
private:
// Copy constructor is public to use this class as a type of return value.
DISALLOW_ASSIGNMENT_OPERATOR(ProbabilityEntry);
@@ -74,6 +121,11 @@ class ProbabilityEntry {
const int mFlags;
const int mProbability;
const HistoricalInfo mHistoricalInfo;
+
+ static int readFromEncodedEntry(const uint64_t encodedEntry, const int size, const int pos) {
+ return static_cast<int>(
+ (encodedEntry >> (pos * CHAR_BIT)) & ((1ull << (size * CHAR_BIT)) - 1));
+ }
};
} // namespace latinime
#endif /* LATINIME_PROBABILITY_ENTRY_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h
index 69a11425f..921774181 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h
@@ -21,17 +21,17 @@
#include <cstdio>
#include "defines.h"
-#include "suggest/policyimpl/dictionary/structure/v4/content/dict_content.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
+#include "utils/byte_array_view.h"
namespace latinime {
-class SingleDictContent : public DictContent {
+class SingleDictContent {
public:
SingleDictContent(uint8_t *const buffer, const int bufferSize)
- : mExpandableContentBuffer(buffer, bufferSize,
+ : mExpandableContentBuffer(ReadWriteByteArrayView(buffer, bufferSize),
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE) {}
SingleDictContent()
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h
index cdf870bd2..c98dd11fd 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h
@@ -21,26 +21,29 @@
#include <cstdio>
#include "defines.h"
-#include "suggest/policyimpl/dictionary/structure/v4/content/dict_content.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
#include "suggest/policyimpl/dictionary/utils/sparse_table.h"
+#include "utils/byte_array_view.h"
namespace latinime {
// TODO: Support multiple contents.
-class SparseTableDictContent : public DictContent {
+class SparseTableDictContent {
public:
AK_FORCE_INLINE SparseTableDictContent(uint8_t *const *buffers, const int *bufferSizes,
const int sparseTableBlockSize, const int sparseTableDataSize)
- : mExpandableLookupTableBuffer(buffers[LOOKUP_TABLE_BUFFER_INDEX],
- bufferSizes[LOOKUP_TABLE_BUFFER_INDEX],
+ : mExpandableLookupTableBuffer(
+ ReadWriteByteArrayView(buffers[LOOKUP_TABLE_BUFFER_INDEX],
+ bufferSizes[LOOKUP_TABLE_BUFFER_INDEX]),
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
- mExpandableAddressTableBuffer(buffers[ADDRESS_TABLE_BUFFER_INDEX],
- bufferSizes[ADDRESS_TABLE_BUFFER_INDEX],
+ mExpandableAddressTableBuffer(
+ ReadWriteByteArrayView(buffers[ADDRESS_TABLE_BUFFER_INDEX],
+ bufferSizes[ADDRESS_TABLE_BUFFER_INDEX]),
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
- mExpandableContentBuffer(buffers[CONTENT_BUFFER_INDEX],
- bufferSizes[CONTENT_BUFFER_INDEX],
+ mExpandableContentBuffer(
+ ReadWriteByteArrayView(buffers[CONTENT_BUFFER_INDEX],
+ bufferSizes[CONTENT_BUFFER_INDEX]),
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
mAddressLookupTable(&mExpandableLookupTableBuffer, &mExpandableAddressTableBuffer,
sparseTableBlockSize, sparseTableDataSize) {}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp
index ac9540309..3c8008dc4 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp
@@ -26,6 +26,7 @@
#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
#include "suggest/policyimpl/dictionary/utils/file_utils.h"
+#include "utils/byte_array_view.h"
namespace latinime {
@@ -46,14 +47,16 @@ namespace latinime {
}
std::vector<uint8_t *> buffers;
std::vector<int> bufferSizes;
- uint8_t *const buffer = bodyBuffer->getBuffer();
+ const ReadWriteByteArrayView buffer = bodyBuffer->getReadWriteByteArrayView();
int position = 0;
- while (position < bodyBuffer->getBufferSize()) {
- const int bufferSize = ByteArrayUtils::readUint32AndAdvancePosition(buffer, &position);
- buffers.push_back(buffer + position);
- bufferSizes.push_back(bufferSize);
+ while (position < static_cast<int>(buffer.size())) {
+ const int bufferSize = ByteArrayUtils::readUint32AndAdvancePosition(
+ buffer.data(), &position);
+ const ReadWriteByteArrayView subBuffer = buffer.subView(position, bufferSize);
+ buffers.push_back(subBuffer.data());
+ bufferSizes.push_back(subBuffer.size());
position += bufferSize;
- if (bufferSize < 0 || position < 0 || position > bodyBuffer->getBufferSize()) {
+ if (bufferSize < 0 || position < 0 || position > static_cast<int>(buffer.size())) {
AKLOGE("The dict body file is corrupted.");
return Ver4DictBuffersPtr(nullptr);
}
@@ -154,9 +157,9 @@ bool Ver4DictBuffers::flushDictBuffers(FILE *const file) const {
AKLOGE("Terminal position lookup table cannot be written.");
return false;
}
- // Write probability dict content.
- if (!mProbabilityDictContent.flushToFile(file)) {
- AKLOGE("Probability dict content cannot be written.");
+ // Write language model content.
+ if (!mLanguageModelDictContent.save(file)) {
+ AKLOGE("Language model dict content cannot be written.");
return false;
}
// Write bigram dict content.
@@ -177,20 +180,21 @@ Ver4DictBuffers::Ver4DictBuffers(MmappedBuffer::MmappedBufferPtr &&headerBuffer,
const FormatUtils::FORMAT_VERSION formatVersion,
const std::vector<uint8_t *> &contentBuffers, const std::vector<int> &contentBufferSizes)
: mHeaderBuffer(std::move(headerBuffer)), mDictBuffer(std::move(bodyBuffer)),
- mHeaderPolicy(mHeaderBuffer->getBuffer(), formatVersion),
- mExpandableHeaderBuffer(mHeaderBuffer ? mHeaderBuffer->getBuffer() : nullptr,
- mHeaderPolicy.getSize(),
+ mHeaderPolicy(mHeaderBuffer->getReadOnlyByteArrayView().data(), formatVersion),
+ mExpandableHeaderBuffer(mHeaderBuffer->getReadWriteByteArrayView(),
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
- mExpandableTrieBuffer(contentBuffers[Ver4DictConstants::TRIE_BUFFER_INDEX],
- contentBufferSizes[Ver4DictConstants::TRIE_BUFFER_INDEX],
+ mExpandableTrieBuffer(
+ ReadWriteByteArrayView(contentBuffers[Ver4DictConstants::TRIE_BUFFER_INDEX],
+ contentBufferSizes[Ver4DictConstants::TRIE_BUFFER_INDEX]),
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
mTerminalPositionLookupTable(
contentBuffers[Ver4DictConstants::TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX],
contentBufferSizes[
Ver4DictConstants::TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX]),
- mProbabilityDictContent(
- contentBuffers[Ver4DictConstants::PROBABILITY_BUFFER_INDEX],
- contentBufferSizes[Ver4DictConstants::PROBABILITY_BUFFER_INDEX],
+ mLanguageModelDictContent(
+ ReadWriteByteArrayView(
+ contentBuffers[Ver4DictConstants::LANGUAGE_MODEL_BUFFER_INDEX],
+ contentBufferSizes[Ver4DictConstants::LANGUAGE_MODEL_BUFFER_INDEX]),
mHeaderPolicy.hasHistoricalInfoOfWords()),
mBigramDictContent(&contentBuffers[Ver4DictConstants::BIGRAM_BUFFERS_INDEX],
&contentBufferSizes[Ver4DictConstants::BIGRAM_BUFFERS_INDEX],
@@ -203,7 +207,7 @@ Ver4DictBuffers::Ver4DictBuffers(const HeaderPolicy *const headerPolicy, const i
: mHeaderBuffer(nullptr), mDictBuffer(nullptr), mHeaderPolicy(headerPolicy),
mExpandableHeaderBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
mExpandableTrieBuffer(maxTrieSize), mTerminalPositionLookupTable(),
- mProbabilityDictContent(headerPolicy->hasHistoricalInfoOfWords()),
+ mLanguageModelDictContent(headerPolicy->hasHistoricalInfoOfWords()),
mBigramDictContent(headerPolicy->hasHistoricalInfoOfWords()), mShortcutDictContent(),
mIsUpdatable(true) {}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h
index 433411cb8..68027dcb8 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h
@@ -23,7 +23,7 @@
#include "defines.h"
#include "suggest/policyimpl/dictionary/header/header_policy.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h"
-#include "suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
@@ -52,7 +52,7 @@ class Ver4DictBuffers {
AK_FORCE_INLINE bool isNearSizeLimit() const {
return mExpandableTrieBuffer.isNearSizeLimit()
|| mTerminalPositionLookupTable.isNearSizeLimit()
- || mProbabilityDictContent.isNearSizeLimit()
+ || mLanguageModelDictContent.isNearSizeLimit()
|| mBigramDictContent.isNearSizeLimit()
|| mShortcutDictContent.isNearSizeLimit();
}
@@ -81,12 +81,12 @@ class Ver4DictBuffers {
return &mTerminalPositionLookupTable;
}
- AK_FORCE_INLINE ProbabilityDictContent *getMutableProbabilityDictContent() {
- return &mProbabilityDictContent;
+ AK_FORCE_INLINE LanguageModelDictContent *getMutableLanguageModelDictContent() {
+ return &mLanguageModelDictContent;
}
- AK_FORCE_INLINE const ProbabilityDictContent *getProbabilityDictContent() const {
- return &mProbabilityDictContent;
+ AK_FORCE_INLINE const LanguageModelDictContent *getLanguageModelDictContent() const {
+ return &mLanguageModelDictContent;
}
AK_FORCE_INLINE BigramDictContent *getMutableBigramDictContent() {
@@ -135,7 +135,7 @@ class Ver4DictBuffers {
BufferWithExtendableBuffer mExpandableHeaderBuffer;
BufferWithExtendableBuffer mExpandableTrieBuffer;
TerminalPositionLookupTable mTerminalPositionLookupTable;
- ProbabilityDictContent mProbabilityDictContent;
+ LanguageModelDictContent mLanguageModelDictContent;
BigramDictContent mBigramDictContent;
ShortcutDictContent mShortcutDictContent;
const int mIsUpdatable;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp
index d45dfe377..93d4e562d 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp
@@ -27,18 +27,20 @@ const int Ver4DictConstants::MAX_DICTIONARY_SIZE = 8 * 1024 * 1024;
// limited to 1MB to prevent from inefficient traversing.
const int Ver4DictConstants::MAX_DICT_EXTENDED_REGION_SIZE = 1 * 1024 * 1024;
-// NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT for Trie, TerminalAddressLookupTable and Probability.
+// NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT for Trie and TerminalAddressLookupTable.
+// NUM_OF_BUFFERS_FOR_LANGUAGE_MODEL_DICT_CONTENT for language model.
// NUM_OF_BUFFERS_FOR_SPARSE_TABLE_DICT_CONTENT for bigram and shortcut.
const size_t Ver4DictConstants::NUM_OF_CONTENT_BUFFERS_IN_BODY_FILE =
- NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT * 3
+ NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT * 2
+ + NUM_OF_BUFFERS_FOR_LANGUAGE_MODEL_DICT_CONTENT
+ NUM_OF_BUFFERS_FOR_SPARSE_TABLE_DICT_CONTENT * 2;
const int Ver4DictConstants::TRIE_BUFFER_INDEX = 0;
const int Ver4DictConstants::TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX =
TRIE_BUFFER_INDEX + NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT;
-const int Ver4DictConstants::PROBABILITY_BUFFER_INDEX =
+const int Ver4DictConstants::LANGUAGE_MODEL_BUFFER_INDEX =
TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX + NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT;
const int Ver4DictConstants::BIGRAM_BUFFERS_INDEX =
- PROBABILITY_BUFFER_INDEX + NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT;
+ LANGUAGE_MODEL_BUFFER_INDEX + NUM_OF_BUFFERS_FOR_LANGUAGE_MODEL_DICT_CONTENT;
const int Ver4DictConstants::SHORTCUT_BUFFERS_INDEX =
BIGRAM_BUFFERS_INDEX + NUM_OF_BUFFERS_FOR_SPARSE_TABLE_DICT_CONTENT;
@@ -73,5 +75,6 @@ const int Ver4DictConstants::SHORTCUT_HAS_NEXT_MASK = 0x80;
const size_t Ver4DictConstants::NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT = 1;
const size_t Ver4DictConstants::NUM_OF_BUFFERS_FOR_SPARSE_TABLE_DICT_CONTENT = 3;
+const size_t Ver4DictConstants::NUM_OF_BUFFERS_FOR_LANGUAGE_MODEL_DICT_CONTENT = 1;
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h
index e8f6739ba..6950ca70f 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h
@@ -35,7 +35,7 @@ class Ver4DictConstants {
static const size_t NUM_OF_CONTENT_BUFFERS_IN_BODY_FILE;
static const int TRIE_BUFFER_INDEX;
static const int TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX;
- static const int PROBABILITY_BUFFER_INDEX;
+ static const int LANGUAGE_MODEL_BUFFER_INDEX;
static const int BIGRAM_BUFFERS_INDEX;
static const int SHORTCUT_BUFFERS_INDEX;
@@ -71,6 +71,7 @@ class Ver4DictConstants {
static const size_t NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT;
static const size_t NUM_OF_BUFFERS_FOR_SPARSE_TABLE_DICT_CONTENT;
+ static const size_t NUM_OF_BUFFERS_FOR_LANGUAGE_MODEL_DICT_CONTENT;
};
} // namespace latinime
#endif /* LATINIME_VER4_DICT_CONSTANTS_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp
index 0a435e91c..731092efd 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp
@@ -18,7 +18,7 @@
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h"
-#include "suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h"
+#include "suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
@@ -61,8 +61,9 @@ const PtNodeParams Ver4PatriciaTrieNodeReader::fetchPtNodeInfoFromBufferAndProce
terminalIdFieldPos += mBuffer->getOriginalBufferSize();
}
terminalId = Ver4PatriciaTrieReadingUtils::getTerminalIdAndAdvancePosition(dictBuf, &pos);
+ // TODO: Quit reading probability here.
const ProbabilityEntry probabilityEntry =
- mProbabilityDictContent->getProbabilityEntry(terminalId);
+ mLanguageModelDictContent->getProbabilityEntry(terminalId);
if (probabilityEntry.hasHistoricalInfo()) {
probability = ForgettingCurveUtils::decodeProbability(
probabilityEntry.getHistoricalInfo(), mHeaderPolicy);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h
index 22ed4a6c0..a91ad5728 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h
@@ -25,18 +25,18 @@ namespace latinime {
class BufferWithExtendableBuffer;
class HeaderPolicy;
-class ProbabilityDictContent;
+class LanguageModelDictContent;
/*
* This class is used for helping to read nodes of ver4 patricia trie. This class handles moved
- * node and reads node attributes including probability form probabilityBuffer.
+ * node and reads node attributes including probability form language model.
*/
class Ver4PatriciaTrieNodeReader : public PtNodeReader {
public:
Ver4PatriciaTrieNodeReader(const BufferWithExtendableBuffer *const buffer,
- const ProbabilityDictContent *const probabilityDictContent,
+ const LanguageModelDictContent *const languageModelDictContent,
const HeaderPolicy *const headerPolicy)
- : mBuffer(buffer), mProbabilityDictContent(probabilityDictContent),
+ : mBuffer(buffer), mLanguageModelDictContent(languageModelDictContent),
mHeaderPolicy(headerPolicy) {}
~Ver4PatriciaTrieNodeReader() {}
@@ -50,7 +50,7 @@ class Ver4PatriciaTrieNodeReader : public PtNodeReader {
DISALLOW_COPY_AND_ASSIGN(Ver4PatriciaTrieNodeReader);
const BufferWithExtendableBuffer *const mBuffer;
- const ProbabilityDictContent *const mProbabilityDictContent;
+ const LanguageModelDictContent *const mLanguageModelDictContent;
const HeaderPolicy *const mHeaderPolicy;
const PtNodeParams fetchPtNodeInfoFromBufferAndProcessMovedPtNode(const int ptNodePos,
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp
index 3d8da9173..857222f5d 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp
@@ -143,11 +143,11 @@ bool Ver4PatriciaTrieNodeWriter::updatePtNodeUnigramProperty(
return false;
}
const ProbabilityEntry originalProbabilityEntry =
- mBuffers->getProbabilityDictContent()->getProbabilityEntry(
+ mBuffers->getLanguageModelDictContent()->getProbabilityEntry(
toBeUpdatedPtNodeParams->getTerminalId());
const ProbabilityEntry probabilityEntry = createUpdatedEntryFrom(&originalProbabilityEntry,
unigramProperty);
- return mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry(
+ return mBuffers->getMutableLanguageModelDictContent()->setProbabilityEntry(
toBeUpdatedPtNodeParams->getTerminalId(), &probabilityEntry);
}
@@ -158,14 +158,14 @@ bool Ver4PatriciaTrieNodeWriter::updatePtNodeProbabilityAndGetNeedsToKeepPtNodeA
return false;
}
const ProbabilityEntry originalProbabilityEntry =
- mBuffers->getProbabilityDictContent()->getProbabilityEntry(
+ mBuffers->getLanguageModelDictContent()->getProbabilityEntry(
toBeUpdatedPtNodeParams->getTerminalId());
if (originalProbabilityEntry.hasHistoricalInfo()) {
const HistoricalInfo historicalInfo = ForgettingCurveUtils::createHistoricalInfoToSave(
originalProbabilityEntry.getHistoricalInfo(), mHeaderPolicy);
const ProbabilityEntry probabilityEntry =
originalProbabilityEntry.createEntryWithUpdatedHistoricalInfo(&historicalInfo);
- if (!mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry(
+ if (!mBuffers->getMutableLanguageModelDictContent()->setProbabilityEntry(
toBeUpdatedPtNodeParams->getTerminalId(), &probabilityEntry)) {
AKLOGE("Cannot write updated probability entry. terminalId: %d",
toBeUpdatedPtNodeParams->getTerminalId());
@@ -218,26 +218,23 @@ bool Ver4PatriciaTrieNodeWriter::writeNewTerminalPtNodeAndAdvancePosition(
ProbabilityEntry newProbabilityEntry;
const ProbabilityEntry probabilityEntryToWrite = createUpdatedEntryFrom(
&newProbabilityEntry, unigramProperty);
- return mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry(terminalId,
- &probabilityEntryToWrite);
+ return mBuffers->getMutableLanguageModelDictContent()->setProbabilityEntry(
+ terminalId, &probabilityEntryToWrite);
}
-bool Ver4PatriciaTrieNodeWriter::addNewBigramEntry(
- const PtNodeParams *const sourcePtNodeParams, const PtNodeParams *const targetPtNodeParam,
+bool Ver4PatriciaTrieNodeWriter::addNgramEntry(const WordIdArrayView prevWordIds, const int wordId,
const BigramProperty *const bigramProperty, bool *const outAddedNewBigram) {
- if (!mBigramPolicy->addNewEntry(sourcePtNodeParams->getTerminalId(),
- targetPtNodeParam->getTerminalId(), bigramProperty, outAddedNewBigram)) {
+ if (!mBigramPolicy->addNewEntry(prevWordIds[0], wordId, bigramProperty, outAddedNewBigram)) {
AKLOGE("Cannot add new bigram entry. terminalId: %d, targetTerminalId: %d",
- sourcePtNodeParams->getTerminalId(), targetPtNodeParam->getTerminalId());
+ prevWordIds[0], wordId);
return false;
}
return true;
}
-bool Ver4PatriciaTrieNodeWriter::removeBigramEntry(
- const PtNodeParams *const sourcePtNodeParams, const PtNodeParams *const targetPtNodeParam) {
- return mBigramPolicy->removeEntry(sourcePtNodeParams->getTerminalId(),
- targetPtNodeParam->getTerminalId());
+bool Ver4PatriciaTrieNodeWriter::removeNgramEntry(const WordIdArrayView prevWordIds,
+ const int wordId) {
+ return mBigramPolicy->removeEntry(prevWordIds[0], wordId);
}
bool Ver4PatriciaTrieNodeWriter::updateAllBigramEntriesAndDeleteUselessEntries(
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h
index 162dc9b1d..6703dba04 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h
@@ -75,12 +75,10 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
virtual bool writeNewTerminalPtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams,
const UnigramProperty *const unigramProperty, int *const ptNodeWritingPos);
- virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams,
- const PtNodeParams *const targetPtNodeParam, const BigramProperty *const bigramProperty,
- bool *const outAddedNewBigram);
+ virtual bool addNgramEntry(const WordIdArrayView prevWordIds, const int wordId,
+ const BigramProperty *const bigramProperty, bool *const outAddedNewEntry);
- virtual bool removeBigramEntry(const PtNodeParams *const sourcePtNodeParams,
- const PtNodeParams *const targetPtNodeParam);
+ virtual bool removeNgramEntry(const WordIdArrayView prevWordIds, const int wordId);
virtual bool updateAllBigramEntriesAndDeleteUselessEntries(
const PtNodeParams *const sourcePtNodeParams, int *const outBigramEntryCount);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
index 22f7e1182..723808399 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
@@ -132,8 +132,8 @@ int Ver4PatriciaTriePolicy::getProbabilityOfPtNode(const int *const prevWordsPtN
return NOT_A_PROBABILITY;
}
if (prevWordsPtNodePos) {
- BinaryDictionaryBigramsIterator bigramsIt =
- getBigramsIteratorOfPtNode(prevWordsPtNodePos[0]);
+ const int bigramsPosition = getBigramsPositionOfPtNode(prevWordsPtNodePos[0]);
+ BinaryDictionaryBigramsIterator bigramsIt(&mBigramPolicy, bigramsPosition);
while (bigramsIt.hasNext()) {
bigramsIt.next();
if (bigramsIt.getBigramPos() == ptNodePos
@@ -151,7 +151,8 @@ void Ver4PatriciaTriePolicy::iterateNgramEntries(const int *const prevWordsPtNod
if (!prevWordsPtNodePos) {
return;
}
- BinaryDictionaryBigramsIterator bigramsIt = getBigramsIteratorOfPtNode(prevWordsPtNodePos[0]);
+ const int bigramsPosition = getBigramsPositionOfPtNode(prevWordsPtNodePos[0]);
+ BinaryDictionaryBigramsIterator bigramsIt(&mBigramPolicy, bigramsPosition);
while (bigramsIt.hasNext()) {
bigramsIt.next();
listener->onVisitEntry(bigramsIt.getProbability(), bigramsIt.getBigramPos());
@@ -170,12 +171,6 @@ int Ver4PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) con
ptNodeParams.getTerminalId());
}
-BinaryDictionaryBigramsIterator Ver4PatriciaTriePolicy::getBigramsIteratorOfPtNode(
- const int ptNodePos) const {
- const int bigramsPosition = getBigramsPositionOfPtNode(ptNodePos);
- return BinaryDictionaryBigramsIterator(&mBigramPolicy, bigramsPosition);
-}
-
int Ver4PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const {
if (ptNodePos == NOT_A_DICT_POS) {
return NOT_A_DICT_POS;
@@ -297,6 +292,7 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI
int prevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
prevWordsInfo->getPrevWordsTerminalPtNodePos(this, prevWordsPtNodePos,
false /* tryLowerCaseSearch */);
+ const auto prevWordsPtNodePosView = PtNodePosArrayView::fromFixedSizeArray(prevWordsPtNodePos);
// TODO: Support N-gram.
if (prevWordsPtNodePos[0] == NOT_A_DICT_POS) {
if (prevWordsInfo->isNthPrevWordBeginningOfSentence(1 /* n */)) {
@@ -324,10 +320,10 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI
if (word1Pos == NOT_A_DICT_POS) {
return false;
}
- bool addedNewBigram = false;
- if (mUpdatingHelper.addBigramWords(prevWordsPtNodePos[0], word1Pos, bigramProperty,
- &addedNewBigram)) {
- if (addedNewBigram) {
+ bool addedNewEntry = false;
+ if (mUpdatingHelper.addNgramEntry(prevWordsPtNodePosView, word1Pos, bigramProperty,
+ &addedNewEntry)) {
+ if (addedNewEntry) {
mBigramCount++;
}
return true;
@@ -357,6 +353,7 @@ bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWor
int prevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
prevWordsInfo->getPrevWordsTerminalPtNodePos(this, prevWordsPtNodePos,
false /* tryLowerCaseSerch */);
+ const auto prevWordsPtNodePosView = PtNodePosArrayView::fromFixedSizeArray(prevWordsPtNodePos);
// TODO: Support N-gram.
if (prevWordsPtNodePos[0] == NOT_A_DICT_POS) {
return false;
@@ -366,7 +363,7 @@ bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWor
if (wordPos == NOT_A_DICT_POS) {
return false;
}
- if (mUpdatingHelper.removeBigramWords(prevWordsPtNodePos[0], wordPos)) {
+ if (mUpdatingHelper.removeNgramEntry(prevWordsPtNodePosView, wordPos)) {
mBigramCount--;
return true;
} else {
@@ -457,7 +454,7 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(const int *const code
std::vector<int> codePointVector(ptNodeParams.getCodePoints(),
ptNodeParams.getCodePoints() + ptNodeParams.getCodePointCount());
const ProbabilityEntry probabilityEntry =
- mBuffers->getProbabilityDictContent()->getProbabilityEntry(
+ mBuffers->getLanguageModelDictContent()->getProbabilityEntry(
ptNodeParams.getTerminalId());
const HistoricalInfo *const historicalInfo = probabilityEntry.getHistoricalInfo();
// Fetch bigram information.
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
index c5b6a80c0..faad4290d 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
@@ -46,7 +46,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
mBuffers->getTerminalPositionLookupTable(), mHeaderPolicy),
mShortcutPolicy(mBuffers->getMutableShortcutDictContent(),
mBuffers->getTerminalPositionLookupTable()),
- mNodeReader(mDictBuffer, mBuffers->getProbabilityDictContent(), mHeaderPolicy),
+ mNodeReader(mDictBuffer, mBuffers->getLanguageModelDictContent(), mHeaderPolicy),
mPtNodeArrayReader(mDictBuffer),
mNodeWriter(mDictBuffer, mBuffers.get(), mHeaderPolicy, &mNodeReader,
&mPtNodeArrayReader, &mBigramPolicy, &mShortcutPolicy),
@@ -79,8 +79,6 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
int getShortcutPositionOfPtNode(const int ptNodePos) const;
- BinaryDictionaryBigramsIterator getBigramsIteratorOfPtNode(const int ptNodePos) const;
-
const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const {
return mHeaderPolicy;
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp
index 0e658f8e3..4220312e0 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp
@@ -75,7 +75,7 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
const HeaderPolicy *const headerPolicy, Ver4DictBuffers *const buffersToWrite,
int *const outUnigramCount, int *const outBigramCount) {
Ver4PatriciaTrieNodeReader ptNodeReader(mBuffers->getTrieBuffer(),
- mBuffers->getProbabilityDictContent(), headerPolicy);
+ mBuffers->getLanguageModelDictContent(), headerPolicy);
Ver4PtNodeArrayReader ptNodeArrayReader(mBuffers->getTrieBuffer());
Ver4BigramListPolicy bigramPolicy(mBuffers->getMutableBigramDictContent(),
mBuffers->getTerminalPositionLookupTable(), headerPolicy);
@@ -138,7 +138,7 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
// Create policy instances for the GCed dictionary.
Ver4PatriciaTrieNodeReader newPtNodeReader(buffersToWrite->getTrieBuffer(),
- buffersToWrite->getProbabilityDictContent(), headerPolicy);
+ buffersToWrite->getLanguageModelDictContent(), headerPolicy);
Ver4PtNodeArrayReader newPtNodeArrayreader(buffersToWrite->getTrieBuffer());
Ver4BigramListPolicy newBigramPolicy(buffersToWrite->getMutableBigramDictContent(),
buffersToWrite->getTerminalPositionLookupTable(), headerPolicy);
@@ -154,8 +154,8 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
return false;
}
// Run GC for probability dict content.
- if (!buffersToWrite->getMutableProbabilityDictContent()->runGC(&terminalIdMap,
- mBuffers->getProbabilityDictContent())) {
+ if (!buffersToWrite->getMutableLanguageModelDictContent()->runGC(&terminalIdMap,
+ mBuffers->getLanguageModelDictContent(), nullptr /* outNgramCount */)) {
return false;
}
// Run GC for bigram dict content.
@@ -201,7 +201,7 @@ bool Ver4PatriciaTrieWritingHelper::truncateUnigrams(
continue;
}
const ProbabilityEntry probabilityEntry =
- mBuffers->getProbabilityDictContent()->getProbabilityEntry(i);
+ mBuffers->getLanguageModelDictContent()->getProbabilityEntry(i);
const int probability = probabilityEntry.hasHistoricalInfo() ?
ForgettingCurveUtils::decodeProbability(
probabilityEntry.getHistoricalInfo(), mBuffers->getHeaderPolicy()) :
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp
index 825b72c6a..833063c17 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp
@@ -25,7 +25,7 @@ const size_t BufferWithExtendableBuffer::EXTEND_ADDITIONAL_BUFFER_SIZE_STEP = 12
uint32_t BufferWithExtendableBuffer::readUint(const int size, const int pos) const {
const bool readingPosIsInAdditionalBuffer = isInAdditionalBuffer(pos);
- const int posInBuffer = readingPosIsInAdditionalBuffer ? pos - mOriginalBufferSize : pos;
+ const int posInBuffer = readingPosIsInAdditionalBuffer ? pos - mOriginalBuffer.size() : pos;
return ByteArrayUtils::readUint(getBuffer(readingPosIsInAdditionalBuffer), size, posInBuffer);
}
@@ -40,12 +40,12 @@ void BufferWithExtendableBuffer::readCodePointsAndAdvancePosition(const int maxC
int *const outCodePoints, int *outCodePointCount, int *const pos) const {
const bool readingPosIsInAdditionalBuffer = isInAdditionalBuffer(*pos);
if (readingPosIsInAdditionalBuffer) {
- *pos -= mOriginalBufferSize;
+ *pos -= mOriginalBuffer.size();
}
*outCodePointCount = ByteArrayUtils::readStringAndAdvancePosition(
getBuffer(readingPosIsInAdditionalBuffer), maxCodePointCount, outCodePoints, pos);
if (readingPosIsInAdditionalBuffer) {
- *pos += mOriginalBufferSize;
+ *pos += mOriginalBuffer.size();
}
}
@@ -69,13 +69,14 @@ bool BufferWithExtendableBuffer::writeUintAndAdvancePosition(const uint32_t data
return false;
}
const bool usesAdditionalBuffer = isInAdditionalBuffer(*pos);
- uint8_t *const buffer = usesAdditionalBuffer ? &mAdditionalBuffer[0] : mOriginalBuffer;
+ uint8_t *const buffer =
+ usesAdditionalBuffer ? mAdditionalBuffer.data() : mOriginalBuffer.data();
if (usesAdditionalBuffer) {
- *pos -= mOriginalBufferSize;
+ *pos -= mOriginalBuffer.size();
}
ByteArrayUtils::writeUintAndAdvancePosition(buffer, data, size, pos);
if (usesAdditionalBuffer) {
- *pos += mOriginalBufferSize;
+ *pos += mOriginalBuffer.size();
}
return true;
}
@@ -88,14 +89,15 @@ bool BufferWithExtendableBuffer::writeCodePointsAndAdvancePosition(const int *co
return false;
}
const bool usesAdditionalBuffer = isInAdditionalBuffer(*pos);
- uint8_t *const buffer = usesAdditionalBuffer ? &mAdditionalBuffer[0] : mOriginalBuffer;
+ uint8_t *const buffer =
+ usesAdditionalBuffer ? mAdditionalBuffer.data() : mOriginalBuffer.data();
if (usesAdditionalBuffer) {
- *pos -= mOriginalBufferSize;
+ *pos -= mOriginalBuffer.size();
}
ByteArrayUtils::writeCodePointsAndAdvancePosition(buffer, codePoints, codePointCount,
writesTerminator, pos);
if (usesAdditionalBuffer) {
- *pos += mOriginalBufferSize;
+ *pos += mOriginalBuffer.size();
}
return true;
}
@@ -119,7 +121,7 @@ bool BufferWithExtendableBuffer::checkAndPrepareWriting(const int pos, const int
const size_t totalRequiredSize = static_cast<size_t>(pos + size);
if (!isInAdditionalBuffer(pos)) {
// Here don't need to care about the additional buffer.
- if (static_cast<size_t>(mOriginalBufferSize) < totalRequiredSize) {
+ if (mOriginalBuffer.size() < totalRequiredSize) {
// Violate the boundary.
return false;
}
@@ -137,7 +139,7 @@ bool BufferWithExtendableBuffer::checkAndPrepareWriting(const int pos, const int
return false;
}
const size_t extendSize = totalRequiredSize -
- std::min(mAdditionalBuffer.size() + mOriginalBufferSize, totalRequiredSize);
+ std::min(mAdditionalBuffer.size() + mOriginalBuffer.size(), totalRequiredSize);
if (extendSize > 0 && !extendBuffer(extendSize)) {
// Failed to extend the buffer.
return false;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h
index 5e1362eee..fad83aa25 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h
@@ -23,6 +23,7 @@
#include "defines.h"
#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
+#include "utils/byte_array_view.h"
namespace latinime {
@@ -34,20 +35,18 @@ class BufferWithExtendableBuffer {
public:
static const size_t DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE;
- BufferWithExtendableBuffer(uint8_t *const originalBuffer, const int originalBufferSize,
+ BufferWithExtendableBuffer(const ReadWriteByteArrayView originalBuffer,
const int maxAdditionalBufferSize)
- : mOriginalBuffer(originalBuffer), mOriginalBufferSize(originalBufferSize),
- mAdditionalBuffer(0), mUsedAdditionalBufferSize(0),
+ : mOriginalBuffer(originalBuffer), mAdditionalBuffer(), mUsedAdditionalBufferSize(0),
mMaxAdditionalBufferSize(maxAdditionalBufferSize) {}
// Without original buffer.
BufferWithExtendableBuffer(const int maxAdditionalBufferSize)
- : mOriginalBuffer(0), mOriginalBufferSize(0),
- mAdditionalBuffer(0), mUsedAdditionalBufferSize(0),
+ : mOriginalBuffer(), mAdditionalBuffer(), mUsedAdditionalBufferSize(0),
mMaxAdditionalBufferSize(maxAdditionalBufferSize) {}
AK_FORCE_INLINE int getTailPosition() const {
- return mOriginalBufferSize + mUsedAdditionalBufferSize;
+ return mOriginalBuffer.size() + mUsedAdditionalBufferSize;
}
AK_FORCE_INLINE int getUsedAdditionalBufferSize() const {
@@ -58,16 +57,16 @@ class BufferWithExtendableBuffer {
* For reading.
*/
AK_FORCE_INLINE bool isInAdditionalBuffer(const int position) const {
- return position >= mOriginalBufferSize;
+ return position >= static_cast<int>(mOriginalBuffer.size());
}
// TODO: Resolve the issue that the address can be changed when the vector is resized.
// CAVEAT!: Be careful about array out of bound access with buffers
AK_FORCE_INLINE const uint8_t *getBuffer(const bool usesAdditionalBuffer) const {
if (usesAdditionalBuffer) {
- return &mAdditionalBuffer[0];
+ return mAdditionalBuffer.data();
} else {
- return mOriginalBuffer;
+ return mOriginalBuffer.data();
}
}
@@ -79,7 +78,7 @@ class BufferWithExtendableBuffer {
int *const outCodePoints, int *outCodePointCount, int *const pos) const;
AK_FORCE_INLINE int getOriginalBufferSize() const {
- return mOriginalBufferSize;
+ return mOriginalBuffer.size();
}
AK_FORCE_INLINE bool isNearSizeLimit() const {
@@ -110,8 +109,7 @@ class BufferWithExtendableBuffer {
static const int NEAR_BUFFER_LIMIT_THRESHOLD_PERCENTILE;
static const size_t EXTEND_ADDITIONAL_BUFFER_SIZE_STEP;
- uint8_t *const mOriginalBuffer;
- const int mOriginalBufferSize;
+ const ReadWriteByteArrayView mOriginalBuffer;
std::vector<uint8_t> mAdditionalBuffer;
int mUsedAdditionalBufferSize;
const size_t mMaxAdditionalBufferSize;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h b/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h
index 8460087ab..e25310373 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h
@@ -21,6 +21,7 @@
#include <memory>
#include "defines.h"
+#include "utils/byte_array_view.h"
namespace latinime {
@@ -39,12 +40,12 @@ class MmappedBuffer {
~MmappedBuffer();
- AK_FORCE_INLINE uint8_t *getBuffer() const {
- return mBuffer;
+ ReadWriteByteArrayView getReadWriteByteArrayView() const {
+ return mByteArrayView;
}
- AK_FORCE_INLINE int getBufferSize() const {
- return mBufferSize;
+ ReadOnlyByteArrayView getReadOnlyByteArrayView() const {
+ return mByteArrayView.getReadOnlyView();
}
AK_FORCE_INLINE bool isUpdatable() const {
@@ -55,18 +56,17 @@ class MmappedBuffer {
AK_FORCE_INLINE MmappedBuffer(uint8_t *const buffer, const int bufferSize,
void *const mmappedBuffer, const int alignedSize, const int mmapFd,
const bool isUpdatable)
- : mBuffer(buffer), mBufferSize(bufferSize), mMmappedBuffer(mmappedBuffer),
+ : mByteArrayView(buffer, bufferSize), mMmappedBuffer(mmappedBuffer),
mAlignedSize(alignedSize), mMmapFd(mmapFd), mIsUpdatable(isUpdatable) {}
// Empty file. We have to handle an empty file as a valid part of a dictionary.
AK_FORCE_INLINE MmappedBuffer(const bool isUpdatable)
- : mBuffer(nullptr), mBufferSize(0), mMmappedBuffer(nullptr), mAlignedSize(0),
+ : mByteArrayView(), mMmappedBuffer(nullptr), mAlignedSize(0),
mMmapFd(0), mIsUpdatable(isUpdatable) {}
DISALLOW_IMPLICIT_CONSTRUCTORS(MmappedBuffer);
- uint8_t *const mBuffer;
- const int mBufferSize;
+ const ReadWriteByteArrayView mByteArrayView;
void *const mMmappedBuffer;
const int mAlignedSize;
const int mMmapFd;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/trie_map.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/trie_map.cpp
index 2904b1e77..407b8efd0 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/trie_map.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/trie_map.cpp
@@ -43,9 +43,8 @@ TrieMap::TrieMap() : mBuffer(MAX_BUFFER_SIZE) {
writeEntry(EMPTY_BITMAP_ENTRY, ROOT_BITMAP_ENTRY_INDEX);
}
-TrieMap::TrieMap(uint8_t *const buffer, const int bufferSize)
- : mBuffer(buffer, bufferSize,
- BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE) {}
+TrieMap::TrieMap(const ReadWriteByteArrayView buffer)
+ : mBuffer(buffer, BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE) {}
void TrieMap::dump(const int from, const int to) const {
AKLOGI("BufSize: %d", mBuffer.getTailPosition());
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/trie_map.h b/native/jni/src/suggest/policyimpl/dictionary/utils/trie_map.h
index 8b33346e6..3e5c4010c 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/trie_map.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/trie_map.h
@@ -24,6 +24,7 @@
#include "defines.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+#include "utils/byte_array_view.h"
namespace latinime {
@@ -161,13 +162,17 @@ class TrieMap {
TrieMap();
// Construct TrieMap using existing data in the memory region written by save().
- TrieMap(uint8_t *const buffer, const int bufferSize);
+ TrieMap(const ReadWriteByteArrayView buffer);
void dump(const int from = 0, const int to = 0) const;
bool isNearSizeLimit() const {
return mBuffer.isNearSizeLimit();
}
+ int getRootBitmapEntryIndex() const {
+ return ROOT_BITMAP_ENTRY_INDEX;
+ }
+
// Returns bitmapEntryIndex. Create the next level map if it doesn't exist.
int getNextLevelBitmapEntryIndex(const int key) {
return getNextLevelBitmapEntryIndex(key, ROOT_BITMAP_ENTRY_INDEX);
diff --git a/native/jni/src/utils/byte_array_view.h b/native/jni/src/utils/byte_array_view.h
index d13999c16..2c97c6d58 100644
--- a/native/jni/src/utils/byte_array_view.h
+++ b/native/jni/src/utils/byte_array_view.h
@@ -71,6 +71,11 @@ class ReadWriteByteArrayView {
return ReadOnlyByteArrayView(mPtr, mSize);
}
+ ReadWriteByteArrayView subView(const size_t start, const size_t n) const {
+ ASSERT(start + n <= mSize);
+ return ReadWriteByteArrayView(mPtr + start, n);
+ }
+
private:
DISALLOW_ASSIGNMENT_OPERATOR(ReadWriteByteArrayView);
diff --git a/native/jni/src/utils/int_array_view.h b/native/jni/src/utils/int_array_view.h
index 3ff01f5d0..c1ddc9812 100644
--- a/native/jni/src/utils/int_array_view.h
+++ b/native/jni/src/utils/int_array_view.h
@@ -56,11 +56,25 @@ class IntArrayView {
explicit IntArrayView(const std::vector<int> &vector)
: mPtr(vector.data()), mSize(vector.size()) {}
+ template <int N>
+ AK_FORCE_INLINE static IntArrayView fromFixedSizeArray(const int (&array)[N]) {
+ return IntArrayView(array, N);
+ }
+
+ // Returns a view that points one int object. Does not take ownership of the given object.
+ AK_FORCE_INLINE static IntArrayView fromObject(const int *const object) {
+ return IntArrayView(object, 1);
+ }
+
AK_FORCE_INLINE int operator[](const size_t index) const {
ASSERT(index < mSize);
return mPtr[index];
}
+ AK_FORCE_INLINE bool empty() const {
+ return size() == 0;
+ }
+
AK_FORCE_INLINE size_t size() const {
return mSize;
}
@@ -69,6 +83,14 @@ class IntArrayView {
return mPtr;
}
+ AK_FORCE_INLINE const int *begin() const {
+ return mPtr;
+ }
+
+ AK_FORCE_INLINE const int *end() const {
+ return mPtr + mSize;
+ }
+
private:
DISALLOW_ASSIGNMENT_OPERATOR(IntArrayView);
@@ -76,5 +98,8 @@ class IntArrayView {
const size_t mSize;
};
+using WordIdArrayView = IntArrayView;
+using PtNodePosArrayView = IntArrayView;
+
} // namespace latinime
#endif // LATINIME_MEMORY_VIEW_H
diff --git a/native/jni/tests/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_test.cpp b/native/jni/tests/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_test.cpp
new file mode 100644
index 000000000..6eef2040b
--- /dev/null
+++ b/native/jni/tests/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_test.cpp
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h"
+
+#include <gtest/gtest.h>
+
+#include "utils/int_array_view.h"
+
+namespace latinime {
+namespace {
+
+TEST(LanguageModelDictContentTest, TestUnigramProbability) {
+ LanguageModelDictContent LanguageModelDictContent(false /* useHistoricalInfo */);
+
+ const int flag = 0xFF;
+ const int probability = 10;
+ const int wordId = 100;
+ const ProbabilityEntry probabilityEntry(flag, probability);
+ LanguageModelDictContent.setProbabilityEntry(wordId, &probabilityEntry);
+ const ProbabilityEntry entry =
+ LanguageModelDictContent.getProbabilityEntry(wordId);
+ EXPECT_EQ(flag, entry.getFlags());
+ EXPECT_EQ(probability, entry.getProbability());
+}
+
+TEST(LanguageModelDictContentTest, TestUnigramProbabilityWithHistoricalInfo) {
+ LanguageModelDictContent LanguageModelDictContent(true /* useHistoricalInfo */);
+
+ const int flag = 0xF0;
+ const int timestamp = 0x3FFFFFFF;
+ const int level = 3;
+ const int count = 10;
+ const int wordId = 100;
+ const HistoricalInfo historicalInfo(timestamp, level, count);
+ const ProbabilityEntry probabilityEntry(flag, NOT_A_PROBABILITY, &historicalInfo);
+ LanguageModelDictContent.setProbabilityEntry(wordId, &probabilityEntry);
+ const ProbabilityEntry entry = LanguageModelDictContent.getProbabilityEntry(wordId);
+ EXPECT_EQ(flag, entry.getFlags());
+ EXPECT_EQ(timestamp, entry.getHistoricalInfo()->getTimeStamp());
+ EXPECT_EQ(level, entry.getHistoricalInfo()->getLevel());
+ EXPECT_EQ(count, entry.getHistoricalInfo()->getCount());
+}
+
+} // namespace
+} // namespace latinime
diff --git a/native/jni/tests/suggest/policyimpl/dictionary/structure/v4/content/probability_entry_test.cpp b/native/jni/tests/suggest/policyimpl/dictionary/structure/v4/content/probability_entry_test.cpp
new file mode 100644
index 000000000..db94550ef
--- /dev/null
+++ b/native/jni/tests/suggest/policyimpl/dictionary/structure/v4/content/probability_entry_test.cpp
@@ -0,0 +1,60 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h"
+
+#include <gtest/gtest.h>
+
+#include "defines.h"
+
+namespace latinime {
+namespace {
+
+TEST(ProbabilityEntryTest, TestEncodeDecode) {
+ const int flag = 0xFF;
+ const int probability = 10;
+
+ const ProbabilityEntry entry(flag, probability);
+ const uint64_t encodedEntry = entry.encode(false /* hasHistoricalInfo */);
+ const ProbabilityEntry decodedEntry =
+ ProbabilityEntry::decode(encodedEntry, false /* hasHistoricalInfo */);
+ EXPECT_EQ(0xFF0Aull, encodedEntry);
+ EXPECT_EQ(flag, decodedEntry.getFlags());
+ EXPECT_EQ(probability, decodedEntry.getProbability());
+}
+
+TEST(ProbabilityEntryTest, TestEncodeDecodeWithHistoricalInfo) {
+ const int flag = 0xF0;
+ const int timestamp = 0x3FFFFFFF;
+ const int level = 3;
+ const int count = 10;
+
+ const HistoricalInfo historicalInfo(timestamp, level, count);
+ const ProbabilityEntry entry(flag, NOT_A_PROBABILITY, &historicalInfo);
+
+ const uint64_t encodedEntry = entry.encode(true /* hasHistoricalInfo */);
+ EXPECT_EQ(0xF03FFFFFFF030Aull, encodedEntry);
+ const ProbabilityEntry decodedEntry =
+ ProbabilityEntry::decode(encodedEntry, true /* hasHistoricalInfo */);
+
+ EXPECT_EQ(flag, decodedEntry.getFlags());
+ EXPECT_EQ(timestamp, decodedEntry.getHistoricalInfo()->getTimeStamp());
+ EXPECT_EQ(level, decodedEntry.getHistoricalInfo()->getLevel());
+ EXPECT_EQ(count, decodedEntry.getHistoricalInfo()->getCount());
+}
+
+} // namespace
+} // namespace latinime
diff --git a/native/jni/tests/utils/int_array_view_test.cpp b/native/jni/tests/utils/int_array_view_test.cpp
index 9aa8cdc8c..bd843ab02 100644
--- a/native/jni/tests/utils/int_array_view_test.cpp
+++ b/native/jni/tests/utils/int_array_view_test.cpp
@@ -23,16 +23,39 @@
namespace latinime {
namespace {
-TEST(MemoryViewTest, TestAccess) {
- static const int DATA_SIZE = 10000;
-
- std::vector<int> intVector = {3, 2, 1, 0, -1, -2};
+TEST(IntArrayViewTest, TestAccess) {
+ const std::vector<int> intVector = {3, 2, 1, 0, -1, -2};
IntArrayView intArrayView(intVector);
EXPECT_EQ(intVector.size(), intArrayView.size());
- for (int i = 0; i < DATA_SIZE; ++i) {
+ for (int i = 0; i < static_cast<int>(intVector.size()); ++i) {
EXPECT_EQ(intVector[i], intArrayView[i]);
}
}
+TEST(IntArrayViewTest, TestIteration) {
+ const std::vector<int> intVector = {3, 2, 1, 0, -1, -2};
+ IntArrayView intArrayView(intVector);
+ size_t expectedIndex = 0;
+ for (const int element : intArrayView) {
+ EXPECT_EQ(intVector[expectedIndex], element);
+ ++expectedIndex;
+ }
+ EXPECT_EQ(expectedIndex, intArrayView.size());
+}
+
+TEST(IntArrayViewTest, TestConstructFromArray) {
+ const size_t ARRAY_SIZE = 100;
+ int intArray[ARRAY_SIZE];
+ const auto intArrayView = IntArrayView::fromFixedSizeArray(intArray);
+ EXPECT_EQ(ARRAY_SIZE, intArrayView.size());
+}
+
+TEST(IntArrayViewTest, TestConstructFromObject) {
+ const int object = 10;
+ const auto intArrayView = IntArrayView::fromObject(&object);
+ EXPECT_EQ(1, intArrayView.size());
+ EXPECT_EQ(object, intArrayView[0]);
+}
+
} // namespace
} // namespace latinime