diff options
Diffstat (limited to 'native/jni/src')
41 files changed, 497 insertions, 481 deletions
diff --git a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h index 7e3bf3ff6..e91f07682 100644 --- a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h +++ b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h @@ -20,14 +20,12 @@ #include <memory> #include "defines.h" -#include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h" #include "suggest/core/dictionary/property/word_property.h" namespace latinime { class DicNode; class DicNodeVector; -class DictionaryBigramsStructurePolicy; class DictionaryHeaderStructurePolicy; class DictionaryShortcutsStructurePolicy; class NgramListener; @@ -67,8 +65,6 @@ class DictionaryStructureWithBufferPolicy { virtual int getShortcutPositionOfPtNode(const int nodePos) const = 0; - virtual BinaryDictionaryBigramsIterator getBigramsIteratorOfPtNode(const int nodePos) const = 0; - virtual const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const = 0; virtual const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const = 0; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/single_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/single_dict_content.h index 6433650b0..49f446814 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/single_dict_content.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/single_dict_content.h @@ -30,6 +30,7 @@ #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" #include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h" #include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h" +#include "utils/byte_array_view.h" namespace latinime { namespace backward { @@ -40,8 +41,9 @@ class SingleDictContent : public DictContent { SingleDictContent(const char *const dictPath, const char *const contentFileName, const bool isUpdatable) : mMmappedBuffer(MmappedBuffer::openBuffer(dictPath, contentFileName, isUpdatable)), - mExpandableContentBuffer(mMmappedBuffer ? mMmappedBuffer->getBuffer() : nullptr, - mMmappedBuffer ? mMmappedBuffer->getBufferSize() : 0, + mExpandableContentBuffer( + mMmappedBuffer ? mMmappedBuffer->getReadWriteByteArrayView() : + ReadWriteByteArrayView(), BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE), mIsValid(mMmappedBuffer) {} diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/sparse_table_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/sparse_table_dict_content.h index c7233edd3..3c626df11 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/sparse_table_dict_content.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/content/sparse_table_dict_content.h @@ -31,6 +31,7 @@ #include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h" #include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h" #include "suggest/policyimpl/dictionary/utils/sparse_table.h" +#include "utils/byte_array_view.h" namespace latinime { namespace backward { @@ -50,15 +51,16 @@ class SparseTableDictContent : public DictContent { mContentBuffer( MmappedBuffer::openBuffer(dictPath, contentFileName, isUpdatable)), mExpandableLookupTableBuffer( - mLookupTableBuffer ? mLookupTableBuffer->getBuffer() : nullptr, - mLookupTableBuffer ? mLookupTableBuffer->getBufferSize() : 0, + mLookupTableBuffer ? mLookupTableBuffer->getReadWriteByteArrayView() : + ReadWriteByteArrayView(), BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE), mExpandableAddressTableBuffer( - mAddressTableBuffer ? mAddressTableBuffer->getBuffer() : nullptr, - mAddressTableBuffer ? mAddressTableBuffer->getBufferSize() : 0, + mAddressTableBuffer ? mAddressTableBuffer->getReadWriteByteArrayView() : + ReadWriteByteArrayView(), BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE), - mExpandableContentBuffer(mContentBuffer ? mContentBuffer->getBuffer() : nullptr, - mContentBuffer ? mContentBuffer->getBufferSize() : 0, + mExpandableContentBuffer( + mContentBuffer ? mContentBuffer->getReadWriteByteArrayView() : + ReadWriteByteArrayView(), BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE), mAddressLookupTable(&mExpandableLookupTableBuffer, &mExpandableAddressTableBuffer, sparseTableBlockSize, sparseTableDataSize), diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_buffers.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_buffers.cpp index 93f192976..3dfbd1c94 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_buffers.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_buffers.cpp @@ -30,6 +30,7 @@ #include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h" #include "suggest/policyimpl/dictionary/utils/file_utils.h" +#include "utils/byte_array_view.h" namespace latinime { namespace backward { @@ -130,12 +131,12 @@ Ver4DictBuffers::Ver4DictBuffers(const char *const dictPath, : mHeaderBuffer(std::move(headerBuffer)), mDictBuffer(MmappedBuffer::openBuffer(dictPath, Ver4DictConstants::TRIE_FILE_EXTENSION, isUpdatable)), - mHeaderPolicy(mHeaderBuffer->getBuffer(), formatVersion), - mExpandableHeaderBuffer(mHeaderBuffer ? mHeaderBuffer->getBuffer() : nullptr, - mHeaderPolicy.getSize(), + mHeaderPolicy(mHeaderBuffer->getReadOnlyByteArrayView().data(), formatVersion), + mExpandableHeaderBuffer(mHeaderBuffer->getReadWriteByteArrayView(), BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE), - mExpandableTrieBuffer(mDictBuffer ? mDictBuffer->getBuffer() : nullptr, - mDictBuffer ? mDictBuffer->getBufferSize() : 0, + mExpandableTrieBuffer( + mDictBuffer ? mDictBuffer->getReadWriteByteArrayView() : + ReadWriteByteArrayView(), BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE), mTerminalPositionLookupTable(dictPath, isUpdatable), mProbabilityDictContent(dictPath, mHeaderPolicy.hasHistoricalInfoOfWords(), isUpdatable), diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.cpp index 4220a9561..278f2b199 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.cpp @@ -231,30 +231,31 @@ bool Ver4PatriciaTrieNodeWriter::writeNewTerminalPtNodeAndAdvancePosition( &probabilityEntryToWrite); } -bool Ver4PatriciaTrieNodeWriter::addNewBigramEntry( - const PtNodeParams *const sourcePtNodeParams, const PtNodeParams *const targetPtNodeParam, - const BigramProperty *const bigramProperty, bool *const outAddedNewBigram) { - if (!mBigramPolicy->addNewEntry(sourcePtNodeParams->getTerminalId(), - targetPtNodeParam->getTerminalId(), bigramProperty, outAddedNewBigram)) { +bool Ver4PatriciaTrieNodeWriter::addNgramEntry(const WordIdArrayView prevWordIds, const int wordId, + const BigramProperty *const bigramProperty, bool *const outAddedNewEntry) { + if (!mBigramPolicy->addNewEntry(prevWordIds[0], wordId, bigramProperty, outAddedNewEntry)) { AKLOGE("Cannot add new bigram entry. terminalId: %d, targetTerminalId: %d", sourcePtNodeParams->getTerminalId(), targetPtNodeParam->getTerminalId()); return false; } - if (!sourcePtNodeParams->hasBigrams()) { + const int ptNodePos = + mBuffers->getTerminalPositionLookupTable()->getTerminalPtNodePosition(prevWordIds[0]); + const PtNodeParams sourcePtNodeParams = + mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos); + if (!sourcePtNodeParams.hasBigrams()) { // Update has bigrams flag. - return updatePtNodeFlags(sourcePtNodeParams->getHeadPos(), - sourcePtNodeParams->isBlacklisted(), sourcePtNodeParams->isNotAWord(), - sourcePtNodeParams->isTerminal(), sourcePtNodeParams->hasShortcutTargets(), + return updatePtNodeFlags(sourcePtNodeParams.getHeadPos(), + sourcePtNodeParams.isBlacklisted(), sourcePtNodeParams.isNotAWord(), + sourcePtNodeParams.isTerminal(), sourcePtNodeParams.hasShortcutTargets(), true /* hasBigrams */, - sourcePtNodeParams->getCodePointCount() > 1 /* hasMultipleChars */); + sourcePtNodeParams.getCodePointCount() > 1 /* hasMultipleChars */); } return true; } -bool Ver4PatriciaTrieNodeWriter::removeBigramEntry( - const PtNodeParams *const sourcePtNodeParams, const PtNodeParams *const targetPtNodeParam) { - return mBigramPolicy->removeEntry(sourcePtNodeParams->getTerminalId(), - targetPtNodeParam->getTerminalId()); +bool Ver4PatriciaTrieNodeWriter::removeNgramEntry(const WordIdArrayView prevWordIds, + const int wordId) { + return mBigramPolicy->removeEntry(prevWordIds[0], wordId); } bool Ver4PatriciaTrieNodeWriter::updateAllBigramEntriesAndDeleteUselessEntries( diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.h index 08226ea26..d49d9a666 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.h @@ -29,6 +29,7 @@ #include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h" #include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h" #include "suggest/policyimpl/dictionary/structure/backward/v402/content/probability_entry.h" +#include "utils/int_array_view.h" namespace latinime { namespace backward { @@ -61,8 +62,8 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter { const PtNodeArrayReader *const ptNodeArrayReader, Ver4BigramListPolicy *const bigramPolicy, Ver4ShortcutListPolicy *const shortcutPolicy) : mTrieBuffer(trieBuffer), mBuffers(buffers), mHeaderPolicy(headerPolicy), - mReadingHelper(ptNodeReader, ptNodeArrayReader), mBigramPolicy(bigramPolicy), - mShortcutPolicy(shortcutPolicy) {} + mPtNodeReader(ptNodeReader), mReadingHelper(ptNodeReader, ptNodeArrayReader), + mBigramPolicy(bigramPolicy), mShortcutPolicy(shortcutPolicy) {} virtual ~Ver4PatriciaTrieNodeWriter() {} @@ -92,12 +93,10 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter { virtual bool writeNewTerminalPtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams, const UnigramProperty *const unigramProperty, int *const ptNodeWritingPos); - virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams, - const PtNodeParams *const targetPtNodeParam, const BigramProperty *const bigramProperty, - bool *const outAddedNewBigram); + virtual bool addNgramEntry(const WordIdArrayView prevWordIds, const int wordId, + const BigramProperty *const bigramProperty, bool *const outAddedNewEntry); - virtual bool removeBigramEntry(const PtNodeParams *const sourcePtNodeParams, - const PtNodeParams *const targetPtNodeParam); + virtual bool removeNgramEntry(const WordIdArrayView prevWordIds, const int wordId); virtual bool updateAllBigramEntriesAndDeleteUselessEntries( const PtNodeParams *const sourcePtNodeParams, int *const outBigramEntryCount); @@ -135,6 +134,7 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter { BufferWithExtendableBuffer *const mTrieBuffer; Ver4DictBuffers *const mBuffers; const HeaderPolicy *const mHeaderPolicy; + const PtNodeReader *const mPtNodeReader; DynamicPtReadingHelper mReadingHelper; Ver4BigramListPolicy *const mBigramPolicy; Ver4ShortcutListPolicy *const mShortcutPolicy; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp index 994c42505..1296b8acd 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp @@ -142,8 +142,8 @@ int Ver4PatriciaTriePolicy::getProbabilityOfPtNode(const int *const prevWordsPtN return NOT_A_PROBABILITY; } if (prevWordsPtNodePos) { - BinaryDictionaryBigramsIterator bigramsIt = - getBigramsIteratorOfPtNode(prevWordsPtNodePos[0]); + const int bigramsPosition = getBigramsPositionOfPtNode(prevWordsPtNodePos[0]); + BinaryDictionaryBigramsIterator bigramsIt(&mBigramPolicy, bigramsPosition); while (bigramsIt.hasNext()) { bigramsIt.next(); if (bigramsIt.getBigramPos() == ptNodePos @@ -161,7 +161,8 @@ void Ver4PatriciaTriePolicy::iterateNgramEntries(const int *const prevWordsPtNod if (!prevWordsPtNodePos) { return; } - BinaryDictionaryBigramsIterator bigramsIt = getBigramsIteratorOfPtNode(prevWordsPtNodePos[0]); + const int bigramsPosition = getBigramsPositionOfPtNode(prevWordsPtNodePos[0]); + BinaryDictionaryBigramsIterator bigramsIt(&mBigramPolicy, bigramsPosition); while (bigramsIt.hasNext()) { bigramsIt.next(); listener->onVisitEntry(bigramsIt.getProbability(), bigramsIt.getBigramPos()); @@ -180,12 +181,6 @@ int Ver4PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) con ptNodeParams.getTerminalId()); } -BinaryDictionaryBigramsIterator Ver4PatriciaTriePolicy::getBigramsIteratorOfPtNode( - const int ptNodePos) const { - const int bigramsPosition = getBigramsPositionOfPtNode(ptNodePos); - return BinaryDictionaryBigramsIterator(&mBigramPolicy, bigramsPosition); -} - int Ver4PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const { if (ptNodePos == NOT_A_DICT_POS) { return NOT_A_DICT_POS; @@ -314,8 +309,8 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI return false; } bool addedNewBigram = false; - if (mUpdatingHelper.addBigramWords(prevWordsPtNodePos[0], word1Pos, bigramProperty, - &addedNewBigram)) { + if (mUpdatingHelper.addNgramEntry(PtNodePosArrayView::fromObject(prevWordsPtNodePos), + word1Pos, bigramProperty, &addedNewBigram)) { if (addedNewBigram) { mBigramCount++; } @@ -355,7 +350,8 @@ bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWor if (wordPos == NOT_A_DICT_POS) { return false; } - if (mUpdatingHelper.removeBigramWords(prevWordsPtNodePos[0], wordPos)) { + if (mUpdatingHelper.removeNgramEntry( + PtNodePosArrayView::fromObject(prevWordsPtNodePos), wordPos)) { mBigramCount--; return true; } else { diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h index ff69de7c0..9e989b268 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h @@ -97,8 +97,6 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { int getShortcutPositionOfPtNode(const int ptNodePos) const; - BinaryDictionaryBigramsIterator getBigramsIteratorOfPtNode(const int ptNodePos) const; - const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const { return mHeaderPolicy; } diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp index e4b5fa267..e4ea3da16 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp @@ -31,6 +31,7 @@ #include "suggest/policyimpl/dictionary/utils/file_utils.h" #include "suggest/policyimpl/dictionary/utils/format_utils.h" #include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h" +#include "utils/byte_array_view.h" namespace latinime { @@ -110,7 +111,8 @@ template<class DictConstants, class DictBuffers, class DictBuffersPtr, class Str return nullptr; } const FormatUtils::FORMAT_VERSION formatVersion = FormatUtils::detectFormatVersion( - mmappedBuffer->getBuffer(), mmappedBuffer->getBufferSize()); + mmappedBuffer->getReadOnlyByteArrayView().data(), + mmappedBuffer->getReadOnlyByteArrayView().size()); switch (formatVersion) { case FormatUtils::VERSION_2: AKLOGE("Given path is a directory but the format is version 2. path: %s", path); @@ -172,8 +174,8 @@ template<class DictConstants, class DictBuffers, class DictBuffersPtr, class Str if (!mmappedBuffer) { return nullptr; } - switch (FormatUtils::detectFormatVersion(mmappedBuffer->getBuffer(), - mmappedBuffer->getBufferSize())) { + switch (FormatUtils::detectFormatVersion(mmappedBuffer->getReadOnlyByteArrayView().data(), + mmappedBuffer->getReadOnlyByteArrayView().size())) { case FormatUtils::VERSION_2: return DictionaryStructureWithBufferPolicy::StructurePolicyPtr( new PatriciaTriePolicy(std::move(mmappedBuffer))); diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h index 2e05bf397..b7262581a 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h @@ -26,7 +26,6 @@ namespace latinime { -class DictionaryBigramsStructurePolicy; class DictionaryShortcutsStructurePolicy; class PtNodeArrayReader; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp index f31c914d2..3c62e2e56 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp @@ -84,23 +84,39 @@ bool DynamicPtUpdatingHelper::addUnigramWord( unigramProperty, &pos); } -bool DynamicPtUpdatingHelper::addBigramWords(const int word0Pos, const int word1Pos, - const BigramProperty *const bigramProperty, bool *const outAddedNewBigram) { - const PtNodeParams sourcePtNodeParams( - mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(word0Pos)); - const PtNodeParams targetPtNodeParams( - mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(word1Pos)); - return mPtNodeWriter->addNewBigramEntry(&sourcePtNodeParams, &targetPtNodeParams, - bigramProperty, outAddedNewBigram); +bool DynamicPtUpdatingHelper::addNgramEntry(const PtNodePosArrayView prevWordsPtNodePos, + const int wordPos, const BigramProperty *const bigramProperty, + bool *const outAddedNewEntry) { + if (prevWordsPtNodePos.empty()) { + return false; + } + ASSERT(prevWordsPtNodePos.size() <= MAX_PREV_WORD_COUNT_FOR_N_GRAM); + int prevWordTerminalIds[MAX_PREV_WORD_COUNT_FOR_N_GRAM]; + for (size_t i = 0; i < prevWordsPtNodePos.size(); ++i) { + prevWordTerminalIds[i] = mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos( + prevWordsPtNodePos[i]).getTerminalId(); + } + const WordIdArrayView prevWordIds(prevWordTerminalIds, prevWordsPtNodePos.size()); + const int wordId = + mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(wordPos).getTerminalId(); + return mPtNodeWriter->addNgramEntry(prevWordIds, wordId, bigramProperty, outAddedNewEntry); } -// Remove a bigram relation from word0Pos to word1Pos. -bool DynamicPtUpdatingHelper::removeBigramWords(const int word0Pos, const int word1Pos) { - const PtNodeParams sourcePtNodeParams( - mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(word0Pos)); - const PtNodeParams targetPtNodeParams( - mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(word1Pos)); - return mPtNodeWriter->removeBigramEntry(&sourcePtNodeParams, &targetPtNodeParams); +bool DynamicPtUpdatingHelper::removeNgramEntry(const PtNodePosArrayView prevWordsPtNodePos, + const int wordPos) { + if (prevWordsPtNodePos.empty()) { + return false; + } + ASSERT(prevWordsPtNodePos.size() <= MAX_PREV_WORD_COUNT_FOR_N_GRAM); + int prevWordTerminalIds[MAX_PREV_WORD_COUNT_FOR_N_GRAM]; + for (size_t i = 0; i < prevWordsPtNodePos.size(); ++i) { + prevWordTerminalIds[i] = mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos( + prevWordsPtNodePos[i]).getTerminalId(); + } + const WordIdArrayView prevWordIds(prevWordTerminalIds, prevWordsPtNodePos.size()); + const int wordId = + mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(wordPos).getTerminalId(); + return mPtNodeWriter->removeNgramEntry(prevWordIds, wordId); } bool DynamicPtUpdatingHelper::addShortcutTarget(const int wordPos, diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h index f10d15a9b..97c05c1ea 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h @@ -19,6 +19,7 @@ #include "defines.h" #include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h" +#include "utils/int_array_view.h" namespace latinime { @@ -42,12 +43,12 @@ class DynamicPtUpdatingHelper { const int *const wordCodePoints, const int codePointCount, const UnigramProperty *const unigramProperty, bool *const outAddedNewUnigram); - // Add a bigram relation from word0Pos to word1Pos. - bool addBigramWords(const int word0Pos, const int word1Pos, - const BigramProperty *const bigramProperty, bool *const outAddedNewBigram); + // Add an n-gram entry. + bool addNgramEntry(const PtNodePosArrayView prevWordsPtNodePos, const int wordPos, + const BigramProperty *const bigramProperty, bool *const outAddedNewEntry); - // Remove a bigram relation from word0Pos to word1Pos. - bool removeBigramWords(const int word0Pos, const int word1Pos); + // Remove an n-gram entry. + bool removeNgramEntry(const PtNodePosArrayView prevWordsPtNodePos, const int wordPos); // Add a shortcut target. bool addShortcutTarget(const int wordPos, const int *const targetCodePoints, diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h index a8029f73f..955d779ac 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h @@ -21,6 +21,7 @@ #include "defines.h" #include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h" +#include "utils/int_array_view.h" namespace latinime { @@ -70,12 +71,10 @@ class PtNodeWriter { virtual bool writeNewTerminalPtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams, const UnigramProperty *const unigramProperty, int *const ptNodeWritingPos) = 0; - virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams, - const PtNodeParams *const targetPtNodeParam, const BigramProperty *const bigramProperty, - bool *const outAddedNewBigram) = 0; + virtual bool addNgramEntry(const WordIdArrayView prevWordIds, const int wordId, + const BigramProperty *const bigramProperty, bool *const outAddedNewEntry) = 0; - virtual bool removeBigramEntry(const PtNodeParams *const sourcePtNodeParams, - const PtNodeParams *const targetPtNodeParam) = 0; + virtual bool removeNgramEntry(const WordIdArrayView prevWordIds, const int wordId) = 0; virtual bool updateAllBigramEntriesAndDeleteUselessEntries( const PtNodeParams *const sourcePtNodeParams, int *const outBigramEntryCount) = 0; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp index 53415aeb6..ea32eb2a9 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp @@ -311,8 +311,8 @@ int PatriciaTriePolicy::getProbabilityOfPtNode(const int *const prevWordsPtNodeP return NOT_A_PROBABILITY; } if (prevWordsPtNodePos) { - BinaryDictionaryBigramsIterator bigramsIt = - getBigramsIteratorOfPtNode(prevWordsPtNodePos[0]); + const int bigramsPosition = getBigramsPositionOfPtNode(prevWordsPtNodePos[0]); + BinaryDictionaryBigramsIterator bigramsIt(&mBigramListPolicy, bigramsPosition); while (bigramsIt.hasNext()) { bigramsIt.next(); if (bigramsIt.getBigramPos() == ptNodePos @@ -330,7 +330,8 @@ void PatriciaTriePolicy::iterateNgramEntries(const int *const prevWordsPtNodePos if (!prevWordsPtNodePos) { return; } - BinaryDictionaryBigramsIterator bigramsIt = getBigramsIteratorOfPtNode(prevWordsPtNodePos[0]); + const int bigramsPosition = getBigramsPositionOfPtNode(prevWordsPtNodePos[0]); + BinaryDictionaryBigramsIterator bigramsIt(&mBigramListPolicy, bigramsPosition); while (bigramsIt.hasNext()) { bigramsIt.next(); listener->onVisitEntry(bigramsIt.getProbability(), bigramsIt.getBigramPos()); @@ -344,12 +345,6 @@ int PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const { return mPtNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos).getShortcutPos(); } -BinaryDictionaryBigramsIterator PatriciaTriePolicy::getBigramsIteratorOfPtNode( - const int ptNodePos) const { - const int bigramsPosition = getBigramsPositionOfPtNode(ptNodePos); - return BinaryDictionaryBigramsIterator(&mBigramListPolicy, bigramsPosition); -} - int PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const { if (ptNodePos == NOT_A_DICT_POS) { return NOT_A_DICT_POS; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h index 07cb72b23..70351d147 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h @@ -29,6 +29,7 @@ #include "suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.h" #include "suggest/policyimpl/dictionary/utils/format_utils.h" #include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h" +#include "utils/byte_array_view.h" namespace latinime { @@ -39,9 +40,12 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { public: PatriciaTriePolicy(MmappedBuffer::MmappedBufferPtr mmappedBuffer) : mMmappedBuffer(std::move(mmappedBuffer)), - mHeaderPolicy(mMmappedBuffer->getBuffer(), FormatUtils::VERSION_2), - mDictRoot(mMmappedBuffer->getBuffer() + mHeaderPolicy.getSize()), - mDictBufferSize(mMmappedBuffer->getBufferSize() - mHeaderPolicy.getSize()), + mHeaderPolicy(mMmappedBuffer->getReadOnlyByteArrayView().data(), + FormatUtils::VERSION_2), + mDictRoot(mMmappedBuffer->getReadOnlyByteArrayView().data() + + mHeaderPolicy.getSize()), + mDictBufferSize(mMmappedBuffer->getReadOnlyByteArrayView().size() + - mHeaderPolicy.getSize()), mBigramListPolicy(mDictRoot, mDictBufferSize), mShortcutListPolicy(mDictRoot), mPtNodeReader(mDictRoot, mDictBufferSize, &mBigramListPolicy, &mShortcutListPolicy), mPtNodeArrayReader(mDictRoot, mDictBufferSize), @@ -70,8 +74,6 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { int getShortcutPositionOfPtNode(const int ptNodePos) const; - BinaryDictionaryBigramsIterator getBigramsIteratorOfPtNode(const int ptNodePos) const; - const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const { return &mHeaderPolicy; } diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/dict_content.h deleted file mode 100644 index c264aeac4..000000000 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/dict_content.h +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright (C) 2013, The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef LATINIME_DICT_CONTENT_H -#define LATINIME_DICT_CONTENT_H - -#include "defines.h" - -namespace latinime { - -class DictContent { - public: - virtual ~DictContent() {} - - protected: - DictContent() {} - - private: - DISALLOW_COPY_AND_ASSIGN(DictContent); -}; -} // namespace latinime -#endif /* LATINIME_DICT_CONTENT_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp new file mode 100644 index 000000000..5dc91ba10 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp @@ -0,0 +1,95 @@ +/* + * Copyright (C) 2014, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h" + +namespace latinime { + +bool LanguageModelDictContent::save(FILE *const file) const { + return mTrieMap.save(file); +} + +bool LanguageModelDictContent::runGC( + const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap, + const LanguageModelDictContent *const originalContent, + int *const outNgramCount) { + return runGCInner(terminalIdMap, originalContent->mTrieMap.getEntriesInRootLevel(), + 0 /* nextLevelBitmapEntryIndex */, outNgramCount); +} + +ProbabilityEntry LanguageModelDictContent::getNgramProbabilityEntry( + const WordIdArrayView prevWordIds, const int wordId) const { + const int bitmapEntryIndex = getBitmapEntryIndex(prevWordIds); + if (bitmapEntryIndex == TrieMap::INVALID_INDEX) { + return ProbabilityEntry(); + } + const TrieMap::Result result = mTrieMap.get(wordId, bitmapEntryIndex); + if (!result.mIsValid) { + // Not found. + return ProbabilityEntry(); + } + return ProbabilityEntry::decode(result.mValue, mHasHistoricalInfo); +} + +bool LanguageModelDictContent::setNgramProbabilityEntry(const WordIdArrayView prevWordIds, + const int terminalId, const ProbabilityEntry *const probabilityEntry) { + const int bitmapEntryIndex = getBitmapEntryIndex(prevWordIds); + if (bitmapEntryIndex == TrieMap::INVALID_INDEX) { + return false; + } + return mTrieMap.put(terminalId, probabilityEntry->encode(mHasHistoricalInfo), bitmapEntryIndex); +} + +bool LanguageModelDictContent::runGCInner( + const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap, + const TrieMap::TrieMapRange trieMapRange, + const int nextLevelBitmapEntryIndex, int *const outNgramCount) { + for (auto &entry : trieMapRange) { + const auto it = terminalIdMap->find(entry.key()); + if (it == terminalIdMap->end() || it->second == Ver4DictConstants::NOT_A_TERMINAL_ID) { + // The word has been removed. + continue; + } + if (!mTrieMap.put(it->second, entry.value(), nextLevelBitmapEntryIndex)) { + return false; + } + if (outNgramCount) { + *outNgramCount += 1; + } + if (entry.hasNextLevelMap()) { + if (!runGCInner(terminalIdMap, entry.getEntriesInNextLevel(), + mTrieMap.getNextLevelBitmapEntryIndex(it->second, nextLevelBitmapEntryIndex), + outNgramCount)) { + return false; + } + } + } + return true; +} + +int LanguageModelDictContent::getBitmapEntryIndex(const WordIdArrayView prevWordIds) const { + int bitmapEntryIndex = mTrieMap.getRootBitmapEntryIndex(); + for (const int wordId : prevWordIds) { + const TrieMap::Result result = mTrieMap.get(wordId, bitmapEntryIndex); + if (!result.mIsValid) { + return TrieMap::INVALID_INDEX; + } + bitmapEntryIndex = result.mNextLevelBitmapEntryIndex; + } + return bitmapEntryIndex; +} + +} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h new file mode 100644 index 000000000..18f2e0170 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h @@ -0,0 +1,83 @@ +/* + * Copyright (C) 2014, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_LANGUAGE_MODEL_DICT_CONTENT_H +#define LATINIME_LANGUAGE_MODEL_DICT_CONTENT_H + +#include <cstdio> + +#include "defines.h" +#include "suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h" +#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h" +#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h" +#include "suggest/policyimpl/dictionary/utils/trie_map.h" +#include "utils/byte_array_view.h" +#include "utils/int_array_view.h" + +namespace latinime { + +/** + * Class representing language model. + * + * This class provides methods to get and store unigram/n-gram probability information and flags. + */ +class LanguageModelDictContent { + public: + LanguageModelDictContent(const ReadWriteByteArrayView trieMapBuffer, + const bool hasHistoricalInfo) + : mTrieMap(trieMapBuffer), mHasHistoricalInfo(hasHistoricalInfo) {} + + explicit LanguageModelDictContent(const bool hasHistoricalInfo) + : mTrieMap(), mHasHistoricalInfo(hasHistoricalInfo) {} + + bool isNearSizeLimit() const { + return mTrieMap.isNearSizeLimit(); + } + + bool save(FILE *const file) const; + + bool runGC(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap, + const LanguageModelDictContent *const originalContent, + int *const outNgramCount); + + ProbabilityEntry getProbabilityEntry(const int wordId) const { + return getNgramProbabilityEntry(WordIdArrayView(), wordId); + } + + bool setProbabilityEntry(const int wordId, const ProbabilityEntry *const probabilityEntry) { + return setNgramProbabilityEntry(WordIdArrayView(), wordId, probabilityEntry); + } + + ProbabilityEntry getNgramProbabilityEntry(const WordIdArrayView prevWordIds, + const int wordId) const; + + bool setNgramProbabilityEntry(const WordIdArrayView prevWordIds, const int wordId, + const ProbabilityEntry *const probabilityEntry); + + private: + DISALLOW_COPY_AND_ASSIGN(LanguageModelDictContent); + + TrieMap mTrieMap; + const bool mHasHistoricalInfo; + + bool runGCInner(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap, + const TrieMap::TrieMapRange trieMapRange, const int nextLevelBitmapEntryIndex, + int *const outNgramCount); + + int getBitmapEntryIndex(const WordIdArrayView prevWordIds) const; +}; +} // namespace latinime +#endif /* LATINIME_LANGUAGE_MODEL_DICT_CONTENT_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.cpp deleted file mode 100644 index 2425b3b2f..000000000 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.cpp +++ /dev/null @@ -1,159 +0,0 @@ -/* - * Copyright (C) 2013 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h" - -#include "suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h" -#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h" -#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h" -#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" - -namespace latinime { - -const ProbabilityEntry ProbabilityDictContent::getProbabilityEntry(const int terminalId) const { - if (terminalId < 0 || terminalId >= mSize) { - // This method can be called with invalid terminal id during GC. - return ProbabilityEntry(0 /* flags */, NOT_A_PROBABILITY); - } - const BufferWithExtendableBuffer *const buffer = getBuffer(); - int entryPos = getEntryPos(terminalId); - const int flags = buffer->readUintAndAdvancePosition( - Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE, &entryPos); - const int probability = buffer->readUintAndAdvancePosition( - Ver4DictConstants::PROBABILITY_SIZE, &entryPos); - if (mHasHistoricalInfo) { - const int timestamp = buffer->readUintAndAdvancePosition( - Ver4DictConstants::TIME_STAMP_FIELD_SIZE, &entryPos); - const int level = buffer->readUintAndAdvancePosition( - Ver4DictConstants::WORD_LEVEL_FIELD_SIZE, &entryPos); - const int count = buffer->readUintAndAdvancePosition( - Ver4DictConstants::WORD_COUNT_FIELD_SIZE, &entryPos); - const HistoricalInfo historicalInfo(timestamp, level, count); - return ProbabilityEntry(flags, probability, &historicalInfo); - } else { - return ProbabilityEntry(flags, probability); - } -} - -bool ProbabilityDictContent::setProbabilityEntry(const int terminalId, - const ProbabilityEntry *const probabilityEntry) { - if (terminalId < 0) { - return false; - } - const int entryPos = getEntryPos(terminalId); - if (terminalId >= mSize) { - ProbabilityEntry dummyEntry; - // Write new entry. - int writingPos = getBuffer()->getTailPosition(); - while (writingPos <= entryPos) { - // Fulfilling with dummy entries until writingPos. - if (!writeEntry(&dummyEntry, writingPos)) { - AKLOGE("Cannot write dummy entry. pos: %d, mSize: %d", writingPos, mSize); - return false; - } - writingPos += getEntrySize(); - mSize++; - } - } - return writeEntry(probabilityEntry, entryPos); -} - -bool ProbabilityDictContent::flushToFile(FILE *const file) const { - if (getEntryPos(mSize) < getBuffer()->getTailPosition()) { - ProbabilityDictContent probabilityDictContentToWrite(mHasHistoricalInfo); - for (int i = 0; i < mSize; ++i) { - const ProbabilityEntry probabilityEntry = getProbabilityEntry(i); - if (!probabilityDictContentToWrite.setProbabilityEntry(i, &probabilityEntry)) { - AKLOGE("Cannot set probability entry in flushToFile. terminalId: %d", i); - return false; - } - } - return probabilityDictContentToWrite.flush(file); - } else { - return flush(file); - } -} - -bool ProbabilityDictContent::runGC( - const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap, - const ProbabilityDictContent *const originalProbabilityDictContent) { - mSize = 0; - for (TerminalPositionLookupTable::TerminalIdMap::const_iterator it = terminalIdMap->begin(); - it != terminalIdMap->end(); ++it) { - const ProbabilityEntry probabilityEntry = - originalProbabilityDictContent->getProbabilityEntry(it->first); - if (!setProbabilityEntry(it->second, &probabilityEntry)) { - AKLOGE("Cannot set probability entry in runGC. terminalId: %d", it->second); - return false; - } - mSize++; - } - return true; -} - -int ProbabilityDictContent::getEntrySize() const { - if (mHasHistoricalInfo) { - return Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE - + Ver4DictConstants::PROBABILITY_SIZE - + Ver4DictConstants::TIME_STAMP_FIELD_SIZE - + Ver4DictConstants::WORD_LEVEL_FIELD_SIZE - + Ver4DictConstants::WORD_COUNT_FIELD_SIZE; - } else { - return Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE - + Ver4DictConstants::PROBABILITY_SIZE; - } -} - -int ProbabilityDictContent::getEntryPos(const int terminalId) const { - return terminalId * getEntrySize(); -} - -bool ProbabilityDictContent::writeEntry(const ProbabilityEntry *const probabilityEntry, - const int entryPos) { - BufferWithExtendableBuffer *const bufferToWrite = getWritableBuffer(); - int writingPos = entryPos; - if (!bufferToWrite->writeUintAndAdvancePosition(probabilityEntry->getFlags(), - Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE, &writingPos)) { - AKLOGE("Cannot write flags in probability dict content. pos: %d", writingPos); - return false; - } - if (!bufferToWrite->writeUintAndAdvancePosition(probabilityEntry->getProbability(), - Ver4DictConstants::PROBABILITY_SIZE, &writingPos)) { - AKLOGE("Cannot write probability in probability dict content. pos: %d", writingPos); - return false; - } - if (mHasHistoricalInfo) { - const HistoricalInfo *const historicalInfo = probabilityEntry->getHistoricalInfo(); - if (!bufferToWrite->writeUintAndAdvancePosition(historicalInfo->getTimeStamp(), - Ver4DictConstants::TIME_STAMP_FIELD_SIZE, &writingPos)) { - AKLOGE("Cannot write timestamp in probability dict content. pos: %d", writingPos); - return false; - } - if (!bufferToWrite->writeUintAndAdvancePosition(historicalInfo->getLevel(), - Ver4DictConstants::WORD_LEVEL_FIELD_SIZE, &writingPos)) { - AKLOGE("Cannot write level in probability dict content. pos: %d", writingPos); - return false; - } - if (!bufferToWrite->writeUintAndAdvancePosition(historicalInfo->getCount(), - Ver4DictConstants::WORD_COUNT_FIELD_SIZE, &writingPos)) { - AKLOGE("Cannot write count in probability dict content. pos: %d", writingPos); - return false; - } - } - return true; -} - -} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h deleted file mode 100644 index 80e992c1c..000000000 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Copyright (C) 2013, The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef LATINIME_PROBABILITY_DICT_CONTENT_H -#define LATINIME_PROBABILITY_DICT_CONTENT_H - -#include <cstdint> -#include <cstdio> - -#include "defines.h" -#include "suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h" -#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h" -#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h" -#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" - -namespace latinime { - -class ProbabilityEntry; - -class ProbabilityDictContent : public SingleDictContent { - public: - ProbabilityDictContent(uint8_t *const buffer, const int bufferSize, - const bool hasHistoricalInfo) - : SingleDictContent(buffer, bufferSize), - mHasHistoricalInfo(hasHistoricalInfo), - mSize(getBuffer()->getTailPosition() / getEntrySize()) {} - - ProbabilityDictContent(const bool hasHistoricalInfo) - : mHasHistoricalInfo(hasHistoricalInfo), mSize(0) {} - - const ProbabilityEntry getProbabilityEntry(const int terminalId) const; - - bool setProbabilityEntry(const int terminalId, const ProbabilityEntry *const probabilityEntry); - - bool flushToFile(FILE *const file) const; - - bool runGC(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap, - const ProbabilityDictContent *const originalProbabilityDictContent); - - private: - DISALLOW_COPY_AND_ASSIGN(ProbabilityDictContent); - - int getEntrySize() const; - - int getEntryPos(const int terminalId) const; - - bool writeEntry(const ProbabilityEntry *const probabilityEntry, const int entryPos); - - bool mHasHistoricalInfo; - int mSize; -}; -} // namespace latinime -#endif /* LATINIME_PROBABILITY_DICT_CONTENT_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h index 36ba82be1..feff6b57f 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h @@ -17,6 +17,9 @@ #ifndef LATINIME_PROBABILITY_ENTRY_H #define LATINIME_PROBABILITY_ENTRY_H +#include <climits> +#include <cstdint> + #include "defines.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h" #include "suggest/policyimpl/dictionary/utils/historical_info.h" @@ -67,6 +70,50 @@ class ProbabilityEntry { return &mHistoricalInfo; } + uint64_t encode(const bool hasHistoricalInfo) const { + uint64_t encodedEntry = static_cast<uint64_t>(mFlags); + if (hasHistoricalInfo) { + encodedEntry = (encodedEntry << (Ver4DictConstants::TIME_STAMP_FIELD_SIZE * CHAR_BIT)) + ^ static_cast<uint64_t>(mHistoricalInfo.getTimeStamp()); + encodedEntry = (encodedEntry << (Ver4DictConstants::WORD_LEVEL_FIELD_SIZE * CHAR_BIT)) + ^ static_cast<uint64_t>(mHistoricalInfo.getLevel()); + encodedEntry = (encodedEntry << (Ver4DictConstants::WORD_COUNT_FIELD_SIZE * CHAR_BIT)) + ^ static_cast<uint64_t>(mHistoricalInfo.getCount()); + } else { + encodedEntry = (encodedEntry << (Ver4DictConstants::PROBABILITY_SIZE * CHAR_BIT)) + ^ static_cast<uint64_t>(mProbability); + } + return encodedEntry; + } + + static ProbabilityEntry decode(const uint64_t encodedEntry, const bool hasHistoricalInfo) { + if (hasHistoricalInfo) { + const int flags = readFromEncodedEntry(encodedEntry, + Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE, + Ver4DictConstants::TIME_STAMP_FIELD_SIZE + + Ver4DictConstants::WORD_LEVEL_FIELD_SIZE + + Ver4DictConstants::WORD_COUNT_FIELD_SIZE); + const int timestamp = readFromEncodedEntry(encodedEntry, + Ver4DictConstants::TIME_STAMP_FIELD_SIZE, + Ver4DictConstants::WORD_LEVEL_FIELD_SIZE + + Ver4DictConstants::WORD_COUNT_FIELD_SIZE); + const int level = readFromEncodedEntry(encodedEntry, + Ver4DictConstants::WORD_LEVEL_FIELD_SIZE, + Ver4DictConstants::WORD_COUNT_FIELD_SIZE); + const int count = readFromEncodedEntry(encodedEntry, + Ver4DictConstants::WORD_COUNT_FIELD_SIZE, 0 /* pos */); + const HistoricalInfo historicalInfo(timestamp, level, count); + return ProbabilityEntry(flags, NOT_A_PROBABILITY, &historicalInfo); + } else { + const int flags = readFromEncodedEntry(encodedEntry, + Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE, + Ver4DictConstants::PROBABILITY_SIZE); + const int probability = readFromEncodedEntry(encodedEntry, + Ver4DictConstants::PROBABILITY_SIZE, 0 /* pos */); + return ProbabilityEntry(flags, probability); + } + } + private: // Copy constructor is public to use this class as a type of return value. DISALLOW_ASSIGNMENT_OPERATOR(ProbabilityEntry); @@ -74,6 +121,11 @@ class ProbabilityEntry { const int mFlags; const int mProbability; const HistoricalInfo mHistoricalInfo; + + static int readFromEncodedEntry(const uint64_t encodedEntry, const int size, const int pos) { + return static_cast<int>( + (encodedEntry >> (pos * CHAR_BIT)) & ((1ull << (size * CHAR_BIT)) - 1)); + } }; } // namespace latinime #endif /* LATINIME_PROBABILITY_ENTRY_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h index 69a11425f..921774181 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h @@ -21,17 +21,17 @@ #include <cstdio> #include "defines.h" -#include "suggest/policyimpl/dictionary/structure/v4/content/dict_content.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" #include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h" +#include "utils/byte_array_view.h" namespace latinime { -class SingleDictContent : public DictContent { +class SingleDictContent { public: SingleDictContent(uint8_t *const buffer, const int bufferSize) - : mExpandableContentBuffer(buffer, bufferSize, + : mExpandableContentBuffer(ReadWriteByteArrayView(buffer, bufferSize), BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE) {} SingleDictContent() diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h index cdf870bd2..c98dd11fd 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h @@ -21,26 +21,29 @@ #include <cstdio> #include "defines.h" -#include "suggest/policyimpl/dictionary/structure/v4/content/dict_content.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" #include "suggest/policyimpl/dictionary/utils/sparse_table.h" +#include "utils/byte_array_view.h" namespace latinime { // TODO: Support multiple contents. -class SparseTableDictContent : public DictContent { +class SparseTableDictContent { public: AK_FORCE_INLINE SparseTableDictContent(uint8_t *const *buffers, const int *bufferSizes, const int sparseTableBlockSize, const int sparseTableDataSize) - : mExpandableLookupTableBuffer(buffers[LOOKUP_TABLE_BUFFER_INDEX], - bufferSizes[LOOKUP_TABLE_BUFFER_INDEX], + : mExpandableLookupTableBuffer( + ReadWriteByteArrayView(buffers[LOOKUP_TABLE_BUFFER_INDEX], + bufferSizes[LOOKUP_TABLE_BUFFER_INDEX]), BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE), - mExpandableAddressTableBuffer(buffers[ADDRESS_TABLE_BUFFER_INDEX], - bufferSizes[ADDRESS_TABLE_BUFFER_INDEX], + mExpandableAddressTableBuffer( + ReadWriteByteArrayView(buffers[ADDRESS_TABLE_BUFFER_INDEX], + bufferSizes[ADDRESS_TABLE_BUFFER_INDEX]), BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE), - mExpandableContentBuffer(buffers[CONTENT_BUFFER_INDEX], - bufferSizes[CONTENT_BUFFER_INDEX], + mExpandableContentBuffer( + ReadWriteByteArrayView(buffers[CONTENT_BUFFER_INDEX], + bufferSizes[CONTENT_BUFFER_INDEX]), BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE), mAddressLookupTable(&mExpandableLookupTableBuffer, &mExpandableAddressTableBuffer, sparseTableBlockSize, sparseTableDataSize) {} diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp index ac9540309..3c8008dc4 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp @@ -26,6 +26,7 @@ #include "suggest/policyimpl/dictionary/utils/byte_array_utils.h" #include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h" #include "suggest/policyimpl/dictionary/utils/file_utils.h" +#include "utils/byte_array_view.h" namespace latinime { @@ -46,14 +47,16 @@ namespace latinime { } std::vector<uint8_t *> buffers; std::vector<int> bufferSizes; - uint8_t *const buffer = bodyBuffer->getBuffer(); + const ReadWriteByteArrayView buffer = bodyBuffer->getReadWriteByteArrayView(); int position = 0; - while (position < bodyBuffer->getBufferSize()) { - const int bufferSize = ByteArrayUtils::readUint32AndAdvancePosition(buffer, &position); - buffers.push_back(buffer + position); - bufferSizes.push_back(bufferSize); + while (position < static_cast<int>(buffer.size())) { + const int bufferSize = ByteArrayUtils::readUint32AndAdvancePosition( + buffer.data(), &position); + const ReadWriteByteArrayView subBuffer = buffer.subView(position, bufferSize); + buffers.push_back(subBuffer.data()); + bufferSizes.push_back(subBuffer.size()); position += bufferSize; - if (bufferSize < 0 || position < 0 || position > bodyBuffer->getBufferSize()) { + if (bufferSize < 0 || position < 0 || position > static_cast<int>(buffer.size())) { AKLOGE("The dict body file is corrupted."); return Ver4DictBuffersPtr(nullptr); } @@ -154,9 +157,9 @@ bool Ver4DictBuffers::flushDictBuffers(FILE *const file) const { AKLOGE("Terminal position lookup table cannot be written."); return false; } - // Write probability dict content. - if (!mProbabilityDictContent.flushToFile(file)) { - AKLOGE("Probability dict content cannot be written."); + // Write language model content. + if (!mLanguageModelDictContent.save(file)) { + AKLOGE("Language model dict content cannot be written."); return false; } // Write bigram dict content. @@ -177,20 +180,21 @@ Ver4DictBuffers::Ver4DictBuffers(MmappedBuffer::MmappedBufferPtr &&headerBuffer, const FormatUtils::FORMAT_VERSION formatVersion, const std::vector<uint8_t *> &contentBuffers, const std::vector<int> &contentBufferSizes) : mHeaderBuffer(std::move(headerBuffer)), mDictBuffer(std::move(bodyBuffer)), - mHeaderPolicy(mHeaderBuffer->getBuffer(), formatVersion), - mExpandableHeaderBuffer(mHeaderBuffer ? mHeaderBuffer->getBuffer() : nullptr, - mHeaderPolicy.getSize(), + mHeaderPolicy(mHeaderBuffer->getReadOnlyByteArrayView().data(), formatVersion), + mExpandableHeaderBuffer(mHeaderBuffer->getReadWriteByteArrayView(), BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE), - mExpandableTrieBuffer(contentBuffers[Ver4DictConstants::TRIE_BUFFER_INDEX], - contentBufferSizes[Ver4DictConstants::TRIE_BUFFER_INDEX], + mExpandableTrieBuffer( + ReadWriteByteArrayView(contentBuffers[Ver4DictConstants::TRIE_BUFFER_INDEX], + contentBufferSizes[Ver4DictConstants::TRIE_BUFFER_INDEX]), BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE), mTerminalPositionLookupTable( contentBuffers[Ver4DictConstants::TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX], contentBufferSizes[ Ver4DictConstants::TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX]), - mProbabilityDictContent( - contentBuffers[Ver4DictConstants::PROBABILITY_BUFFER_INDEX], - contentBufferSizes[Ver4DictConstants::PROBABILITY_BUFFER_INDEX], + mLanguageModelDictContent( + ReadWriteByteArrayView( + contentBuffers[Ver4DictConstants::LANGUAGE_MODEL_BUFFER_INDEX], + contentBufferSizes[Ver4DictConstants::LANGUAGE_MODEL_BUFFER_INDEX]), mHeaderPolicy.hasHistoricalInfoOfWords()), mBigramDictContent(&contentBuffers[Ver4DictConstants::BIGRAM_BUFFERS_INDEX], &contentBufferSizes[Ver4DictConstants::BIGRAM_BUFFERS_INDEX], @@ -203,7 +207,7 @@ Ver4DictBuffers::Ver4DictBuffers(const HeaderPolicy *const headerPolicy, const i : mHeaderBuffer(nullptr), mDictBuffer(nullptr), mHeaderPolicy(headerPolicy), mExpandableHeaderBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE), mExpandableTrieBuffer(maxTrieSize), mTerminalPositionLookupTable(), - mProbabilityDictContent(headerPolicy->hasHistoricalInfoOfWords()), + mLanguageModelDictContent(headerPolicy->hasHistoricalInfoOfWords()), mBigramDictContent(headerPolicy->hasHistoricalInfoOfWords()), mShortcutDictContent(), mIsUpdatable(true) {} diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h index 433411cb8..68027dcb8 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h @@ -23,7 +23,7 @@ #include "defines.h" #include "suggest/policyimpl/dictionary/header/header_policy.h" #include "suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h" -#include "suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h" +#include "suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h" #include "suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h" #include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h" @@ -52,7 +52,7 @@ class Ver4DictBuffers { AK_FORCE_INLINE bool isNearSizeLimit() const { return mExpandableTrieBuffer.isNearSizeLimit() || mTerminalPositionLookupTable.isNearSizeLimit() - || mProbabilityDictContent.isNearSizeLimit() + || mLanguageModelDictContent.isNearSizeLimit() || mBigramDictContent.isNearSizeLimit() || mShortcutDictContent.isNearSizeLimit(); } @@ -81,12 +81,12 @@ class Ver4DictBuffers { return &mTerminalPositionLookupTable; } - AK_FORCE_INLINE ProbabilityDictContent *getMutableProbabilityDictContent() { - return &mProbabilityDictContent; + AK_FORCE_INLINE LanguageModelDictContent *getMutableLanguageModelDictContent() { + return &mLanguageModelDictContent; } - AK_FORCE_INLINE const ProbabilityDictContent *getProbabilityDictContent() const { - return &mProbabilityDictContent; + AK_FORCE_INLINE const LanguageModelDictContent *getLanguageModelDictContent() const { + return &mLanguageModelDictContent; } AK_FORCE_INLINE BigramDictContent *getMutableBigramDictContent() { @@ -135,7 +135,7 @@ class Ver4DictBuffers { BufferWithExtendableBuffer mExpandableHeaderBuffer; BufferWithExtendableBuffer mExpandableTrieBuffer; TerminalPositionLookupTable mTerminalPositionLookupTable; - ProbabilityDictContent mProbabilityDictContent; + LanguageModelDictContent mLanguageModelDictContent; BigramDictContent mBigramDictContent; ShortcutDictContent mShortcutDictContent; const int mIsUpdatable; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp index d45dfe377..93d4e562d 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp @@ -27,18 +27,20 @@ const int Ver4DictConstants::MAX_DICTIONARY_SIZE = 8 * 1024 * 1024; // limited to 1MB to prevent from inefficient traversing. const int Ver4DictConstants::MAX_DICT_EXTENDED_REGION_SIZE = 1 * 1024 * 1024; -// NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT for Trie, TerminalAddressLookupTable and Probability. +// NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT for Trie and TerminalAddressLookupTable. +// NUM_OF_BUFFERS_FOR_LANGUAGE_MODEL_DICT_CONTENT for language model. // NUM_OF_BUFFERS_FOR_SPARSE_TABLE_DICT_CONTENT for bigram and shortcut. const size_t Ver4DictConstants::NUM_OF_CONTENT_BUFFERS_IN_BODY_FILE = - NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT * 3 + NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT * 2 + + NUM_OF_BUFFERS_FOR_LANGUAGE_MODEL_DICT_CONTENT + NUM_OF_BUFFERS_FOR_SPARSE_TABLE_DICT_CONTENT * 2; const int Ver4DictConstants::TRIE_BUFFER_INDEX = 0; const int Ver4DictConstants::TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX = TRIE_BUFFER_INDEX + NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT; -const int Ver4DictConstants::PROBABILITY_BUFFER_INDEX = +const int Ver4DictConstants::LANGUAGE_MODEL_BUFFER_INDEX = TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX + NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT; const int Ver4DictConstants::BIGRAM_BUFFERS_INDEX = - PROBABILITY_BUFFER_INDEX + NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT; + LANGUAGE_MODEL_BUFFER_INDEX + NUM_OF_BUFFERS_FOR_LANGUAGE_MODEL_DICT_CONTENT; const int Ver4DictConstants::SHORTCUT_BUFFERS_INDEX = BIGRAM_BUFFERS_INDEX + NUM_OF_BUFFERS_FOR_SPARSE_TABLE_DICT_CONTENT; @@ -73,5 +75,6 @@ const int Ver4DictConstants::SHORTCUT_HAS_NEXT_MASK = 0x80; const size_t Ver4DictConstants::NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT = 1; const size_t Ver4DictConstants::NUM_OF_BUFFERS_FOR_SPARSE_TABLE_DICT_CONTENT = 3; +const size_t Ver4DictConstants::NUM_OF_BUFFERS_FOR_LANGUAGE_MODEL_DICT_CONTENT = 1; } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h index e8f6739ba..6950ca70f 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h @@ -35,7 +35,7 @@ class Ver4DictConstants { static const size_t NUM_OF_CONTENT_BUFFERS_IN_BODY_FILE; static const int TRIE_BUFFER_INDEX; static const int TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX; - static const int PROBABILITY_BUFFER_INDEX; + static const int LANGUAGE_MODEL_BUFFER_INDEX; static const int BIGRAM_BUFFERS_INDEX; static const int SHORTCUT_BUFFERS_INDEX; @@ -71,6 +71,7 @@ class Ver4DictConstants { static const size_t NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT; static const size_t NUM_OF_BUFFERS_FOR_SPARSE_TABLE_DICT_CONTENT; + static const size_t NUM_OF_BUFFERS_FOR_LANGUAGE_MODEL_DICT_CONTENT; }; } // namespace latinime #endif /* LATINIME_VER4_DICT_CONSTANTS_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp index 0a435e91c..731092efd 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp @@ -18,7 +18,7 @@ #include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h" #include "suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h" -#include "suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h" +#include "suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h" #include "suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" @@ -61,8 +61,9 @@ const PtNodeParams Ver4PatriciaTrieNodeReader::fetchPtNodeInfoFromBufferAndProce terminalIdFieldPos += mBuffer->getOriginalBufferSize(); } terminalId = Ver4PatriciaTrieReadingUtils::getTerminalIdAndAdvancePosition(dictBuf, &pos); + // TODO: Quit reading probability here. const ProbabilityEntry probabilityEntry = - mProbabilityDictContent->getProbabilityEntry(terminalId); + mLanguageModelDictContent->getProbabilityEntry(terminalId); if (probabilityEntry.hasHistoricalInfo()) { probability = ForgettingCurveUtils::decodeProbability( probabilityEntry.getHistoricalInfo(), mHeaderPolicy); diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h index 22ed4a6c0..a91ad5728 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h @@ -25,18 +25,18 @@ namespace latinime { class BufferWithExtendableBuffer; class HeaderPolicy; -class ProbabilityDictContent; +class LanguageModelDictContent; /* * This class is used for helping to read nodes of ver4 patricia trie. This class handles moved - * node and reads node attributes including probability form probabilityBuffer. + * node and reads node attributes including probability form language model. */ class Ver4PatriciaTrieNodeReader : public PtNodeReader { public: Ver4PatriciaTrieNodeReader(const BufferWithExtendableBuffer *const buffer, - const ProbabilityDictContent *const probabilityDictContent, + const LanguageModelDictContent *const languageModelDictContent, const HeaderPolicy *const headerPolicy) - : mBuffer(buffer), mProbabilityDictContent(probabilityDictContent), + : mBuffer(buffer), mLanguageModelDictContent(languageModelDictContent), mHeaderPolicy(headerPolicy) {} ~Ver4PatriciaTrieNodeReader() {} @@ -50,7 +50,7 @@ class Ver4PatriciaTrieNodeReader : public PtNodeReader { DISALLOW_COPY_AND_ASSIGN(Ver4PatriciaTrieNodeReader); const BufferWithExtendableBuffer *const mBuffer; - const ProbabilityDictContent *const mProbabilityDictContent; + const LanguageModelDictContent *const mLanguageModelDictContent; const HeaderPolicy *const mHeaderPolicy; const PtNodeParams fetchPtNodeInfoFromBufferAndProcessMovedPtNode(const int ptNodePos, diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp index 3d8da9173..857222f5d 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp @@ -143,11 +143,11 @@ bool Ver4PatriciaTrieNodeWriter::updatePtNodeUnigramProperty( return false; } const ProbabilityEntry originalProbabilityEntry = - mBuffers->getProbabilityDictContent()->getProbabilityEntry( + mBuffers->getLanguageModelDictContent()->getProbabilityEntry( toBeUpdatedPtNodeParams->getTerminalId()); const ProbabilityEntry probabilityEntry = createUpdatedEntryFrom(&originalProbabilityEntry, unigramProperty); - return mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry( + return mBuffers->getMutableLanguageModelDictContent()->setProbabilityEntry( toBeUpdatedPtNodeParams->getTerminalId(), &probabilityEntry); } @@ -158,14 +158,14 @@ bool Ver4PatriciaTrieNodeWriter::updatePtNodeProbabilityAndGetNeedsToKeepPtNodeA return false; } const ProbabilityEntry originalProbabilityEntry = - mBuffers->getProbabilityDictContent()->getProbabilityEntry( + mBuffers->getLanguageModelDictContent()->getProbabilityEntry( toBeUpdatedPtNodeParams->getTerminalId()); if (originalProbabilityEntry.hasHistoricalInfo()) { const HistoricalInfo historicalInfo = ForgettingCurveUtils::createHistoricalInfoToSave( originalProbabilityEntry.getHistoricalInfo(), mHeaderPolicy); const ProbabilityEntry probabilityEntry = originalProbabilityEntry.createEntryWithUpdatedHistoricalInfo(&historicalInfo); - if (!mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry( + if (!mBuffers->getMutableLanguageModelDictContent()->setProbabilityEntry( toBeUpdatedPtNodeParams->getTerminalId(), &probabilityEntry)) { AKLOGE("Cannot write updated probability entry. terminalId: %d", toBeUpdatedPtNodeParams->getTerminalId()); @@ -218,26 +218,23 @@ bool Ver4PatriciaTrieNodeWriter::writeNewTerminalPtNodeAndAdvancePosition( ProbabilityEntry newProbabilityEntry; const ProbabilityEntry probabilityEntryToWrite = createUpdatedEntryFrom( &newProbabilityEntry, unigramProperty); - return mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry(terminalId, - &probabilityEntryToWrite); + return mBuffers->getMutableLanguageModelDictContent()->setProbabilityEntry( + terminalId, &probabilityEntryToWrite); } -bool Ver4PatriciaTrieNodeWriter::addNewBigramEntry( - const PtNodeParams *const sourcePtNodeParams, const PtNodeParams *const targetPtNodeParam, +bool Ver4PatriciaTrieNodeWriter::addNgramEntry(const WordIdArrayView prevWordIds, const int wordId, const BigramProperty *const bigramProperty, bool *const outAddedNewBigram) { - if (!mBigramPolicy->addNewEntry(sourcePtNodeParams->getTerminalId(), - targetPtNodeParam->getTerminalId(), bigramProperty, outAddedNewBigram)) { + if (!mBigramPolicy->addNewEntry(prevWordIds[0], wordId, bigramProperty, outAddedNewBigram)) { AKLOGE("Cannot add new bigram entry. terminalId: %d, targetTerminalId: %d", - sourcePtNodeParams->getTerminalId(), targetPtNodeParam->getTerminalId()); + prevWordIds[0], wordId); return false; } return true; } -bool Ver4PatriciaTrieNodeWriter::removeBigramEntry( - const PtNodeParams *const sourcePtNodeParams, const PtNodeParams *const targetPtNodeParam) { - return mBigramPolicy->removeEntry(sourcePtNodeParams->getTerminalId(), - targetPtNodeParam->getTerminalId()); +bool Ver4PatriciaTrieNodeWriter::removeNgramEntry(const WordIdArrayView prevWordIds, + const int wordId) { + return mBigramPolicy->removeEntry(prevWordIds[0], wordId); } bool Ver4PatriciaTrieNodeWriter::updateAllBigramEntriesAndDeleteUselessEntries( diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h index 162dc9b1d..6703dba04 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h @@ -75,12 +75,10 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter { virtual bool writeNewTerminalPtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams, const UnigramProperty *const unigramProperty, int *const ptNodeWritingPos); - virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams, - const PtNodeParams *const targetPtNodeParam, const BigramProperty *const bigramProperty, - bool *const outAddedNewBigram); + virtual bool addNgramEntry(const WordIdArrayView prevWordIds, const int wordId, + const BigramProperty *const bigramProperty, bool *const outAddedNewEntry); - virtual bool removeBigramEntry(const PtNodeParams *const sourcePtNodeParams, - const PtNodeParams *const targetPtNodeParam); + virtual bool removeNgramEntry(const WordIdArrayView prevWordIds, const int wordId); virtual bool updateAllBigramEntriesAndDeleteUselessEntries( const PtNodeParams *const sourcePtNodeParams, int *const outBigramEntryCount); diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp index 22f7e1182..723808399 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp @@ -132,8 +132,8 @@ int Ver4PatriciaTriePolicy::getProbabilityOfPtNode(const int *const prevWordsPtN return NOT_A_PROBABILITY; } if (prevWordsPtNodePos) { - BinaryDictionaryBigramsIterator bigramsIt = - getBigramsIteratorOfPtNode(prevWordsPtNodePos[0]); + const int bigramsPosition = getBigramsPositionOfPtNode(prevWordsPtNodePos[0]); + BinaryDictionaryBigramsIterator bigramsIt(&mBigramPolicy, bigramsPosition); while (bigramsIt.hasNext()) { bigramsIt.next(); if (bigramsIt.getBigramPos() == ptNodePos @@ -151,7 +151,8 @@ void Ver4PatriciaTriePolicy::iterateNgramEntries(const int *const prevWordsPtNod if (!prevWordsPtNodePos) { return; } - BinaryDictionaryBigramsIterator bigramsIt = getBigramsIteratorOfPtNode(prevWordsPtNodePos[0]); + const int bigramsPosition = getBigramsPositionOfPtNode(prevWordsPtNodePos[0]); + BinaryDictionaryBigramsIterator bigramsIt(&mBigramPolicy, bigramsPosition); while (bigramsIt.hasNext()) { bigramsIt.next(); listener->onVisitEntry(bigramsIt.getProbability(), bigramsIt.getBigramPos()); @@ -170,12 +171,6 @@ int Ver4PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) con ptNodeParams.getTerminalId()); } -BinaryDictionaryBigramsIterator Ver4PatriciaTriePolicy::getBigramsIteratorOfPtNode( - const int ptNodePos) const { - const int bigramsPosition = getBigramsPositionOfPtNode(ptNodePos); - return BinaryDictionaryBigramsIterator(&mBigramPolicy, bigramsPosition); -} - int Ver4PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const { if (ptNodePos == NOT_A_DICT_POS) { return NOT_A_DICT_POS; @@ -297,6 +292,7 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI int prevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM]; prevWordsInfo->getPrevWordsTerminalPtNodePos(this, prevWordsPtNodePos, false /* tryLowerCaseSearch */); + const auto prevWordsPtNodePosView = PtNodePosArrayView::fromFixedSizeArray(prevWordsPtNodePos); // TODO: Support N-gram. if (prevWordsPtNodePos[0] == NOT_A_DICT_POS) { if (prevWordsInfo->isNthPrevWordBeginningOfSentence(1 /* n */)) { @@ -324,10 +320,10 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI if (word1Pos == NOT_A_DICT_POS) { return false; } - bool addedNewBigram = false; - if (mUpdatingHelper.addBigramWords(prevWordsPtNodePos[0], word1Pos, bigramProperty, - &addedNewBigram)) { - if (addedNewBigram) { + bool addedNewEntry = false; + if (mUpdatingHelper.addNgramEntry(prevWordsPtNodePosView, word1Pos, bigramProperty, + &addedNewEntry)) { + if (addedNewEntry) { mBigramCount++; } return true; @@ -357,6 +353,7 @@ bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWor int prevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM]; prevWordsInfo->getPrevWordsTerminalPtNodePos(this, prevWordsPtNodePos, false /* tryLowerCaseSerch */); + const auto prevWordsPtNodePosView = PtNodePosArrayView::fromFixedSizeArray(prevWordsPtNodePos); // TODO: Support N-gram. if (prevWordsPtNodePos[0] == NOT_A_DICT_POS) { return false; @@ -366,7 +363,7 @@ bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWor if (wordPos == NOT_A_DICT_POS) { return false; } - if (mUpdatingHelper.removeBigramWords(prevWordsPtNodePos[0], wordPos)) { + if (mUpdatingHelper.removeNgramEntry(prevWordsPtNodePosView, wordPos)) { mBigramCount--; return true; } else { @@ -457,7 +454,7 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(const int *const code std::vector<int> codePointVector(ptNodeParams.getCodePoints(), ptNodeParams.getCodePoints() + ptNodeParams.getCodePointCount()); const ProbabilityEntry probabilityEntry = - mBuffers->getProbabilityDictContent()->getProbabilityEntry( + mBuffers->getLanguageModelDictContent()->getProbabilityEntry( ptNodeParams.getTerminalId()); const HistoricalInfo *const historicalInfo = probabilityEntry.getHistoricalInfo(); // Fetch bigram information. diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h index c5b6a80c0..faad4290d 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h @@ -46,7 +46,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { mBuffers->getTerminalPositionLookupTable(), mHeaderPolicy), mShortcutPolicy(mBuffers->getMutableShortcutDictContent(), mBuffers->getTerminalPositionLookupTable()), - mNodeReader(mDictBuffer, mBuffers->getProbabilityDictContent(), mHeaderPolicy), + mNodeReader(mDictBuffer, mBuffers->getLanguageModelDictContent(), mHeaderPolicy), mPtNodeArrayReader(mDictBuffer), mNodeWriter(mDictBuffer, mBuffers.get(), mHeaderPolicy, &mNodeReader, &mPtNodeArrayReader, &mBigramPolicy, &mShortcutPolicy), @@ -79,8 +79,6 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { int getShortcutPositionOfPtNode(const int ptNodePos) const; - BinaryDictionaryBigramsIterator getBigramsIteratorOfPtNode(const int ptNodePos) const; - const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const { return mHeaderPolicy; } diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp index 0e658f8e3..4220312e0 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp @@ -75,7 +75,7 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, const HeaderPolicy *const headerPolicy, Ver4DictBuffers *const buffersToWrite, int *const outUnigramCount, int *const outBigramCount) { Ver4PatriciaTrieNodeReader ptNodeReader(mBuffers->getTrieBuffer(), - mBuffers->getProbabilityDictContent(), headerPolicy); + mBuffers->getLanguageModelDictContent(), headerPolicy); Ver4PtNodeArrayReader ptNodeArrayReader(mBuffers->getTrieBuffer()); Ver4BigramListPolicy bigramPolicy(mBuffers->getMutableBigramDictContent(), mBuffers->getTerminalPositionLookupTable(), headerPolicy); @@ -138,7 +138,7 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, // Create policy instances for the GCed dictionary. Ver4PatriciaTrieNodeReader newPtNodeReader(buffersToWrite->getTrieBuffer(), - buffersToWrite->getProbabilityDictContent(), headerPolicy); + buffersToWrite->getLanguageModelDictContent(), headerPolicy); Ver4PtNodeArrayReader newPtNodeArrayreader(buffersToWrite->getTrieBuffer()); Ver4BigramListPolicy newBigramPolicy(buffersToWrite->getMutableBigramDictContent(), buffersToWrite->getTerminalPositionLookupTable(), headerPolicy); @@ -154,8 +154,8 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, return false; } // Run GC for probability dict content. - if (!buffersToWrite->getMutableProbabilityDictContent()->runGC(&terminalIdMap, - mBuffers->getProbabilityDictContent())) { + if (!buffersToWrite->getMutableLanguageModelDictContent()->runGC(&terminalIdMap, + mBuffers->getLanguageModelDictContent(), nullptr /* outNgramCount */)) { return false; } // Run GC for bigram dict content. @@ -201,7 +201,7 @@ bool Ver4PatriciaTrieWritingHelper::truncateUnigrams( continue; } const ProbabilityEntry probabilityEntry = - mBuffers->getProbabilityDictContent()->getProbabilityEntry(i); + mBuffers->getLanguageModelDictContent()->getProbabilityEntry(i); const int probability = probabilityEntry.hasHistoricalInfo() ? ForgettingCurveUtils::decodeProbability( probabilityEntry.getHistoricalInfo(), mBuffers->getHeaderPolicy()) : diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp index 825b72c6a..833063c17 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp @@ -25,7 +25,7 @@ const size_t BufferWithExtendableBuffer::EXTEND_ADDITIONAL_BUFFER_SIZE_STEP = 12 uint32_t BufferWithExtendableBuffer::readUint(const int size, const int pos) const { const bool readingPosIsInAdditionalBuffer = isInAdditionalBuffer(pos); - const int posInBuffer = readingPosIsInAdditionalBuffer ? pos - mOriginalBufferSize : pos; + const int posInBuffer = readingPosIsInAdditionalBuffer ? pos - mOriginalBuffer.size() : pos; return ByteArrayUtils::readUint(getBuffer(readingPosIsInAdditionalBuffer), size, posInBuffer); } @@ -40,12 +40,12 @@ void BufferWithExtendableBuffer::readCodePointsAndAdvancePosition(const int maxC int *const outCodePoints, int *outCodePointCount, int *const pos) const { const bool readingPosIsInAdditionalBuffer = isInAdditionalBuffer(*pos); if (readingPosIsInAdditionalBuffer) { - *pos -= mOriginalBufferSize; + *pos -= mOriginalBuffer.size(); } *outCodePointCount = ByteArrayUtils::readStringAndAdvancePosition( getBuffer(readingPosIsInAdditionalBuffer), maxCodePointCount, outCodePoints, pos); if (readingPosIsInAdditionalBuffer) { - *pos += mOriginalBufferSize; + *pos += mOriginalBuffer.size(); } } @@ -69,13 +69,14 @@ bool BufferWithExtendableBuffer::writeUintAndAdvancePosition(const uint32_t data return false; } const bool usesAdditionalBuffer = isInAdditionalBuffer(*pos); - uint8_t *const buffer = usesAdditionalBuffer ? &mAdditionalBuffer[0] : mOriginalBuffer; + uint8_t *const buffer = + usesAdditionalBuffer ? mAdditionalBuffer.data() : mOriginalBuffer.data(); if (usesAdditionalBuffer) { - *pos -= mOriginalBufferSize; + *pos -= mOriginalBuffer.size(); } ByteArrayUtils::writeUintAndAdvancePosition(buffer, data, size, pos); if (usesAdditionalBuffer) { - *pos += mOriginalBufferSize; + *pos += mOriginalBuffer.size(); } return true; } @@ -88,14 +89,15 @@ bool BufferWithExtendableBuffer::writeCodePointsAndAdvancePosition(const int *co return false; } const bool usesAdditionalBuffer = isInAdditionalBuffer(*pos); - uint8_t *const buffer = usesAdditionalBuffer ? &mAdditionalBuffer[0] : mOriginalBuffer; + uint8_t *const buffer = + usesAdditionalBuffer ? mAdditionalBuffer.data() : mOriginalBuffer.data(); if (usesAdditionalBuffer) { - *pos -= mOriginalBufferSize; + *pos -= mOriginalBuffer.size(); } ByteArrayUtils::writeCodePointsAndAdvancePosition(buffer, codePoints, codePointCount, writesTerminator, pos); if (usesAdditionalBuffer) { - *pos += mOriginalBufferSize; + *pos += mOriginalBuffer.size(); } return true; } @@ -119,7 +121,7 @@ bool BufferWithExtendableBuffer::checkAndPrepareWriting(const int pos, const int const size_t totalRequiredSize = static_cast<size_t>(pos + size); if (!isInAdditionalBuffer(pos)) { // Here don't need to care about the additional buffer. - if (static_cast<size_t>(mOriginalBufferSize) < totalRequiredSize) { + if (mOriginalBuffer.size() < totalRequiredSize) { // Violate the boundary. return false; } @@ -137,7 +139,7 @@ bool BufferWithExtendableBuffer::checkAndPrepareWriting(const int pos, const int return false; } const size_t extendSize = totalRequiredSize - - std::min(mAdditionalBuffer.size() + mOriginalBufferSize, totalRequiredSize); + std::min(mAdditionalBuffer.size() + mOriginalBuffer.size(), totalRequiredSize); if (extendSize > 0 && !extendBuffer(extendSize)) { // Failed to extend the buffer. return false; diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h index 5e1362eee..fad83aa25 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h @@ -23,6 +23,7 @@ #include "defines.h" #include "suggest/policyimpl/dictionary/utils/byte_array_utils.h" +#include "utils/byte_array_view.h" namespace latinime { @@ -34,20 +35,18 @@ class BufferWithExtendableBuffer { public: static const size_t DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE; - BufferWithExtendableBuffer(uint8_t *const originalBuffer, const int originalBufferSize, + BufferWithExtendableBuffer(const ReadWriteByteArrayView originalBuffer, const int maxAdditionalBufferSize) - : mOriginalBuffer(originalBuffer), mOriginalBufferSize(originalBufferSize), - mAdditionalBuffer(0), mUsedAdditionalBufferSize(0), + : mOriginalBuffer(originalBuffer), mAdditionalBuffer(), mUsedAdditionalBufferSize(0), mMaxAdditionalBufferSize(maxAdditionalBufferSize) {} // Without original buffer. BufferWithExtendableBuffer(const int maxAdditionalBufferSize) - : mOriginalBuffer(0), mOriginalBufferSize(0), - mAdditionalBuffer(0), mUsedAdditionalBufferSize(0), + : mOriginalBuffer(), mAdditionalBuffer(), mUsedAdditionalBufferSize(0), mMaxAdditionalBufferSize(maxAdditionalBufferSize) {} AK_FORCE_INLINE int getTailPosition() const { - return mOriginalBufferSize + mUsedAdditionalBufferSize; + return mOriginalBuffer.size() + mUsedAdditionalBufferSize; } AK_FORCE_INLINE int getUsedAdditionalBufferSize() const { @@ -58,16 +57,16 @@ class BufferWithExtendableBuffer { * For reading. */ AK_FORCE_INLINE bool isInAdditionalBuffer(const int position) const { - return position >= mOriginalBufferSize; + return position >= static_cast<int>(mOriginalBuffer.size()); } // TODO: Resolve the issue that the address can be changed when the vector is resized. // CAVEAT!: Be careful about array out of bound access with buffers AK_FORCE_INLINE const uint8_t *getBuffer(const bool usesAdditionalBuffer) const { if (usesAdditionalBuffer) { - return &mAdditionalBuffer[0]; + return mAdditionalBuffer.data(); } else { - return mOriginalBuffer; + return mOriginalBuffer.data(); } } @@ -79,7 +78,7 @@ class BufferWithExtendableBuffer { int *const outCodePoints, int *outCodePointCount, int *const pos) const; AK_FORCE_INLINE int getOriginalBufferSize() const { - return mOriginalBufferSize; + return mOriginalBuffer.size(); } AK_FORCE_INLINE bool isNearSizeLimit() const { @@ -110,8 +109,7 @@ class BufferWithExtendableBuffer { static const int NEAR_BUFFER_LIMIT_THRESHOLD_PERCENTILE; static const size_t EXTEND_ADDITIONAL_BUFFER_SIZE_STEP; - uint8_t *const mOriginalBuffer; - const int mOriginalBufferSize; + const ReadWriteByteArrayView mOriginalBuffer; std::vector<uint8_t> mAdditionalBuffer; int mUsedAdditionalBufferSize; const size_t mMaxAdditionalBufferSize; diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h b/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h index 8460087ab..e25310373 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h @@ -21,6 +21,7 @@ #include <memory> #include "defines.h" +#include "utils/byte_array_view.h" namespace latinime { @@ -39,12 +40,12 @@ class MmappedBuffer { ~MmappedBuffer(); - AK_FORCE_INLINE uint8_t *getBuffer() const { - return mBuffer; + ReadWriteByteArrayView getReadWriteByteArrayView() const { + return mByteArrayView; } - AK_FORCE_INLINE int getBufferSize() const { - return mBufferSize; + ReadOnlyByteArrayView getReadOnlyByteArrayView() const { + return mByteArrayView.getReadOnlyView(); } AK_FORCE_INLINE bool isUpdatable() const { @@ -55,18 +56,17 @@ class MmappedBuffer { AK_FORCE_INLINE MmappedBuffer(uint8_t *const buffer, const int bufferSize, void *const mmappedBuffer, const int alignedSize, const int mmapFd, const bool isUpdatable) - : mBuffer(buffer), mBufferSize(bufferSize), mMmappedBuffer(mmappedBuffer), + : mByteArrayView(buffer, bufferSize), mMmappedBuffer(mmappedBuffer), mAlignedSize(alignedSize), mMmapFd(mmapFd), mIsUpdatable(isUpdatable) {} // Empty file. We have to handle an empty file as a valid part of a dictionary. AK_FORCE_INLINE MmappedBuffer(const bool isUpdatable) - : mBuffer(nullptr), mBufferSize(0), mMmappedBuffer(nullptr), mAlignedSize(0), + : mByteArrayView(), mMmappedBuffer(nullptr), mAlignedSize(0), mMmapFd(0), mIsUpdatable(isUpdatable) {} DISALLOW_IMPLICIT_CONSTRUCTORS(MmappedBuffer); - uint8_t *const mBuffer; - const int mBufferSize; + const ReadWriteByteArrayView mByteArrayView; void *const mMmappedBuffer; const int mAlignedSize; const int mMmapFd; diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/trie_map.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/trie_map.cpp index 2904b1e77..407b8efd0 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/trie_map.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/trie_map.cpp @@ -43,9 +43,8 @@ TrieMap::TrieMap() : mBuffer(MAX_BUFFER_SIZE) { writeEntry(EMPTY_BITMAP_ENTRY, ROOT_BITMAP_ENTRY_INDEX); } -TrieMap::TrieMap(uint8_t *const buffer, const int bufferSize) - : mBuffer(buffer, bufferSize, - BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE) {} +TrieMap::TrieMap(const ReadWriteByteArrayView buffer) + : mBuffer(buffer, BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE) {} void TrieMap::dump(const int from, const int to) const { AKLOGI("BufSize: %d", mBuffer.getTailPosition()); diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/trie_map.h b/native/jni/src/suggest/policyimpl/dictionary/utils/trie_map.h index 8b33346e6..3e5c4010c 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/trie_map.h +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/trie_map.h @@ -24,6 +24,7 @@ #include "defines.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" +#include "utils/byte_array_view.h" namespace latinime { @@ -161,13 +162,17 @@ class TrieMap { TrieMap(); // Construct TrieMap using existing data in the memory region written by save(). - TrieMap(uint8_t *const buffer, const int bufferSize); + TrieMap(const ReadWriteByteArrayView buffer); void dump(const int from = 0, const int to = 0) const; bool isNearSizeLimit() const { return mBuffer.isNearSizeLimit(); } + int getRootBitmapEntryIndex() const { + return ROOT_BITMAP_ENTRY_INDEX; + } + // Returns bitmapEntryIndex. Create the next level map if it doesn't exist. int getNextLevelBitmapEntryIndex(const int key) { return getNextLevelBitmapEntryIndex(key, ROOT_BITMAP_ENTRY_INDEX); diff --git a/native/jni/src/utils/byte_array_view.h b/native/jni/src/utils/byte_array_view.h index d13999c16..2c97c6d58 100644 --- a/native/jni/src/utils/byte_array_view.h +++ b/native/jni/src/utils/byte_array_view.h @@ -71,6 +71,11 @@ class ReadWriteByteArrayView { return ReadOnlyByteArrayView(mPtr, mSize); } + ReadWriteByteArrayView subView(const size_t start, const size_t n) const { + ASSERT(start + n <= mSize); + return ReadWriteByteArrayView(mPtr + start, n); + } + private: DISALLOW_ASSIGNMENT_OPERATOR(ReadWriteByteArrayView); diff --git a/native/jni/src/utils/int_array_view.h b/native/jni/src/utils/int_array_view.h index 3ff01f5d0..c1ddc9812 100644 --- a/native/jni/src/utils/int_array_view.h +++ b/native/jni/src/utils/int_array_view.h @@ -56,11 +56,25 @@ class IntArrayView { explicit IntArrayView(const std::vector<int> &vector) : mPtr(vector.data()), mSize(vector.size()) {} + template <int N> + AK_FORCE_INLINE static IntArrayView fromFixedSizeArray(const int (&array)[N]) { + return IntArrayView(array, N); + } + + // Returns a view that points one int object. Does not take ownership of the given object. + AK_FORCE_INLINE static IntArrayView fromObject(const int *const object) { + return IntArrayView(object, 1); + } + AK_FORCE_INLINE int operator[](const size_t index) const { ASSERT(index < mSize); return mPtr[index]; } + AK_FORCE_INLINE bool empty() const { + return size() == 0; + } + AK_FORCE_INLINE size_t size() const { return mSize; } @@ -69,6 +83,14 @@ class IntArrayView { return mPtr; } + AK_FORCE_INLINE const int *begin() const { + return mPtr; + } + + AK_FORCE_INLINE const int *end() const { + return mPtr + mSize; + } + private: DISALLOW_ASSIGNMENT_OPERATOR(IntArrayView); @@ -76,5 +98,8 @@ class IntArrayView { const size_t mSize; }; +using WordIdArrayView = IntArrayView; +using PtNodePosArrayView = IntArrayView; + } // namespace latinime #endif // LATINIME_MEMORY_VIEW_H |