diff options
Diffstat (limited to 'native/jni/src')
31 files changed, 345 insertions, 56 deletions
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp index 1926b9831..de9fc9bbc 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp @@ -16,7 +16,7 @@ #include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h" -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/utils/byte_array_utils.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp index b1170e251..d97501265 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp @@ -17,8 +17,8 @@ #include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h" #include "suggest/core/policy/dictionary_shortcuts_structure_policy.h" -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h" -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" #include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h" diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h index 0504b59d5..5de456656 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h @@ -22,7 +22,7 @@ #include "defines.h" #include "suggest/core/policy/dictionary_bigrams_structure_policy.h" #include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h" -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.h" namespace latinime { diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h index a9c7805a8..7c06a7117 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h @@ -30,8 +30,8 @@ namespace latinime { class HeaderPolicy : public DictionaryHeaderStructurePolicy { public: // Reads information from existing dictionary buffer. - HeaderPolicy(const uint8_t *const dictBuf, const int dictSize) - : mDictFormatVersion(FormatUtils::detectFormatVersion(dictBuf, dictSize)), + HeaderPolicy(const uint8_t *const dictBuf, const FormatUtils::FORMAT_VERSION formatVersion) + : mDictFormatVersion(formatVersion), mDictionaryFlags(HeaderReadWriteUtils::getFlags(dictBuf)), mSize(HeaderReadWriteUtils::getHeaderSize(dictBuf)), mAttributeMap(createAttributeMapAndReadAllAttributes(dictBuf)), diff --git a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp index a28535fda..baa9cf142 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp @@ -14,13 +14,13 @@ * limitations under the License. */ -#include "suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h" +#include "suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h" #include <stdint.h> #include "defines.h" -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h" -#include "suggest/policyimpl/dictionary/patricia_trie_policy.h" +#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.h" #include "suggest/policyimpl/dictionary/utils/format_utils.h" #include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h" diff --git a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h index 8cebc3b16..8cebc3b16 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp index 9921e4bba..960c1b936 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp @@ -15,12 +15,12 @@ */ -#include "suggest/policyimpl/dictionary/patricia_trie_policy.h" +#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h" #include "defines.h" #include "suggest/core/dicnode/dic_node.h" #include "suggest/core/dicnode/dic_node_vector.h" -#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/utils/probability_utils.h" namespace latinime { diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h index 4b4c39dfa..31e6186b7 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h @@ -24,6 +24,7 @@ #include "suggest/policyimpl/dictionary/bigram/bigram_list_policy.h" #include "suggest/policyimpl/dictionary/header/header_policy.h" #include "suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h" +#include "suggest/policyimpl/dictionary/utils/format_utils.h" #include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h" namespace latinime { @@ -34,7 +35,7 @@ class DicNodeVector; class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { public: PatriciaTriePolicy(const MmappedBuffer *const buffer) - : mBuffer(buffer), mHeaderPolicy(mBuffer->getBuffer(), buffer->getBufferSize()), + : mBuffer(buffer), mHeaderPolicy(mBuffer->getBuffer(), FormatUtils::VERSION_2), mDictRoot(mBuffer->getBuffer() + mHeaderPolicy.getSize()), mDictBufferSize(mBuffer->getBufferSize() - mHeaderPolicy.getSize()), mBigramListPolicy(mDictRoot), mShortcutListPolicy(mDictRoot) {} diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.cpp index 7df55815f..82b3593c8 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h" #include "defines.h" #include "suggest/policyimpl/dictionary/utils/byte_array_utils.h" diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h index 8420ee95a..8420ee95a 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.cpp index 5724c5d88..126b7681e 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.h" #include "suggest/core/policy/dictionary_header_structure_policy.h" #include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h" diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.h index 9755120b0..ab59cc645 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.h @@ -21,9 +21,9 @@ #include "defines.h" #include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h" -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h" -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h" -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" #include "utils/hash_map_compat.h" diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.cpp index 2fa3111d3..4fd2484e1 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.cpp @@ -14,11 +14,11 @@ * limitations under the License. */ -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h" #include "suggest/core/policy/dictionary_bigrams_structure_policy.h" #include "suggest/core/policy/dictionary_shortcuts_structure_policy.h" -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" namespace latinime { diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h index 3b36d425f..fac078d0a 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h @@ -20,8 +20,8 @@ #include <stdint.h> #include "defines.h" -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h" -#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h" namespace latinime { diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.cpp index 1d902d7c2..246458d09 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.h" #include <cstdio> #include <cstring> @@ -23,11 +23,11 @@ #include "defines.h" #include "suggest/core/dicnode/dic_node.h" #include "suggest/core/dicnode/dic_node_vector.h" -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h" -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h" -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h" -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h" -#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.h" #include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h" #include "suggest/policyimpl/dictionary/utils/probability_utils.h" @@ -55,9 +55,16 @@ void DynamicPatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *cons readingHelper.initWithPtNodeArrayPos(dicNode->getChildrenPtNodeArrayPos()); const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader(); while (!readingHelper.isEnd()) { + bool isTerminal = nodeReader->isTerminal() && !nodeReader->isDeleted(); + if (isTerminal && mHeaderPolicy.isDecayingDict()) { + // A DecayingDict may have a terminal PtNode that has a terminal DicNode whose + // probability is NOT_A_PROBABILITY. In such case, we don't want to treat it as a + // valid terminal DicNode. + isTerminal = getProbability(nodeReader->getProbability(), NOT_A_PROBABILITY) + != NOT_A_PROBABILITY; + } childDicNodes->pushLeavingChild(dicNode, nodeReader->getHeadPos(), - nodeReader->getChildrenPos(), nodeReader->getProbability(), - nodeReader->isTerminal() && !nodeReader->isDeleted(), + nodeReader->getChildrenPos(), nodeReader->getProbability(), isTerminal, nodeReader->hasChildren(), nodeReader->isBlacklisted() || nodeReader->isNotAWord(), nodeReader->getCodePointCount(), readingHelper.getMergedNodeCodePoints()); readingHelper.readNextSiblingNode(); diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.h index 2c722e8ed..1a924c177 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.h @@ -23,6 +23,7 @@ #include "suggest/policyimpl/dictionary/header/header_policy.h" #include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" +#include "suggest/policyimpl/dictionary/utils/format_utils.h" #include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h" namespace latinime { @@ -33,9 +34,10 @@ class DicNodeVector; class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { public: DynamicPatriciaTriePolicy(const MmappedBuffer *const buffer) - : mBuffer(buffer), mHeaderPolicy(mBuffer->getBuffer(), buffer->getBufferSize()), + : mBuffer(buffer), mHeaderPolicy(mBuffer->getBuffer(), FormatUtils::VERSION_3), mBufferWithExtendableBuffer(mBuffer->getBuffer() + mHeaderPolicy.getSize(), - mBuffer->getBufferSize() - mHeaderPolicy.getSize()), + mBuffer->getBufferSize() - mHeaderPolicy.getSize(), + BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE), mShortcutListPolicy(&mBufferWithExtendableBuffer), mBigramListPolicy(&mHeaderPolicy, &mBufferWithExtendableBuffer, &mShortcutListPolicy, mHeaderPolicy.isDecayingDict()), diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.cpp index 91794266d..f3410affc 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h index 90a223934..f8d32c9cd 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h @@ -21,9 +21,9 @@ #include <vector> #include "defines.h" -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h" -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h" -#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h" namespace latinime { diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.cpp index d68446db6..e94925365 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h" #include "defines.h" #include "suggest/policyimpl/dictionary/utils/byte_array_utils.h" diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h index 67c3cc57e..67c3cc57e 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.cpp index 052558bfc..d856c50f4 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.cpp @@ -14,16 +14,16 @@ * limitations under the License. */ -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.h" #include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h" -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h" -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h" -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h" -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h" -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h" +#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.h" #include "suggest/policyimpl/dictionary/header/header_policy.h" -#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h" #include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h" #include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h" @@ -149,7 +149,8 @@ bool DynamicPatriciaTrieWritingHelper::removeBigramWords(const int word0Pos, con void DynamicPatriciaTrieWritingHelper::writeToDictFile(const char *const fileName, const HeaderPolicy *const headerPolicy, const int unigramCount, const int bigramCount) { - BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */); + BufferWithExtendableBuffer headerBuffer( + BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE); const int extendedRegionSize = headerPolicy->getExtendedRegionSize() + mBuffer->getUsedAdditionalBufferSize(); if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, false /* updatesLastUpdatedTime */, @@ -161,8 +162,7 @@ void DynamicPatriciaTrieWritingHelper::writeToDictFile(const char *const fileNam void DynamicPatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNodeArrayPos, const char *const fileName, const HeaderPolicy *const headerPolicy) { - BufferWithExtendableBuffer newDictBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */, - MAX_DICTIONARY_SIZE); + BufferWithExtendableBuffer newDictBuffer(MAX_DICTIONARY_SIZE); int unigramCount = 0; int bigramCount = 0; if (mNeedsToDecay) { @@ -171,7 +171,8 @@ void DynamicPatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNod if (!runGC(rootPtNodeArrayPos, headerPolicy, &newDictBuffer, &unigramCount, &bigramCount)) { return; } - BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */); + BufferWithExtendableBuffer headerBuffer( + BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE); if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, true /* updatesLastUpdatedTime */, mNeedsToDecay, unigramCount, bigramCount, 0 /* extendedRegionSize */)) { return; diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.h index ca8664729..ca8664729 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.h diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.cpp index 30ff10cd6..67733660b 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.h" #include <cstddef> #include <cstdlib> diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.h index af76bc6b5..5654105ee 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.h @@ -20,7 +20,7 @@ #include <cstddef> #include "defines.h" -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h" namespace latinime { diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp new file mode 100644 index 000000000..b9ee4891c --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp @@ -0,0 +1,96 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h" + +namespace latinime { + +void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode, + DicNodeVector *const childDicNodes) const { + // TODO: Implement. +} + +int Ver4PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( + const int ptNodePos, const int maxCodePointCount, int *const outCodePoints, + int *const outUnigramProbability) const { + // TODO: Implement. + return 0; +} + +int Ver4PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord, + const int length, const bool forceLowerCaseSearch) const { + // TODO: Implement. + return NOT_A_DICT_POS; +} + +int Ver4PatriciaTriePolicy::getProbability(const int unigramProbability, + const int bigramProbability) const { + // TODO: Implement. + return NOT_A_PROBABILITY; +} + +int Ver4PatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int ptNodePos) const { + // TODO: Implement. + return NOT_A_PROBABILITY; +} + +int Ver4PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const { + // TODO: Implement. + return NOT_A_DICT_POS; +} + +int Ver4PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const { + // TODO: Implement. + return NOT_A_DICT_POS; +} + +bool Ver4PatriciaTriePolicy::addUnigramWord(const int *const word, const int length, + const int probability) { + // TODO: Implement. + return false; +} + +bool Ver4PatriciaTriePolicy::addBigramWords(const int *const word0, const int length0, + const int *const word1, const int length1, const int probability) { + // TODO: Implement. + return false; +} + +bool Ver4PatriciaTriePolicy::removeBigramWords(const int *const word0, const int length0, + const int *const word1, const int length1) { + // TODO: Implement. + return false; +} + +void Ver4PatriciaTriePolicy::flush(const char *const filePath) { + // TODO: Implement. +} + +void Ver4PatriciaTriePolicy::flushWithGC(const char *const filePath) { + // TODO: Implement. +} + +bool Ver4PatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const { + // TODO: Implement. + return false; +} + +void Ver4PatriciaTriePolicy::getProperty(const char *const query, char *const outResult, + const int maxResultLength) { + // TODO: Implement. +} + +} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h new file mode 100644 index 000000000..86a4f8bce --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h @@ -0,0 +1,88 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_VER4_PATRICIA_TRIE_POLICY_H +#define LATINIME_VER4_PATRICIA_TRIE_POLICY_H + +#include "defines.h" +#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" + +namespace latinime { + +class DicNode; +class DicNodeVector; + +// TODO: Implement. +class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { + public: + ~Ver4PatriciaTriePolicy() {} + + AK_FORCE_INLINE int getRootPosition() const { + return 0; + } + + void createAndGetAllChildDicNodes(const DicNode *const dicNode, + DicNodeVector *const childDicNodes) const; + + int getCodePointsAndProbabilityAndReturnCodePointCount( + const int terminalPtNodePos, const int maxCodePointCount, int *const outCodePoints, + int *const outUnigramProbability) const; + + int getTerminalPtNodePositionOfWord(const int *const inWord, + const int length, const bool forceLowerCaseSearch) const; + + int getProbability(const int unigramProbability, const int bigramProbability) const; + + int getUnigramProbabilityOfPtNode(const int ptNodePos) const; + + int getShortcutPositionOfPtNode(const int ptNodePos) const; + + int getBigramsPositionOfPtNode(const int ptNodePos) const; + + const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const { + return 0; + } + + const DictionaryBigramsStructurePolicy *getBigramsStructurePolicy() const { + return 0; + } + + const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const { + return 0; + } + + bool addUnigramWord(const int *const word, const int length, const int probability); + + bool addBigramWords(const int *const word0, const int length0, const int *const word1, + const int length1, const int probability); + + bool removeBigramWords(const int *const word0, const int length0, const int *const word1, + const int length1); + + void flush(const char *const filePath); + + void flushWithGC(const char *const filePath); + + bool needsToRunGC(const bool mindsBlockByGC) const; + + void getProperty(const char *const query, char *const outResult, + const int maxResultLength); + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4PatriciaTriePolicy); +}; +} // namespace latinime +#endif // LATINIME_VER4_PATRICIA_TRIE_POLICY_H diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp index f692882f2..5032131ab 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp @@ -18,7 +18,7 @@ namespace latinime { -const size_t BufferWithExtendableBuffer::MAX_ADDITIONAL_BUFFER_SIZE = 1024 * 1024; +const size_t BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE = 1024 * 1024; const int BufferWithExtendableBuffer::NEAR_BUFFER_LIMIT_THRESHOLD_PERCENTILE = 90; // TODO: Needs to allocate larger memory corresponding to the current vector size. const size_t BufferWithExtendableBuffer::EXTEND_ADDITIONAL_BUFFER_SIZE_STEP = 128 * 1024; diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h index 9dc34823c..1e27a1bec 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h @@ -32,12 +32,20 @@ namespace latinime { // raw pointer but provides several methods that handle boundary checking for writing data. class BufferWithExtendableBuffer { public: + static const size_t DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE; + BufferWithExtendableBuffer(uint8_t *const originalBuffer, const int originalBufferSize, - const int maxAdditionalBufferSize = MAX_ADDITIONAL_BUFFER_SIZE) + const int maxAdditionalBufferSize) : mOriginalBuffer(originalBuffer), mOriginalBufferSize(originalBufferSize), mAdditionalBuffer(EXTEND_ADDITIONAL_BUFFER_SIZE_STEP), mUsedAdditionalBufferSize(0), mMaxAdditionalBufferSize(maxAdditionalBufferSize) {} + // Without original buffer. + BufferWithExtendableBuffer(const int maxAdditionalBufferSize) + : mOriginalBuffer(0), mOriginalBufferSize(0), + mAdditionalBuffer(EXTEND_ADDITIONAL_BUFFER_SIZE_STEP), mUsedAdditionalBufferSize(0), + mMaxAdditionalBufferSize(maxAdditionalBufferSize) {} + AK_FORCE_INLINE int getTailPosition() const { return mOriginalBufferSize + mUsedAdditionalBufferSize; } @@ -86,7 +94,6 @@ class BufferWithExtendableBuffer { private: DISALLOW_COPY_AND_ASSIGN(BufferWithExtendableBuffer); - static const size_t MAX_ADDITIONAL_BUFFER_SIZE; static const int NEAR_BUFFER_LIMIT_THRESHOLD_PERCENTILE; static const size_t EXTEND_ADDITIONAL_BUFFER_SIZE_STEP; diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp index 5f781d50b..b48e5b005 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp @@ -20,7 +20,7 @@ #include <cstring> #include "suggest/policyimpl/dictionary/header/header_policy.h" -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" #include "suggest/policyimpl/dictionary/utils/format_utils.h" @@ -44,12 +44,14 @@ const char *const DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE = /* static */ bool DictFileWritingUtils::createEmptyV3DictFile(const char *const filePath, const HeaderReadWriteUtils::AttributeMap *const attributeMap) { - BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */); + BufferWithExtendableBuffer headerBuffer( + BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE); HeaderPolicy headerPolicy(FormatUtils::VERSION_3, attributeMap); headerPolicy.writeHeaderToBuffer(&headerBuffer, true /* updatesLastUpdatedTime */, true /* updatesLastDecayedTime */, 0 /* unigramCount */, 0 /* bigramCount */, 0 /* extendedRegionSize */); - BufferWithExtendableBuffer bodyBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */); + BufferWithExtendableBuffer bodyBuffer( + BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE); if (!DynamicPatriciaTrieWritingUtils::writeEmptyDictionary(&bodyBuffer, 0 /* rootPos */)) { return false; } diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/file_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/file_utils.h new file mode 100644 index 000000000..59b894fa6 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/file_utils.h @@ -0,0 +1,50 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_FILE_UTILS_H +#define LATINIME_FILE_UTILS_H + +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> + +#include "defines.h" + +namespace latinime { + +class FileUtils { + public: + // Returns -1 on error. + static int getFileSize(const char *const filePath) { + const int fd = open(filePath, O_RDONLY); + if (fd == -1) { + return -1; + } + struct stat statBuf; + if (fstat(fd, &statBuf) != 0) { + close(fd); + return -1; + } + close(fd); + return static_cast<int>(statBuf.st_size); + } + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(FileUtils); +}; +} // namespace latinime +#endif /* LATINIME_FILE_UTILS_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h b/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h index 6b69116eb..82138355d 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h @@ -18,18 +18,21 @@ #define LATINIME_MMAPPED_BUFFER_H #include <cerrno> +#include <climits> +#include <cstdio> #include <fcntl.h> #include <stdint.h> #include <sys/mman.h> #include <unistd.h> #include "defines.h" +#include "suggest/policyimpl/dictionary/utils/file_utils.h" namespace latinime { class MmappedBuffer { public: - static MmappedBuffer* openBuffer(const char *const path, const int bufferOffset, + static MmappedBuffer *openBuffer(const char *const path, const int bufferOffset, const int bufferSize, const bool isUpdatable) { const int openMode = isUpdatable ? O_RDWR : O_RDONLY; const int mmapFd = open(path, openMode); @@ -59,7 +62,34 @@ class MmappedBuffer { isUpdatable); } + // Mmap entire file. + static MmappedBuffer *openBuffer(const char *const path, const bool isUpdatable) { + const int fileSize = FileUtils::getFileSize(path); + if (fileSize == -1) { + return 0; + } else if (fileSize == 0) { + return new MmappedBuffer(isUpdatable); + } else { + return openBuffer(path, 0 /* bufferOffset */, fileSize, isUpdatable); + } + } + + static MmappedBuffer *openBuffer(const char *const dirPath, const char *const fileName, + const bool isUpdatable) { + const int filePathBufferSize = PATH_MAX + 1 /* terminator */; + char filePath[filePathBufferSize]; + const int filePathLength = snprintf(filePath, filePathBufferSize, "%s%s", dirPath, + fileName); + if (filePathLength >= filePathBufferSize) { + return 0; + } + return openBuffer(filePath, isUpdatable); + } + ~MmappedBuffer() { + if (mAlignedSize == 0) { + return; + } int ret = munmap(mMmappedBuffer, mAlignedSize); if (ret != 0) { AKLOGE("DICT: Failure in munmap. ret=%d errno=%d", ret, errno); @@ -89,6 +119,11 @@ class MmappedBuffer { : mBuffer(buffer), mBufferSize(bufferSize), mMmappedBuffer(mmappedBuffer), mAlignedSize(alignedSize), mMmapFd(mmapFd), mIsUpdatable(isUpdatable) {} + // Empty file. We have to handle an empty file as a valid part of a dictionary. + AK_FORCE_INLINE MmappedBuffer(const bool isUpdatable) + : mBuffer(0), mBufferSize(0), mMmappedBuffer(0), mAlignedSize(0), mMmapFd(0), + mIsUpdatable(isUpdatable) {} + DISALLOW_IMPLICIT_CONSTRUCTORS(MmappedBuffer); uint8_t *const mBuffer; |