diff options
Diffstat (limited to 'native')
25 files changed, 527 insertions, 104 deletions
diff --git a/native/jni/Android.mk b/native/jni/Android.mk index a5757fd58..b61a66ce6 100644 --- a/native/jni/Android.mk +++ b/native/jni/Android.mk @@ -68,12 +68,14 @@ LATIN_IME_CORE_SRC_FILES := \ suggest/core/policy/weighting.cpp \ suggest/core/session/dic_traverse_session.cpp \ $(addprefix suggest/policyimpl/dictionary/, \ - bigram/bigram_list_read_write_utils.cpp \ - bigram/dynamic_bigram_list_policy.cpp \ header/header_policy.cpp \ header/header_read_write_utils.cpp \ shortcut/shortcut_list_reading_utils.cpp \ structure/dictionary_structure_with_buffer_policy_factory.cpp) \ + $(addprefix suggest/policyimpl/dictionary/bigram/, \ + bigram_list_read_write_utils.cpp \ + dynamic_bigram_list_policy.cpp \ + ver4_bigram_list_policy.cpp) \ $(addprefix suggest/policyimpl/dictionary/structure/v2/, \ patricia_trie_policy.cpp \ patricia_trie_reading_utils.cpp) \ @@ -88,6 +90,7 @@ LATIN_IME_CORE_SRC_FILES := \ dynamic_patricia_trie_writing_helper.cpp \ dynamic_patricia_trie_writing_utils.cpp) \ $(addprefix suggest/policyimpl/dictionary/structure/v4/, \ + content/bigram_dict_content.cpp \ ver4_dict_constants.cpp \ ver4_patricia_trie_node_reader.cpp \ ver4_patricia_trie_node_writer.cpp \ diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp index 3becc7e39..c4383d754 100644 --- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp +++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp @@ -86,10 +86,10 @@ static jlong latinime_BinaryDictionary_open(JNIEnv *env, jclass clazz, jstring s char sourceDirChars[sourceDirUtf8Length + 1]; env->GetStringUTFRegion(sourceDir, 0, env->GetStringLength(sourceDir), sourceDirChars); sourceDirChars[sourceDirUtf8Length] = '\0'; - DictionaryStructureWithBufferPolicy::StructurePoilcyPtr dictionaryStructureWithBufferPolicy( + DictionaryStructureWithBufferPolicy::StructurePoilcyPtr dictionaryStructureWithBufferPolicy = DictionaryStructureWithBufferPolicyFactory::newDictionaryStructureWithBufferPolicy( sourceDirChars, static_cast<int>(dictOffset), static_cast<int>(dictSize), - isUpdatable == JNI_TRUE)); + isUpdatable == JNI_TRUE); if (!dictionaryStructureWithBufferPolicy.get()) { return 0; } diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp new file mode 100644 index 000000000..94d7f1061 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp @@ -0,0 +1,142 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h" + +#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h" +#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h" +#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h" +#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h" + +namespace latinime { + +void Ver4BigramListPolicy::getNextBigram(int *const outBigramPos, int *const outProbability, + bool *const outHasNext, int *const bigramEntryPos) const { + int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID; + mBigramDictContent->getBigramEntryAndAdvancePosition(outProbability, outHasNext, + &targetTerminalId, bigramEntryPos); + if (outBigramPos) { + // Lookup target PtNode position. + *outBigramPos = mTerminalPositionLookupTable->getTerminalPtNodePosition(targetTerminalId); + } +} + +bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTargetTerminalId, + const int newProbability, bool *const outAddedNewEntry) { + if (outAddedNewEntry) { + *outAddedNewEntry = false; + } + const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId); + if (bigramListPos == NOT_A_DICT_POS) { + // Updating PtNode doesn't have a bigram list. + // Create new bigram list. + if (!mBigramDictContent->createNewBigramList(terminalId)) { + return false; + } + // Write an entry. + int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId); + if (!mBigramDictContent->writeBigramEntryAndAdvancePosition(newProbability, + false /* hasNext */, newTargetTerminalId, &writingPos)) { + return false; + } + return true; + } + + const int entryPosToUpdate = getEntryPosToUpdate(newTargetTerminalId, bigramListPos); + if (entryPosToUpdate != NOT_A_DICT_POS) { + // Overwrite existing entry. + int readingPos = entryPosToUpdate; + bool hasNext = false; + int probability = NOT_A_PROBABILITY; + int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID; + mBigramDictContent->getBigramEntryAndAdvancePosition(&probability, &hasNext, + &targetTerminalId, &readingPos); + if (targetTerminalId == Ver4DictConstants::NOT_A_TERMINAL_ID && outAddedNewEntry) { + // Reuse invalid entry. + *outAddedNewEntry = true; + } + int writingPos = entryPosToUpdate; + return mBigramDictContent->writeBigramEntryAndAdvancePosition(newProbability, hasNext, + newTargetTerminalId, &writingPos); + } + + // Add new entry to the bigram list. + // Create new bigram list. + if (!mBigramDictContent->createNewBigramList(terminalId)) { + return false; + } + // Write new entry at a head position of the bigram list. + int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId); + if (!mBigramDictContent->writeBigramEntryAndAdvancePosition(newProbability, + true /* hasNext */, newTargetTerminalId, &writingPos)) { + return false; + } + if (outAddedNewEntry) { + *outAddedNewEntry = true; + } + // Append existing entries by copying. + return mBigramDictContent->copyBigramList(bigramListPos, writingPos); +} + +bool Ver4BigramListPolicy::removeEntry(const int terminalId, const int targetTerminalId) { + const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId); + if (bigramListPos == NOT_A_DICT_POS) { + // Bigram list does't exist. + return false; + } + const int entryPosToUpdate = getEntryPosToUpdate(targetTerminalId, bigramListPos); + if (entryPosToUpdate == NOT_A_DICT_POS) { + // Bigram entry doesn't exist. + return false; + } + int readingPos = entryPosToUpdate; + bool hasNext = false; + int probability = NOT_A_PROBABILITY; + int originalTargetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID; + mBigramDictContent->getBigramEntryAndAdvancePosition(&probability, &hasNext, + &originalTargetTerminalId, &readingPos); + if (targetTerminalId != originalTargetTerminalId) { + // Bigram entry doesn't exist. + return false; + } + int writingPos = entryPosToUpdate; + // Remove bigram entry by overwriting target terminal Id. + return mBigramDictContent->writeBigramEntryAndAdvancePosition(probability, hasNext, + Ver4DictConstants::NOT_A_TERMINAL_ID /* targetTerminalId */, &writingPos); +} + +int Ver4BigramListPolicy::getEntryPosToUpdate(const int targetTerminalIdToFind, + const int bigramListPos) const { + bool hasNext = true; + int invalidEntryPos = NOT_A_DICT_POS; + int readingPos = bigramListPos; + while(hasNext) { + const int entryPos = readingPos; + int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID; + mBigramDictContent->getBigramEntryAndAdvancePosition(0 /* probability */, &hasNext, + &targetTerminalId, &readingPos); + if (targetTerminalId == targetTerminalIdToFind) { + // Entry with same target is found. + return entryPos; + } else if (targetTerminalId == Ver4DictConstants::NOT_A_TERMINAL_ID) { + // Invalid entry that can be reused is found. + invalidEntryPos = entryPos; + } + } + return invalidEntryPos; +} + +} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h index 875a0ff9b..b3fe13d7d 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h @@ -19,46 +19,37 @@ #include "defines.h" #include "suggest/core/policy/dictionary_bigrams_structure_policy.h" -#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h" -#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h" -#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h" namespace latinime { +class BigramDictContent; +class TerminalPositionLookupTable; + class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy { public: - Ver4BigramListPolicy(const BigramDictContent *const bigramDictContent, + Ver4BigramListPolicy(BigramDictContent *const bigramDictContent, const TerminalPositionLookupTable *const terminalPositionLookupTable) : mBigramDictContent(bigramDictContent), mTerminalPositionLookupTable(terminalPositionLookupTable) {} void getNextBigram(int *const outBigramPos, int *const outProbability, - bool *const outHasNext, int *const bigramEntryPos) const { - int bigramFlags = 0; - int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID; - mBigramDictContent->getBigramEntryAndAdvancePosition(&bigramFlags, &targetTerminalId, - bigramEntryPos); - if (outProbability) { - *outProbability = BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags); - } - if (outHasNext) { - *outHasNext = BigramListReadWriteUtils::hasNext(bigramFlags); - } - if (outBigramPos) { - // Lookup target PtNode position. - *outBigramPos = - mTerminalPositionLookupTable->getTerminalPtNodePosition(targetTerminalId); - } - } + bool *const outHasNext, int *const bigramEntryPos) const; void skipAllBigrams(int *const pos) const { // Do nothing because we don't need to skip bigram lists in ver4 dictionaries. } + bool addNewEntry(const int terminalId, const int newTargetTerminalId, const int newProbability, + bool *const outAddedNewEntry); + + bool removeEntry(const int terminalId, const int targetTerminalId); + private: DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4BigramListPolicy); - const BigramDictContent *const mBigramDictContent; + int getEntryPosToUpdate(const int targetTerminalIdToFind, const int bigramListPos) const; + + BigramDictContent *const mBigramDictContent; const TerminalPositionLookupTable *const mTerminalPositionLookupTable; }; } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp index 3ab6a8e21..063b84cbf 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp @@ -35,8 +35,8 @@ namespace latinime { const int bufOffset, const int size, const bool isUpdatable) { // Allocated buffer in MmapedBuffer::newBuffer() will be freed in the destructor of // MmappedBufferWrapper if the instance has the responsibility. - MmappedBuffer::MmappedBufferPtr mmappedBuffer(MmappedBuffer::openBuffer(path, bufOffset, size, - isUpdatable)); + MmappedBuffer::MmappedBufferPtr mmappedBuffer = MmappedBuffer::openBuffer(path, bufOffset, size, + isUpdatable); if (!mmappedBuffer.get()) { return DictionaryStructureWithBufferPolicy::StructurePoilcyPtr(0); } @@ -58,8 +58,8 @@ namespace latinime { } // Removing extension to get the base path. dictDirPath.erase(pos); - const Ver4DictBuffers::Ver4DictBuffersPtr dictBuffers( - Ver4DictBuffers::openVer4DictBuffers(dictDirPath.c_str(), mmappedBuffer)); + const Ver4DictBuffers::Ver4DictBuffersPtr dictBuffers = + Ver4DictBuffers::openVer4DictBuffers(dictDirPath.c_str(), mmappedBuffer); if (!dictBuffers.get()->isValid()) { AKLOGE("DICT: The dictionary doesn't satisfy ver4 format requirements."); ASSERT(false); diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.cpp index c3fe03d37..b3fdbeb78 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.cpp @@ -238,6 +238,9 @@ int DynamicPatriciaTrieReadingHelper::getTerminalPtNodePositionOfWord(const int } // All characters are matched. if (length == getTotalCodePointCount(ptNodeParams)) { + if (!ptNodeParams.isTerminal()) { + return NOT_A_DICT_POS; + } // Terminal position is found. return ptNodeParams.getHeadPos(); } diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp new file mode 100644 index 000000000..999460086 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp @@ -0,0 +1,75 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h" + +#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" + +namespace latinime { + +void BigramDictContent::getBigramEntryAndAdvancePosition(int *const outProbability, + bool *const outHasNext, int *const outTargetTerminalId, int *const bigramEntryPos) const { + const BufferWithExtendableBuffer *const bigramListBuffer = getContentBuffer(); + const int bigramFlags = bigramListBuffer->readUintAndAdvancePosition( + Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, bigramEntryPos); + if (outProbability) { + *outProbability = bigramFlags & Ver4DictConstants::BIGRAM_PROBABILITY_MASK; + } + if (outHasNext) { + *outHasNext = (bigramFlags & Ver4DictConstants::BIGRAM_HAS_NEXT_MASK) != 0; + } + const int targetTerminalId = bigramListBuffer->readUintAndAdvancePosition( + Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, bigramEntryPos); + if (outTargetTerminalId) { + *outTargetTerminalId = + (targetTerminalId == Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID) ? + Ver4DictConstants::NOT_A_TERMINAL_ID : targetTerminalId; + } +} + +bool BigramDictContent::writeBigramEntryAndAdvancePosition(const int probability, const int hasNext, + const int targetTerminalId, int *const entryWritingPos) { + BufferWithExtendableBuffer *const bigramListBuffer = getWritableContentBuffer(); + const int bigramFlags = createAndGetBigramFlags(probability, hasNext); + if (!bigramListBuffer->writeUintAndAdvancePosition(bigramFlags, + Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, entryWritingPos)) { + return false; + } + const int targetTerminalIdToWrite = + (targetTerminalId == Ver4DictConstants::NOT_A_TERMINAL_ID) ? + Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID : targetTerminalId; + return bigramListBuffer->writeUintAndAdvancePosition(targetTerminalIdToWrite, + Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, entryWritingPos); +} + +bool BigramDictContent::copyBigramList(const int bigramListPos, const int toPos) { + bool hasNext = true; + int readingPos = bigramListPos; + int writingPos = toPos; + while(hasNext) { + int probability = NOT_A_PROBABILITY; + int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID; + getBigramEntryAndAdvancePosition(&probability, &hasNext, &targetTerminalId, + &readingPos); + if (!writeBigramEntryAndAdvancePosition(probability, hasNext, targetTerminalId, + &writingPos)) { + return false; + } + } + return true; +} + +} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h index 5eed13e70..bc9e4b619 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h @@ -33,21 +33,15 @@ class BigramDictContent : public SparseTableDictContent { Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE, Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE) {} - void getBigramEntryAndAdvancePosition(int *const outBigramFlags, - int *const outTargetTerminalId, int *const bigramEntryPos) const { - const BufferWithExtendableBuffer *const bigramListBuffer = getContentBuffer(); - if (outBigramFlags) { - *outBigramFlags = bigramListBuffer->readUintAndAdvancePosition( - Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, bigramEntryPos); - } - if (outTargetTerminalId) { - *outTargetTerminalId = bigramListBuffer->readUintAndAdvancePosition( - Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, bigramEntryPos); - } - } + BigramDictContent() + : SparseTableDictContent(Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE, + Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE) {} + + void getBigramEntryAndAdvancePosition(int *const outProbability, bool *const outHasNext, + int *const outTargetTerminalId, int *const bigramEntryPos) const; - // Returns head position of bigram list for a PtNode specified by terminalId. - int getBigramListHeadPos(const int terminalId) const { + // Returns head position of bigram list for a PtNode specified by terminalId. + int getBigramListHeadPos(const int terminalId) const { const SparseTable *const addressLookupTable = getAddressLookupTable(); if (!addressLookupTable->contains(terminalId)) { return NOT_A_DICT_POS; @@ -55,8 +49,23 @@ class BigramDictContent : public SparseTableDictContent { return addressLookupTable->get(terminalId); } + bool writeBigramEntryAndAdvancePosition(const int probability, const int hasNext, + const int targetTerminalId, int *const entryWritingPos); + + bool createNewBigramList(const int terminalId) { + const int bigramListPos = getContentBuffer()->getTailPosition(); + return getUpdatableAddressLookupTable()->set(terminalId, bigramListPos); + } + + bool copyBigramList(const int bigramListPos, const int toPos); + private: - DISALLOW_IMPLICIT_CONSTRUCTORS(BigramDictContent); + DISALLOW_COPY_AND_ASSIGN(BigramDictContent); + + int createAndGetBigramFlags(const int probability, const bool hasNext) const { + return (probability & Ver4DictConstants::BIGRAM_PROBABILITY_MASK) + | (hasNext ? Ver4DictConstants::BIGRAM_HAS_NEXT_MASK : 0); + } }; } // namespace latinime #endif /* LATINIME_BIGRAM_DICT_CONTENT_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h index e85bbe18e..c109cbf51 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h @@ -31,6 +31,8 @@ class ProbabilityDictContent : public SingleDictContent { : SingleDictContent(dictDirPath, Ver4DictConstants::FREQ_FILE_EXTENSION, isUpdatable) {} + ProbabilityDictContent() {} + int getProbability(const int terminalId) const { if (terminalId < 0 || terminalId >= getSize()) { return NOT_A_PROBABILITY; @@ -61,7 +63,7 @@ class ProbabilityDictContent : public SingleDictContent { } private: - DISALLOW_IMPLICIT_CONSTRUCTORS(ProbabilityDictContent); + DISALLOW_COPY_AND_ASSIGN(ProbabilityDictContent); int getSize() const { return getBuffer()->getTailPosition() / (Ver4DictConstants::PROBABILITY_SIZE diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h index c10fbcb2a..8463a1753 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h @@ -33,6 +33,10 @@ class ShortcutDictContent : public SparseTableDictContent { Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE, Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE) {} + ShortcutDictContent() + : SparseTableDictContent(Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE, + Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE) {} + void getShortcutEntryAndAdvancePosition(const int maxCodePointCount, int *const outCodePoint, int *const outCodePointCount, int *const outShortcutFlags, int *const shortcutEntryPos) const { @@ -57,7 +61,7 @@ class ShortcutDictContent : public SparseTableDictContent { } private: - DISALLOW_IMPLICIT_CONSTRUCTORS(ShortcutDictContent); + DISALLOW_COPY_AND_ASSIGN(ShortcutDictContent); }; } // namespace latinime #endif /* LATINIME_SHORTCUT_DICT_CONTENT_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h index 4cb96da6a..7669c1eca 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h @@ -19,6 +19,7 @@ #include "defines.h" #include "suggest/policyimpl/dictionary/structure/v4/content/dict_content.h" +#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" #include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h" @@ -31,12 +32,17 @@ class SingleDictContent : public DictContent { : mMmappedBuffer(MmappedBuffer::openBuffer(dictDirPath, contentFileName, isUpdatable)), mExpandableContentBuffer(mMmappedBuffer.get() ? mMmappedBuffer.get()->getBuffer() : 0, mMmappedBuffer.get() ? mMmappedBuffer.get()->getBufferSize() : 0, - BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE) {} + BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE), + mIsValid(mMmappedBuffer.get() != 0) {} + + SingleDictContent() + : mMmappedBuffer(0), mExpandableContentBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE), + mIsValid(true) {} virtual ~SingleDictContent() {} virtual bool isValid() const { - return mMmappedBuffer.get() != 0; + return mIsValid; } protected: @@ -49,10 +55,11 @@ class SingleDictContent : public DictContent { } private: - DISALLOW_IMPLICIT_CONSTRUCTORS(SingleDictContent); + DISALLOW_COPY_AND_ASSIGN(SingleDictContent); const MmappedBuffer::MmappedBufferPtr mMmappedBuffer; BufferWithExtendableBuffer mExpandableContentBuffer; + const bool mIsValid; }; } // namespace latinime #endif /* LATINIME_SINGLE_DICT_CONTENT_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h index 71868e9ca..5ae5f0ff1 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h @@ -19,6 +19,7 @@ #include "defines.h" #include "suggest/policyimpl/dictionary/structure/v4/content/dict_content.h" +#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" #include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h" #include "suggest/policyimpl/dictionary/utils/sparse_table.h" @@ -49,20 +50,37 @@ class SparseTableDictContent : public DictContent { mContentBuffer.get() ? mContentBuffer.get()->getBufferSize() : 0, BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE), mAddressLookupTable(&mExpandableLookupTableBuffer, &mExpandableAddressTableBuffer, - sparseTableBlockSize, sparseTableDataSize) {} + sparseTableBlockSize, sparseTableDataSize), + mIsValid(mLookupTableBuffer.get() != 0 && mAddressTableBuffer.get() != 0 + && mContentBuffer.get() != 0) {} + + SparseTableDictContent(const int sparseTableBlockSize, const int sparseTableDataSize) + : mLookupTableBuffer(0), mAddressTableBuffer(0), mContentBuffer(0), + mExpandableLookupTableBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE), + mExpandableAddressTableBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE), + mExpandableContentBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE), + mAddressLookupTable(&mExpandableLookupTableBuffer, &mExpandableAddressTableBuffer, + sparseTableBlockSize, sparseTableDataSize), mIsValid(true) {} virtual ~SparseTableDictContent() {} virtual bool isValid() const { - return mLookupTableBuffer.get() != 0 && mAddressTableBuffer.get() != 0 - && mContentBuffer.get() != 0; + return mIsValid; } protected: + SparseTable *getUpdatableAddressLookupTable() { + return &mAddressLookupTable; + } + const SparseTable *getAddressLookupTable() const { return &mAddressLookupTable; } + BufferWithExtendableBuffer *getWritableContentBuffer() { + return &mExpandableContentBuffer; + } + const BufferWithExtendableBuffer *getContentBuffer() const { return &mExpandableContentBuffer; } @@ -70,7 +88,6 @@ class SparseTableDictContent : public DictContent { private: DISALLOW_IMPLICIT_CONSTRUCTORS(SparseTableDictContent); - // TODO: Have sparse table. const MmappedBuffer::MmappedBufferPtr mLookupTableBuffer; const MmappedBuffer::MmappedBufferPtr mAddressTableBuffer; const MmappedBuffer::MmappedBufferPtr mContentBuffer; @@ -78,6 +95,7 @@ class SparseTableDictContent : public DictContent { BufferWithExtendableBuffer mExpandableAddressTableBuffer; BufferWithExtendableBuffer mExpandableContentBuffer; SparseTable mAddressLookupTable; + const bool mIsValid; }; } // namespace latinime #endif /* LATINIME_SPARSE_TABLE_DICT_CONTENT_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h index f6ced31b4..e016a2b5f 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h @@ -38,6 +38,8 @@ class TerminalPositionLookupTable : public SingleDictContent { / Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE), mHeaderRegionSize(headerRegionSize) {} + TerminalPositionLookupTable() : mSize(0), mHeaderRegionSize(0) {} + int getTerminalPtNodePosition(const int terminalId) const { if (terminalId < 0 || terminalId >= mSize) { return NOT_A_DICT_POS; @@ -66,7 +68,7 @@ class TerminalPositionLookupTable : public SingleDictContent { } private: - DISALLOW_IMPLICIT_CONSTRUCTORS(TerminalPositionLookupTable); + DISALLOW_COPY_AND_ASSIGN(TerminalPositionLookupTable); int mSize; const int mHeaderRegionSize; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h index 6476478e5..e468be591 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h @@ -33,27 +33,30 @@ class Ver4DictBuffers { public: typedef ExclusiveOwnershipPointer<Ver4DictBuffers> Ver4DictBuffersPtr; - static Ver4DictBuffersPtr openVer4DictBuffers(const char *const dictDirPath, + static AK_FORCE_INLINE Ver4DictBuffersPtr openVer4DictBuffers(const char *const dictDirPath, const MmappedBuffer::MmappedBufferPtr &dictBuffer) { const bool isUpdatable = dictBuffer.get() ? dictBuffer.get()->isUpdatable() : false; return Ver4DictBuffersPtr(new Ver4DictBuffers(dictDirPath, dictBuffer, isUpdatable)); } + static AK_FORCE_INLINE Ver4DictBuffersPtr createVer4DictBuffers() { + return Ver4DictBuffersPtr(new Ver4DictBuffers()); + } + AK_FORCE_INLINE bool isValid() const { return mDictBuffer.get() != 0 && mProbabilityDictContent.isValid() && mTerminalPositionLookupTable.isValid() && mBigramDictContent.isValid() && mShortcutDictContent.isValid(); } - AK_FORCE_INLINE uint8_t *getRawDictBuffer() const { - return mDictBuffer.get()->getBuffer(); + AK_FORCE_INLINE BufferWithExtendableBuffer *getWritableHeaderBuffer() { + return &mExpandableHeaderBuffer; } - AK_FORCE_INLINE int getRawDictBufferSize() const { - return mDictBuffer.get()->getBufferSize(); + AK_FORCE_INLINE BufferWithExtendableBuffer *getWritableTrieBuffer() { + return &mExpandableTrieBuffer; } - AK_FORCE_INLINE TerminalPositionLookupTable *getUpdatableTerminalPositionLookupTable() { return &mTerminalPositionLookupTable; } @@ -70,6 +73,10 @@ class Ver4DictBuffers { return &mProbabilityDictContent; } + AK_FORCE_INLINE BigramDictContent *getUpdatableBigramDictContent() { + return &mBigramDictContent; + } + AK_FORCE_INLINE const BigramDictContent *getBigramDictContent() const { return &mBigramDictContent; } @@ -82,21 +89,41 @@ class Ver4DictBuffers { return mIsUpdatable; } + bool flush(const char *const dictDirPath) { + // TODO: Implement. + return false; + } + private: - DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4DictBuffers); + DISALLOW_COPY_AND_ASSIGN(Ver4DictBuffers); AK_FORCE_INLINE Ver4DictBuffers(const char *const dictDirPath, const MmappedBuffer::MmappedBufferPtr &dictBuffer, const bool isUpdatable) : mDictBuffer(dictBuffer), - // TODO: Quit using getHeaderSize. - mTerminalPositionLookupTable(dictDirPath, isUpdatable, - HeaderReadWriteUtils::getHeaderSize(mDictBuffer.get()->getBuffer())), + mHeaderSize(HeaderReadWriteUtils::getHeaderSize(mDictBuffer.get()->getBuffer())), + mExpandableHeaderBuffer(dictBuffer.get()->getBuffer(), mHeaderSize, + BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE), + mExpandableTrieBuffer(dictBuffer.get()->getBuffer() + mHeaderSize, + dictBuffer.get()->getBufferSize() - mHeaderSize, + BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE), + // TODO: Quit using header size. + mTerminalPositionLookupTable(dictDirPath, isUpdatable, mHeaderSize), mProbabilityDictContent(dictDirPath, isUpdatable), mBigramDictContent(dictDirPath, isUpdatable), mShortcutDictContent(dictDirPath, isUpdatable), mIsUpdatable(isUpdatable) {} + AK_FORCE_INLINE Ver4DictBuffers() + : mDictBuffer(0), mHeaderSize(0), + mExpandableHeaderBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE), + mExpandableTrieBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE), + mTerminalPositionLookupTable(), mProbabilityDictContent(), + mBigramDictContent(), mShortcutDictContent(), mIsUpdatable(true) {} + const MmappedBuffer::MmappedBufferPtr mDictBuffer; + const int mHeaderSize; + BufferWithExtendableBuffer mExpandableHeaderBuffer; + BufferWithExtendableBuffer mExpandableTrieBuffer; TerminalPositionLookupTable mTerminalPositionLookupTable; ProbabilityDictContent mProbabilityDictContent; BigramDictContent mBigramDictContent; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp index 941bcd594..af13a374a 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp @@ -30,6 +30,10 @@ const char *const Ver4DictConstants::SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION = ".sh const char *const Ver4DictConstants::SHORTCUT_CONTENT_TABLE_FILE_EXTENSION = ".shortcut_index_shortcut"; +// Version 4 dictionary size is implicitly limited to 8MB due to 3-byte offsets. +// TODO: Make MAX_DICTIONARY_SIZE 8MB. +const int Ver4DictConstants::MAX_DICTIONARY_SIZE = 2 * 1024 * 1024; + const int Ver4DictConstants::NOT_A_TERMINAL_ID = -1; const int Ver4DictConstants::PROBABILITY_SIZE = 1; const int Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE = 1; @@ -42,7 +46,13 @@ const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE = 16; const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE = 4; const int Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE = 3; +// Unsigned int max value of BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE-byte is used for representing +// invalid terminal ID in bigram lists. +const int Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID = + (1 << (BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE * 8)) - 1; const int Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE = 1; +const int Ver4DictConstants::BIGRAM_PROBABILITY_MASK = 0x0F; +const int Ver4DictConstants::BIGRAM_HAS_NEXT_MASK = 0x80; const int Ver4DictConstants::SHORTCUT_FLAGS_FIELD_SIZE = 1; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h index 7270d9e6e..cfb7740be 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h @@ -34,6 +34,8 @@ class Ver4DictConstants { static const char *const SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION; static const char *const SHORTCUT_CONTENT_TABLE_FILE_EXTENSION; + static const int MAX_DICTIONARY_SIZE; + static const int NOT_A_TERMINAL_ID; static const int PROBABILITY_SIZE; static const int FLAGS_IN_PROBABILITY_FILE_SIZE; @@ -47,6 +49,9 @@ class Ver4DictConstants { static const int BIGRAM_FLAGS_FIELD_SIZE; static const int BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE; + static const int INVALID_BIGRAM_TARGET_TERMINAL_ID; + static const int BIGRAM_PROBABILITY_MASK; + static const int BIGRAM_HAS_NEXT_MASK; static const int SHORTCUT_FLAGS_FIELD_SIZE; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp index 8b0ea823e..b572ee87f 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp @@ -16,7 +16,7 @@ #include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h" -#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h" +#include "suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h" #include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h" #include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h" @@ -167,8 +167,6 @@ bool Ver4PatriciaTrieNodeWriter::writePtNodeAndAdvancePosition( ptNodeParams->getChildrenPos(), ptNodeWritingPos)) { return false; } - // TODO: Implement bigram and shortcut writing. - // Create node flags and write them. PatriciaTrieReadingUtils::NodeFlags nodeFlags = PatriciaTrieReadingUtils::createAndGetFlags(ptNodeParams->isBlacklisted(), @@ -188,14 +186,14 @@ bool Ver4PatriciaTrieNodeWriter::addNewBigramEntry( const PtNodeParams *const sourcePtNodeParams, const PtNodeParams *const targetPtNodeParam, const int probability, bool *const outAddedNewBigram) { - // TODO: Implement. - return false; + return mBigramPolicy->addNewEntry(sourcePtNodeParams->getTerminalId(), + targetPtNodeParam->getTerminalId(), probability, outAddedNewBigram); } bool Ver4PatriciaTrieNodeWriter::removeBigramEntry( const PtNodeParams *const sourcePtNodeParams, const PtNodeParams *const targetPtNodeParam) { - // TODO: Implement. - return false; + return mBigramPolicy->removeEntry(sourcePtNodeParams->getTerminalId(), + targetPtNodeParam->getTerminalId()); } } diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp index 520ffc080..698483a79 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp @@ -28,14 +28,14 @@ namespace latinime { const int Ver4PatriciaTriePolicy::MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS = 1024; const int Ver4PatriciaTriePolicy::MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS = - DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE - MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS; + Ver4DictConstants::MAX_DICTIONARY_SIZE - MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS; void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode, DicNodeVector *const childDicNodes) const { if (!dicNode->hasChildren()) { return; } - DynamicPatriciaTrieReadingHelper readingHelper(&mDictBuffer, &mNodeReader); + DynamicPatriciaTrieReadingHelper readingHelper(mDictBuffer, &mNodeReader); readingHelper.initWithPtNodeArrayPos(dicNode->getChildrenPtNodeArrayPos()); while (!readingHelper.isEnd()) { const PtNodeParams ptNodeParams = readingHelper.getPtNodeParams(); @@ -63,7 +63,7 @@ void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const d int Ver4PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( const int ptNodePos, const int maxCodePointCount, int *const outCodePoints, int *const outUnigramProbability) const { - DynamicPatriciaTrieReadingHelper readingHelper(&mDictBuffer, &mNodeReader); + DynamicPatriciaTrieReadingHelper readingHelper(mDictBuffer, &mNodeReader); readingHelper.initWithPtNodePos(ptNodePos); return readingHelper.getCodePointsAndProbabilityAndReturnCodePointCount( maxCodePointCount, outCodePoints, outUnigramProbability); @@ -71,7 +71,7 @@ int Ver4PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( int Ver4PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord, const int length, const bool forceLowerCaseSearch) const { - DynamicPatriciaTrieReadingHelper readingHelper(&mDictBuffer, &mNodeReader); + DynamicPatriciaTrieReadingHelper readingHelper(mDictBuffer, &mNodeReader); readingHelper.initWithPtNodeArrayPos(getRootPosition()); return readingHelper.getTerminalPtNodePositionOfWord(inWord, length, forceLowerCaseSearch); } @@ -135,12 +135,12 @@ bool Ver4PatriciaTriePolicy::addUnigramWord(const int *const word, const int len AKLOGI("Warning: addUnigramWord() is called for non-updatable dictionary."); return false; } - if (mDictBuffer.getTailPosition() - >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) { - AKLOGE("The dictionary is too large to dynamically update."); + if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) { + AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d", + mDictBuffer->getTailPosition()); return false; } - DynamicPatriciaTrieReadingHelper readingHelper(&mDictBuffer, &mNodeReader); + DynamicPatriciaTrieReadingHelper readingHelper(mDictBuffer, &mNodeReader); readingHelper.initWithPtNodeArrayPos(getRootPosition()); bool addedNewUnigram = false; if (mUpdatingHelper.addUnigramWord(&readingHelper, word, length, probability, @@ -156,14 +156,63 @@ bool Ver4PatriciaTriePolicy::addUnigramWord(const int *const word, const int len bool Ver4PatriciaTriePolicy::addBigramWords(const int *const word0, const int length0, const int *const word1, const int length1, const int probability) { - // TODO: Implement. - return false; + if (!mBuffers.get()->isUpdatable()) { + AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary."); + return false; + } + if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) { + AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d", + mDictBuffer->getTailPosition()); + return false; + } + const int word0Pos = getTerminalPtNodePositionOfWord(word0, length0, + false /* forceLowerCaseSearch */); + if (word0Pos == NOT_A_DICT_POS) { + return false; + } + const int word1Pos = getTerminalPtNodePositionOfWord(word1, length1, + false /* forceLowerCaseSearch */); + if (word1Pos == NOT_A_DICT_POS) { + return false; + } + bool addedNewBigram = false; + if (mUpdatingHelper.addBigramWords(word0Pos, word1Pos, probability, &addedNewBigram)) { + if (addedNewBigram) { + mBigramCount++; + } + return true; + } else { + return false; + } } bool Ver4PatriciaTriePolicy::removeBigramWords(const int *const word0, const int length0, const int *const word1, const int length1) { - // TODO: Implement. - return false; + if (!mBuffers.get()->isUpdatable()) { + AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary."); + return false; + } + if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) { + AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d", + mDictBuffer->getTailPosition()); + return false; + } + const int word0Pos = getTerminalPtNodePositionOfWord(word0, length0, + false /* forceLowerCaseSearch */); + if (word0Pos == NOT_A_DICT_POS) { + return false; + } + const int word1Pos = getTerminalPtNodePositionOfWord(word1, length1, + false /* forceLowerCaseSearch */); + if (word1Pos == NOT_A_DICT_POS) { + return false; + } + if (mUpdatingHelper.removeBigramWords(word0Pos, word1Pos)) { + mBigramCount--; + return true; + } else { + return false; + } } void Ver4PatriciaTriePolicy::flush(const char *const filePath) { diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h index fdb7ac69b..e8fdf5513 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h @@ -38,18 +38,17 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { public: Ver4PatriciaTriePolicy(const Ver4DictBuffers::Ver4DictBuffersPtr &buffers) : mBuffers(buffers), - mHeaderPolicy(mBuffers.get()->getRawDictBuffer(), FormatUtils::VERSION_4), - mDictBuffer(mBuffers.get()->getRawDictBuffer() + mHeaderPolicy.getSize(), - mBuffers.get()->getRawDictBufferSize() - mHeaderPolicy.getSize(), - BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE), - mBigramPolicy(mBuffers.get()->getBigramDictContent(), + mHeaderPolicy(mBuffers.get()->getWritableHeaderBuffer()->getBuffer( + false /* usesAdditionalBuffer*/), FormatUtils::VERSION_4), + mDictBuffer(mBuffers.get()->getWritableTrieBuffer()), + mBigramPolicy(mBuffers.get()->getUpdatableBigramDictContent(), mBuffers.get()->getTerminalPositionLookupTable()), mShortcutPolicy(mBuffers.get()->getShortcutDictContent(), mBuffers.get()->getTerminalPositionLookupTable()), - mNodeReader(&mDictBuffer, mBuffers.get()->getProbabilityDictContent()), - mNodeWriter(&mDictBuffer, mBuffers.get(), &mNodeReader, &mBigramPolicy, + mNodeReader(mDictBuffer, mBuffers.get()->getProbabilityDictContent()), + mNodeWriter(mDictBuffer, mBuffers.get(), &mNodeReader, &mBigramPolicy, &mShortcutPolicy), - mUpdatingHelper(&mDictBuffer, &mNodeReader, &mNodeWriter, + mUpdatingHelper(mDictBuffer, &mNodeReader, &mNodeWriter, mHeaderPolicy.isDecayingDict()), mUnigramCount(mHeaderPolicy.getUnigramCount()), mBigramCount(mHeaderPolicy.getBigramCount()) {}; @@ -115,7 +114,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { Ver4DictBuffers::Ver4DictBuffersPtr mBuffers; const HeaderPolicy mHeaderPolicy; - BufferWithExtendableBuffer mDictBuffer; + BufferWithExtendableBuffer *const mDictBuffer; Ver4BigramListPolicy mBigramPolicy; Ver4ShortcutListPolicy mShortcutPolicy; Ver4PatriciaTrieNodeReader mNodeReader; diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp index f17a0d1c0..26eafcd44 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp @@ -49,6 +49,11 @@ void BufferWithExtendableBuffer::readCodePointsAndAdvancePosition(const int maxC } } +bool BufferWithExtendableBuffer::writeUint(const uint32_t data, const int size, const int pos) { + int writingPos = pos; + return writeUintAndAdvancePosition(data, size, &writingPos); +} + bool BufferWithExtendableBuffer::writeUintAndAdvancePosition(const uint32_t data, const int size, int *const pos) { if (!(size >= 1 && size <= 4)) { diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h index 13dce9b61..ee6107ad7 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h @@ -93,6 +93,8 @@ class BufferWithExtendableBuffer { * Writing is allowed for original buffer, already written region of additional buffer and the * tail of additional buffer. */ + bool writeUint(const uint32_t data, const int size, const int pos); + bool writeUintAndAdvancePosition(const uint32_t data, const int size, int *const pos); bool writeCodePointsAndAdvancePosition(const int *const codePoints, const int codePointCount, diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp index b48e5b005..40f7d1f5c 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp @@ -21,6 +21,7 @@ #include "suggest/policyimpl/dictionary/header/header_policy.h" #include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.h" +#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" #include "suggest/policyimpl/dictionary/utils/format_utils.h" @@ -34,7 +35,7 @@ const char *const DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE = case 3: return createEmptyV3DictFile(filePath, attributeMap); case 4: - // TODO: Support version 4 dictionary format. + return createEmptyV4DictFile(filePath, attributeMap); return false; default: // Only version 3 dictionary is supported for now. @@ -58,6 +59,20 @@ const char *const DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE = return flushAllHeaderAndBodyToFile(filePath, &headerBuffer, &bodyBuffer); } +/* static */ bool DictFileWritingUtils::createEmptyV4DictFile(const char *const filePath, + const HeaderReadWriteUtils::AttributeMap *const attributeMap) { + Ver4DictBuffers::Ver4DictBuffersPtr dictBuffers = Ver4DictBuffers::createVer4DictBuffers(); + HeaderPolicy headerPolicy(FormatUtils::VERSION_4, attributeMap); + headerPolicy.writeHeaderToBuffer(dictBuffers.get()->getWritableHeaderBuffer(), + true /* updatesLastUpdatedTime */, true /* updatesLastDecayedTime */, + 0 /* unigramCount */, 0 /* bigramCount */, 0 /* extendedRegionSize */); + if (!DynamicPatriciaTrieWritingUtils::writeEmptyDictionary( + dictBuffers.get()->getWritableTrieBuffer(), 0 /* rootPos */)) { + return false; + } + return dictBuffers.get()->flush(filePath); +} + /* static */ bool DictFileWritingUtils::flushAllHeaderAndBodyToFile(const char *const filePath, BufferWithExtendableBuffer *const dictHeader, BufferWithExtendableBuffer *const dictBody) { const int tmpFileNameBufSize = strlen(filePath) @@ -69,21 +84,21 @@ const char *const DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE = TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE); FILE *const file = fopen(tmpFileName, "wb"); if (!file) { - AKLOGE("Dictionary file %s cannnot be opened.", tmpFileName); + AKLOGE("Dictionary file %s cannot be opened.", tmpFileName); ASSERT(false); return false; } // Write the dictionary header. if (!writeBufferToFile(file, dictHeader)) { remove(tmpFileName); - AKLOGE("Dictionary header cannnot be written. size: %d", dictHeader->getTailPosition()); + AKLOGE("Dictionary header cannot be written. size: %d", dictHeader->getTailPosition()); ASSERT(false); return false; } // Write the dictionary body. if (!writeBufferToFile(file, dictBody)) { remove(tmpFileName); - AKLOGE("Dictionary body cannnot be written. size: %d", dictBody->getTailPosition()); + AKLOGE("Dictionary body cannot be written. size: %d", dictBody->getTailPosition()); ASSERT(false); return false; } diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h index bd4ac66fd..3291f98c7 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h @@ -43,6 +43,9 @@ class DictFileWritingUtils { static bool createEmptyV3DictFile(const char *const filePath, const HeaderReadWriteUtils::AttributeMap *const attributeMap); + static bool createEmptyV4DictFile(const char *const filePath, + const HeaderReadWriteUtils::AttributeMap *const attributeMap); + static bool writeBufferToFile(FILE *const file, const BufferWithExtendableBuffer *const buffer); }; diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.cpp index 2678b8c7b..9be35620c 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.cpp @@ -19,23 +19,68 @@ namespace latinime { const int SparseTable::NOT_EXIST = -1; +const int SparseTable::INDEX_SIZE = 4; bool SparseTable::contains(const int id) const { - const int readingPos = id / mBlockSize * mDataSize; + const int readingPos = getPosInIndexTable(id); if (id < 0 || mIndexTableBuffer->getTailPosition() <= readingPos) { return false; } - const int index = mIndexTableBuffer->readUint(mDataSize, readingPos); + const int index = mIndexTableBuffer->readUint(INDEX_SIZE, readingPos); return index != NOT_EXIST; } uint32_t SparseTable::get(const int id) const { - const int indexTableIndex = id / mBlockSize; - int readingPos = indexTableIndex * mDataSize; - const int index = mIndexTableBuffer->readUint(mDataSize, readingPos); + const int indexTableReadingPos = getPosInIndexTable(id); + const int index = mIndexTableBuffer->readUint(INDEX_SIZE, indexTableReadingPos); + const int contentTableReadingPos = getPosInContentTable(id, index); + return mContentTableBuffer->readUint(mDataSize, contentTableReadingPos); +} + +bool SparseTable::set(const int id, const uint32_t value) { + const int posInIndexTable = getPosInIndexTable(id); + // Extends the index table if needed. + if (mIndexTableBuffer->getTailPosition() < posInIndexTable) { + int tailPos = mIndexTableBuffer->getTailPosition(); + while(tailPos < posInIndexTable) { + if (!mIndexTableBuffer->writeUintAndAdvancePosition(NOT_EXIST, INDEX_SIZE, &tailPos)) { + return false; + } + } + } + if (contains(id)) { + // The entry is already in the content table. + const int index = mIndexTableBuffer->readUint(INDEX_SIZE, posInIndexTable); + return mContentTableBuffer->writeUint(value, mDataSize, getPosInContentTable(id, index)); + } + // The entry is not in the content table. + // Create new entry in the content table. + const int index = getIndexFromContentTablePos(mContentTableBuffer->getTailPosition()); + if (!mIndexTableBuffer->writeUint(index, INDEX_SIZE, posInIndexTable)) { + return false; + } + // Write a new block that containing the entry to be set. + int writingPos = getPosInContentTable(0 /* id */, index); + for (int i = 0; i < mBlockSize; ++i) { + if (!mContentTableBuffer->writeUintAndAdvancePosition(NOT_A_DICT_POS, mDataSize, + &writingPos)) { + return false; + } + } + return mContentTableBuffer->writeUint(value, mDataSize, getPosInContentTable(id, index)); +} + +int SparseTable::getIndexFromContentTablePos(const int contentTablePos) const { + return contentTablePos / mDataSize / mBlockSize; +} + +int SparseTable::getPosInIndexTable(const int id) const { + return (id / mBlockSize) * INDEX_SIZE; +} + +int SparseTable::getPosInContentTable(const int id, const int index) const { const int offset = id % mBlockSize; - readingPos = (index * mDataSize + offset) * mBlockSize; - return mContentTableBuffer->readUint(mDataSize, readingPos); + return (index * mDataSize + offset) * mBlockSize; } } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.h b/native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.h index d71756c63..21c167506 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.h +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.h @@ -38,10 +38,19 @@ class SparseTable { uint32_t get(const int id) const; + bool set(const int id, const uint32_t value); + private: DISALLOW_IMPLICIT_CONSTRUCTORS(SparseTable); + int getIndexFromContentTablePos(const int contentTablePos) const; + + int getPosInIndexTable(const int id) const; + + int getPosInContentTable(const int id, const int index) const; + static const int NOT_EXIST; + static const int INDEX_SIZE; BufferWithExtendableBuffer *const mIndexTableBuffer; BufferWithExtendableBuffer *const mContentTableBuffer; |