diff options
17 files changed, 830 insertions, 524 deletions
diff --git a/java/res/drawable-hdpi/keyboard_popup_panel_background_holo.9.png b/java/res/drawable-hdpi/keyboard_popup_panel_background_holo.9.png Binary files differindex dc2fc7dfc..f9dd3b8b1 100644 --- a/java/res/drawable-hdpi/keyboard_popup_panel_background_holo.9.png +++ b/java/res/drawable-hdpi/keyboard_popup_panel_background_holo.9.png diff --git a/java/res/drawable-mdpi/keyboard_popup_panel_background_holo.9.png b/java/res/drawable-mdpi/keyboard_popup_panel_background_holo.9.png Binary files differindex 441edc30b..896505518 100644 --- a/java/res/drawable-mdpi/keyboard_popup_panel_background_holo.9.png +++ b/java/res/drawable-mdpi/keyboard_popup_panel_background_holo.9.png diff --git a/java/res/drawable-xhdpi/keyboard_popup_panel_background_holo.9.png b/java/res/drawable-xhdpi/keyboard_popup_panel_background_holo.9.png Binary files differindex dde1856e3..36df715b6 100644 --- a/java/res/drawable-xhdpi/keyboard_popup_panel_background_holo.9.png +++ b/java/res/drawable-xhdpi/keyboard_popup_panel_background_holo.9.png diff --git a/java/res/drawable-xxhdpi/keyboard_popup_panel_background_holo.9.png b/java/res/drawable-xxhdpi/keyboard_popup_panel_background_holo.9.png Binary files differindex ca576deaf..91d5d7f90 100644 --- a/java/res/drawable-xxhdpi/keyboard_popup_panel_background_holo.9.png +++ b/java/res/drawable-xxhdpi/keyboard_popup_panel_background_holo.9.png diff --git a/native/jni/Android.mk b/native/jni/Android.mk index 333688015..35fa352d9 100644 --- a/native/jni/Android.mk +++ b/native/jni/Android.mk @@ -80,9 +80,11 @@ LATIN_IME_CORE_SRC_FILES := \ $(addprefix suggest/policyimpl/dictionary/structure/v3/, \ dynamic_patricia_trie_gc_event_listeners.cpp \ dynamic_patricia_trie_node_reader.cpp \ + dynamic_patricia_trie_node_writer.cpp \ dynamic_patricia_trie_policy.cpp \ dynamic_patricia_trie_reading_helper.cpp \ dynamic_patricia_trie_reading_utils.cpp \ + dynamic_patricia_trie_updating_helper.cpp \ dynamic_patricia_trie_writing_helper.cpp \ dynamic_patricia_trie_writing_utils.cpp) \ $(addprefix suggest/policyimpl/dictionary/structure/v4/, \ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h index edc7b954c..6a4cfee3c 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h @@ -86,6 +86,38 @@ class PtNodeParams { memcpy(mCodePoints, codePoints, sizeof(int) * mCodePointCount); } + // Construct new params by updating existing PtNode params. + PtNodeParams(const PtNodeParams *const ptNodeParams, + const PatriciaTrieReadingUtils::NodeFlags flags, const int parentPos, + const int codePointCount, const int *const codePoints, const int probability) + : mHeadPos(ptNodeParams->getHeadPos()), mFlags(flags), mParentPos(parentPos), + mCodePointCount(codePointCount), mCodePoints(), + mTerminalIdFieldPos(ptNodeParams->getTerminalIdFieldPos()), + mTerminalId(ptNodeParams->getTerminalId()), + mProbabilityFieldPos(ptNodeParams->getProbabilityFieldPos()), + mProbability(probability), + mChildrenPosFieldPos(ptNodeParams->getChildrenPosFieldPos()), + mChildrenPos(ptNodeParams->getChildrenPos()), + mBigramLinkedNodePos(ptNodeParams->getBigramLinkedNodePos()), + mShortcutPos(ptNodeParams->getShortcutPos()), + mBigramPos(ptNodeParams->getBigramsPos()), + mSiblingPos(ptNodeParams->getSiblingNodePos()) { + memcpy(mCodePoints, codePoints, sizeof(int) * mCodePointCount); + } + + PtNodeParams(const PatriciaTrieReadingUtils::NodeFlags flags, const int parentPos, + const int codePointCount, const int *const codePoints, const int probability) + : mHeadPos(NOT_A_DICT_POS), mFlags(flags), mParentPos(parentPos), + mCodePointCount(codePointCount), mCodePoints(), + mTerminalIdFieldPos(NOT_A_DICT_POS), + mTerminalId(Ver4DictConstants::NOT_A_TERMINAL_ID), + mProbabilityFieldPos(NOT_A_DICT_POS), mProbability(probability), + mChildrenPosFieldPos(NOT_A_DICT_POS), mChildrenPos(NOT_A_DICT_POS), + mBigramLinkedNodePos(NOT_A_DICT_POS), mShortcutPos(NOT_A_DICT_POS), + mBigramPos(NOT_A_DICT_POS), mSiblingPos(NOT_A_DICT_POS) { + memcpy(mCodePoints, codePoints, sizeof(int) * mCodePointCount); + } + AK_FORCE_INLINE bool isValid() const { return mCodePointCount > 0; } diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h new file mode 100644 index 000000000..0f1e635bc --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h @@ -0,0 +1,59 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_PT_NODE_WRITER_H +#define LATINIME_PT_NODE_WRITER_H + +#include "defines.h" + +#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h" + +namespace latinime { + +// Interface class used to write PtNode information. +class PtNodeWriter { + public: + virtual ~PtNodeWriter() {} + + virtual bool markPtNodeAsDeleted(const PtNodeParams *const toBeUpdatedPtNodeParams) = 0; + + virtual bool markPtNodeAsMoved(const PtNodeParams *const toBeUpdatedPtNodeParams, + const int movedPos, const int bigramLinkedNodePos) = 0; + + virtual bool updatePtNodeProbability(const PtNodeParams *const toBeUpdatedPtNodeParams, + const int probability) = 0; + + virtual bool updateChildrenPosition(const PtNodeParams *const toBeUpdatedPtNodeParams, + const int newChildrenPosition) = 0; + + virtual bool writePtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams, + int *const ptNodeWritingPos) = 0; + + virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams, + const PtNodeParams *const targetPtNodeParam, const int probability, + bool *const outAddedNewBigram) = 0; + + virtual bool removeBigramEntry(const PtNodeParams *const sourcePtNodeParams, + const PtNodeParams *const targetPtNodeParam) = 0; + + protected: + PtNodeWriter() {}; + + private: + DISALLOW_COPY_AND_ASSIGN(PtNodeWriter); +}; +} // namespace latinime +#endif /* LATINIME_PT_NODE_WRITER_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.cpp index db4e86da1..a252c6fb2 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.cpp @@ -18,6 +18,8 @@ #include "suggest/core/policy/dictionary_header_structure_policy.h" #include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h" +#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.h" #include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h" namespace latinime { @@ -54,7 +56,7 @@ bool DynamicPatriciaTrieGcEventListeners } if (isUselessPtNode) { // Current PtNode is no longer needed. Mark it as deleted. - if (!mWritingHelper->markNodeAsDeleted(ptNodeParams)) { + if (!mPtNodeWriter->markPtNodeAsDeleted(ptNodeParams)) { return false; } } else { @@ -130,9 +132,7 @@ bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNo ptNodeParams->getHeadPos(), writingPos)); mValidPtNodeCount++; // Writes current PtNode. - return mWritingHelper->writePtNodeToBufferByCopyingPtNodeInfo(mBufferToWrite, ptNodeParams, - ptNodeParams->getParentPos(), ptNodeParams->getCodePoints(), - ptNodeParams->getCodePointCount(), ptNodeParams->getProbability(), &writingPos); + return mPtNodeWriter->writePtNodeAndAdvancePosition(ptNodeParams, &writingPos); } bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateAllPositionFields diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.h index cfe3c145c..a0b3b4d3a 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.h @@ -23,13 +23,13 @@ #include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h" #include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h" #include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.h" -#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" #include "utils/hash_map_compat.h" namespace latinime { class DictionaryHeaderStructurePolicy; +class PtNodeWriter; class PtNodeParams; class DynamicPatriciaTrieGcEventListeners { @@ -42,9 +42,9 @@ class DynamicPatriciaTrieGcEventListeners { public: TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted( const DictionaryHeaderStructurePolicy *const headerPolicy, - DynamicPatriciaTrieWritingHelper *const writingHelper, - BufferWithExtendableBuffer *const buffer, const bool isDecayingDict) - : mHeaderPolicy(headerPolicy), mWritingHelper(writingHelper), mBuffer(buffer), + PtNodeWriter *const ptNodeWriter, BufferWithExtendableBuffer *const buffer, + const bool isDecayingDict) + : mHeaderPolicy(headerPolicy), mPtNodeWriter(ptNodeWriter), mBuffer(buffer), mIsDecayingDict(isDecayingDict), mValueStack(), mChildrenValue(0), mValidUnigramCount(0) {} @@ -78,7 +78,7 @@ class DynamicPatriciaTrieGcEventListeners { TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted); const DictionaryHeaderStructurePolicy *const mHeaderPolicy; - DynamicPatriciaTrieWritingHelper *const mWritingHelper; + PtNodeWriter *const mPtNodeWriter; BufferWithExtendableBuffer *const mBuffer; const bool mIsDecayingDict; std::vector<int> mValueStack; @@ -118,11 +118,10 @@ class DynamicPatriciaTrieGcEventListeners { : public DynamicPatriciaTrieReadingHelper::TraversingEventListener { public: TraversePolicyToPlaceAndWriteValidPtNodesToBuffer( - DynamicPatriciaTrieWritingHelper *const writingHelper, - BufferWithExtendableBuffer *const bufferToWrite, + PtNodeWriter *const ptNodeWriter, BufferWithExtendableBuffer *const bufferToWrite, DynamicPatriciaTrieWritingHelper::DictPositionRelocationMap *const dictPositionRelocationMap) - : mWritingHelper(writingHelper), mBufferToWrite(bufferToWrite), + : mPtNodeWriter(ptNodeWriter), mBufferToWrite(bufferToWrite), mDictPositionRelocationMap(dictPositionRelocationMap), mValidPtNodeCount(0), mPtNodeArraySizeFieldPos(NOT_A_DICT_POS) {}; @@ -137,7 +136,7 @@ class DynamicPatriciaTrieGcEventListeners { private: DISALLOW_IMPLICIT_CONSTRUCTORS(TraversePolicyToPlaceAndWriteValidPtNodesToBuffer); - DynamicPatriciaTrieWritingHelper *const mWritingHelper; + PtNodeWriter *const mPtNodeWriter; BufferWithExtendableBuffer *const mBufferToWrite; DynamicPatriciaTrieWritingHelper::DictPositionRelocationMap *const mDictPositionRelocationMap; @@ -149,13 +148,11 @@ class DynamicPatriciaTrieGcEventListeners { : public DynamicPatriciaTrieReadingHelper::TraversingEventListener { public: TraversePolicyToUpdateAllPositionFields( - DynamicPatriciaTrieWritingHelper *const writingHelper, DynamicBigramListPolicy *const bigramPolicy, BufferWithExtendableBuffer *const bufferToWrite, const DynamicPatriciaTrieWritingHelper::DictPositionRelocationMap *const dictPositionRelocationMap) - : mWritingHelper(writingHelper), mBigramPolicy(bigramPolicy), - mBufferToWrite(bufferToWrite), + : mBigramPolicy(bigramPolicy), mBufferToWrite(bufferToWrite), mDictPositionRelocationMap(dictPositionRelocationMap), mUnigramCount(0), mBigramCount(0) {}; @@ -178,7 +175,6 @@ class DynamicPatriciaTrieGcEventListeners { private: DISALLOW_IMPLICIT_CONSTRUCTORS(TraversePolicyToUpdateAllPositionFields); - DynamicPatriciaTrieWritingHelper *const mWritingHelper; DynamicBigramListPolicy *const mBigramPolicy; BufferWithExtendableBuffer *const mBufferToWrite; const DynamicPatriciaTrieWritingHelper::DictPositionRelocationMap *const diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_writer.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_writer.cpp new file mode 100644 index 000000000..3b1e16cc7 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_writer.cpp @@ -0,0 +1,236 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_writer.h" + +#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h" +#include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h" +#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.h" +#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" + +namespace latinime { + +const int DynamicPatriciaTrieNodeWriter::CHILDREN_POSITION_FIELD_SIZE = 3; + +bool DynamicPatriciaTrieNodeWriter::markPtNodeAsDeleted( + const PtNodeParams *const toBeUpdatedPtNodeParams) { + int pos = toBeUpdatedPtNodeParams->getHeadPos(); + const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(pos); + const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer); + if (usesAdditionalBuffer) { + pos -= mBuffer->getOriginalBufferSize(); + } + // Read original flags + const PatriciaTrieReadingUtils::NodeFlags originalFlags = + PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos); + const PatriciaTrieReadingUtils::NodeFlags updatedFlags = + DynamicPatriciaTrieReadingUtils::updateAndGetFlags(originalFlags, false /* isMoved */, + true /* isDeleted */); + int writingPos = toBeUpdatedPtNodeParams->getHeadPos(); + // Update flags. + return DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags, + &writingPos); +} + +bool DynamicPatriciaTrieNodeWriter::markPtNodeAsMoved( + const PtNodeParams *const toBeUpdatedPtNodeParams, + const int movedPos, const int bigramLinkedNodePos) { + int pos = toBeUpdatedPtNodeParams->getHeadPos(); + const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(pos); + const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer); + if (usesAdditionalBuffer) { + pos -= mBuffer->getOriginalBufferSize(); + } + // Read original flags + const PatriciaTrieReadingUtils::NodeFlags originalFlags = + PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos); + const PatriciaTrieReadingUtils::NodeFlags updatedFlags = + DynamicPatriciaTrieReadingUtils::updateAndGetFlags(originalFlags, true /* isMoved */, + false /* isDeleted */); + int writingPos = toBeUpdatedPtNodeParams->getHeadPos(); + // Update flags. + if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags, + &writingPos)) { + return false; + } + // Update moved position, which is stored in the parent offset field. + if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition( + mBuffer, movedPos, toBeUpdatedPtNodeParams->getHeadPos(), &writingPos)) { + return false; + } + // Update bigram linked node position, which is stored in the children position field. + int childrenPosFieldPos = toBeUpdatedPtNodeParams->getChildrenPosFieldPos(); + if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition( + mBuffer, bigramLinkedNodePos, &childrenPosFieldPos)) { + return false; + } + if (toBeUpdatedPtNodeParams->hasChildren()) { + // Update children's parent position. + mReadingHelper.initWithPtNodeArrayPos(toBeUpdatedPtNodeParams->getChildrenPos()); + while (!mReadingHelper.isEnd()) { + const PtNodeParams childPtNodeParams(mReadingHelper.getPtNodeParams()); + int parentOffsetFieldPos = childPtNodeParams.getHeadPos() + + DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE; + if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition( + mBuffer, bigramLinkedNodePos, childPtNodeParams.getHeadPos(), + &parentOffsetFieldPos)) { + // Parent offset cannot be written because of a bug or a broken dictionary; thus, + // we give up to update dictionary. + return false; + } + mReadingHelper.readNextSiblingNode(childPtNodeParams); + } + } + return true; +} + +bool DynamicPatriciaTrieNodeWriter::updatePtNodeProbability( + const PtNodeParams *const toBeUpdatedPtNodeParams, const int newProbability) { + if (!toBeUpdatedPtNodeParams->isTerminal()) { + return false; + } + int probabilityFieldPos = toBeUpdatedPtNodeParams->getProbabilityFieldPos(); + return DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(mBuffer, + newProbability, &probabilityFieldPos); +} + +bool DynamicPatriciaTrieNodeWriter::updateChildrenPosition( + const PtNodeParams *const toBeUpdatedPtNodeParams, const int newChildrenPosition) { + int childrenPosFieldPos = toBeUpdatedPtNodeParams->getChildrenPosFieldPos(); + return DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBuffer, + newChildrenPosition, &childrenPosFieldPos); +} + +bool DynamicPatriciaTrieNodeWriter::writePtNodeAndAdvancePosition( + const PtNodeParams *const ptNodeParams, int *const ptNodeWritingPos) { + const int nodePos = *ptNodeWritingPos; + // Write dummy flags. The Node flags are updated with appropriate flags at the last step of the + // PtNode writing. + if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, + 0 /* nodeFlags */, ptNodeWritingPos)) { + return false; + } + // Calculate a parent offset and write the offset. + if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(mBuffer, + ptNodeParams->getParentPos(), nodePos, ptNodeWritingPos)) { + return false; + } + // Write code points + if (!DynamicPatriciaTrieWritingUtils::writeCodePointsAndAdvancePosition(mBuffer, + ptNodeParams->getCodePoints(), ptNodeParams->getCodePointCount(), ptNodeWritingPos)) { + return false; + } + // Write probability when the probability is a valid probability, which means this node is + // terminal. + if (ptNodeParams->getProbability() != NOT_A_PROBABILITY) { + if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(mBuffer, + ptNodeParams->getProbability(), ptNodeWritingPos)) { + return false; + } + } + // Write children position + if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBuffer, + ptNodeParams->getChildrenPos(), ptNodeWritingPos)) { + return false; + } + // Copy shortcut list when the originalShortcutListPos is valid dictionary position. + if (ptNodeParams->getShortcutPos() != NOT_A_DICT_POS) { + int fromPos = ptNodeParams->getShortcutPos(); + if (!mShortcutPolicy->copyAllShortcutsAndReturnIfSucceededOrNot(mBuffer, &fromPos, + ptNodeWritingPos)) { + return false; + } + } + // Copy bigram list when the originalBigramListPos is valid dictionary position. + int bigramCount = 0; + if (ptNodeParams->getBigramsPos() != NOT_A_DICT_POS) { + int fromPos = ptNodeParams->getBigramsPos(); + if (!mBigramPolicy->copyAllBigrams(mBuffer, &fromPos, ptNodeWritingPos, &bigramCount)) { + return false; + } + } + // Create node flags and write them. + PatriciaTrieReadingUtils::NodeFlags nodeFlags = + PatriciaTrieReadingUtils::createAndGetFlags(ptNodeParams->isBlacklisted(), + ptNodeParams->isNotAWord(), + ptNodeParams->getProbability() != NOT_A_PROBABILITY /* isTerminal */, + ptNodeParams->getShortcutPos() != NOT_A_DICT_POS /* hasShortcutTargets */, + bigramCount > 0 /* hasBigrams */, + ptNodeParams->getCodePointCount() > 1 /* hasMultipleChars */, + CHILDREN_POSITION_FIELD_SIZE); + int flagsFieldPos = nodePos; + if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, nodeFlags, + &flagsFieldPos)) { + return false; + } + return true; +} + +bool DynamicPatriciaTrieNodeWriter::addNewBigramEntry( + const PtNodeParams *const sourcePtNodeParams, + const PtNodeParams *const targetPtNodeParam, const int probability, + bool *const outAddedNewBigram) { + const int newNodePos = mBuffer->getTailPosition(); + int writingPos = newNodePos; + // Write a new PtNode using original PtNode's info to the tail of the dictionary in mBuffer. + if (!writePtNodeAndAdvancePosition(sourcePtNodeParams, &writingPos)) { + return false; + } + if (!markPtNodeAsMoved(sourcePtNodeParams, newNodePos, newNodePos)) { + return false; + } + const PtNodeParams newPtNodeParams( + mPtNodeReader->fetchNodeInfoInBufferFromPtNodePos(newNodePos)); + if (newPtNodeParams.getBigramsPos() != NOT_A_DICT_POS) { + // Insert a new bigram entry into the existing bigram list. + int bigramListPos = newPtNodeParams.getBigramsPos(); + return mBigramPolicy->addNewBigramEntryToBigramList(targetPtNodeParam->getHeadPos(), + probability, &bigramListPos, outAddedNewBigram); + } else { + // The PtNode doesn't have a bigram list. + *outAddedNewBigram = true; + // First, Write a bigram entry at the tail position of the PtNode. + if (!mBigramPolicy->writeNewBigramEntry(targetPtNodeParam->getHeadPos(), probability, + &writingPos)) { + return false; + } + // Then, Mark as the PtNode having bigram list in the flags. + const PatriciaTrieReadingUtils::NodeFlags updatedFlags = + PatriciaTrieReadingUtils::createAndGetFlags(newPtNodeParams.isBlacklisted(), + newPtNodeParams.isNotAWord(), + newPtNodeParams.getProbability() != NOT_A_PROBABILITY, + newPtNodeParams.getShortcutPos() != NOT_A_DICT_POS, true /* hasBigrams */, + newPtNodeParams.getCodePointCount() > 1, CHILDREN_POSITION_FIELD_SIZE); + writingPos = newNodePos; + // Write updated flags into the moved PtNode's flags field. + return DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags, + &writingPos); + } +} + +bool DynamicPatriciaTrieNodeWriter::removeBigramEntry( + const PtNodeParams *const sourcePtNodeParams, const PtNodeParams *const targetPtNodeParam) { + if (sourcePtNodeParams->getBigramsPos() == NOT_A_DICT_POS) { + return false; + } + return mBigramPolicy->removeBigram(sourcePtNodeParams->getBigramsPos(), + targetPtNodeParam->getHeadPos()); +} + +} diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_writer.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_writer.h new file mode 100644 index 000000000..87ed1b159 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_writer.h @@ -0,0 +1,82 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_NODE_WRITER_H +#define LATINIME_DYNAMIC_PATRICIA_TRIE_NODE_WRITER_H + +#include <stdint.h> + +#include "defines.h" +#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h" +#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h" + +namespace latinime { + +class BufferWithExtendableBuffer; +class DynamicBigramListPolicy; +class DynamicShortcutListPolicy; + +/* + * This class is used for helping to writes nodes of dynamic patricia trie. + */ +class DynamicPatriciaTrieNodeWriter : public PtNodeWriter { + public: + DynamicPatriciaTrieNodeWriter(BufferWithExtendableBuffer *const buffer, + const DynamicPatriciaTrieNodeReader *const ptNodeReader, + DynamicBigramListPolicy *const bigramPolicy, + DynamicShortcutListPolicy *const shortcutPolicy) + : mBuffer(buffer), mPtNodeReader(ptNodeReader), mReadingHelper(mBuffer, ptNodeReader), + mBigramPolicy(bigramPolicy), mShortcutPolicy(shortcutPolicy) {} + + virtual ~DynamicPatriciaTrieNodeWriter() {} + + virtual bool markPtNodeAsDeleted(const PtNodeParams *const toBeUpdatedPtNodeParams); + + virtual bool markPtNodeAsMoved(const PtNodeParams *const toBeUpdatedPtNodeParams, + const int movedPos, const int bigramLinkedNodePos); + + virtual bool updatePtNodeProbability(const PtNodeParams *const toBeUpdatedPtNodeParams, + const int newProbability); + + virtual bool updateChildrenPosition(const PtNodeParams *const toBeUpdatedPtNodeParams, + const int newChildrenPosition); + + virtual bool writePtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams, + int *const ptNodeWritingPos); + + virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams, + const PtNodeParams *const targetPtNodeParam, const int probability, + bool *const outAddedNewBigram); + + virtual bool removeBigramEntry(const PtNodeParams *const sourcePtNodeParams, + const PtNodeParams *const targetPtNodeParam); + + private: + DISALLOW_COPY_AND_ASSIGN(DynamicPatriciaTrieNodeWriter); + + static const int CHILDREN_POSITION_FIELD_SIZE; + + BufferWithExtendableBuffer *const mBuffer; + const DynamicPatriciaTrieNodeReader *const mPtNodeReader; + DynamicPatriciaTrieReadingHelper mReadingHelper; + DynamicBigramListPolicy *const mBigramPolicy; + DynamicShortcutListPolicy *const mShortcutPolicy; + +}; +} // namespace latinime +#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_NODE_WRITER_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.cpp index 205d181e7..6b58f5fb7 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.cpp @@ -27,6 +27,7 @@ #include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h" #include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h" #include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_updating_helper.h" #include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.h" #include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h" #include "suggest/policyimpl/dictionary/utils/probability_utils.h" @@ -152,10 +153,8 @@ bool DynamicPatriciaTriePolicy::addUnigramWord(const int *const word, const int } DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer, &mNodeReader); readingHelper.initWithPtNodeArrayPos(getRootPosition()); - DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer, - &mBigramListPolicy, &mShortcutListPolicy, mHeaderPolicy.isDecayingDict()); bool addedNewUnigram = false; - if (writingHelper.addUnigramWord(&readingHelper, word, length, probability, + if (mUpdatingHelper.addUnigramWord(&readingHelper, word, length, probability, &addedNewUnigram)) { if (addedNewUnigram) { mUnigramCount++; @@ -187,10 +186,8 @@ bool DynamicPatriciaTriePolicy::addBigramWords(const int *const word0, const int if (word1Pos == NOT_A_DICT_POS) { return false; } - DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer, - &mBigramListPolicy, &mShortcutListPolicy, mHeaderPolicy.isDecayingDict()); bool addedNewBigram = false; - if (writingHelper.addBigramWords(word0Pos, word1Pos, probability, &addedNewBigram)) { + if (mUpdatingHelper.addBigramWords(word0Pos, word1Pos, probability, &addedNewBigram)) { if (addedNewBigram) { mBigramCount++; } @@ -221,9 +218,7 @@ bool DynamicPatriciaTriePolicy::removeBigramWords(const int *const word0, const if (word1Pos == NOT_A_DICT_POS) { return false; } - DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer, - &mBigramListPolicy, &mShortcutListPolicy, mHeaderPolicy.isDecayingDict()); - if (writingHelper.removeBigramWords(word0Pos, word1Pos)) { + if (mUpdatingHelper.removeBigramWords(word0Pos, word1Pos)) { mBigramCount--; return true; } else { @@ -236,8 +231,8 @@ void DynamicPatriciaTriePolicy::flush(const char *const filePath) { AKLOGI("Warning: flush() is called for non-updatable dictionary."); return; } - DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer, - &mBigramListPolicy, &mShortcutListPolicy, false /* needsToDecay */); + DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer, &mNodeReader, + &mNodeWriter, &mBigramListPolicy, &mShortcutListPolicy, false /* needsToDecay */); writingHelper.writeToDictFile(filePath, &mHeaderPolicy, mUnigramCount, mBigramCount); } @@ -251,8 +246,8 @@ void DynamicPatriciaTriePolicy::flushWithGC(const char *const filePath) { false /* mindsBlockByDecay */, mUnigramCount, mBigramCount, &mHeaderPolicy)); DynamicBigramListPolicy bigramListPolicyForGC(&mHeaderPolicy, &mBufferWithExtendableBuffer, &mShortcutListPolicy, needsToDecay); - DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer, - &bigramListPolicyForGC, &mShortcutListPolicy, needsToDecay); + DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer, &mNodeReader, + &mNodeWriter, &bigramListPolicyForGC, &mShortcutListPolicy, needsToDecay); writingHelper.writeToDictFileWithGC(getRootPosition(), filePath, &mHeaderPolicy); mNeedsToDecayForTesting = false; } diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.h index 7a81a9c0a..454698430 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.h @@ -23,6 +23,8 @@ #include "suggest/policyimpl/dictionary/header/header_policy.h" #include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h" #include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_writer.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_updating_helper.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" #include "suggest/policyimpl/dictionary/utils/format_utils.h" #include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h" @@ -46,6 +48,10 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { mBigramListPolicy(&mHeaderPolicy, &mBufferWithExtendableBuffer, &mShortcutListPolicy, mHeaderPolicy.isDecayingDict()), mNodeReader(&mBufferWithExtendableBuffer, &mBigramListPolicy, &mShortcutListPolicy), + mNodeWriter(&mBufferWithExtendableBuffer, &mNodeReader, &mBigramListPolicy, + &mShortcutListPolicy), + mUpdatingHelper(&mBufferWithExtendableBuffer, &mNodeReader, &mNodeWriter, + mHeaderPolicy.isDecayingDict()), mUnigramCount(mHeaderPolicy.getUnigramCount()), mBigramCount(mHeaderPolicy.getBigramCount()), mNeedsToDecayForTesting(false) {} @@ -117,6 +123,8 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { DynamicShortcutListPolicy mShortcutListPolicy; DynamicBigramListPolicy mBigramListPolicy; DynamicPatriciaTrieNodeReader mNodeReader; + DynamicPatriciaTrieNodeWriter mNodeWriter; + DynamicPatriciaTrieUpdatingHelper mUpdatingHelper; int mUnigramCount; int mBigramCount; int mNeedsToDecayForTesting; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_updating_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_updating_helper.cpp new file mode 100644 index 000000000..cbc0d2304 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_updating_helper.cpp @@ -0,0 +1,279 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_updating_helper.h" + +#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h" +#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h" +#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.h" +#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" +#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h" + +namespace latinime { + +const int DynamicPatriciaTrieUpdatingHelper::CHILDREN_POSITION_FIELD_SIZE = 3; + +bool DynamicPatriciaTrieUpdatingHelper::addUnigramWord( + DynamicPatriciaTrieReadingHelper *const readingHelper, + const int *const wordCodePoints, const int codePointCount, const int probability, + bool *const outAddedNewUnigram) { + int parentPos = NOT_A_DICT_POS; + while (!readingHelper->isEnd()) { + const PtNodeParams ptNodeParams(readingHelper->getPtNodeParams()); + if (!ptNodeParams.isValid()) { + break; + } + const int matchedCodePointCount = readingHelper->getPrevTotalCodePointCount(); + if (!readingHelper->isMatchedCodePoint(ptNodeParams, 0 /* index */, + wordCodePoints[matchedCodePointCount])) { + // The first code point is different from target code point. Skip this node and read + // the next sibling node. + readingHelper->readNextSiblingNode(ptNodeParams); + continue; + } + // Check following merged node code points. + const int nodeCodePointCount = ptNodeParams.getCodePointCount(); + for (int j = 1; j < nodeCodePointCount; ++j) { + const int nextIndex = matchedCodePointCount + j; + if (nextIndex >= codePointCount || !readingHelper->isMatchedCodePoint(ptNodeParams, j, + wordCodePoints[matchedCodePointCount + j])) { + *outAddedNewUnigram = true; + return reallocatePtNodeAndAddNewPtNodes(&ptNodeParams, j, + getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, + probability), + wordCodePoints + matchedCodePointCount, + codePointCount - matchedCodePointCount); + } + } + // All characters are matched. + if (codePointCount == readingHelper->getTotalCodePointCount(ptNodeParams)) { + return setPtNodeProbability(&ptNodeParams, probability, outAddedNewUnigram); + } + if (!ptNodeParams.hasChildren()) { + *outAddedNewUnigram = true; + return createChildrenPtNodeArrayAndAChildPtNode(&ptNodeParams, + getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, probability), + wordCodePoints + readingHelper->getTotalCodePointCount(ptNodeParams), + codePointCount - readingHelper->getTotalCodePointCount(ptNodeParams)); + } + // Advance to the children nodes. + parentPos = ptNodeParams.getHeadPos(); + readingHelper->readChildNode(ptNodeParams); + } + if (readingHelper->isError()) { + // The dictionary is invalid. + return false; + } + int pos = readingHelper->getPosOfLastForwardLinkField(); + *outAddedNewUnigram = true; + return createAndInsertNodeIntoPtNodeArray(parentPos, + wordCodePoints + readingHelper->getPrevTotalCodePointCount(), + codePointCount - readingHelper->getPrevTotalCodePointCount(), + getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, probability), &pos); +} + +bool DynamicPatriciaTrieUpdatingHelper::addBigramWords(const int word0Pos, const int word1Pos, + const int probability, bool *const outAddedNewBigram) { + const PtNodeParams sourcePtNodeParams( + mPtNodeReader->fetchNodeInfoInBufferFromPtNodePos(word0Pos)); + const PtNodeParams targetPtNodeParams( + mPtNodeReader->fetchNodeInfoInBufferFromPtNodePos(word1Pos)); + return mPtNodeWriter->addNewBigramEntry(&sourcePtNodeParams, &targetPtNodeParams, probability, + outAddedNewBigram); +} + +// Remove a bigram relation from word0Pos to word1Pos. +bool DynamicPatriciaTrieUpdatingHelper::removeBigramWords(const int word0Pos, const int word1Pos) { + const PtNodeParams sourcePtNodeParams( + mPtNodeReader->fetchNodeInfoInBufferFromPtNodePos(word0Pos)); + const PtNodeParams targetPtNodeParams( + mPtNodeReader->fetchNodeInfoInBufferFromPtNodePos(word1Pos)); + return mPtNodeWriter->removeBigramEntry(&sourcePtNodeParams, &targetPtNodeParams); +} + +bool DynamicPatriciaTrieUpdatingHelper::createAndInsertNodeIntoPtNodeArray(const int parentPos, + const int *const nodeCodePoints, const int nodeCodePointCount, const int probability, + int *const forwardLinkFieldPos) { + const int newPtNodeArrayPos = mBuffer->getTailPosition(); + if (!DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer, + newPtNodeArrayPos, forwardLinkFieldPos)) { + return false; + } + return createNewPtNodeArrayWithAChildPtNode(parentPos, nodeCodePoints, nodeCodePointCount, + probability); +} + +bool DynamicPatriciaTrieUpdatingHelper::setPtNodeProbability( + const PtNodeParams *const originalPtNodeParams, const int probability, + bool *const outAddedNewUnigram) { + if (originalPtNodeParams->isTerminal()) { + // Overwrites the probability. + *outAddedNewUnigram = false; + const int probabilityToWrite = getUpdatedProbability( + originalPtNodeParams->getProbability(), probability); + return mPtNodeWriter->updatePtNodeProbability(originalPtNodeParams, probabilityToWrite); + } else { + // Make the node terminal and write the probability. + *outAddedNewUnigram = true; + const int movedPos = mBuffer->getTailPosition(); + int writingPos = movedPos; + const PtNodeParams ptNodeParamsToWrite(getUpdatedPtNodeParams(originalPtNodeParams, + originalPtNodeParams->getParentPos(), originalPtNodeParams->getCodePointCount(), + originalPtNodeParams->getCodePoints(), + getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, probability))); + if (!mPtNodeWriter->writePtNodeAndAdvancePosition(&ptNodeParamsToWrite, &writingPos)) { + return false; + } + if (!mPtNodeWriter->markPtNodeAsMoved(originalPtNodeParams, movedPos, movedPos)) { + return false; + } + } + return true; +} + +bool DynamicPatriciaTrieUpdatingHelper::createChildrenPtNodeArrayAndAChildPtNode( + const PtNodeParams *const parentPtNodeParams, const int probability, + const int *const codePoints, const int codePointCount) { + const int newPtNodeArrayPos = mBuffer->getTailPosition(); + if (!mPtNodeWriter->updateChildrenPosition(parentPtNodeParams, newPtNodeArrayPos)) { + return false; + } + return createNewPtNodeArrayWithAChildPtNode(parentPtNodeParams->getHeadPos(), codePoints, + codePointCount, probability); +} + +bool DynamicPatriciaTrieUpdatingHelper::createNewPtNodeArrayWithAChildPtNode( + const int parentPtNodePos, const int *const nodeCodePoints, const int nodeCodePointCount, + const int probability) { + int writingPos = mBuffer->getTailPosition(); + if (!DynamicPatriciaTrieWritingUtils::writePtNodeArraySizeAndAdvancePosition(mBuffer, + 1 /* arraySize */, &writingPos)) { + return false; + } + const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode( + parentPtNodePos, nodeCodePointCount, nodeCodePoints, probability)); + if (!mPtNodeWriter->writePtNodeAndAdvancePosition(&ptNodeParamsToWrite, &writingPos)) { + return false; + } + if (!DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer, + NOT_A_DICT_POS /* forwardLinkPos */, &writingPos)) { + return false; + } + return true; +} + +// Returns whether the dictionary updating was succeeded or not. +bool DynamicPatriciaTrieUpdatingHelper::reallocatePtNodeAndAddNewPtNodes( + const PtNodeParams *const reallocatingPtNodeParams, const int overlappingCodePointCount, + const int probabilityOfNewPtNode, const int *const newNodeCodePoints, + const int newNodeCodePointCount) { + // When addsExtraChild is true, split the reallocating PtNode and add new child. + // Reallocating PtNode: abcde, newNode: abcxy. + // abc (1st, not terminal) __ de (2nd) + // \_ xy (extra child, terminal) + // Otherwise, this method makes 1st part terminal and write probabilityOfNewPtNode. + // Reallocating PtNode: abcde, newNode: abc. + // abc (1st, terminal) __ de (2nd) + const bool addsExtraChild = newNodeCodePointCount > overlappingCodePointCount; + const int firstPartOfReallocatedPtNodePos = mBuffer->getTailPosition(); + int writingPos = firstPartOfReallocatedPtNodePos; + // Write the 1st part of the reallocating node. The children position will be updated later + // with actual children position. + const int newProbability = addsExtraChild ? NOT_A_PROBABILITY : probabilityOfNewPtNode; + const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode( + reallocatingPtNodeParams->getParentPos(), overlappingCodePointCount, + reallocatingPtNodeParams->getCodePoints(), newProbability)); + if (!mPtNodeWriter->writePtNodeAndAdvancePosition(&ptNodeParamsToWrite, &writingPos)) { + return false; + } + const int actualChildrenPos = writingPos; + // Create new children PtNode array. + const size_t newPtNodeCount = addsExtraChild ? 2 : 1; + if (!DynamicPatriciaTrieWritingUtils::writePtNodeArraySizeAndAdvancePosition(mBuffer, + newPtNodeCount, &writingPos)) { + return false; + } + // Write the 2nd part of the reallocating node. + const int secondPartOfReallocatedPtNodePos = writingPos; + const PtNodeParams childPartPtNodeParams(getUpdatedPtNodeParams(reallocatingPtNodeParams, + firstPartOfReallocatedPtNodePos, + reallocatingPtNodeParams->getCodePointCount() - overlappingCodePointCount, + reallocatingPtNodeParams->getCodePoints() + overlappingCodePointCount, + reallocatingPtNodeParams->getProbability())); + if (!mPtNodeWriter->writePtNodeAndAdvancePosition(&childPartPtNodeParams, &writingPos)) { + return false; + } + if (addsExtraChild) { + const PtNodeParams extraChildPtNodeParams(getPtNodeParamsForNewPtNode( + firstPartOfReallocatedPtNodePos, newNodeCodePointCount - overlappingCodePointCount, + newNodeCodePoints + overlappingCodePointCount, probabilityOfNewPtNode)); + if (!mPtNodeWriter->writePtNodeAndAdvancePosition(&extraChildPtNodeParams, &writingPos)) { + return false; + } + } + if (!DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer, + NOT_A_DICT_POS /* forwardLinkPos */, &writingPos)) { + return false; + } + // Update original reallocating PtNode as moved. + if (!mPtNodeWriter->markPtNodeAsMoved(reallocatingPtNodeParams, firstPartOfReallocatedPtNodePos, + secondPartOfReallocatedPtNodePos)) { + return false; + } + // Load node info. Information of the 1st part will be fetched. + const PtNodeParams ptNodeParams( + mPtNodeReader->fetchNodeInfoInBufferFromPtNodePos(firstPartOfReallocatedPtNodePos)); + // Update children position. + return mPtNodeWriter->updateChildrenPosition(&ptNodeParams, actualChildrenPos); +} + +int DynamicPatriciaTrieUpdatingHelper::getUpdatedProbability(const int originalProbability, + const int newProbability) const { + if (mNeedsToDecay) { + return ForgettingCurveUtils::getUpdatedEncodedProbability(originalProbability, + newProbability); + } else { + return newProbability; + } +} + +const PtNodeParams DynamicPatriciaTrieUpdatingHelper::getUpdatedPtNodeParams( + const PtNodeParams *const originalPtNodeParams, const int parentPos, + const int codePointCount, const int *const codePoints, const int probability) const { + const PatriciaTrieReadingUtils::NodeFlags flags = PatriciaTrieReadingUtils::createAndGetFlags( + originalPtNodeParams->isBlacklisted(), originalPtNodeParams->isNotAWord(), + probability != NOT_A_PROBABILITY /* isTerminal */, + originalPtNodeParams->getShortcutPos() != NOT_A_DICT_POS /* hasShortcutTargets */, + originalPtNodeParams->getBigramsPos() != NOT_A_DICT_POS /* hasBigrams */, + codePointCount > 1 /* hasMultipleChars */, CHILDREN_POSITION_FIELD_SIZE); + return PtNodeParams(originalPtNodeParams, flags, parentPos, codePointCount, codePoints, + probability); +} + +const PtNodeParams DynamicPatriciaTrieUpdatingHelper::getPtNodeParamsForNewPtNode( + const int parentPos, const int codePointCount, const int *const codePoints, + const int probability) const { + const PatriciaTrieReadingUtils::NodeFlags flags = PatriciaTrieReadingUtils::createAndGetFlags( + false /* isBlacklisted */, false /* isNotAWord */, + probability != NOT_A_PROBABILITY /* isTerminal */, + false /* hasShortcutTargets */, false /* hasBigrams */, + codePointCount > 1 /* hasMultipleChars */, CHILDREN_POSITION_FIELD_SIZE); + return PtNodeParams(flags, parentPos, codePointCount, codePoints, probability); +} + +} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_updating_helper.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_updating_helper.h new file mode 100644 index 000000000..b9800cd80 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_updating_helper.h @@ -0,0 +1,93 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_UPDATING_HELPER_H +#define LATINIME_DYNAMIC_PATRICIA_TRIE_UPDATING_HELPER_H + +#include <stdint.h> + +#include "defines.h" +#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h" +#include "utils/hash_map_compat.h" + +namespace latinime { + +class BufferWithExtendableBuffer; +class DynamicPatriciaTrieReadingHelper; +class PtNodeReader; +class PtNodeWriter; + +// TODO: Move to pt_common. +class DynamicPatriciaTrieUpdatingHelper { + public: + DynamicPatriciaTrieUpdatingHelper(BufferWithExtendableBuffer *const buffer, + const PtNodeReader *const ptNodeReader, PtNodeWriter *const ptNodeWriter, + const bool needsToDecay) + : mBuffer(buffer), mPtNodeReader(ptNodeReader), mPtNodeWriter(ptNodeWriter), + mNeedsToDecay(needsToDecay) {} + + ~DynamicPatriciaTrieUpdatingHelper() {} + + // Add a word to the dictionary. If the word already exists, update the probability. + bool addUnigramWord(DynamicPatriciaTrieReadingHelper *const readingHelper, + const int *const wordCodePoints, const int codePointCount, const int probability, + bool *const outAddedNewUnigram); + + // Add a bigram relation from word0Pos to word1Pos. + bool addBigramWords(const int word0Pos, const int word1Pos, const int probability, + bool *const outAddedNewBigram); + + // Remove a bigram relation from word0Pos to word1Pos. + bool removeBigramWords(const int word0Pos, const int word1Pos); + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieUpdatingHelper); + + static const int CHILDREN_POSITION_FIELD_SIZE; + + BufferWithExtendableBuffer *const mBuffer; + const PtNodeReader *const mPtNodeReader; + PtNodeWriter *const mPtNodeWriter; + const bool mNeedsToDecay; + + bool createAndInsertNodeIntoPtNodeArray(const int parentPos, const int *const nodeCodePoints, + const int nodeCodePointCount, const int probability, int *const forwardLinkFieldPos); + + bool setPtNodeProbability(const PtNodeParams *const originalPtNodeParams, const int probability, + bool *const outAddedNewUnigram); + + bool createChildrenPtNodeArrayAndAChildPtNode(const PtNodeParams *const parentPtNodeParams, + const int probability, const int *const codePoints, const int codePointCount); + + bool createNewPtNodeArrayWithAChildPtNode(const int parentPos, const int *const nodeCodePoints, + const int nodeCodePointCount, const int probability); + + bool reallocatePtNodeAndAddNewPtNodes( + const PtNodeParams *const reallocatingPtNodeParams, const int overlappingCodePointCount, + const int probabilityOfNewPtNode, const int *const newNodeCodePoints, + const int newNodeCodePointCount); + + int getUpdatedProbability(const int originalProbability, const int newProbability) const; + + const PtNodeParams getUpdatedPtNodeParams(const PtNodeParams *const originalPtNodeParams, + const int parentPos, const int codePointCount, const int *const codePoints, + const int probability) const; + + const PtNodeParams getPtNodeParamsForNewPtNode(const int parentPos, const int codePointCount, + const int *const codePoints, const int probability) const; +}; +} // namespace latinime +#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_UPDATING_HELPER_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.cpp index 05caaebac..d97cec53b 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.cpp @@ -17,9 +17,12 @@ #include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.h" #include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h" +#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h" +#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h" #include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.h" #include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_writer.h" #include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h" #include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.h" @@ -31,122 +34,9 @@ namespace latinime { -const int DynamicPatriciaTrieWritingHelper::CHILDREN_POSITION_FIELD_SIZE = 3; // TODO: Make MAX_DICTIONARY_SIZE 8MB. const size_t DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE = 2 * 1024 * 1024; -bool DynamicPatriciaTrieWritingHelper::addUnigramWord( - DynamicPatriciaTrieReadingHelper *const readingHelper, - const int *const wordCodePoints, const int codePointCount, const int probability, - bool *const outAddedNewUnigram) { - int parentPos = NOT_A_DICT_POS; - while (!readingHelper->isEnd()) { - const PtNodeParams ptNodeParams(readingHelper->getPtNodeParams()); - if (!ptNodeParams.isValid()) { - break; - } - const int matchedCodePointCount = readingHelper->getPrevTotalCodePointCount(); - if (!readingHelper->isMatchedCodePoint(ptNodeParams, 0 /* index */, - wordCodePoints[matchedCodePointCount])) { - // The first code point is different from target code point. Skip this node and read - // the next sibling node. - readingHelper->readNextSiblingNode(ptNodeParams); - continue; - } - // Check following merged node code points. - const int nodeCodePointCount = ptNodeParams.getCodePointCount(); - for (int j = 1; j < nodeCodePointCount; ++j) { - const int nextIndex = matchedCodePointCount + j; - if (nextIndex >= codePointCount || !readingHelper->isMatchedCodePoint(ptNodeParams, j, - wordCodePoints[matchedCodePointCount + j])) { - *outAddedNewUnigram = true; - return reallocatePtNodeAndAddNewPtNodes(&ptNodeParams, j, - getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, - probability), - wordCodePoints + matchedCodePointCount, - codePointCount - matchedCodePointCount); - } - } - // All characters are matched. - if (codePointCount == readingHelper->getTotalCodePointCount(ptNodeParams)) { - return setPtNodeProbability(&ptNodeParams, probability, outAddedNewUnigram); - } - if (!ptNodeParams.hasChildren()) { - *outAddedNewUnigram = true; - return createChildrenPtNodeArrayAndAChildPtNode(&ptNodeParams, - getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, probability), - wordCodePoints + readingHelper->getTotalCodePointCount(ptNodeParams), - codePointCount - readingHelper->getTotalCodePointCount(ptNodeParams)); - } - // Advance to the children nodes. - parentPos = ptNodeParams.getHeadPos(); - readingHelper->readChildNode(ptNodeParams); - } - if (readingHelper->isError()) { - // The dictionary is invalid. - return false; - } - int pos = readingHelper->getPosOfLastForwardLinkField(); - *outAddedNewUnigram = true; - return createAndInsertNodeIntoPtNodeArray(parentPos, - wordCodePoints + readingHelper->getPrevTotalCodePointCount(), - codePointCount - readingHelper->getPrevTotalCodePointCount(), - getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, probability), &pos); -} - -bool DynamicPatriciaTrieWritingHelper::addBigramWords(const int word0Pos, const int word1Pos, - const int probability, bool *const outAddedNewBigram) { - DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy); - const PtNodeParams ptNodeParams(nodeReader.fetchNodeInfoInBufferFromPtNodePos(word0Pos)); - // Move node to add bigram entry. - const int newNodePos = mBuffer->getTailPosition(); - if (!markNodeAsMovedAndSetPosition(&ptNodeParams, newNodePos, newNodePos)) { - return false; - } - int writingPos = newNodePos; - // Write a new PtNode using original PtNode's info to the tail of the dictionary in mBuffer. - if (!writePtNodeToBufferByCopyingPtNodeInfo(mBuffer, &ptNodeParams, ptNodeParams.getParentPos(), - ptNodeParams.getCodePoints(), ptNodeParams.getCodePointCount(), - ptNodeParams.getProbability(), &writingPos)) { - return false; - } - const PtNodeParams newPtNodeParams(nodeReader.fetchNodeInfoInBufferFromPtNodePos(newNodePos)); - if (newPtNodeParams.getBigramsPos() != NOT_A_DICT_POS) { - // Insert a new bigram entry into the existing bigram list. - int bigramListPos = newPtNodeParams.getBigramsPos(); - return mBigramPolicy->addNewBigramEntryToBigramList(word1Pos, probability, &bigramListPos, - outAddedNewBigram); - } else { - // The PtNode doesn't have a bigram list. - *outAddedNewBigram = true; - // First, Write a bigram entry at the tail position of the PtNode. - if (!mBigramPolicy->writeNewBigramEntry(word1Pos, probability, &writingPos)) { - return false; - } - // Then, Mark as the PtNode having bigram list in the flags. - const PatriciaTrieReadingUtils::NodeFlags updatedFlags = - PatriciaTrieReadingUtils::createAndGetFlags(newPtNodeParams.isBlacklisted(), - newPtNodeParams.isNotAWord(), - newPtNodeParams.getProbability() != NOT_A_PROBABILITY, - newPtNodeParams.getShortcutPos() != NOT_A_DICT_POS, true /* hasBigrams */, - newPtNodeParams.getCodePointCount() > 1, CHILDREN_POSITION_FIELD_SIZE); - writingPos = newNodePos; - // Write updated flags into the moved PtNode's flags field. - return DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags, - &writingPos); - } -} - -// Remove a bigram relation from word0Pos to word1Pos. -bool DynamicPatriciaTrieWritingHelper::removeBigramWords(const int word0Pos, const int word1Pos) { - DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy); - const PtNodeParams ptNodeParams(nodeReader.fetchNodeInfoInBufferFromPtNodePos(word0Pos)); - if (ptNodeParams.getBigramsPos() == NOT_A_DICT_POS) { - return false; - } - return mBigramPolicy->removeBigram(ptNodeParams.getBigramsPos(), word1Pos); -} - void DynamicPatriciaTrieWritingHelper::writeToDictFile(const char *const fileName, const HeaderPolicy *const headerPolicy, const int unigramCount, const int bigramCount) { BufferWithExtendableBuffer headerBuffer( @@ -180,323 +70,17 @@ void DynamicPatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNod DictFileWritingUtils::flushAllHeaderAndBodyToFile(fileName, &headerBuffer, &newDictBuffer); } -bool DynamicPatriciaTrieWritingHelper::markNodeAsDeleted( - const PtNodeParams *const toBeUpdatedPtNodeParams) { - int pos = toBeUpdatedPtNodeParams->getHeadPos(); - const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(pos); - const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer); - if (usesAdditionalBuffer) { - pos -= mBuffer->getOriginalBufferSize(); - } - // Read original flags - const PatriciaTrieReadingUtils::NodeFlags originalFlags = - PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos); - const PatriciaTrieReadingUtils::NodeFlags updatedFlags = - DynamicPatriciaTrieReadingUtils::updateAndGetFlags(originalFlags, false /* isMoved */, - true /* isDeleted */); - int writingPos = toBeUpdatedPtNodeParams->getHeadPos(); - // Update flags. - return DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags, - &writingPos); -} - -bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition( - const PtNodeParams *const toBeUpdatedPtNodeParams, const int movedPos, - const int bigramLinkedNodePos) { - int pos = toBeUpdatedPtNodeParams->getHeadPos(); - const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(pos); - const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer); - if (usesAdditionalBuffer) { - pos -= mBuffer->getOriginalBufferSize(); - } - // Read original flags - const PatriciaTrieReadingUtils::NodeFlags originalFlags = - PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos); - const PatriciaTrieReadingUtils::NodeFlags updatedFlags = - DynamicPatriciaTrieReadingUtils::updateAndGetFlags(originalFlags, true /* isMoved */, - false /* isDeleted */); - int writingPos = toBeUpdatedPtNodeParams->getHeadPos(); - // Update flags. - if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags, - &writingPos)) { - return false; - } - // Update moved position, which is stored in the parent offset field. - if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition( - mBuffer, movedPos, toBeUpdatedPtNodeParams->getHeadPos(), &writingPos)) { - return false; - } - // Update bigram linked node position, which is stored in the children position field. - int childrenPosFieldPos = toBeUpdatedPtNodeParams->getChildrenPosFieldPos(); - if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition( - mBuffer, bigramLinkedNodePos, &childrenPosFieldPos)) { - return false; - } - if (toBeUpdatedPtNodeParams->hasChildren()) { - // Update children's parent position. - DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy); - DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, &nodeReader); - readingHelper.initWithPtNodeArrayPos(toBeUpdatedPtNodeParams->getChildrenPos()); - while (!readingHelper.isEnd()) { - const PtNodeParams childPtNodeParams(readingHelper.getPtNodeParams()); - int parentOffsetFieldPos = childPtNodeParams.getHeadPos() - + DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE; - if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition( - mBuffer, bigramLinkedNodePos, childPtNodeParams.getHeadPos(), - &parentOffsetFieldPos)) { - // Parent offset cannot be written because of a bug or a broken dictionary; thus, - // we give up to update dictionary. - return false; - } - readingHelper.readNextSiblingNode(childPtNodeParams); - } - } - return true; -} - -// Write new PtNode at writingPos. -bool DynamicPatriciaTrieWritingHelper::writePtNodeWithFullInfoToBuffer( - BufferWithExtendableBuffer *const bufferToWrite, const bool isBlacklisted, - const bool isNotAWord, const int parentPos, const int *const codePoints, - const int codePointCount, const int probability, const int childrenPos, - const int originalBigramListPos, const int originalShortcutListPos, - int *const writingPos) { - const int nodePos = *writingPos; - // Write dummy flags. The Node flags are updated with appropriate flags at the last step of the - // PtNode writing. - if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(bufferToWrite, - 0 /* nodeFlags */, writingPos)) { - return false; - } - // Calculate a parent offset and write the offset. - if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(bufferToWrite, - parentPos, nodePos, writingPos)) { - return false; - } - // Write code points - if (!DynamicPatriciaTrieWritingUtils::writeCodePointsAndAdvancePosition(bufferToWrite, - codePoints, codePointCount, writingPos)) { - return false; - } - // Write probability when the probability is a valid probability, which means this node is - // terminal. - if (probability != NOT_A_PROBABILITY) { - if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(bufferToWrite, - probability, writingPos)) { - return false; - } - } - // Write children position - if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(bufferToWrite, - childrenPos, writingPos)) { - return false; - } - // Copy shortcut list when the originalShortcutListPos is valid dictionary position. - if (originalShortcutListPos != NOT_A_DICT_POS) { - int fromPos = originalShortcutListPos; - if (!mShortcutPolicy->copyAllShortcutsAndReturnIfSucceededOrNot(bufferToWrite, &fromPos, - writingPos)) { - return false; - } - } - // Copy bigram list when the originalBigramListPos is valid dictionary position. - int bigramCount = 0; - if (originalBigramListPos != NOT_A_DICT_POS) { - int fromPos = originalBigramListPos; - if (!mBigramPolicy->copyAllBigrams(bufferToWrite, &fromPos, writingPos, &bigramCount)) { - return false; - } - } - // Create node flags and write them. - PatriciaTrieReadingUtils::NodeFlags nodeFlags = - PatriciaTrieReadingUtils::createAndGetFlags(isBlacklisted, isNotAWord, - probability != NOT_A_PROBABILITY /* isTerminal */, - originalShortcutListPos != NOT_A_DICT_POS /* hasShortcutTargets */, - bigramCount > 0 /* hasBigrams */, codePointCount > 1 /* hasMultipleChars */, - CHILDREN_POSITION_FIELD_SIZE); - int flagsFieldPos = nodePos; - if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(bufferToWrite, nodeFlags, - &flagsFieldPos)) { - return false; - } - return true; -} - -bool DynamicPatriciaTrieWritingHelper::writePtNodeToBuffer( - BufferWithExtendableBuffer *const bufferToWrite, const int parentPos, - const int *const codePoints, const int codePointCount, const int probability, - int *const writingPos) { - return writePtNodeWithFullInfoToBuffer(bufferToWrite, false /* isBlacklisted */, - false /* isNotAWord */, parentPos, codePoints, codePointCount, probability, - NOT_A_DICT_POS /* childrenPos */, NOT_A_DICT_POS /* originalBigramsPos */, - NOT_A_DICT_POS /* originalShortcutPos */, writingPos); -} - -bool DynamicPatriciaTrieWritingHelper::writePtNodeToBufferByCopyingPtNodeInfo( - BufferWithExtendableBuffer *const bufferToWrite, - const PtNodeParams *const originalPtNodeParams, const int parentPos, - const int *const codePoints, const int codePointCount, const int probability, - int *const writingPos) { - return writePtNodeWithFullInfoToBuffer(bufferToWrite, originalPtNodeParams->isBlacklisted(), - originalPtNodeParams->isNotAWord(), parentPos, codePoints, codePointCount, probability, - originalPtNodeParams->getChildrenPos(), originalPtNodeParams->getBigramsPos(), - originalPtNodeParams->getShortcutPos(), writingPos); -} - -bool DynamicPatriciaTrieWritingHelper::createAndInsertNodeIntoPtNodeArray(const int parentPos, - const int *const nodeCodePoints, const int nodeCodePointCount, const int probability, - int *const forwardLinkFieldPos) { - const int newPtNodeArrayPos = mBuffer->getTailPosition(); - if (!DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer, - newPtNodeArrayPos, forwardLinkFieldPos)) { - return false; - } - return createNewPtNodeArrayWithAChildPtNode(parentPos, nodeCodePoints, nodeCodePointCount, - probability); -} - -bool DynamicPatriciaTrieWritingHelper::setPtNodeProbability( - const PtNodeParams *const originalPtNodeParams, const int probability, - bool *const outAddedNewUnigram) { - if (originalPtNodeParams->isTerminal()) { - // Overwrites the probability. - *outAddedNewUnigram = false; - const int probabilityToWrite = getUpdatedProbability( - originalPtNodeParams->getProbability(), probability); - int probabilityFieldPos = originalPtNodeParams->getProbabilityFieldPos(); - if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(mBuffer, - probabilityToWrite, &probabilityFieldPos)) { - return false; - } - } else { - // Make the node terminal and write the probability. - *outAddedNewUnigram = true; - int movedPos = mBuffer->getTailPosition(); - if (!markNodeAsMovedAndSetPosition(originalPtNodeParams, movedPos, movedPos)) { - return false; - } - if (!writePtNodeToBufferByCopyingPtNodeInfo(mBuffer, originalPtNodeParams, - originalPtNodeParams->getParentPos(), originalPtNodeParams->getCodePoints(), - originalPtNodeParams->getCodePointCount(), - getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, probability), - &movedPos)) { - return false; - } - } - return true; -} - -bool DynamicPatriciaTrieWritingHelper::createChildrenPtNodeArrayAndAChildPtNode( - const PtNodeParams *const parentPtNodeParams, const int probability, - const int *const codePoints, const int codePointCount) { - const int newPtNodeArrayPos = mBuffer->getTailPosition(); - int childrenPosFieldPos = parentPtNodeParams->getChildrenPosFieldPos(); - if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBuffer, - newPtNodeArrayPos, &childrenPosFieldPos)) { - return false; - } - return createNewPtNodeArrayWithAChildPtNode(parentPtNodeParams->getHeadPos(), codePoints, - codePointCount, probability); -} - -bool DynamicPatriciaTrieWritingHelper::createNewPtNodeArrayWithAChildPtNode( - const int parentPtNodePos, const int *const nodeCodePoints, const int nodeCodePointCount, - const int probability) { - int writingPos = mBuffer->getTailPosition(); - if (!DynamicPatriciaTrieWritingUtils::writePtNodeArraySizeAndAdvancePosition(mBuffer, - 1 /* arraySize */, &writingPos)) { - return false; - } - if (!writePtNodeToBuffer(mBuffer, parentPtNodePos, nodeCodePoints, nodeCodePointCount, - probability, &writingPos)) { - return false; - } - if (!DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer, - NOT_A_DICT_POS /* forwardLinkPos */, &writingPos)) { - return false; - } - return true; -} - -// Returns whether the dictionary updating was succeeded or not. -bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes( - const PtNodeParams *const reallocatingPtNodeParams, const int overlappingCodePointCount, - const int probabilityOfNewPtNode, const int *const newNodeCodePoints, - const int newNodeCodePointCount) { - // When addsExtraChild is true, split the reallocating PtNode and add new child. - // Reallocating PtNode: abcde, newNode: abcxy. - // abc (1st, not terminal) __ de (2nd) - // \_ xy (extra child, terminal) - // Otherwise, this method makes 1st part terminal and write probabilityOfNewPtNode. - // Reallocating PtNode: abcde, newNode: abc. - // abc (1st, terminal) __ de (2nd) - const bool addsExtraChild = newNodeCodePointCount > overlappingCodePointCount; - const int firstPartOfReallocatedPtNodePos = mBuffer->getTailPosition(); - int writingPos = firstPartOfReallocatedPtNodePos; - // Write the 1st part of the reallocating node. The children position will be updated later - // with actual children position. - const int newProbability = addsExtraChild ? NOT_A_PROBABILITY : probabilityOfNewPtNode; - if (!writePtNodeToBuffer(mBuffer, reallocatingPtNodeParams->getParentPos(), - reallocatingPtNodeParams->getCodePoints(), overlappingCodePointCount, newProbability, - &writingPos)) { - return false; - } - const int actualChildrenPos = writingPos; - // Create new children PtNode array. - const size_t newPtNodeCount = addsExtraChild ? 2 : 1; - if (!DynamicPatriciaTrieWritingUtils::writePtNodeArraySizeAndAdvancePosition(mBuffer, - newPtNodeCount, &writingPos)) { - return false; - } - // Write the 2nd part of the reallocating node. - const int secondPartOfReallocatedPtNodePos = writingPos; - if (!writePtNodeToBufferByCopyingPtNodeInfo(mBuffer, reallocatingPtNodeParams, - firstPartOfReallocatedPtNodePos, - reallocatingPtNodeParams->getCodePoints() + overlappingCodePointCount, - reallocatingPtNodeParams->getCodePointCount() - overlappingCodePointCount, - reallocatingPtNodeParams->getProbability(), &writingPos)) { - return false; - } - if (addsExtraChild) { - if (!writePtNodeToBuffer(mBuffer, firstPartOfReallocatedPtNodePos, - newNodeCodePoints + overlappingCodePointCount, - newNodeCodePointCount - overlappingCodePointCount, probabilityOfNewPtNode, - &writingPos)) { - return false; - } - } - if (!DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer, - NOT_A_DICT_POS /* forwardLinkPos */, &writingPos)) { - return false; - } - // Update original reallocating PtNode as moved. - if (!markNodeAsMovedAndSetPosition(reallocatingPtNodeParams, firstPartOfReallocatedPtNodePos, - secondPartOfReallocatedPtNodePos)) { - return false; - } - // Load node info. Information of the 1st part will be fetched. - DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy); - const PtNodeParams ptNodeParams( - nodeReader.fetchNodeInfoInBufferFromPtNodePos(firstPartOfReallocatedPtNodePos)); - // Update children position. - int childrenPosFieldPos = ptNodeParams.getChildrenPosFieldPos(); - if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBuffer, - actualChildrenPos, &childrenPosFieldPos)) { - return false; - } - return true; -} - +// TODO: Make this method version independent. bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, const HeaderPolicy *const headerPolicy, BufferWithExtendableBuffer *const bufferToWrite, int *const outUnigramCount, int *const outBigramCount) { - DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy); - DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, &nodeReader); + DynamicPatriciaTrieNodeReader ptNodeReader(mBuffer, mBigramPolicy, mShortcutPolicy); + DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, &ptNodeReader); readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); DynamicPatriciaTrieGcEventListeners ::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted( - headerPolicy, this, mBuffer, mNeedsToDecay); + headerPolicy, mPtNodeWriter, mBuffer, mNeedsToDecay); if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner( &traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted)) { return false; @@ -521,8 +105,10 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, // Mapping from positions in mBuffer to positions in bufferToWrite. DictPositionRelocationMap dictPositionRelocationMap; readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); + DynamicPatriciaTrieNodeWriter newPtNodeWriter(bufferToWrite, + &ptNodeReader, mBigramPolicy, mShortcutPolicy); DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer - traversePolicyToPlaceAndWriteValidPtNodesToBuffer(this, bufferToWrite, + traversePolicyToPlaceAndWriteValidPtNodesToBuffer(&newPtNodeWriter, bufferToWrite, &dictPositionRelocationMap); if (!readingHelper.traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner( &traversePolicyToPlaceAndWriteValidPtNodesToBuffer)) { @@ -539,7 +125,7 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, DynamicPatriciaTrieReadingHelper newDictReadingHelper(bufferToWrite, &newDictNodeReader); newDictReadingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateAllPositionFields - traversePolicyToUpdateAllPositionFields(this, &newDictBigramPolicy, bufferToWrite, + traversePolicyToUpdateAllPositionFields(&newDictBigramPolicy, bufferToWrite, &dictPositionRelocationMap); if (!newDictReadingHelper.traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner( &traversePolicyToUpdateAllPositionFields)) { @@ -550,14 +136,4 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, return true; } -int DynamicPatriciaTrieWritingHelper::getUpdatedProbability(const int originalProbability, - const int newProbability) { - if (mNeedsToDecay) { - return ForgettingCurveUtils::getUpdatedEncodedProbability(originalProbability, - newProbability); - } else { - return newProbability; - } -} - } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.h index 5614cb3ac..7223dea8b 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.h @@ -29,7 +29,8 @@ class DynamicBigramListPolicy; class DynamicPatriciaTrieReadingHelper; class DynamicShortcutListPolicy; class HeaderPolicy; -class PtNodeParams; +class PtNodeReader; +class PtNodeWriter; // TODO: Make it independent from a particular format and move to pt_common. class DynamicPatriciaTrieWritingHelper { @@ -51,87 +52,34 @@ class DynamicPatriciaTrieWritingHelper { static const size_t MAX_DICTIONARY_SIZE; DynamicPatriciaTrieWritingHelper(BufferWithExtendableBuffer *const buffer, + const PtNodeReader *const ptNodeReader, PtNodeWriter *const ptNodeWriter, DynamicBigramListPolicy *const bigramPolicy, DynamicShortcutListPolicy *const shortcutPolicy, const bool needsToDecay) - : mBuffer(buffer), mBigramPolicy(bigramPolicy), mShortcutPolicy(shortcutPolicy), + : mBuffer(buffer), mPtNodeReader(ptNodeReader), mPtNodeWriter(ptNodeWriter), + mBigramPolicy(bigramPolicy), mShortcutPolicy(shortcutPolicy), mNeedsToDecay(needsToDecay) {} ~DynamicPatriciaTrieWritingHelper() {} - // Add a word to the dictionary. If the word already exists, update the probability. - bool addUnigramWord(DynamicPatriciaTrieReadingHelper *const readingHelper, - const int *const wordCodePoints, const int codePointCount, const int probability, - bool *const outAddedNewUnigram); - - // Add a bigram relation from word0Pos to word1Pos. - bool addBigramWords(const int word0Pos, const int word1Pos, const int probability, - bool *const outAddedNewBigram); - - // Remove a bigram relation from word0Pos to word1Pos. - bool removeBigramWords(const int word0Pos, const int word1Pos); - void writeToDictFile(const char *const fileName, const HeaderPolicy *const headerPolicy, const int unigramCount, const int bigramCount); void writeToDictFileWithGC(const int rootPtNodeArrayPos, const char *const fileName, const HeaderPolicy *const headerPolicy); - // CAVEAT: This method must be called only from inner classes of - // DynamicPatriciaTrieGcEventListeners. - bool markNodeAsDeleted(const PtNodeParams *const toBeUpdatedPtNodeParams); - - // CAVEAT: This method must be called only from this class or inner classes of - // DynamicPatriciaTrieGcEventListeners. - bool writePtNodeToBufferByCopyingPtNodeInfo(BufferWithExtendableBuffer *const bufferToWrite, - const PtNodeParams *const originalPtNodeParams, const int parentPos, - const int *const codePoints, const int codePointCount, const int probability, - int *const writingPos); - private: DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieWritingHelper); - static const int CHILDREN_POSITION_FIELD_SIZE; - BufferWithExtendableBuffer *const mBuffer; + const PtNodeReader *const mPtNodeReader; + PtNodeWriter *const mPtNodeWriter; DynamicBigramListPolicy *const mBigramPolicy; DynamicShortcutListPolicy *const mShortcutPolicy; const bool mNeedsToDecay; - bool markNodeAsMovedAndSetPosition(const PtNodeParams *const toBeUpdatedPtNodeParams, - const int movedPos, const int bigramLinkedNodePos); - - bool writePtNodeWithFullInfoToBuffer(BufferWithExtendableBuffer *const bufferToWrite, - const bool isBlacklisted, const bool isNotAWord, - const int parentPos, const int *const codePoints, const int codePointCount, - const int probability, const int childrenPos, const int originalBigramListPos, - const int originalShortcutListPos, int *const writingPos); - - bool writePtNodeToBuffer(BufferWithExtendableBuffer *const bufferToWrite, - const int parentPos, const int *const codePoints, const int codePointCount, - const int probability, int *const writingPos); - - bool createAndInsertNodeIntoPtNodeArray(const int parentPos, const int *const nodeCodePoints, - const int nodeCodePointCount, const int probability, int *const forwardLinkFieldPos); - - bool setPtNodeProbability(const PtNodeParams *const originalPtNodeParams, const int probability, - bool *const outAddedNewUnigram); - - bool createChildrenPtNodeArrayAndAChildPtNode(const PtNodeParams *const parentPtNodeParams, - const int probability, const int *const codePoints, const int codePointCount); - - bool createNewPtNodeArrayWithAChildPtNode(const int parentPos, const int *const nodeCodePoints, - const int nodeCodePointCount, const int probability); - - bool reallocatePtNodeAndAddNewPtNodes( - const PtNodeParams *const reallocatingPtNodeParams, const int overlappingCodePointCount, - const int probabilityOfNewPtNode, const int *const newNodeCodePoints, - const int newNodeCodePointCount); - bool runGC(const int rootPtNodeArrayPos, const HeaderPolicy *const headerPolicy, BufferWithExtendableBuffer *const bufferToWrite, int *const outUnigramCount, int *const outBigramCount); - - int getUpdatedProbability(const int originalProbability, const int newProbability); }; } // namespace latinime #endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H */ |