diff options
6 files changed, 134 insertions, 34 deletions
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp index 405628b30..5674cb48e 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp @@ -34,7 +34,7 @@ void DynamicPatriciaTrieNodeReader::fetchNodeInfoFromBufferAndProcessMovedNode(c mFlags = PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos); const int parentPos = DynamicPatriciaTrieReadingUtils::getParentPosAndAdvancePosition(dictBuf, &pos); - mParentPos = (parentPos != 0) ? mNodePos + parentPos : NOT_A_DICT_POS; + mParentPos = (parentPos != 0) ? nodePos + parentPos : NOT_A_DICT_POS; if (outCodePoints != 0) { mCodePointCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition( dictBuf, mFlags, maxCodePointCount, outCodePoints, &pos); diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp index 6d1bd02c2..2c28da9af 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp @@ -47,14 +47,16 @@ bool DynamicPatriciaTrieWritingHelper::addUnigramWord( const int nodeCodePointCount = nodeReader->getCodePointCount(); for (int j = 1; j < nodeCodePointCount; ++j) { const int nextIndex = matchedCodePointCount + j; - if (nextIndex >= codePointCount) { - // TODO: split current node after j - 1, create child and make this terminal. - return false; - } - if (!readingHelper->isMatchedCodePoint(j, + if (nextIndex >= codePointCount || !readingHelper->isMatchedCodePoint(j, wordCodePoints[matchedCodePointCount + j])) { - // TODO: split current node after j - 1 and create two children. - return false; + if (ENABLE_DYNAMIC_UPDATE) { + return reallocatePtNodeAndAddNewPtNodes(nodeReader, + readingHelper->getMergedNodeCodePoints(), j, probability, + wordCodePoints + matchedCodePointCount, + codePointCount - matchedCodePointCount); + } else { + return false; + } } } // All characters are matched. @@ -136,20 +138,22 @@ bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition( &writingPos)) { return false; } - // Update moved position, which is stored in the parent position field. - if (!DynamicPatriciaTrieWritingUtils::writeParentPositionAndAdvancePosition( - mBuffer, movedPos, &writingPos)) { + // Update moved position, which is stored in the parent offset field. + const int movedPosOffset = movedPos - originalNode->getNodePos(); + if (!DynamicPatriciaTrieWritingUtils::writeParentOffsetAndAdvancePosition( + mBuffer, movedPosOffset, &writingPos)) { return false; } return true; } -// Write new node at writingPos. -bool DynamicPatriciaTrieWritingHelper::writeNodeToBuffer(const bool isBlacklisted, +// Write new PtNode at writingPos. +bool DynamicPatriciaTrieWritingHelper::writePtNodeWithFullInfoToBuffer(const bool isBlacklisted, const bool isNotAWord, const int parentPos, const int *const codePoints, const int codePointCount, const int probability, const int childrenPos, const int originalBigramListPos, const int originalShortcutListPos, int *const writingPos) { + const int nodePos = *writingPos; // Create node flags and write them. const PatriciaTrieReadingUtils::NodeFlags nodeFlags = PatriciaTrieReadingUtils::createAndGetFlags(isBlacklisted, isNotAWord, @@ -160,9 +164,10 @@ bool DynamicPatriciaTrieWritingHelper::writeNodeToBuffer(const bool isBlackliste writingPos)) { return false; } - // Write parent position - if (!DynamicPatriciaTrieWritingUtils::writeParentPositionAndAdvancePosition(mBuffer, parentPos, - writingPos)) { + // Calculate a parent offset and write the offset. + const int parentOffset = (parentPos != NOT_A_DICT_POS) ? parentPos - nodePos : NOT_A_DICT_POS; + if (!DynamicPatriciaTrieWritingUtils::writeParentOffsetAndAdvancePosition(mBuffer, + parentOffset, writingPos)) { return false; } // Write code points @@ -200,6 +205,25 @@ bool DynamicPatriciaTrieWritingHelper::writeNodeToBuffer(const bool isBlackliste return true; } +bool DynamicPatriciaTrieWritingHelper::writePtNodeToBuffer(const int parentPos, + const int *const codePoints, const int codePointCount, const int probability, + int *const writingPos) { + return writePtNodeWithFullInfoToBuffer(false /* isBlacklisted */, false /* isNotAWord */, + parentPos, codePoints, codePointCount, probability, + NOT_A_DICT_POS /* childrenPos */, NOT_A_DICT_POS /* originalBigramsPos */, + NOT_A_DICT_POS /* originalShortcutPos */, writingPos); +} + +bool DynamicPatriciaTrieWritingHelper::writePtNodeToBufferByCopyingPtNodeInfo( + const DynamicPatriciaTrieNodeReader *const originalNode, const int parentPos, + const int *const codePoints, const int codePointCount, const int probability, + int *const writingPos) { + return writePtNodeWithFullInfoToBuffer(originalNode->isBlacklisted(), + originalNode->isNotAWord(), parentPos, codePoints, codePointCount, probability, + originalNode->getChildrenPos(), originalNode->getBigramsPos(), + originalNode->getShortcutPos(), writingPos); +} + bool DynamicPatriciaTrieWritingHelper::createAndInsertNodeIntoPtNodeArray(const int parentPos, const int *const nodeCodePoints, const int nodeCodePointCount, const int probability, int *const forwardLinkFieldPos) { @@ -228,10 +252,8 @@ bool DynamicPatriciaTrieWritingHelper::setPtNodeProbability( if (!markNodeAsMovedAndSetPosition(originalPtNode, movedPos)) { return false; } - if (!writeNodeToBuffer(originalPtNode->isBlacklisted(), originalPtNode->isNotAWord(), - originalPtNode->getParentPos(), codePoints, originalPtNode->getCodePointCount(), - probability, originalPtNode->getChildrenPos(), originalPtNode->getBigramsPos(), - originalPtNode->getShortcutPos(), &movedPos)) { + if (!writePtNodeToBufferByCopyingPtNodeInfo(originalPtNode, originalPtNode->getParentPos(), + codePoints, originalPtNode->getCodePointCount(), probability, &movedPos)) { return false; } } @@ -259,9 +281,7 @@ bool DynamicPatriciaTrieWritingHelper::createNewPtNodeArrayWithAChildPtNode( 1 /* arraySize */, &writingPos)) { return false; } - if (!writeNodeToBuffer(false /* isBlacklisted */, false /* isNotAWord */, parentPtNodePos, - nodeCodePoints, nodeCodePointCount, probability, NOT_A_DICT_POS /* childrenPos */, - NOT_A_DICT_POS /* originalBigramsPos */, NOT_A_DICT_POS /* originalShortcutPos */, + if (!writePtNodeToBuffer(parentPtNodePos, nodeCodePoints, nodeCodePointCount, probability, &writingPos)) { return false; } @@ -272,4 +292,69 @@ bool DynamicPatriciaTrieWritingHelper::createNewPtNodeArrayWithAChildPtNode( return true; } +// Returns whether the dictionary updating was succeeded or not. +bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes( + const DynamicPatriciaTrieNodeReader *const reallocatingPtNode, + const int *const reallocatingPtNodeCodePoints, const int overlappingCodePointCount, + const int probabilityOfNewPtNode, const int *const newNodeCodePoints, + const int newNodeCodePointCount) { + // When addsExtraChild is true, split the reallocating PtNode and add new child. + // Reallocating PtNode: abcde, newNode: abcxy. + // abc (1st, not terminal) __ de (2nd) + // \_ xy (extra child, terminal) + // Otherwise, this method makes 1st part terminal and write probabilityOfNewPtNode. + // Reallocating PtNode: abcde, newNode: abc. + // abc (1st, terminal) __ de (2nd) + const bool addsExtraChild = newNodeCodePointCount > overlappingCodePointCount; + const int firstPtNodePos = mBuffer->getTailPosition(); + if (!markNodeAsMovedAndSetPosition(reallocatingPtNode, firstPtNodePos)) { + return false; + } + int writingPos = firstPtNodePos; + // Write the 1st part of the reallocating node. The children position will be updated later + // with actual children position. + const int newProbability = addsExtraChild ? NOT_A_PROBABILITY : probabilityOfNewPtNode; + if (!writePtNodeToBuffer(reallocatingPtNode->getParentPos(), reallocatingPtNodeCodePoints, + overlappingCodePointCount, newProbability, &writingPos)) { + return false; + } + const int actualChildrenPos = writingPos; + // Create new children PtNode array. + const size_t newPtNodeCount = addsExtraChild ? 2 : 1; + if (!DynamicPatriciaTrieWritingUtils::writePtNodeArraySizeAndAdvancePosition(mBuffer, + newPtNodeCount, &writingPos)) { + return false; + } + // Write the 2nd part of the reallocating node. + if (!writePtNodeToBufferByCopyingPtNodeInfo(reallocatingPtNode, + reallocatingPtNode->getNodePos(), + reallocatingPtNodeCodePoints + overlappingCodePointCount, + reallocatingPtNode->getCodePointCount() - overlappingCodePointCount, + reallocatingPtNode->getProbability(), &writingPos)) { + return false; + } + if (addsExtraChild) { + if (!writePtNodeToBuffer(reallocatingPtNode->getNodePos(), + newNodeCodePoints + overlappingCodePointCount, + newNodeCodePointCount - overlappingCodePointCount, probabilityOfNewPtNode, + &writingPos)) { + return false; + } + } + if (!DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer, + NOT_A_DICT_POS /* forwardLinkPos */, &writingPos)) { + return false; + } + // Load node info. Information of the 1st part will be fetched. + DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy); + nodeReader.fetchNodeInfoFromBuffer(firstPtNodePos); + // Update children position. + int childrenPosFieldPos = nodeReader.getChildrenPosFieldPos(); + if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBuffer, + actualChildrenPos, &childrenPosFieldPos)) { + return false; + } + return true; +} + } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h index 16b84bac3..ebde24d4d 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h @@ -57,11 +57,19 @@ class DynamicPatriciaTrieWritingHelper { bool markNodeAsMovedAndSetPosition(const DynamicPatriciaTrieNodeReader *const nodeToUpdate, const int movedPos); - bool writeNodeToBuffer(const bool isBlacklisted, const bool isNotAWord, const int parentPos, - const int *const codePoints, const int codePointCount, const int probability, - const int childrenPos, const int originalBigramListPos, + bool writePtNodeWithFullInfoToBuffer(const bool isBlacklisted, const bool isNotAWord, + const int parentPos, const int *const codePoints, const int codePointCount, + const int probability, const int childrenPos, const int originalBigramListPos, const int originalShortcutListPos, int *const writingPos); + bool writePtNodeToBuffer(const int parentPos, const int *const codePoints, + const int codePointCount, const int probability, int *const writingPos); + + bool writePtNodeToBufferByCopyingPtNodeInfo( + const DynamicPatriciaTrieNodeReader *const originalNode, const int parentPos, + const int *const codePoints, const int codePointCount, const int probability, + int *const writingPos); + bool createAndInsertNodeIntoPtNodeArray(const int parentPos, const int *const nodeCodePoints, const int nodeCodePointCount, const int probability, int *const forwardLinkFieldPos); @@ -74,6 +82,12 @@ class DynamicPatriciaTrieWritingHelper { bool createNewPtNodeArrayWithAChildPtNode(const int parentPos, const int *const nodeCodePoints, const int nodeCodePointCount, const int probability); + + bool reallocatePtNodeAndAddNewPtNodes( + const DynamicPatriciaTrieNodeReader *const reallocatingPtNode, + const int *const reallocatingPtNodeCodePoints, const int overlappingCodePointCount, + const int probabilityOfNewPtNode, const int *const newNodeCodePoints, + const int newNodeCodePointCount); }; } // namespace latinime #endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.cpp index 4187504b4..b261e594d 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.cpp @@ -68,11 +68,11 @@ const int DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE = 1; return buffer->writeUintAndAdvancePosition(nodeFlags, NODE_FLAG_FIELD_SIZE, nodeFlagsFieldPos); } -/* static */ bool DynamicPatriciaTrieWritingUtils::writeParentPositionAndAdvancePosition( - BufferWithExtendableBuffer *const buffer, const int parentPosition, +// Note that parentOffset is offset from node's head position. +/* static */ bool DynamicPatriciaTrieWritingUtils::writeParentOffsetAndAdvancePosition( + BufferWithExtendableBuffer *const buffer, const int parentOffset, int *const parentPosFieldPos) { - // Note that parentPosition is offset from node's head position. - int offset = (parentPosition != NOT_A_DICT_POS) ? parentPosition : 0; + int offset = (parentOffset != NOT_A_DICT_POS) ? parentOffset : 0; return writeDictOffset(buffer, offset, parentPosFieldPos); } diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h index 801042ddf..183ede444 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h @@ -39,7 +39,7 @@ class DynamicPatriciaTrieWritingUtils { const DynamicPatriciaTrieReadingUtils::NodeFlags nodeFlags, int *const nodeFlagsFieldPos); - static bool writeParentPositionAndAdvancePosition(BufferWithExtendableBuffer *const buffer, + static bool writeParentOffsetAndAdvancePosition(BufferWithExtendableBuffer *const buffer, const int parentPosition, int *const parentPosFieldPos); static bool writeCodePointsAndAdvancePosition(BufferWithExtendableBuffer *const buffer, diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp index 6326754c2..dfdaebd18 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp @@ -66,16 +66,17 @@ bool BufferWithExtendableBuffer::writeCodePointsAndAdvancePosition(const int *co bool BufferWithExtendableBuffer::checkAndPrepareWriting(const int pos, const int size) { if (isInAdditionalBuffer(pos)) { - if (pos == mUsedAdditionalBufferSize) { + const int tailPosition = getTailPosition(); + if (pos == tailPosition) { // Append data to the tail. - if (pos + size > static_cast<int>(mAdditionalBuffer.size())) { + if (pos + size > static_cast<int>(mAdditionalBuffer.size()) + mOriginalBufferSize) { // Need to extend buffer. if (!extendBuffer()) { return false; } } mUsedAdditionalBufferSize += size; - } else if (pos + size >= mUsedAdditionalBufferSize) { + } else if (pos + size >= tailPosition) { // The access will beyond the tail of used region. return false; } |