diff options
Diffstat (limited to 'native/jni')
17 files changed, 450 insertions, 146 deletions
diff --git a/native/jni/Android.mk b/native/jni/Android.mk index a51fe3c03..bf188971e 100644 --- a/native/jni/Android.mk +++ b/native/jni/Android.mk @@ -71,14 +71,16 @@ LATIN_IME_CORE_SRC_FILES := \ header/header_policy.cpp \ header/header_reading_utils.cpp \ shortcut/shortcut_list_reading_utils.cpp \ - utils/byte_array_utils.cpp \ - utils/format_utils.cpp \ dictionary_structure_with_buffer_policy_factory.cpp \ dynamic_patricia_trie_node_reader.cpp \ dynamic_patricia_trie_policy.cpp \ dynamic_patricia_trie_reading_utils.cpp \ patricia_trie_policy.cpp \ patricia_trie_reading_utils.cpp) \ + $(addprefix suggest/policyimpl/dictionary/utils/, \ + byte_array_utils.cpp \ + extendable_buffer.cpp \ + format_utils.cpp) \ suggest/policyimpl/gesture/gesture_suggest_policy_factory.cpp \ $(addprefix suggest/policyimpl/typing/, \ scoring_params.cpp \ diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h new file mode 100644 index 000000000..513878e1d --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h @@ -0,0 +1,80 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DYNAMIC_BIGRAM_LIST_POLICY_H +#define LATINIME_DYNAMIC_BIGRAM_LIST_POLICY_H + +#include <stdint.h> + +#include "defines.h" +#include "suggest/core/policy/dictionary_bigrams_structure_policy.h" +#include "suggest/policyimpl/dictionary/bigram/bigram_list_reading_utils.h" +#include "suggest/policyimpl/dictionary/utils/extendable_buffer.h" + +namespace latinime { + +/* + * This is a dynamic version of BigramListPolicy and supports an additional buffer. + */ +class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy { + public: + DynamicBigramListPolicy(const uint8_t *const bigramsBuf, const int bufSize, + const ExtendableBuffer *const additionalBuffer) + : mDictRoot(bigramsBuf), mBufSize(bufSize), mAdditionalBuffer(additionalBuffer) {} + + ~DynamicBigramListPolicy() {} + + void getNextBigram(int *const outBigramPos, int *const outProbability, bool *const outHasNext, + int *const pos) const { + const bool usesAdditionalBuffer = *pos >= mBufSize; + const uint8_t *const buffer = (usesAdditionalBuffer) ? + mAdditionalBuffer->getBuffer() : mDictRoot; + if (usesAdditionalBuffer) { + *pos -= mBufSize; + } + const BigramListReadingUtils::BigramFlags flags = + BigramListReadingUtils::getFlagsAndForwardPointer(buffer, pos); + *outBigramPos = BigramListReadingUtils::getBigramAddressAndForwardPointer( + buffer, flags, pos); + if (usesAdditionalBuffer) { + *outBigramPos += mBufSize; + } + *outProbability = BigramListReadingUtils::getProbabilityFromFlags(flags); + *outHasNext = BigramListReadingUtils::hasNext(flags); + if (usesAdditionalBuffer) { + *pos += mBufSize; + } + } + + void skipAllBigrams(int *const pos) const { + if (*pos >= mBufSize) { + *pos -= mBufSize; + BigramListReadingUtils::skipExistingBigrams(mAdditionalBuffer->getBuffer(), pos); + *pos += mBufSize; + } else { + BigramListReadingUtils::skipExistingBigrams(mDictRoot, pos); + } + } + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicBigramListPolicy); + + const uint8_t *const mDictRoot; + const int mBufSize; + const ExtendableBuffer *const mAdditionalBuffer; +}; +} // namespace latinime +#endif // LATINIME_DYNAMIC_BIGRAM_LIST_POLICY_H diff --git a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.cpp b/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.cpp index cc5252c43..434858d67 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.cpp @@ -22,7 +22,7 @@ #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h" #include "suggest/policyimpl/dictionary/patricia_trie_policy.h" #include "suggest/policyimpl/dictionary/utils/format_utils.h" -#include "suggest/policyimpl/dictionary/utils/mmaped_buffer.h" +#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h" namespace latinime { @@ -31,7 +31,7 @@ namespace latinime { const int bufOffset, const int size, const bool isUpdatable) { // Allocated buffer in MmapedBuffer::openBuffer() will be freed in the destructor of // impl classes of DictionaryStructureWithBufferPolicy. - const MmapedBuffer *const mmapedBuffer = MmapedBuffer::openBuffer(path, pathLength, bufOffset, + const MmappedBuffer *const mmapedBuffer = MmappedBuffer::openBuffer(path, pathLength, bufOffset, size, isUpdatable); if (!mmapedBuffer) { return 0; diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp index 77a85c86d..c427ebe2d 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp @@ -19,34 +19,44 @@ #include "suggest/core/policy/dictionary_bigrams_structure_policy.h" #include "suggest/core/policy/dictionary_shortcuts_structure_policy.h" #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/utils/extendable_buffer.h" namespace latinime { void DynamicPatriciaTrieNodeReader::fetchNodeInfoFromBufferAndProcessMovedNode(const int nodePos, const int maxCodePointCount, int *const outCodePoints) { - int pos = nodePos; - mFlags = PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos); + const bool usesAdditionalBuffer = nodePos >= mOriginalDictSize; + const uint8_t *const dictBuf = + usesAdditionalBuffer ? mExtendableBuffer->getBuffer() : mDictRoot; + int pos = (usesAdditionalBuffer) ? nodePos - mOriginalDictSize : nodePos; + mFlags = PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos); const int parentPos = - DynamicPatriciaTrieReadingUtils::getParentPosAndAdvancePosition(mDictRoot, &pos); + DynamicPatriciaTrieReadingUtils::getParentPosAndAdvancePosition(dictBuf, &pos); mParentPos = (parentPos != 0) ? mNodePos + parentPos : NOT_A_DICT_POS; if (outCodePoints != 0) { mCodePointCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition( - mDictRoot, mFlags, maxCodePointCount, outCodePoints, &pos); + dictBuf, mFlags, maxCodePointCount, outCodePoints, &pos); } else { mCodePointCount = PatriciaTrieReadingUtils::skipCharacters( - mDictRoot, mFlags, MAX_WORD_LENGTH, &pos); + dictBuf, mFlags, MAX_WORD_LENGTH, &pos); } if (isTerminal()) { - mProbability = PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos); + mProbability = PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(dictBuf, &pos); } else { mProbability = NOT_A_PROBABILITY; } if (hasChildren()) { mChildrenPos = DynamicPatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition( - mDictRoot, mFlags, &pos); + dictBuf, mFlags, &pos); + if (usesAdditionalBuffer && mChildrenPos != NOT_A_DICT_POS) { + mChildrenPos += mOriginalDictSize; + } } else { mChildrenPos = NOT_A_DICT_POS; } + if (usesAdditionalBuffer) { + pos += mOriginalDictSize; + } if (PatriciaTrieReadingUtils::hasShortcutTargets(mFlags)) { mShortcutPos = pos; mShortcutsPolicy->skipAllShortcuts(&pos); diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h index e990809e8..2a636289e 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h @@ -27,6 +27,7 @@ namespace latinime { class DictionaryBigramsStructurePolicy; class DictionaryShortcutsStructurePolicy; +class ExtendableBuffer; /* * This class is used for helping to read nodes of dynamic patricia trie. This class handles moved @@ -34,12 +35,14 @@ class DictionaryShortcutsStructurePolicy; */ class DynamicPatriciaTrieNodeReader { public: - DynamicPatriciaTrieNodeReader(const uint8_t *const dictRoot, + DynamicPatriciaTrieNodeReader(const uint8_t *const dictRoot, const int originalDictSize, + const ExtendableBuffer *const extendableBuffer, const DictionaryBigramsStructurePolicy *const bigramsPolicy, const DictionaryShortcutsStructurePolicy *const shortcutsPolicy) - : mDictRoot(dictRoot), mBigramsPolicy(bigramsPolicy), + : mDictRoot(dictRoot), mOriginalDictSize(originalDictSize), + mExtendableBuffer(extendableBuffer), mBigramsPolicy(bigramsPolicy), mShortcutsPolicy(shortcutsPolicy), mNodePos(NOT_A_VALID_WORD_POS), mFlags(0), - mParentPos(NOT_A_DICT_POS), mCodePointCount(0), mProbability(NOT_A_PROBABILITY), + mParentPos(NOT_A_DICT_POS), mCodePointCount(0), mProbability(NOT_A_PROBABILITY), mChildrenPos(NOT_A_DICT_POS), mShortcutPos(NOT_A_DICT_POS), mBigramPos(NOT_A_DICT_POS), mSiblingPos(NOT_A_VALID_WORD_POS) {} @@ -123,6 +126,8 @@ class DynamicPatriciaTrieNodeReader { // TODO: Consolidate mDictRoot. const uint8_t *const mDictRoot; + const int mOriginalDictSize; + const ExtendableBuffer *const mExtendableBuffer; const DictionaryBigramsStructurePolicy *const mBigramsPolicy; const DictionaryShortcutsStructurePolicy *const mShortcutsPolicy; int mNodePos; diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp index 3000860a3..b244dd0b5 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp @@ -33,13 +33,13 @@ void DynamicPatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const d if (!dicNode->hasChildren()) { return; } - DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, getBigramsStructurePolicy(), - getShortcutsStructurePolicy()); + DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, mOriginalDictSize, &mExtendableBuffer, + getBigramsStructurePolicy(), getShortcutsStructurePolicy()); int mergedNodeCodePoints[MAX_WORD_LENGTH]; int nextPos = dicNode->getChildrenPos(); int totalChildCount = 0; do { - const int childCount = PatriciaTrieReadingUtils::getGroupCountAndAdvancePosition( + const int childCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition( mDictRoot, &nextPos); totalChildCount += childCount; if (childCount <= 0 || totalChildCount > MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP) { @@ -79,8 +79,8 @@ int DynamicPatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCoun int mergedNodeCodePoints[maxCodePointCount]; int codePointCount = 0; - DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, getBigramsStructurePolicy(), - getShortcutsStructurePolicy()); + DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, mOriginalDictSize, &mExtendableBuffer, + getBigramsStructurePolicy(), getShortcutsStructurePolicy()); // First, read terminal node and get its probability. nodeReader.fetchNodeInfoFromBufferAndGetNodeCodePoints(nodePos, maxCodePointCount, mergedNodeCodePoints); @@ -124,14 +124,14 @@ int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const in int mergedNodeCodePoints[MAX_WORD_LENGTH]; int currentLength = 0; int pos = getRootPosition(); - DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, getBigramsStructurePolicy(), - getShortcutsStructurePolicy()); - while (currentLength <= length) { + DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, mOriginalDictSize, &mExtendableBuffer, + getBigramsStructurePolicy(), getShortcutsStructurePolicy()); + while (currentLength < length) { // When foundMatchedNode becomes true, currentLength is increased at least once. bool foundMatchedNode = false; int totalChildCount = 0; do { - const int childCount = PatriciaTrieReadingUtils::getGroupCountAndAdvancePosition( + const int childCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition( mDictRoot, &pos); totalChildCount += childCount; if (childCount <= 0 || totalChildCount > MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP) { @@ -144,13 +144,15 @@ int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const in for (int i = 0; i < childCount; i++) { nodeReader.fetchNodeInfoFromBufferAndGetNodeCodePoints(pos, MAX_WORD_LENGTH, mergedNodeCodePoints); - if (nodeReader.isDeleted() || nodeReader.getCodePointCount() <= 0) { + const int nodeCodePointCount = nodeReader.getCodePointCount(); + if (nodeReader.isDeleted() || nodeCodePointCount <= 0 + || currentLength + nodeCodePointCount > length) { // Skip deleted or empty node. pos = nodeReader.getSiblingNodePos(); continue; } bool matched = true; - for (int j = 0; j < nodeReader.getCodePointCount(); ++j) { + for (int j = 0; j < nodeCodePointCount; ++j) { if (mergedNodeCodePoints[j] != searchCodePoints[currentLength + j]) { // Different code point is found. matched = false; @@ -158,7 +160,7 @@ int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const in } } if (matched) { - currentLength += nodeReader.getCodePointCount(); + currentLength += nodeCodePointCount; if (length == currentLength) { // Terminal position is found. return nodeReader.getNodePos(); @@ -177,7 +179,7 @@ int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const in if (foundMatchedNode) { break; } - // If the matched node is not found in the current node group, try to follow the + // If the matched node is not found in the current PtNode array, try to follow the // forward link. pos = DynamicPatriciaTrieReadingUtils::getForwardLinkPosition( mDictRoot, pos); @@ -196,8 +198,8 @@ int DynamicPatriciaTriePolicy::getUnigramProbability(const int nodePos) const { if (nodePos == NOT_A_VALID_WORD_POS) { return NOT_A_PROBABILITY; } - DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, getBigramsStructurePolicy(), - getShortcutsStructurePolicy()); + DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, mOriginalDictSize, &mExtendableBuffer, + getBigramsStructurePolicy(), getShortcutsStructurePolicy()); nodeReader.fetchNodeInfoFromBuffer(nodePos); if (nodeReader.isDeleted() || nodeReader.isBlacklisted() || nodeReader.isNotAWord()) { return NOT_A_PROBABILITY; @@ -209,8 +211,8 @@ int DynamicPatriciaTriePolicy::getShortcutPositionOfNode(const int nodePos) cons if (nodePos == NOT_A_VALID_WORD_POS) { return NOT_A_DICT_POS; } - DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, getBigramsStructurePolicy(), - getShortcutsStructurePolicy()); + DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, mOriginalDictSize, &mExtendableBuffer, + getBigramsStructurePolicy(), getShortcutsStructurePolicy()); nodeReader.fetchNodeInfoFromBuffer(nodePos); if (nodeReader.isDeleted()) { return NOT_A_DICT_POS; @@ -222,8 +224,8 @@ int DynamicPatriciaTriePolicy::getBigramsPositionOfNode(const int nodePos) const if (nodePos == NOT_A_VALID_WORD_POS) { return NOT_A_DICT_POS; } - DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, getBigramsStructurePolicy(), - getShortcutsStructurePolicy()); + DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, mOriginalDictSize, &mExtendableBuffer, + getBigramsStructurePolicy(), getShortcutsStructurePolicy()); nodeReader.fetchNodeInfoFromBuffer(nodePos); if (nodeReader.isDeleted()) { return NOT_A_DICT_POS; diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h index 708967d7b..73b963212 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h @@ -21,10 +21,11 @@ #include "defines.h" #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" -#include "suggest/policyimpl/dictionary/bigram/bigram_list_policy.h" +#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h" #include "suggest/policyimpl/dictionary/header/header_policy.h" -#include "suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h" -#include "suggest/policyimpl/dictionary/utils/mmaped_buffer.h" +#include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h" +#include "suggest/policyimpl/dictionary/utils/extendable_buffer.h" +#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h" namespace latinime { @@ -33,10 +34,12 @@ class DicNodeVector; class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { public: - DynamicPatriciaTriePolicy(const MmapedBuffer *const buffer) - : mBuffer(buffer), mHeaderPolicy(mBuffer->getBuffer()), + DynamicPatriciaTriePolicy(const MmappedBuffer *const buffer) + : mBuffer(buffer), mExtendableBuffer(), mHeaderPolicy(mBuffer->getBuffer()), mDictRoot(mBuffer->getBuffer() + mHeaderPolicy.getSize()), - mBigramListPolicy(mDictRoot), mShortcutListPolicy(mDictRoot) {} + mOriginalDictSize(mBuffer->getBufferSize() - mHeaderPolicy.getSize()), + mBigramListPolicy(mDictRoot, mOriginalDictSize, &mExtendableBuffer), + mShortcutListPolicy(mDictRoot, mOriginalDictSize, &mExtendableBuffer) {} ~DynamicPatriciaTriePolicy() { delete mBuffer; @@ -86,12 +89,15 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTriePolicy); static const int MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP; - const MmapedBuffer *const mBuffer; + const MmappedBuffer *const mBuffer; + const ExtendableBuffer mExtendableBuffer; const HeaderPolicy mHeaderPolicy; // TODO: Consolidate mDictRoot. + // CAVEAT!: Be careful about array out of bound access with mDictRoot const uint8_t *const mDictRoot; - const BigramListPolicy mBigramListPolicy; - const ShortcutListPolicy mShortcutListPolicy; + const int mOriginalDictSize; + const DynamicBigramListPolicy mBigramListPolicy; + const DynamicShortcutListPolicy mShortcutListPolicy; }; } // namespace latinime #endif // LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp index 15eb0674d..adcf2dbdf 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp @@ -30,7 +30,7 @@ void PatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode, return; } int nextPos = dicNode->getChildrenPos(); - const int childCount = PatriciaTrieReadingUtils::getGroupCountAndAdvancePosition( + const int childCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition( mDictRoot, &nextPos); for (int i = 0; i < childCount; i++) { nextPos = createAndGetLeavingChildNode(dicNode, nextPos, childDicNodes); @@ -40,15 +40,15 @@ void PatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode, // This retrieves code points and the probability of the word by its terminal position. // Due to the fact that words are ordered in the dictionary in a strict breadth-first order, // it is possible to check for this with advantageous complexity. For each node, we search -// for groups with children and compare the children position with the position we look for. +// for PtNodes with children and compare the children position with the position we look for. // When we shoot the position we look for, it means the word we look for is in the children -// of the previous group. The only tricky part is the fact that if we arrive at the end of a -// node with the last group's children position still less than what we are searching for, we -// must descend the last group's children (for example, if the word we are searching for starts -// with a z, it's the last group of the root node, so all children addresses will be smaller +// of the previous PtNode. The only tricky part is the fact that if we arrive at the end of a +// PtNode array with the last PtNode's children position still less than what we are searching for, +// we must descend the last PtNode's children (for example, if the word we are searching for starts +// with a z, it's the last PtNode of the root array, so all children addresses will be smaller // than the position we look for, and we have to descend the z node). /* Parameters : - * nodePos: the byte position of the terminal chargroup of the word we are searching for (this is + * nodePos: the byte position of the terminal PtNode of the word we are searching for (this is * what is stored as the "bigram position" in each bigram) * outCodePoints: an array to write the found word, with MAX_WORD_LENGTH size. * outUnigramProbability: a pointer to an int to write the probability into. @@ -60,18 +60,18 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( int *const outUnigramProbability) const { int pos = getRootPosition(); int wordPos = 0; - // One iteration of the outer loop iterates through nodes. As stated above, we will only - // traverse nodes that are actually a part of the terminal we are searching, so each time + // One iteration of the outer loop iterates through PtNode arrays. As stated above, we will + // only traverse nodes that are actually a part of the terminal we are searching, so each time // we enter this loop we are one depth level further than last time. // The only reason we count nodes is because we want to reduce the probability of infinite // looping in case there is a bug. Since we know there is an upper bound to the depth we are // supposed to traverse, it does not hurt to count iterations. for (int loopCount = maxCodePointCount; loopCount > 0; --loopCount) { - int lastCandidateGroupPos = 0; - // Let's loop through char groups in this node searching for either the terminal + int lastCandidatePtNodePos = 0; + // Let's loop through PtNodes in this PtNode array searching for either the terminal // or one of its ascendants. - for (int charGroupCount = PatriciaTrieReadingUtils::getGroupCountAndAdvancePosition( - mDictRoot, &pos); charGroupCount > 0; --charGroupCount) { + for (int ptNodeCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition( + mDictRoot, &pos); ptNodeCount > 0; --ptNodeCount) { const int startPos = pos; const PatriciaTrieReadingUtils::NodeFlags flags = PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos); @@ -98,7 +98,7 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( &pos); return ++wordPos; } - // We need to skip past this char group, so skip any remaining code points after the + // We need to skip past this PtNode, so skip any remaining code points after the // first and possibly the probability. if (PatriciaTrieReadingUtils::hasMultipleChars(flags)) { PatriciaTrieReadingUtils::skipCharacters(mDictRoot, flags, MAX_WORD_LENGTH, &pos); @@ -106,8 +106,8 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( if (PatriciaTrieReadingUtils::isTerminal(flags)) { PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos); } - // The fact that this group has children is very important. Since we already know - // that this group does not match, if it has no children we know it is irrelevant + // The fact that this PtNode has children is very important. Since we already know + // that this PtNode does not match, if it has no children we know it is irrelevant // to what we are searching for. const bool hasChildren = PatriciaTrieReadingUtils::hasChildrenInFlags(flags); // We will write in `found' whether we have passed the children position we are @@ -122,45 +122,45 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( ::readChildrenPositionAndAdvancePosition(mDictRoot, flags, ¤tPos); if (childrenPos > nodePos) { // If the children pos is greater than the position, it means the previous - // chargroup, which position is stored in lastCandidateGroupPos, was the right + // PtNode, which position is stored in lastCandidatePtNodePos, was the right // one. found = true; - } else if (1 >= charGroupCount) { - // However if we are on the LAST group of this node, and we have NOT shot the - // position we should descend THIS node. So we trick the lastCandidateGroupPos - // so that we will descend this node, not the previous one. - lastCandidateGroupPos = startPos; + } else if (1 >= ptNodeCount) { + // However if we are on the LAST PtNode of this array, and we have NOT shot the + // position we should descend THIS node. So we trick the lastCandidatePtNodePos + // so that we will descend this PtNode, not the previous one. + lastCandidatePtNodePos = startPos; found = true; } else { // Else, we should continue looking. found = false; } } else { - // Even if we don't have children here, we could still be on the last group of this - // node. If this is the case, we should descend the last group that had children, - // and their position is already in lastCandidateGroup. - found = (1 >= charGroupCount); + // Even if we don't have children here, we could still be on the last PtNode of / + // this array. If this is the case, we should descend the last PtNode that had + // children, and their position is already in lastCandidatePtNodePos. + found = (1 >= ptNodeCount); } if (found) { - // Okay, we found the group we should descend. Its position is in - // the lastCandidateGroupPos variable, so we just re-read it. - if (0 != lastCandidateGroupPos) { + // Okay, we found the PtNode we should descend. Its position is in + // the lastCandidatePtNodePos variable, so we just re-read it. + if (0 != lastCandidatePtNodePos) { const PatriciaTrieReadingUtils::NodeFlags lastFlags = PatriciaTrieReadingUtils::getFlagsAndAdvancePosition( - mDictRoot, &lastCandidateGroupPos); + mDictRoot, &lastCandidatePtNodePos); const int lastChar = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition( - mDictRoot, &lastCandidateGroupPos); - // We copy all the characters in this group to the buffer + mDictRoot, &lastCandidatePtNodePos); + // We copy all the characters in this PtNode to the buffer outCodePoints[wordPos] = lastChar; if (PatriciaTrieReadingUtils::hasMultipleChars(lastFlags)) { int nextChar = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition( - mDictRoot, &lastCandidateGroupPos); + mDictRoot, &lastCandidatePtNodePos); int charCount = maxCodePointCount; while (-1 != nextChar && --charCount > 0) { outCodePoints[++wordPos] = nextChar; nextChar = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition( - mDictRoot, &lastCandidateGroupPos); + mDictRoot, &lastCandidatePtNodePos); } } ++wordPos; @@ -168,19 +168,19 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( // it's there, read pos, and break to resume the search at pos. if (PatriciaTrieReadingUtils::isTerminal(lastFlags)) { PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, - &lastCandidateGroupPos); + &lastCandidatePtNodePos); } pos = PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition( - mDictRoot, lastFlags, &lastCandidateGroupPos); + mDictRoot, lastFlags, &lastCandidatePtNodePos); break; } else { // Here is a little tricky part: we come here if we found out that all children - // addresses in this group are bigger than the address we are searching for. + // addresses in this PtNode are bigger than the address we are searching for. // Should we conclude the word is not in the dictionary? No! It could still be - // one of the remaining chargroups in this node, so we have to keep looking in - // this node until we find it (or we realize it's not there either, in which - // case it's actually not in the dictionary). Pass the end of this group, ready - // to start the next one. + // one of the remaining PtNodes in this array, so we have to keep looking in + // this array until we find it (or we realize it's not there either, in which + // case it's actually not in the dictionary). Pass the end of this PtNode, + // ready to start the next one. if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) { PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition( mDictRoot, flags, &pos); @@ -195,9 +195,9 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( } else { // If we did not find it, we should record the last children address for the next // iteration. - if (hasChildren) lastCandidateGroupPos = startPos; - // Now skip the end of this group (children pos and the attributes if any) so that - // our pos is after the end of this char group, at the start of the next one. + if (hasChildren) lastCandidatePtNodePos = startPos; + // Now skip the end of this PtNode (children pos and the attributes if any) so that + // our pos is after the end of this PtNode, at the start of the next one. if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) { PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition( mDictRoot, flags, &pos); @@ -212,7 +212,7 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( } } - // If we have looked through all the chargroups and found no match, the nodePos is + // If we have looked through all the PtNodes and found no match, the nodePos is // not the position of a terminal in this dictionary. return 0; } @@ -228,24 +228,24 @@ int PatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord, // If we already traversed the tree further than the word is long, there means // there was no match (or we would have found it). if (wordPos >= length) return NOT_A_VALID_WORD_POS; - int charGroupCount = PatriciaTrieReadingUtils::getGroupCountAndAdvancePosition(mDictRoot, + int ptNodeCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(mDictRoot, &pos); const int wChar = forceLowerCaseSearch ? CharUtils::toLowerCase(inWord[wordPos]) : inWord[wordPos]; while (true) { - // If there are no more character groups in this node, it means we could not + // If there are no more PtNodes in this array, it means we could not // find a matching character for this depth, therefore there is no match. - if (0 >= charGroupCount) return NOT_A_VALID_WORD_POS; - const int charGroupPos = pos; + if (0 >= ptNodeCount) return NOT_A_VALID_WORD_POS; + const int ptNodePos = pos; const PatriciaTrieReadingUtils::NodeFlags flags = PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos); int character = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(mDictRoot, &pos); if (character == wChar) { - // This is the correct node. Only one character group may start with the same - // char within a node, so either we found our match in this node, or there is + // This is the correct PtNode. Only one PtNode may start with the same char within + // a PtNode array, so either we found our match in this array, or there is // no match and we can return NOT_A_VALID_WORD_POS. So we will check all the - // characters in this character group indeed does match. + // characters in this PtNode indeed does match. if (PatriciaTrieReadingUtils::hasMultipleChars(flags)) { character = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(mDictRoot, &pos); @@ -253,7 +253,7 @@ int PatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord, ++wordPos; // If we shoot the length of the word we search for, or if we find a single // character that does not match, as explained above, it means the word is - // not in the dictionary (by virtue of this chargroup being the only one to + // not in the dictionary (by virtue of this PtNode being the only one to // match the word on the first character, but not matching the whole word). if (wordPos >= length) return NOT_A_VALID_WORD_POS; if (inWord[wordPos] != character) return NOT_A_VALID_WORD_POS; @@ -268,7 +268,7 @@ int PatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord, ++wordPos; if (PatriciaTrieReadingUtils::isTerminal(flags)) { if (wordPos == length) { - return charGroupPos; + return ptNodePos; } PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos); } @@ -282,7 +282,7 @@ int PatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord, flags, &pos); break; } else { - // This chargroup does not match, so skip the remaining part and go to the next. + // This PtNode does not match, so skip the remaining part and go to the next. if (PatriciaTrieReadingUtils::hasMultipleChars(flags)) { PatriciaTrieReadingUtils::skipCharacters(mDictRoot, flags, MAX_WORD_LENGTH, &pos); @@ -301,7 +301,7 @@ int PatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord, mBigramListPolicy.skipAllBigrams(&pos); } } - --charGroupCount; + --ptNodeCount; } } } diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h index 0d85050f3..d0567fd85 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h @@ -24,7 +24,7 @@ #include "suggest/policyimpl/dictionary/bigram/bigram_list_policy.h" #include "suggest/policyimpl/dictionary/header/header_policy.h" #include "suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h" -#include "suggest/policyimpl/dictionary/utils/mmaped_buffer.h" +#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h" namespace latinime { @@ -33,7 +33,7 @@ class DicNodeVector; class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { public: - PatriciaTriePolicy(const MmapedBuffer *const buffer) + PatriciaTriePolicy(const MmappedBuffer *const buffer) : mBuffer(buffer), mHeaderPolicy(mBuffer->getBuffer()), mDictRoot(mBuffer->getBuffer() + mHeaderPolicy.getSize()), mBigramListPolicy(mDictRoot), mShortcutListPolicy(mDictRoot) {} @@ -97,7 +97,7 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { private: DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTriePolicy); - const MmapedBuffer *const mBuffer; + const MmappedBuffer *const mBuffer; const HeaderPolicy mHeaderPolicy; const uint8_t *const mDictRoot; const BigramListPolicy mBigramListPolicy; diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp index 003b9435b..576a158bc 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp @@ -23,15 +23,15 @@ namespace latinime { typedef PatriciaTrieReadingUtils PtReadingUtils; -const PtReadingUtils::NodeFlags PtReadingUtils::MASK_GROUP_ADDRESS_TYPE = 0xC0; -const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_GROUP_ADDRESS_TYPE_NOADDRESS = 0x00; -const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_GROUP_ADDRESS_TYPE_ONEBYTE = 0x40; -const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_GROUP_ADDRESS_TYPE_TWOBYTES = 0x80; -const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_GROUP_ADDRESS_TYPE_THREEBYTES = 0xC0; +const PtReadingUtils::NodeFlags PtReadingUtils::MASK_CHILDREN_POSITION_TYPE = 0xC0; +const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_CHILDREN_POSITION_TYPE_NOPOSITION = 0x00; +const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_CHILDREN_POSITION_TYPE_ONEBYTE = 0x40; +const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_CHILDREN_POSITION_TYPE_TWOBYTES = 0x80; +const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_CHILDREN_POSITION_TYPE_THREEBYTES = 0xC0; // Flag for single/multiple char group const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_HAS_MULTIPLE_CHARS = 0x20; -// Flag for terminal groups +// Flag for terminal PtNodes const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_IS_TERMINAL = 0x10; // Flag for shortcut targets presence const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_HAS_SHORTCUT_TARGETS = 0x08; @@ -46,14 +46,14 @@ const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_IS_BLACKLISTED = 0x01; const uint8_t *const buffer, const NodeFlags flags, int *const pos) { const int base = *pos; int offset = 0; - switch (MASK_GROUP_ADDRESS_TYPE & flags) { - case FLAG_GROUP_ADDRESS_TYPE_ONEBYTE: + switch (MASK_CHILDREN_POSITION_TYPE & flags) { + case FLAG_CHILDREN_POSITION_TYPE_ONEBYTE: offset = ByteArrayUtils::readUint8AndAdvancePosition(buffer, pos); break; - case FLAG_GROUP_ADDRESS_TYPE_TWOBYTES: + case FLAG_CHILDREN_POSITION_TYPE_TWOBYTES: offset = ByteArrayUtils::readUint16AndAdvancePosition(buffer, pos); break; - case FLAG_GROUP_ADDRESS_TYPE_THREEBYTES: + case FLAG_CHILDREN_POSITION_TYPE_THREEBYTES: offset = ByteArrayUtils::readUint24AndAdvancePosition(buffer, pos); break; default: diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.h index 9f2fc2059..f76c38751 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.h @@ -28,7 +28,7 @@ class PatriciaTrieReadingUtils { public: typedef uint8_t NodeFlags; - static AK_FORCE_INLINE int getGroupCountAndAdvancePosition( + static AK_FORCE_INLINE int getPtNodeArraySizeAndAdvancePosition( const uint8_t *const buffer, int *const pos) { const uint8_t firstByte = ByteArrayUtils::readUint8AndAdvancePosition(buffer, pos); if (firstByte < 0x80) { @@ -116,17 +116,17 @@ class PatriciaTrieReadingUtils { } static AK_FORCE_INLINE bool hasChildrenInFlags(const NodeFlags flags) { - return FLAG_GROUP_ADDRESS_TYPE_NOADDRESS != (MASK_GROUP_ADDRESS_TYPE & flags); + return FLAG_CHILDREN_POSITION_TYPE_NOPOSITION != (MASK_CHILDREN_POSITION_TYPE & flags); } private: DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTrieReadingUtils); - static const NodeFlags MASK_GROUP_ADDRESS_TYPE; - static const NodeFlags FLAG_GROUP_ADDRESS_TYPE_NOADDRESS; - static const NodeFlags FLAG_GROUP_ADDRESS_TYPE_ONEBYTE; - static const NodeFlags FLAG_GROUP_ADDRESS_TYPE_TWOBYTES; - static const NodeFlags FLAG_GROUP_ADDRESS_TYPE_THREEBYTES; + static const NodeFlags MASK_CHILDREN_POSITION_TYPE; + static const NodeFlags FLAG_CHILDREN_POSITION_TYPE_NOPOSITION; + static const NodeFlags FLAG_CHILDREN_POSITION_TYPE_ONEBYTE; + static const NodeFlags FLAG_CHILDREN_POSITION_TYPE_TWOBYTES; + static const NodeFlags FLAG_CHILDREN_POSITION_TYPE_THREEBYTES; static const NodeFlags FLAG_HAS_MULTIPLE_CHARS; static const NodeFlags FLAG_IS_TERMINAL; diff --git a/native/jni/src/suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h new file mode 100644 index 000000000..fdaf18f7c --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h @@ -0,0 +1,94 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DYNAMIC_SHORTCUT_LIST_POLICY_H +#define LATINIME_DYNAMIC_SHORTCUT_LIST_POLICY_H + +#include <stdint.h> + +#include "defines.h" +#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h" +#include "suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h" +#include "suggest/policyimpl/dictionary/utils/extendable_buffer.h" + +namespace latinime { + +/* + * This is a dynamic version of ShortcutListPolicy and supports an additional buffer. + */ +class DynamicShortcutListPolicy : public DictionaryShortcutsStructurePolicy { + public: + DynamicShortcutListPolicy(const uint8_t *const shortcutBuf, const int bufSize, + const ExtendableBuffer *const additionalBuffer) + : mShortcutsBuf(shortcutBuf), mBufSize(bufSize), mAdditionalBuffer(additionalBuffer) {} + + ~DynamicShortcutListPolicy() {} + + int getStartPos(const int pos) const { + if (pos == NOT_A_DICT_POS) { + return NOT_A_DICT_POS; + } + return pos + ShortcutListReadingUtils::getShortcutListSizeFieldSize(); + } + + void getNextShortcut(const int maxCodePointCount, int *const outCodePoint, + int *const outCodePointCount, bool *const outIsWhitelist, bool *const outHasNext, + int *const pos) const { + const bool usesAdditionalBuffer = *pos >= mBufSize; + const uint8_t *const buffer = usesAdditionalBuffer + ? mAdditionalBuffer->getBuffer() : mShortcutsBuf; + if (usesAdditionalBuffer) { + *pos -= mBufSize; + } + const ShortcutListReadingUtils::ShortcutFlags flags = + ShortcutListReadingUtils::getFlagsAndForwardPointer(buffer, pos); + if (outHasNext) { + *outHasNext = ShortcutListReadingUtils::hasNext(flags); + } + if (outIsWhitelist) { + *outIsWhitelist = ShortcutListReadingUtils::isWhitelist(flags); + } + if (outCodePoint) { + *outCodePointCount = ShortcutListReadingUtils::readShortcutTarget( + buffer, maxCodePointCount, outCodePoint, pos); + } + if (usesAdditionalBuffer) { + *pos += mBufSize; + } + } + + void skipAllShortcuts(int *const pos) const { + if (*pos >= mBufSize) { + *pos -= mBufSize; + const int shortcutListSize = ShortcutListReadingUtils + ::getShortcutListSizeAndForwardPointer(mAdditionalBuffer->getBuffer(), pos); + *pos += mBufSize + shortcutListSize; + } else { + const int shortcutListSize = ShortcutListReadingUtils + ::getShortcutListSizeAndForwardPointer(mShortcutsBuf, pos); + *pos += shortcutListSize; + } + } + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicShortcutListPolicy); + + const uint8_t *const mShortcutsBuf; + const int mBufSize; + const ExtendableBuffer *const mAdditionalBuffer; +}; +} // namespace latinime +#endif // LATINIME_DYNAMIC_SHORTCUT_LIST_POLICY_H diff --git a/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h index b5bb964d5..5f4f240f5 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h @@ -50,6 +50,10 @@ class ShortcutListReadingUtils { - SHORTCUT_LIST_SIZE_FIELD_SIZE; } + static AK_FORCE_INLINE int getShortcutListSizeFieldSize() { + return SHORTCUT_LIST_SIZE_FIELD_SIZE; + } + static AK_FORCE_INLINE void skipShortcuts(const uint8_t *const dictRoot, int *const pos) { const int shortcutListSize = getShortcutListSizeAndForwardPointer(dictRoot, pos); *pos += shortcutListSize; diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/extendable_buffer.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/extendable_buffer.cpp new file mode 100644 index 000000000..e55cc2410 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/extendable_buffer.cpp @@ -0,0 +1,25 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "suggest/policyimpl/dictionary/utils/extendable_buffer.h" + +namespace latinime { + +const size_t ExtendableBuffer::INITIAL_BUFFER_SIZE = 16 * 1024; +const size_t ExtendableBuffer::MAX_BUFFER_SIZE = 1024 * 1024; +const size_t ExtendableBuffer::EXTEND_BUFFER_SIZE_STEP = 16 * 1024; + +} diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/extendable_buffer.h b/native/jni/src/suggest/policyimpl/dictionary/utils/extendable_buffer.h new file mode 100644 index 000000000..5c75027d2 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/extendable_buffer.h @@ -0,0 +1,70 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_EXTENDABLE_BUFFER_H +#define LATINIME_EXTENDABLE_BUFFER_H + +#include <cstddef> +#include <stdint.h> +#include <vector> + +#include "defines.h" + +namespace latinime { + +// This is used as a buffer that can be extended for updatable dictionaries. +class ExtendableBuffer { + public: + ExtendableBuffer() : mBuffer(INITIAL_BUFFER_SIZE), mUsedSize(0) {} + + AK_FORCE_INLINE const uint8_t *getBuffer() const { + return &mBuffer[0]; + } + + // Return if the buffer is successfully extended or not. + AK_FORCE_INLINE bool extendBuffer() { + if (mBuffer.size() + EXTEND_BUFFER_SIZE_STEP > MAX_BUFFER_SIZE) { + return false; + } + mBuffer.resize(mBuffer.size() + EXTEND_BUFFER_SIZE_STEP); + return true; + } + + AK_FORCE_INLINE int getAllocatedSize() const { + return mBuffer.size(); + } + + AK_FORCE_INLINE int getUsedSize() const { + return mUsedSize; + } + + AK_FORCE_INLINE void clear() { + mUsedSize = 0; + mBuffer.clear(); + } + + private: + DISALLOW_COPY_AND_ASSIGN(ExtendableBuffer); + + static const size_t INITIAL_BUFFER_SIZE; + static const size_t MAX_BUFFER_SIZE; + static const size_t EXTEND_BUFFER_SIZE_STEP; + + std::vector<uint8_t> mBuffer; + int mUsedSize; +}; +} +#endif /* LATINIME_MMAPED_BUFFER_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/mmaped_buffer.h b/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h index 08add03c6..6febd7832 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/mmaped_buffer.h +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef LATINIME_MMAPED_BUFFER_H -#define LATINIME_MMAPED_BUFFER_H +#ifndef LATINIME_MMAPPED_BUFFER_H +#define LATINIME_MMAPPED_BUFFER_H #include <cerrno> #include <fcntl.h> @@ -27,39 +27,40 @@ namespace latinime { -class MmapedBuffer { +class MmappedBuffer { public: - static MmapedBuffer* openBuffer(const char *const path, const int pathLength, - const int bufOffset, const int size, const bool isUpdatable) { + static MmappedBuffer* openBuffer(const char *const path, const int pathLength, + const int bufferOffset, const int bufferSize, const bool isUpdatable) { const int openMode = isUpdatable ? O_RDWR : O_RDONLY; - const int fd = open(path, openMode); - if (fd < 0) { + const int mmapFd = open(path, openMode); + if (mmapFd < 0) { AKLOGE("DICT: Can't open the source. path=%s errno=%d", path, errno); return 0; } const int pagesize = getpagesize(); - const int offset = bufOffset % pagesize; - int adjOffset = bufOffset - offset; - int adjSize = size + offset; + const int offset = bufferOffset % pagesize; + int alignedOffset = bufferOffset - offset; + int alignedSize = bufferSize + offset; const int protMode = isUpdatable ? PROT_READ | PROT_WRITE : PROT_READ; - void *const mmapedBuffer = mmap(0, adjSize, protMode, MAP_PRIVATE, fd, adjOffset); - if (mmapedBuffer == MAP_FAILED) { + void *const mmappedBuffer = mmap(0, alignedSize, protMode, MAP_PRIVATE, mmapFd, + alignedOffset); + if (mmappedBuffer == MAP_FAILED) { AKLOGE("DICT: Can't mmap dictionary. errno=%d", errno); - close(fd); + close(mmapFd); return 0; } - uint8_t *const buffer = static_cast<uint8_t *>(mmapedBuffer) + offset; + uint8_t *const buffer = static_cast<uint8_t *>(mmappedBuffer) + offset; if (!buffer) { AKLOGE("DICT: buffer is null"); - close(fd); + close(mmapFd); return 0; } - return new MmapedBuffer(buffer, adjSize, fd, offset, isUpdatable); + return new MmappedBuffer(buffer, bufferSize, mmappedBuffer, alignedSize, mmapFd, + isUpdatable); } - ~MmapedBuffer() { - int ret = munmap(static_cast<void *>(mBuffer - mBufferOffset), - mBufferSize + mBufferOffset); + ~MmappedBuffer() { + int ret = munmap(mMmappedBuffer, mAlignedSize); if (ret != 0) { AKLOGE("DICT: Failure in munmap. ret=%d errno=%d", ret, errno); } @@ -82,18 +83,20 @@ class MmapedBuffer { } private: - AK_FORCE_INLINE MmapedBuffer(uint8_t *const buffer, const int bufferSize, const int mmapFd, - const int bufferOffset, const bool isUpdatable) - : mBuffer(buffer), mBufferSize(bufferSize), mMmapFd(mmapFd), - mBufferOffset(bufferOffset), mIsUpdatable(isUpdatable) {} + AK_FORCE_INLINE MmappedBuffer(uint8_t *const buffer, const int bufferSize, + void *const mmappedBuffer, const int alignedSize, const int mmapFd, + const bool isUpdatable) + : mBuffer(buffer), mBufferSize(bufferSize), mMmappedBuffer(mmappedBuffer), + mAlignedSize(alignedSize), mMmapFd(mmapFd), mIsUpdatable(isUpdatable) {} - DISALLOW_IMPLICIT_CONSTRUCTORS(MmapedBuffer); + DISALLOW_IMPLICIT_CONSTRUCTORS(MmappedBuffer); uint8_t *const mBuffer; const int mBufferSize; + void *const mMmappedBuffer; + const int mAlignedSize; const int mMmapFd; - const int mBufferOffset; const bool mIsUpdatable; }; } -#endif /* LATINIME_MMAPED_BUFFER_H */ +#endif /* LATINIME_MMAPPED_BUFFER_H */ diff --git a/native/jni/src/utils/autocorrection_threshold_utils.cpp b/native/jni/src/utils/autocorrection_threshold_utils.cpp index 3406e0f8e..1f8ee0814 100644 --- a/native/jni/src/utils/autocorrection_threshold_utils.cpp +++ b/native/jni/src/utils/autocorrection_threshold_utils.cpp @@ -83,9 +83,12 @@ const int AutocorrectionThresholdUtils::FULL_WORD_MULTIPLIER = 2; return 0.0f; } + if (score <= 0 || distance >= afterLength) { + // normalizedScore must be 0.0f (the minimum value) if the score is less than or equal to 0, + // or if the edit distance is larger than or equal to afterLength. + return 0.0f; + } // add a weight based on edit distance. - // distance <= max(afterLength, beforeLength) == afterLength, - // so, 0 <= distance / afterLength <= 1 const float weight = 1.0f - static_cast<float>(distance) / static_cast<float>(afterLength); // TODO: Revise the following logic thoroughly by referring to... |