aboutsummaryrefslogtreecommitdiffstats
path: root/native/jni
diff options
context:
space:
mode:
Diffstat (limited to 'native/jni')
-rw-r--r--native/jni/Android.mk6
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h80
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.cpp4
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp24
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h11
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp40
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h24
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp112
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h6
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp20
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.h14
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h94
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h4
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/extendable_buffer.cpp25
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/extendable_buffer.h70
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h (renamed from native/jni/src/suggest/policyimpl/dictionary/utils/mmaped_buffer.h)55
-rw-r--r--native/jni/src/utils/autocorrection_threshold_utils.cpp7
17 files changed, 450 insertions, 146 deletions
diff --git a/native/jni/Android.mk b/native/jni/Android.mk
index a51fe3c03..bf188971e 100644
--- a/native/jni/Android.mk
+++ b/native/jni/Android.mk
@@ -71,14 +71,16 @@ LATIN_IME_CORE_SRC_FILES := \
header/header_policy.cpp \
header/header_reading_utils.cpp \
shortcut/shortcut_list_reading_utils.cpp \
- utils/byte_array_utils.cpp \
- utils/format_utils.cpp \
dictionary_structure_with_buffer_policy_factory.cpp \
dynamic_patricia_trie_node_reader.cpp \
dynamic_patricia_trie_policy.cpp \
dynamic_patricia_trie_reading_utils.cpp \
patricia_trie_policy.cpp \
patricia_trie_reading_utils.cpp) \
+ $(addprefix suggest/policyimpl/dictionary/utils/, \
+ byte_array_utils.cpp \
+ extendable_buffer.cpp \
+ format_utils.cpp) \
suggest/policyimpl/gesture/gesture_suggest_policy_factory.cpp \
$(addprefix suggest/policyimpl/typing/, \
scoring_params.cpp \
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h
new file mode 100644
index 000000000..513878e1d
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h
@@ -0,0 +1,80 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DYNAMIC_BIGRAM_LIST_POLICY_H
+#define LATINIME_DYNAMIC_BIGRAM_LIST_POLICY_H
+
+#include <stdint.h>
+
+#include "defines.h"
+#include "suggest/core/policy/dictionary_bigrams_structure_policy.h"
+#include "suggest/policyimpl/dictionary/bigram/bigram_list_reading_utils.h"
+#include "suggest/policyimpl/dictionary/utils/extendable_buffer.h"
+
+namespace latinime {
+
+/*
+ * This is a dynamic version of BigramListPolicy and supports an additional buffer.
+ */
+class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy {
+ public:
+ DynamicBigramListPolicy(const uint8_t *const bigramsBuf, const int bufSize,
+ const ExtendableBuffer *const additionalBuffer)
+ : mDictRoot(bigramsBuf), mBufSize(bufSize), mAdditionalBuffer(additionalBuffer) {}
+
+ ~DynamicBigramListPolicy() {}
+
+ void getNextBigram(int *const outBigramPos, int *const outProbability, bool *const outHasNext,
+ int *const pos) const {
+ const bool usesAdditionalBuffer = *pos >= mBufSize;
+ const uint8_t *const buffer = (usesAdditionalBuffer) ?
+ mAdditionalBuffer->getBuffer() : mDictRoot;
+ if (usesAdditionalBuffer) {
+ *pos -= mBufSize;
+ }
+ const BigramListReadingUtils::BigramFlags flags =
+ BigramListReadingUtils::getFlagsAndForwardPointer(buffer, pos);
+ *outBigramPos = BigramListReadingUtils::getBigramAddressAndForwardPointer(
+ buffer, flags, pos);
+ if (usesAdditionalBuffer) {
+ *outBigramPos += mBufSize;
+ }
+ *outProbability = BigramListReadingUtils::getProbabilityFromFlags(flags);
+ *outHasNext = BigramListReadingUtils::hasNext(flags);
+ if (usesAdditionalBuffer) {
+ *pos += mBufSize;
+ }
+ }
+
+ void skipAllBigrams(int *const pos) const {
+ if (*pos >= mBufSize) {
+ *pos -= mBufSize;
+ BigramListReadingUtils::skipExistingBigrams(mAdditionalBuffer->getBuffer(), pos);
+ *pos += mBufSize;
+ } else {
+ BigramListReadingUtils::skipExistingBigrams(mDictRoot, pos);
+ }
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicBigramListPolicy);
+
+ const uint8_t *const mDictRoot;
+ const int mBufSize;
+ const ExtendableBuffer *const mAdditionalBuffer;
+};
+} // namespace latinime
+#endif // LATINIME_DYNAMIC_BIGRAM_LIST_POLICY_H
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.cpp b/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.cpp
index cc5252c43..434858d67 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.cpp
@@ -22,7 +22,7 @@
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h"
#include "suggest/policyimpl/dictionary/patricia_trie_policy.h"
#include "suggest/policyimpl/dictionary/utils/format_utils.h"
-#include "suggest/policyimpl/dictionary/utils/mmaped_buffer.h"
+#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
namespace latinime {
@@ -31,7 +31,7 @@ namespace latinime {
const int bufOffset, const int size, const bool isUpdatable) {
// Allocated buffer in MmapedBuffer::openBuffer() will be freed in the destructor of
// impl classes of DictionaryStructureWithBufferPolicy.
- const MmapedBuffer *const mmapedBuffer = MmapedBuffer::openBuffer(path, pathLength, bufOffset,
+ const MmappedBuffer *const mmapedBuffer = MmappedBuffer::openBuffer(path, pathLength, bufOffset,
size, isUpdatable);
if (!mmapedBuffer) {
return 0;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp
index 77a85c86d..c427ebe2d 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp
@@ -19,34 +19,44 @@
#include "suggest/core/policy/dictionary_bigrams_structure_policy.h"
#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h"
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/utils/extendable_buffer.h"
namespace latinime {
void DynamicPatriciaTrieNodeReader::fetchNodeInfoFromBufferAndProcessMovedNode(const int nodePos,
const int maxCodePointCount, int *const outCodePoints) {
- int pos = nodePos;
- mFlags = PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
+ const bool usesAdditionalBuffer = nodePos >= mOriginalDictSize;
+ const uint8_t *const dictBuf =
+ usesAdditionalBuffer ? mExtendableBuffer->getBuffer() : mDictRoot;
+ int pos = (usesAdditionalBuffer) ? nodePos - mOriginalDictSize : nodePos;
+ mFlags = PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
const int parentPos =
- DynamicPatriciaTrieReadingUtils::getParentPosAndAdvancePosition(mDictRoot, &pos);
+ DynamicPatriciaTrieReadingUtils::getParentPosAndAdvancePosition(dictBuf, &pos);
mParentPos = (parentPos != 0) ? mNodePos + parentPos : NOT_A_DICT_POS;
if (outCodePoints != 0) {
mCodePointCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition(
- mDictRoot, mFlags, maxCodePointCount, outCodePoints, &pos);
+ dictBuf, mFlags, maxCodePointCount, outCodePoints, &pos);
} else {
mCodePointCount = PatriciaTrieReadingUtils::skipCharacters(
- mDictRoot, mFlags, MAX_WORD_LENGTH, &pos);
+ dictBuf, mFlags, MAX_WORD_LENGTH, &pos);
}
if (isTerminal()) {
- mProbability = PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos);
+ mProbability = PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(dictBuf, &pos);
} else {
mProbability = NOT_A_PROBABILITY;
}
if (hasChildren()) {
mChildrenPos = DynamicPatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(
- mDictRoot, mFlags, &pos);
+ dictBuf, mFlags, &pos);
+ if (usesAdditionalBuffer && mChildrenPos != NOT_A_DICT_POS) {
+ mChildrenPos += mOriginalDictSize;
+ }
} else {
mChildrenPos = NOT_A_DICT_POS;
}
+ if (usesAdditionalBuffer) {
+ pos += mOriginalDictSize;
+ }
if (PatriciaTrieReadingUtils::hasShortcutTargets(mFlags)) {
mShortcutPos = pos;
mShortcutsPolicy->skipAllShortcuts(&pos);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h
index e990809e8..2a636289e 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h
@@ -27,6 +27,7 @@ namespace latinime {
class DictionaryBigramsStructurePolicy;
class DictionaryShortcutsStructurePolicy;
+class ExtendableBuffer;
/*
* This class is used for helping to read nodes of dynamic patricia trie. This class handles moved
@@ -34,12 +35,14 @@ class DictionaryShortcutsStructurePolicy;
*/
class DynamicPatriciaTrieNodeReader {
public:
- DynamicPatriciaTrieNodeReader(const uint8_t *const dictRoot,
+ DynamicPatriciaTrieNodeReader(const uint8_t *const dictRoot, const int originalDictSize,
+ const ExtendableBuffer *const extendableBuffer,
const DictionaryBigramsStructurePolicy *const bigramsPolicy,
const DictionaryShortcutsStructurePolicy *const shortcutsPolicy)
- : mDictRoot(dictRoot), mBigramsPolicy(bigramsPolicy),
+ : mDictRoot(dictRoot), mOriginalDictSize(originalDictSize),
+ mExtendableBuffer(extendableBuffer), mBigramsPolicy(bigramsPolicy),
mShortcutsPolicy(shortcutsPolicy), mNodePos(NOT_A_VALID_WORD_POS), mFlags(0),
- mParentPos(NOT_A_DICT_POS), mCodePointCount(0), mProbability(NOT_A_PROBABILITY),
+ mParentPos(NOT_A_DICT_POS), mCodePointCount(0), mProbability(NOT_A_PROBABILITY),
mChildrenPos(NOT_A_DICT_POS), mShortcutPos(NOT_A_DICT_POS),
mBigramPos(NOT_A_DICT_POS), mSiblingPos(NOT_A_VALID_WORD_POS) {}
@@ -123,6 +126,8 @@ class DynamicPatriciaTrieNodeReader {
// TODO: Consolidate mDictRoot.
const uint8_t *const mDictRoot;
+ const int mOriginalDictSize;
+ const ExtendableBuffer *const mExtendableBuffer;
const DictionaryBigramsStructurePolicy *const mBigramsPolicy;
const DictionaryShortcutsStructurePolicy *const mShortcutsPolicy;
int mNodePos;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp
index 3000860a3..b244dd0b5 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp
@@ -33,13 +33,13 @@ void DynamicPatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const d
if (!dicNode->hasChildren()) {
return;
}
- DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, getBigramsStructurePolicy(),
- getShortcutsStructurePolicy());
+ DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, mOriginalDictSize, &mExtendableBuffer,
+ getBigramsStructurePolicy(), getShortcutsStructurePolicy());
int mergedNodeCodePoints[MAX_WORD_LENGTH];
int nextPos = dicNode->getChildrenPos();
int totalChildCount = 0;
do {
- const int childCount = PatriciaTrieReadingUtils::getGroupCountAndAdvancePosition(
+ const int childCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(
mDictRoot, &nextPos);
totalChildCount += childCount;
if (childCount <= 0 || totalChildCount > MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP) {
@@ -79,8 +79,8 @@ int DynamicPatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCoun
int mergedNodeCodePoints[maxCodePointCount];
int codePointCount = 0;
- DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, getBigramsStructurePolicy(),
- getShortcutsStructurePolicy());
+ DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, mOriginalDictSize, &mExtendableBuffer,
+ getBigramsStructurePolicy(), getShortcutsStructurePolicy());
// First, read terminal node and get its probability.
nodeReader.fetchNodeInfoFromBufferAndGetNodeCodePoints(nodePos, maxCodePointCount,
mergedNodeCodePoints);
@@ -124,14 +124,14 @@ int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const in
int mergedNodeCodePoints[MAX_WORD_LENGTH];
int currentLength = 0;
int pos = getRootPosition();
- DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, getBigramsStructurePolicy(),
- getShortcutsStructurePolicy());
- while (currentLength <= length) {
+ DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, mOriginalDictSize, &mExtendableBuffer,
+ getBigramsStructurePolicy(), getShortcutsStructurePolicy());
+ while (currentLength < length) {
// When foundMatchedNode becomes true, currentLength is increased at least once.
bool foundMatchedNode = false;
int totalChildCount = 0;
do {
- const int childCount = PatriciaTrieReadingUtils::getGroupCountAndAdvancePosition(
+ const int childCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(
mDictRoot, &pos);
totalChildCount += childCount;
if (childCount <= 0 || totalChildCount > MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP) {
@@ -144,13 +144,15 @@ int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const in
for (int i = 0; i < childCount; i++) {
nodeReader.fetchNodeInfoFromBufferAndGetNodeCodePoints(pos, MAX_WORD_LENGTH,
mergedNodeCodePoints);
- if (nodeReader.isDeleted() || nodeReader.getCodePointCount() <= 0) {
+ const int nodeCodePointCount = nodeReader.getCodePointCount();
+ if (nodeReader.isDeleted() || nodeCodePointCount <= 0
+ || currentLength + nodeCodePointCount > length) {
// Skip deleted or empty node.
pos = nodeReader.getSiblingNodePos();
continue;
}
bool matched = true;
- for (int j = 0; j < nodeReader.getCodePointCount(); ++j) {
+ for (int j = 0; j < nodeCodePointCount; ++j) {
if (mergedNodeCodePoints[j] != searchCodePoints[currentLength + j]) {
// Different code point is found.
matched = false;
@@ -158,7 +160,7 @@ int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const in
}
}
if (matched) {
- currentLength += nodeReader.getCodePointCount();
+ currentLength += nodeCodePointCount;
if (length == currentLength) {
// Terminal position is found.
return nodeReader.getNodePos();
@@ -177,7 +179,7 @@ int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const in
if (foundMatchedNode) {
break;
}
- // If the matched node is not found in the current node group, try to follow the
+ // If the matched node is not found in the current PtNode array, try to follow the
// forward link.
pos = DynamicPatriciaTrieReadingUtils::getForwardLinkPosition(
mDictRoot, pos);
@@ -196,8 +198,8 @@ int DynamicPatriciaTriePolicy::getUnigramProbability(const int nodePos) const {
if (nodePos == NOT_A_VALID_WORD_POS) {
return NOT_A_PROBABILITY;
}
- DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, getBigramsStructurePolicy(),
- getShortcutsStructurePolicy());
+ DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, mOriginalDictSize, &mExtendableBuffer,
+ getBigramsStructurePolicy(), getShortcutsStructurePolicy());
nodeReader.fetchNodeInfoFromBuffer(nodePos);
if (nodeReader.isDeleted() || nodeReader.isBlacklisted() || nodeReader.isNotAWord()) {
return NOT_A_PROBABILITY;
@@ -209,8 +211,8 @@ int DynamicPatriciaTriePolicy::getShortcutPositionOfNode(const int nodePos) cons
if (nodePos == NOT_A_VALID_WORD_POS) {
return NOT_A_DICT_POS;
}
- DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, getBigramsStructurePolicy(),
- getShortcutsStructurePolicy());
+ DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, mOriginalDictSize, &mExtendableBuffer,
+ getBigramsStructurePolicy(), getShortcutsStructurePolicy());
nodeReader.fetchNodeInfoFromBuffer(nodePos);
if (nodeReader.isDeleted()) {
return NOT_A_DICT_POS;
@@ -222,8 +224,8 @@ int DynamicPatriciaTriePolicy::getBigramsPositionOfNode(const int nodePos) const
if (nodePos == NOT_A_VALID_WORD_POS) {
return NOT_A_DICT_POS;
}
- DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, getBigramsStructurePolicy(),
- getShortcutsStructurePolicy());
+ DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, mOriginalDictSize, &mExtendableBuffer,
+ getBigramsStructurePolicy(), getShortcutsStructurePolicy());
nodeReader.fetchNodeInfoFromBuffer(nodePos);
if (nodeReader.isDeleted()) {
return NOT_A_DICT_POS;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h
index 708967d7b..73b963212 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h
@@ -21,10 +21,11 @@
#include "defines.h"
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
-#include "suggest/policyimpl/dictionary/bigram/bigram_list_policy.h"
+#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h"
#include "suggest/policyimpl/dictionary/header/header_policy.h"
-#include "suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h"
-#include "suggest/policyimpl/dictionary/utils/mmaped_buffer.h"
+#include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h"
+#include "suggest/policyimpl/dictionary/utils/extendable_buffer.h"
+#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
namespace latinime {
@@ -33,10 +34,12 @@ class DicNodeVector;
class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
public:
- DynamicPatriciaTriePolicy(const MmapedBuffer *const buffer)
- : mBuffer(buffer), mHeaderPolicy(mBuffer->getBuffer()),
+ DynamicPatriciaTriePolicy(const MmappedBuffer *const buffer)
+ : mBuffer(buffer), mExtendableBuffer(), mHeaderPolicy(mBuffer->getBuffer()),
mDictRoot(mBuffer->getBuffer() + mHeaderPolicy.getSize()),
- mBigramListPolicy(mDictRoot), mShortcutListPolicy(mDictRoot) {}
+ mOriginalDictSize(mBuffer->getBufferSize() - mHeaderPolicy.getSize()),
+ mBigramListPolicy(mDictRoot, mOriginalDictSize, &mExtendableBuffer),
+ mShortcutListPolicy(mDictRoot, mOriginalDictSize, &mExtendableBuffer) {}
~DynamicPatriciaTriePolicy() {
delete mBuffer;
@@ -86,12 +89,15 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTriePolicy);
static const int MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP;
- const MmapedBuffer *const mBuffer;
+ const MmappedBuffer *const mBuffer;
+ const ExtendableBuffer mExtendableBuffer;
const HeaderPolicy mHeaderPolicy;
// TODO: Consolidate mDictRoot.
+ // CAVEAT!: Be careful about array out of bound access with mDictRoot
const uint8_t *const mDictRoot;
- const BigramListPolicy mBigramListPolicy;
- const ShortcutListPolicy mShortcutListPolicy;
+ const int mOriginalDictSize;
+ const DynamicBigramListPolicy mBigramListPolicy;
+ const DynamicShortcutListPolicy mShortcutListPolicy;
};
} // namespace latinime
#endif // LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H
diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp
index 15eb0674d..adcf2dbdf 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp
@@ -30,7 +30,7 @@ void PatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode,
return;
}
int nextPos = dicNode->getChildrenPos();
- const int childCount = PatriciaTrieReadingUtils::getGroupCountAndAdvancePosition(
+ const int childCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(
mDictRoot, &nextPos);
for (int i = 0; i < childCount; i++) {
nextPos = createAndGetLeavingChildNode(dicNode, nextPos, childDicNodes);
@@ -40,15 +40,15 @@ void PatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode,
// This retrieves code points and the probability of the word by its terminal position.
// Due to the fact that words are ordered in the dictionary in a strict breadth-first order,
// it is possible to check for this with advantageous complexity. For each node, we search
-// for groups with children and compare the children position with the position we look for.
+// for PtNodes with children and compare the children position with the position we look for.
// When we shoot the position we look for, it means the word we look for is in the children
-// of the previous group. The only tricky part is the fact that if we arrive at the end of a
-// node with the last group's children position still less than what we are searching for, we
-// must descend the last group's children (for example, if the word we are searching for starts
-// with a z, it's the last group of the root node, so all children addresses will be smaller
+// of the previous PtNode. The only tricky part is the fact that if we arrive at the end of a
+// PtNode array with the last PtNode's children position still less than what we are searching for,
+// we must descend the last PtNode's children (for example, if the word we are searching for starts
+// with a z, it's the last PtNode of the root array, so all children addresses will be smaller
// than the position we look for, and we have to descend the z node).
/* Parameters :
- * nodePos: the byte position of the terminal chargroup of the word we are searching for (this is
+ * nodePos: the byte position of the terminal PtNode of the word we are searching for (this is
* what is stored as the "bigram position" in each bigram)
* outCodePoints: an array to write the found word, with MAX_WORD_LENGTH size.
* outUnigramProbability: a pointer to an int to write the probability into.
@@ -60,18 +60,18 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
int *const outUnigramProbability) const {
int pos = getRootPosition();
int wordPos = 0;
- // One iteration of the outer loop iterates through nodes. As stated above, we will only
- // traverse nodes that are actually a part of the terminal we are searching, so each time
+ // One iteration of the outer loop iterates through PtNode arrays. As stated above, we will
+ // only traverse nodes that are actually a part of the terminal we are searching, so each time
// we enter this loop we are one depth level further than last time.
// The only reason we count nodes is because we want to reduce the probability of infinite
// looping in case there is a bug. Since we know there is an upper bound to the depth we are
// supposed to traverse, it does not hurt to count iterations.
for (int loopCount = maxCodePointCount; loopCount > 0; --loopCount) {
- int lastCandidateGroupPos = 0;
- // Let's loop through char groups in this node searching for either the terminal
+ int lastCandidatePtNodePos = 0;
+ // Let's loop through PtNodes in this PtNode array searching for either the terminal
// or one of its ascendants.
- for (int charGroupCount = PatriciaTrieReadingUtils::getGroupCountAndAdvancePosition(
- mDictRoot, &pos); charGroupCount > 0; --charGroupCount) {
+ for (int ptNodeCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(
+ mDictRoot, &pos); ptNodeCount > 0; --ptNodeCount) {
const int startPos = pos;
const PatriciaTrieReadingUtils::NodeFlags flags =
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
@@ -98,7 +98,7 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
&pos);
return ++wordPos;
}
- // We need to skip past this char group, so skip any remaining code points after the
+ // We need to skip past this PtNode, so skip any remaining code points after the
// first and possibly the probability.
if (PatriciaTrieReadingUtils::hasMultipleChars(flags)) {
PatriciaTrieReadingUtils::skipCharacters(mDictRoot, flags, MAX_WORD_LENGTH, &pos);
@@ -106,8 +106,8 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
if (PatriciaTrieReadingUtils::isTerminal(flags)) {
PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos);
}
- // The fact that this group has children is very important. Since we already know
- // that this group does not match, if it has no children we know it is irrelevant
+ // The fact that this PtNode has children is very important. Since we already know
+ // that this PtNode does not match, if it has no children we know it is irrelevant
// to what we are searching for.
const bool hasChildren = PatriciaTrieReadingUtils::hasChildrenInFlags(flags);
// We will write in `found' whether we have passed the children position we are
@@ -122,45 +122,45 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
::readChildrenPositionAndAdvancePosition(mDictRoot, flags, &currentPos);
if (childrenPos > nodePos) {
// If the children pos is greater than the position, it means the previous
- // chargroup, which position is stored in lastCandidateGroupPos, was the right
+ // PtNode, which position is stored in lastCandidatePtNodePos, was the right
// one.
found = true;
- } else if (1 >= charGroupCount) {
- // However if we are on the LAST group of this node, and we have NOT shot the
- // position we should descend THIS node. So we trick the lastCandidateGroupPos
- // so that we will descend this node, not the previous one.
- lastCandidateGroupPos = startPos;
+ } else if (1 >= ptNodeCount) {
+ // However if we are on the LAST PtNode of this array, and we have NOT shot the
+ // position we should descend THIS node. So we trick the lastCandidatePtNodePos
+ // so that we will descend this PtNode, not the previous one.
+ lastCandidatePtNodePos = startPos;
found = true;
} else {
// Else, we should continue looking.
found = false;
}
} else {
- // Even if we don't have children here, we could still be on the last group of this
- // node. If this is the case, we should descend the last group that had children,
- // and their position is already in lastCandidateGroup.
- found = (1 >= charGroupCount);
+ // Even if we don't have children here, we could still be on the last PtNode of /
+ // this array. If this is the case, we should descend the last PtNode that had
+ // children, and their position is already in lastCandidatePtNodePos.
+ found = (1 >= ptNodeCount);
}
if (found) {
- // Okay, we found the group we should descend. Its position is in
- // the lastCandidateGroupPos variable, so we just re-read it.
- if (0 != lastCandidateGroupPos) {
+ // Okay, we found the PtNode we should descend. Its position is in
+ // the lastCandidatePtNodePos variable, so we just re-read it.
+ if (0 != lastCandidatePtNodePos) {
const PatriciaTrieReadingUtils::NodeFlags lastFlags =
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(
- mDictRoot, &lastCandidateGroupPos);
+ mDictRoot, &lastCandidatePtNodePos);
const int lastChar = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
- mDictRoot, &lastCandidateGroupPos);
- // We copy all the characters in this group to the buffer
+ mDictRoot, &lastCandidatePtNodePos);
+ // We copy all the characters in this PtNode to the buffer
outCodePoints[wordPos] = lastChar;
if (PatriciaTrieReadingUtils::hasMultipleChars(lastFlags)) {
int nextChar = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
- mDictRoot, &lastCandidateGroupPos);
+ mDictRoot, &lastCandidatePtNodePos);
int charCount = maxCodePointCount;
while (-1 != nextChar && --charCount > 0) {
outCodePoints[++wordPos] = nextChar;
nextChar = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
- mDictRoot, &lastCandidateGroupPos);
+ mDictRoot, &lastCandidatePtNodePos);
}
}
++wordPos;
@@ -168,19 +168,19 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
// it's there, read pos, and break to resume the search at pos.
if (PatriciaTrieReadingUtils::isTerminal(lastFlags)) {
PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot,
- &lastCandidateGroupPos);
+ &lastCandidatePtNodePos);
}
pos = PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(
- mDictRoot, lastFlags, &lastCandidateGroupPos);
+ mDictRoot, lastFlags, &lastCandidatePtNodePos);
break;
} else {
// Here is a little tricky part: we come here if we found out that all children
- // addresses in this group are bigger than the address we are searching for.
+ // addresses in this PtNode are bigger than the address we are searching for.
// Should we conclude the word is not in the dictionary? No! It could still be
- // one of the remaining chargroups in this node, so we have to keep looking in
- // this node until we find it (or we realize it's not there either, in which
- // case it's actually not in the dictionary). Pass the end of this group, ready
- // to start the next one.
+ // one of the remaining PtNodes in this array, so we have to keep looking in
+ // this array until we find it (or we realize it's not there either, in which
+ // case it's actually not in the dictionary). Pass the end of this PtNode,
+ // ready to start the next one.
if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) {
PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(
mDictRoot, flags, &pos);
@@ -195,9 +195,9 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
} else {
// If we did not find it, we should record the last children address for the next
// iteration.
- if (hasChildren) lastCandidateGroupPos = startPos;
- // Now skip the end of this group (children pos and the attributes if any) so that
- // our pos is after the end of this char group, at the start of the next one.
+ if (hasChildren) lastCandidatePtNodePos = startPos;
+ // Now skip the end of this PtNode (children pos and the attributes if any) so that
+ // our pos is after the end of this PtNode, at the start of the next one.
if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) {
PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(
mDictRoot, flags, &pos);
@@ -212,7 +212,7 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
}
}
- // If we have looked through all the chargroups and found no match, the nodePos is
+ // If we have looked through all the PtNodes and found no match, the nodePos is
// not the position of a terminal in this dictionary.
return 0;
}
@@ -228,24 +228,24 @@ int PatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord,
// If we already traversed the tree further than the word is long, there means
// there was no match (or we would have found it).
if (wordPos >= length) return NOT_A_VALID_WORD_POS;
- int charGroupCount = PatriciaTrieReadingUtils::getGroupCountAndAdvancePosition(mDictRoot,
+ int ptNodeCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(mDictRoot,
&pos);
const int wChar = forceLowerCaseSearch
? CharUtils::toLowerCase(inWord[wordPos]) : inWord[wordPos];
while (true) {
- // If there are no more character groups in this node, it means we could not
+ // If there are no more PtNodes in this array, it means we could not
// find a matching character for this depth, therefore there is no match.
- if (0 >= charGroupCount) return NOT_A_VALID_WORD_POS;
- const int charGroupPos = pos;
+ if (0 >= ptNodeCount) return NOT_A_VALID_WORD_POS;
+ const int ptNodePos = pos;
const PatriciaTrieReadingUtils::NodeFlags flags =
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
int character = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(mDictRoot,
&pos);
if (character == wChar) {
- // This is the correct node. Only one character group may start with the same
- // char within a node, so either we found our match in this node, or there is
+ // This is the correct PtNode. Only one PtNode may start with the same char within
+ // a PtNode array, so either we found our match in this array, or there is
// no match and we can return NOT_A_VALID_WORD_POS. So we will check all the
- // characters in this character group indeed does match.
+ // characters in this PtNode indeed does match.
if (PatriciaTrieReadingUtils::hasMultipleChars(flags)) {
character = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(mDictRoot,
&pos);
@@ -253,7 +253,7 @@ int PatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord,
++wordPos;
// If we shoot the length of the word we search for, or if we find a single
// character that does not match, as explained above, it means the word is
- // not in the dictionary (by virtue of this chargroup being the only one to
+ // not in the dictionary (by virtue of this PtNode being the only one to
// match the word on the first character, but not matching the whole word).
if (wordPos >= length) return NOT_A_VALID_WORD_POS;
if (inWord[wordPos] != character) return NOT_A_VALID_WORD_POS;
@@ -268,7 +268,7 @@ int PatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord,
++wordPos;
if (PatriciaTrieReadingUtils::isTerminal(flags)) {
if (wordPos == length) {
- return charGroupPos;
+ return ptNodePos;
}
PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos);
}
@@ -282,7 +282,7 @@ int PatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord,
flags, &pos);
break;
} else {
- // This chargroup does not match, so skip the remaining part and go to the next.
+ // This PtNode does not match, so skip the remaining part and go to the next.
if (PatriciaTrieReadingUtils::hasMultipleChars(flags)) {
PatriciaTrieReadingUtils::skipCharacters(mDictRoot, flags, MAX_WORD_LENGTH,
&pos);
@@ -301,7 +301,7 @@ int PatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord,
mBigramListPolicy.skipAllBigrams(&pos);
}
}
- --charGroupCount;
+ --ptNodeCount;
}
}
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h
index 0d85050f3..d0567fd85 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h
@@ -24,7 +24,7 @@
#include "suggest/policyimpl/dictionary/bigram/bigram_list_policy.h"
#include "suggest/policyimpl/dictionary/header/header_policy.h"
#include "suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h"
-#include "suggest/policyimpl/dictionary/utils/mmaped_buffer.h"
+#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
namespace latinime {
@@ -33,7 +33,7 @@ class DicNodeVector;
class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
public:
- PatriciaTriePolicy(const MmapedBuffer *const buffer)
+ PatriciaTriePolicy(const MmappedBuffer *const buffer)
: mBuffer(buffer), mHeaderPolicy(mBuffer->getBuffer()),
mDictRoot(mBuffer->getBuffer() + mHeaderPolicy.getSize()),
mBigramListPolicy(mDictRoot), mShortcutListPolicy(mDictRoot) {}
@@ -97,7 +97,7 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTriePolicy);
- const MmapedBuffer *const mBuffer;
+ const MmappedBuffer *const mBuffer;
const HeaderPolicy mHeaderPolicy;
const uint8_t *const mDictRoot;
const BigramListPolicy mBigramListPolicy;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp
index 003b9435b..576a158bc 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp
@@ -23,15 +23,15 @@ namespace latinime {
typedef PatriciaTrieReadingUtils PtReadingUtils;
-const PtReadingUtils::NodeFlags PtReadingUtils::MASK_GROUP_ADDRESS_TYPE = 0xC0;
-const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_GROUP_ADDRESS_TYPE_NOADDRESS = 0x00;
-const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_GROUP_ADDRESS_TYPE_ONEBYTE = 0x40;
-const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_GROUP_ADDRESS_TYPE_TWOBYTES = 0x80;
-const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_GROUP_ADDRESS_TYPE_THREEBYTES = 0xC0;
+const PtReadingUtils::NodeFlags PtReadingUtils::MASK_CHILDREN_POSITION_TYPE = 0xC0;
+const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_CHILDREN_POSITION_TYPE_NOPOSITION = 0x00;
+const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_CHILDREN_POSITION_TYPE_ONEBYTE = 0x40;
+const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_CHILDREN_POSITION_TYPE_TWOBYTES = 0x80;
+const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_CHILDREN_POSITION_TYPE_THREEBYTES = 0xC0;
// Flag for single/multiple char group
const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_HAS_MULTIPLE_CHARS = 0x20;
-// Flag for terminal groups
+// Flag for terminal PtNodes
const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_IS_TERMINAL = 0x10;
// Flag for shortcut targets presence
const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_HAS_SHORTCUT_TARGETS = 0x08;
@@ -46,14 +46,14 @@ const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_IS_BLACKLISTED = 0x01;
const uint8_t *const buffer, const NodeFlags flags, int *const pos) {
const int base = *pos;
int offset = 0;
- switch (MASK_GROUP_ADDRESS_TYPE & flags) {
- case FLAG_GROUP_ADDRESS_TYPE_ONEBYTE:
+ switch (MASK_CHILDREN_POSITION_TYPE & flags) {
+ case FLAG_CHILDREN_POSITION_TYPE_ONEBYTE:
offset = ByteArrayUtils::readUint8AndAdvancePosition(buffer, pos);
break;
- case FLAG_GROUP_ADDRESS_TYPE_TWOBYTES:
+ case FLAG_CHILDREN_POSITION_TYPE_TWOBYTES:
offset = ByteArrayUtils::readUint16AndAdvancePosition(buffer, pos);
break;
- case FLAG_GROUP_ADDRESS_TYPE_THREEBYTES:
+ case FLAG_CHILDREN_POSITION_TYPE_THREEBYTES:
offset = ByteArrayUtils::readUint24AndAdvancePosition(buffer, pos);
break;
default:
diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.h
index 9f2fc2059..f76c38751 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.h
@@ -28,7 +28,7 @@ class PatriciaTrieReadingUtils {
public:
typedef uint8_t NodeFlags;
- static AK_FORCE_INLINE int getGroupCountAndAdvancePosition(
+ static AK_FORCE_INLINE int getPtNodeArraySizeAndAdvancePosition(
const uint8_t *const buffer, int *const pos) {
const uint8_t firstByte = ByteArrayUtils::readUint8AndAdvancePosition(buffer, pos);
if (firstByte < 0x80) {
@@ -116,17 +116,17 @@ class PatriciaTrieReadingUtils {
}
static AK_FORCE_INLINE bool hasChildrenInFlags(const NodeFlags flags) {
- return FLAG_GROUP_ADDRESS_TYPE_NOADDRESS != (MASK_GROUP_ADDRESS_TYPE & flags);
+ return FLAG_CHILDREN_POSITION_TYPE_NOPOSITION != (MASK_CHILDREN_POSITION_TYPE & flags);
}
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTrieReadingUtils);
- static const NodeFlags MASK_GROUP_ADDRESS_TYPE;
- static const NodeFlags FLAG_GROUP_ADDRESS_TYPE_NOADDRESS;
- static const NodeFlags FLAG_GROUP_ADDRESS_TYPE_ONEBYTE;
- static const NodeFlags FLAG_GROUP_ADDRESS_TYPE_TWOBYTES;
- static const NodeFlags FLAG_GROUP_ADDRESS_TYPE_THREEBYTES;
+ static const NodeFlags MASK_CHILDREN_POSITION_TYPE;
+ static const NodeFlags FLAG_CHILDREN_POSITION_TYPE_NOPOSITION;
+ static const NodeFlags FLAG_CHILDREN_POSITION_TYPE_ONEBYTE;
+ static const NodeFlags FLAG_CHILDREN_POSITION_TYPE_TWOBYTES;
+ static const NodeFlags FLAG_CHILDREN_POSITION_TYPE_THREEBYTES;
static const NodeFlags FLAG_HAS_MULTIPLE_CHARS;
static const NodeFlags FLAG_IS_TERMINAL;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h
new file mode 100644
index 000000000..fdaf18f7c
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h
@@ -0,0 +1,94 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DYNAMIC_SHORTCUT_LIST_POLICY_H
+#define LATINIME_DYNAMIC_SHORTCUT_LIST_POLICY_H
+
+#include <stdint.h>
+
+#include "defines.h"
+#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h"
+#include "suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h"
+#include "suggest/policyimpl/dictionary/utils/extendable_buffer.h"
+
+namespace latinime {
+
+/*
+ * This is a dynamic version of ShortcutListPolicy and supports an additional buffer.
+ */
+class DynamicShortcutListPolicy : public DictionaryShortcutsStructurePolicy {
+ public:
+ DynamicShortcutListPolicy(const uint8_t *const shortcutBuf, const int bufSize,
+ const ExtendableBuffer *const additionalBuffer)
+ : mShortcutsBuf(shortcutBuf), mBufSize(bufSize), mAdditionalBuffer(additionalBuffer) {}
+
+ ~DynamicShortcutListPolicy() {}
+
+ int getStartPos(const int pos) const {
+ if (pos == NOT_A_DICT_POS) {
+ return NOT_A_DICT_POS;
+ }
+ return pos + ShortcutListReadingUtils::getShortcutListSizeFieldSize();
+ }
+
+ void getNextShortcut(const int maxCodePointCount, int *const outCodePoint,
+ int *const outCodePointCount, bool *const outIsWhitelist, bool *const outHasNext,
+ int *const pos) const {
+ const bool usesAdditionalBuffer = *pos >= mBufSize;
+ const uint8_t *const buffer = usesAdditionalBuffer
+ ? mAdditionalBuffer->getBuffer() : mShortcutsBuf;
+ if (usesAdditionalBuffer) {
+ *pos -= mBufSize;
+ }
+ const ShortcutListReadingUtils::ShortcutFlags flags =
+ ShortcutListReadingUtils::getFlagsAndForwardPointer(buffer, pos);
+ if (outHasNext) {
+ *outHasNext = ShortcutListReadingUtils::hasNext(flags);
+ }
+ if (outIsWhitelist) {
+ *outIsWhitelist = ShortcutListReadingUtils::isWhitelist(flags);
+ }
+ if (outCodePoint) {
+ *outCodePointCount = ShortcutListReadingUtils::readShortcutTarget(
+ buffer, maxCodePointCount, outCodePoint, pos);
+ }
+ if (usesAdditionalBuffer) {
+ *pos += mBufSize;
+ }
+ }
+
+ void skipAllShortcuts(int *const pos) const {
+ if (*pos >= mBufSize) {
+ *pos -= mBufSize;
+ const int shortcutListSize = ShortcutListReadingUtils
+ ::getShortcutListSizeAndForwardPointer(mAdditionalBuffer->getBuffer(), pos);
+ *pos += mBufSize + shortcutListSize;
+ } else {
+ const int shortcutListSize = ShortcutListReadingUtils
+ ::getShortcutListSizeAndForwardPointer(mShortcutsBuf, pos);
+ *pos += shortcutListSize;
+ }
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicShortcutListPolicy);
+
+ const uint8_t *const mShortcutsBuf;
+ const int mBufSize;
+ const ExtendableBuffer *const mAdditionalBuffer;
+};
+} // namespace latinime
+#endif // LATINIME_DYNAMIC_SHORTCUT_LIST_POLICY_H
diff --git a/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h
index b5bb964d5..5f4f240f5 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h
@@ -50,6 +50,10 @@ class ShortcutListReadingUtils {
- SHORTCUT_LIST_SIZE_FIELD_SIZE;
}
+ static AK_FORCE_INLINE int getShortcutListSizeFieldSize() {
+ return SHORTCUT_LIST_SIZE_FIELD_SIZE;
+ }
+
static AK_FORCE_INLINE void skipShortcuts(const uint8_t *const dictRoot, int *const pos) {
const int shortcutListSize = getShortcutListSizeAndForwardPointer(dictRoot, pos);
*pos += shortcutListSize;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/extendable_buffer.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/extendable_buffer.cpp
new file mode 100644
index 000000000..e55cc2410
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/extendable_buffer.cpp
@@ -0,0 +1,25 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/utils/extendable_buffer.h"
+
+namespace latinime {
+
+const size_t ExtendableBuffer::INITIAL_BUFFER_SIZE = 16 * 1024;
+const size_t ExtendableBuffer::MAX_BUFFER_SIZE = 1024 * 1024;
+const size_t ExtendableBuffer::EXTEND_BUFFER_SIZE_STEP = 16 * 1024;
+
+}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/extendable_buffer.h b/native/jni/src/suggest/policyimpl/dictionary/utils/extendable_buffer.h
new file mode 100644
index 000000000..5c75027d2
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/extendable_buffer.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_EXTENDABLE_BUFFER_H
+#define LATINIME_EXTENDABLE_BUFFER_H
+
+#include <cstddef>
+#include <stdint.h>
+#include <vector>
+
+#include "defines.h"
+
+namespace latinime {
+
+// This is used as a buffer that can be extended for updatable dictionaries.
+class ExtendableBuffer {
+ public:
+ ExtendableBuffer() : mBuffer(INITIAL_BUFFER_SIZE), mUsedSize(0) {}
+
+ AK_FORCE_INLINE const uint8_t *getBuffer() const {
+ return &mBuffer[0];
+ }
+
+ // Return if the buffer is successfully extended or not.
+ AK_FORCE_INLINE bool extendBuffer() {
+ if (mBuffer.size() + EXTEND_BUFFER_SIZE_STEP > MAX_BUFFER_SIZE) {
+ return false;
+ }
+ mBuffer.resize(mBuffer.size() + EXTEND_BUFFER_SIZE_STEP);
+ return true;
+ }
+
+ AK_FORCE_INLINE int getAllocatedSize() const {
+ return mBuffer.size();
+ }
+
+ AK_FORCE_INLINE int getUsedSize() const {
+ return mUsedSize;
+ }
+
+ AK_FORCE_INLINE void clear() {
+ mUsedSize = 0;
+ mBuffer.clear();
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(ExtendableBuffer);
+
+ static const size_t INITIAL_BUFFER_SIZE;
+ static const size_t MAX_BUFFER_SIZE;
+ static const size_t EXTEND_BUFFER_SIZE_STEP;
+
+ std::vector<uint8_t> mBuffer;
+ int mUsedSize;
+};
+}
+#endif /* LATINIME_MMAPED_BUFFER_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/mmaped_buffer.h b/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h
index 08add03c6..6febd7832 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/mmaped_buffer.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#ifndef LATINIME_MMAPED_BUFFER_H
-#define LATINIME_MMAPED_BUFFER_H
+#ifndef LATINIME_MMAPPED_BUFFER_H
+#define LATINIME_MMAPPED_BUFFER_H
#include <cerrno>
#include <fcntl.h>
@@ -27,39 +27,40 @@
namespace latinime {
-class MmapedBuffer {
+class MmappedBuffer {
public:
- static MmapedBuffer* openBuffer(const char *const path, const int pathLength,
- const int bufOffset, const int size, const bool isUpdatable) {
+ static MmappedBuffer* openBuffer(const char *const path, const int pathLength,
+ const int bufferOffset, const int bufferSize, const bool isUpdatable) {
const int openMode = isUpdatable ? O_RDWR : O_RDONLY;
- const int fd = open(path, openMode);
- if (fd < 0) {
+ const int mmapFd = open(path, openMode);
+ if (mmapFd < 0) {
AKLOGE("DICT: Can't open the source. path=%s errno=%d", path, errno);
return 0;
}
const int pagesize = getpagesize();
- const int offset = bufOffset % pagesize;
- int adjOffset = bufOffset - offset;
- int adjSize = size + offset;
+ const int offset = bufferOffset % pagesize;
+ int alignedOffset = bufferOffset - offset;
+ int alignedSize = bufferSize + offset;
const int protMode = isUpdatable ? PROT_READ | PROT_WRITE : PROT_READ;
- void *const mmapedBuffer = mmap(0, adjSize, protMode, MAP_PRIVATE, fd, adjOffset);
- if (mmapedBuffer == MAP_FAILED) {
+ void *const mmappedBuffer = mmap(0, alignedSize, protMode, MAP_PRIVATE, mmapFd,
+ alignedOffset);
+ if (mmappedBuffer == MAP_FAILED) {
AKLOGE("DICT: Can't mmap dictionary. errno=%d", errno);
- close(fd);
+ close(mmapFd);
return 0;
}
- uint8_t *const buffer = static_cast<uint8_t *>(mmapedBuffer) + offset;
+ uint8_t *const buffer = static_cast<uint8_t *>(mmappedBuffer) + offset;
if (!buffer) {
AKLOGE("DICT: buffer is null");
- close(fd);
+ close(mmapFd);
return 0;
}
- return new MmapedBuffer(buffer, adjSize, fd, offset, isUpdatable);
+ return new MmappedBuffer(buffer, bufferSize, mmappedBuffer, alignedSize, mmapFd,
+ isUpdatable);
}
- ~MmapedBuffer() {
- int ret = munmap(static_cast<void *>(mBuffer - mBufferOffset),
- mBufferSize + mBufferOffset);
+ ~MmappedBuffer() {
+ int ret = munmap(mMmappedBuffer, mAlignedSize);
if (ret != 0) {
AKLOGE("DICT: Failure in munmap. ret=%d errno=%d", ret, errno);
}
@@ -82,18 +83,20 @@ class MmapedBuffer {
}
private:
- AK_FORCE_INLINE MmapedBuffer(uint8_t *const buffer, const int bufferSize, const int mmapFd,
- const int bufferOffset, const bool isUpdatable)
- : mBuffer(buffer), mBufferSize(bufferSize), mMmapFd(mmapFd),
- mBufferOffset(bufferOffset), mIsUpdatable(isUpdatable) {}
+ AK_FORCE_INLINE MmappedBuffer(uint8_t *const buffer, const int bufferSize,
+ void *const mmappedBuffer, const int alignedSize, const int mmapFd,
+ const bool isUpdatable)
+ : mBuffer(buffer), mBufferSize(bufferSize), mMmappedBuffer(mmappedBuffer),
+ mAlignedSize(alignedSize), mMmapFd(mmapFd), mIsUpdatable(isUpdatable) {}
- DISALLOW_IMPLICIT_CONSTRUCTORS(MmapedBuffer);
+ DISALLOW_IMPLICIT_CONSTRUCTORS(MmappedBuffer);
uint8_t *const mBuffer;
const int mBufferSize;
+ void *const mMmappedBuffer;
+ const int mAlignedSize;
const int mMmapFd;
- const int mBufferOffset;
const bool mIsUpdatable;
};
}
-#endif /* LATINIME_MMAPED_BUFFER_H */
+#endif /* LATINIME_MMAPPED_BUFFER_H */
diff --git a/native/jni/src/utils/autocorrection_threshold_utils.cpp b/native/jni/src/utils/autocorrection_threshold_utils.cpp
index 3406e0f8e..1f8ee0814 100644
--- a/native/jni/src/utils/autocorrection_threshold_utils.cpp
+++ b/native/jni/src/utils/autocorrection_threshold_utils.cpp
@@ -83,9 +83,12 @@ const int AutocorrectionThresholdUtils::FULL_WORD_MULTIPLIER = 2;
return 0.0f;
}
+ if (score <= 0 || distance >= afterLength) {
+ // normalizedScore must be 0.0f (the minimum value) if the score is less than or equal to 0,
+ // or if the edit distance is larger than or equal to afterLength.
+ return 0.0f;
+ }
// add a weight based on edit distance.
- // distance <= max(afterLength, beforeLength) == afterLength,
- // so, 0 <= distance / afterLength <= 1
const float weight = 1.0f - static_cast<float>(distance) / static_cast<float>(afterLength);
// TODO: Revise the following logic thoroughly by referring to...