aboutsummaryrefslogtreecommitdiffstats
path: root/native/jni/src
diff options
context:
space:
mode:
Diffstat (limited to 'native/jni/src')
-rw-r--r--native/jni/src/defines.h8
-rw-r--r--native/jni/src/suggest/core/dicnode/dic_node.h23
-rw-r--r--native/jni/src/suggest/core/dicnode/dic_node_priority_queue.h61
-rw-r--r--native/jni/src/suggest/core/dicnode/dic_node_utils.cpp2
-rw-r--r--native/jni/src/suggest/core/dicnode/dic_nodes_cache.cpp5
-rw-r--r--native/jni/src/suggest/core/dicnode/dic_nodes_cache.h51
-rw-r--r--native/jni/src/suggest/core/dicnode/internal/dic_node_state_prevword.h33
-rw-r--r--native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp6
-rw-r--r--native/jni/src/suggest/core/dictionary/dictionary.cpp14
-rw-r--r--native/jni/src/suggest/core/dictionary/dictionary.h6
-rw-r--r--native/jni/src/suggest/core/dictionary/multi_bigram_map.h2
-rw-r--r--native/jni/src/suggest/core/layout/proximity_info_state.cpp5
-rw-r--r--native/jni/src/suggest/core/layout/proximity_info_state.h14
-rw-r--r--native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h6
-rw-r--r--native/jni/src/suggest/core/session/dic_traverse_session.cpp14
-rw-r--r--native/jni/src/suggest/core/session/dic_traverse_session.h23
-rw-r--r--native/jni/src/suggest/core/suggest.cpp36
-rw-r--r--native/jni/src/suggest/core/suggest.h2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp18
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h23
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp143
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h57
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.cpp8
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h3
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp34
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h33
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp49
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h12
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h3
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp29
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h20
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp134
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h4
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp20
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h16
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp57
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.h58
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.cpp20
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h20
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.cpp3
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h12
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h4
43 files changed, 714 insertions, 379 deletions
diff --git a/native/jni/src/defines.h b/native/jni/src/defines.h
index 34a646f80..89dfa39b3 100644
--- a/native/jni/src/defines.h
+++ b/native/jni/src/defines.h
@@ -292,7 +292,6 @@ static inline void prof_out(void) {
// of the binary dictionary where a {key,value} string pair scheme is used.
#define LARGEST_INT_DIGIT_COUNT 11
-#define NOT_A_VALID_WORD_POS (-99)
#define NOT_A_CODE_POINT (-1)
#define NOT_A_DISTANCE (-1)
#define NOT_A_COORDINATE (-1)
@@ -322,13 +321,6 @@ static inline void prof_out(void) {
#define MAX_POINTER_COUNT 1
#define MAX_POINTER_COUNT_G 2
-// Queue IDs and size for DicNodesCache
-#define DIC_NODES_CACHE_INITIAL_QUEUE_ID_ACTIVE 0
-#define DIC_NODES_CACHE_INITIAL_QUEUE_ID_NEXT_ACTIVE 1
-#define DIC_NODES_CACHE_INITIAL_QUEUE_ID_TERMINAL 2
-#define DIC_NODES_CACHE_INITIAL_QUEUE_ID_CACHE_FOR_CONTINUOUS_SUGGESTION 3
-#define DIC_NODES_CACHE_PRIORITY_QUEUES_SIZE 4
-
template<typename T> AK_FORCE_INLINE const T &min(const T &a, const T &b) { return a < b ? a : b; }
template<typename T> AK_FORCE_INLINE const T &max(const T &a, const T &b) { return a > b ? a : b; }
diff --git a/native/jni/src/suggest/core/dicnode/dic_node.h b/native/jni/src/suggest/core/dicnode/dic_node.h
index cdd9f59aa..41ef9d2b2 100644
--- a/native/jni/src/suggest/core/dicnode/dic_node.h
+++ b/native/jni/src/suggest/core/dicnode/dic_node.h
@@ -112,7 +112,7 @@ class DicNode {
mIsUsed = true;
mIsCachedForNextSuggestion = false;
mDicNodeProperties.init(
- NOT_A_VALID_WORD_POS /* pos */, rootGroupPos, NOT_A_CODE_POINT /* nodeCodePoint */,
+ NOT_A_DICT_POS /* pos */, rootGroupPos, NOT_A_CODE_POINT /* nodeCodePoint */,
NOT_A_PROBABILITY /* probability */, false /* isTerminal */,
true /* hasChildren */, false /* isBlacklistedOrNotAWord */, 0 /* depth */,
0 /* terminalDepth */);
@@ -125,7 +125,7 @@ class DicNode {
mIsUsed = true;
mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion;
mDicNodeProperties.init(
- NOT_A_VALID_WORD_POS /* pos */, rootGroupPos, NOT_A_CODE_POINT /* nodeCodePoint */,
+ NOT_A_DICT_POS /* pos */, rootGroupPos, NOT_A_CODE_POINT /* nodeCodePoint */,
NOT_A_PROBABILITY /* probability */, false /* isTerminal */,
true /* hasChildren */, false /* isBlacklistedOrNotAWord */, 0 /* depth */,
0 /* terminalDepth */);
@@ -143,7 +143,7 @@ class DicNode {
dicNode->mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength(),
dicNode->getOutputWordBuf(),
dicNode->mDicNodeProperties.getDepth(),
- dicNode->mDicNodeState.mDicNodeStatePrevWord.mPrevSpacePositions,
+ dicNode->mDicNodeState.mDicNodeStatePrevWord.getSecondWordFirstInputIndex(),
mDicNodeState.mDicNodeStateInput.getInputIndex(0) /* lastInputIndex */);
PROF_NODE_COPY(&dicNode->mProfiler, mProfiler);
}
@@ -234,7 +234,7 @@ class DicNode {
}
bool isFirstWord() const {
- return mDicNodeState.mDicNodeStatePrevWord.getPrevWordNodePos() == NOT_A_VALID_WORD_POS;
+ return mDicNodeState.mDicNodeStatePrevWord.getPrevWordNodePos() == NOT_A_DICT_POS;
}
bool isCompletion(const int inputSize) const {
@@ -321,8 +321,13 @@ class DicNode {
DUMP_WORD_AND_SCORE("OUTPUT");
}
- void outputSpacePositionsResult(int *spaceIndices) const {
- mDicNodeState.mDicNodeStatePrevWord.outputSpacePositions(spaceIndices);
+ int getSecondWordFirstInputIndex(const ProximityInfoState *const pInfoState) const {
+ const int inputIndex = mDicNodeState.mDicNodeStatePrevWord.getSecondWordFirstInputIndex();
+ if (inputIndex == NOT_AN_INDEX) {
+ return NOT_AN_INDEX;
+ } else {
+ return pInfoState->getInputIndexOfSampledPoint(inputIndex);
+ }
}
bool hasMultipleWords() const {
@@ -573,7 +578,11 @@ class DicNode {
}
}
- AK_FORCE_INLINE void updateInputIndexG(DicNode_InputStateG *inputStateG) {
+ AK_FORCE_INLINE void updateInputIndexG(const DicNode_InputStateG *const inputStateG) {
+ if (mDicNodeState.mDicNodeStatePrevWord.getPrevWordCount() == 1 && isFirstLetter()) {
+ mDicNodeState.mDicNodeStatePrevWord.setSecondWordFirstInputIndex(
+ inputStateG->mInputIndex);
+ }
mDicNodeState.mDicNodeStateInput.updateInputIndexG(inputStateG->mPointerId,
inputStateG->mInputIndex, inputStateG->mPrevCodePoint,
inputStateG->mTerminalDiffCost, inputStateG->mRawLength);
diff --git a/native/jni/src/suggest/core/dicnode/dic_node_priority_queue.h b/native/jni/src/suggest/core/dicnode/dic_node_priority_queue.h
index 2a486b804..7461f0cc6 100644
--- a/native/jni/src/suggest/core/dicnode/dic_node_priority_queue.h
+++ b/native/jni/src/suggest/core/dicnode/dic_node_priority_queue.h
@@ -24,20 +24,16 @@
#include "suggest/core/dicnode/dic_node.h"
#include "suggest/core/dicnode/dic_node_release_listener.h"
-// The biggest value among MAX_CACHE_DIC_NODE_SIZE, MAX_CACHE_DIC_NODE_SIZE_FOR_SINGLE_POINT, ...
-#define MAX_DIC_NODE_PRIORITY_QUEUE_CAPACITY 310
-
namespace latinime {
class DicNodePriorityQueue : public DicNodeReleaseListener {
public:
- AK_FORCE_INLINE DicNodePriorityQueue()
- : MAX_CAPACITY(MAX_DIC_NODE_PRIORITY_QUEUE_CAPACITY),
- mMaxSize(MAX_DIC_NODE_PRIORITY_QUEUE_CAPACITY), mDicNodesBuf(), mUnusedNodeIndices(),
- mNextUnusedNodeId(0), mDicNodesQueue() {
- mDicNodesBuf.resize(MAX_CAPACITY + 1);
- mUnusedNodeIndices.resize(MAX_CAPACITY + 1);
- reset();
+ AK_FORCE_INLINE explicit DicNodePriorityQueue(const int capacity)
+ : mCapacity(capacity), mMaxSize(capacity), mDicNodesBuf(),
+ mUnusedNodeIndices(), mNextUnusedNodeId(0), mDicNodesQueue() {
+ mDicNodesBuf.resize(mCapacity + 1);
+ mUnusedNodeIndices.resize(mCapacity + 1);
+ clearAndResizeToCapacity();
}
// Non virtual inline destructor -- never inherit this class
@@ -52,11 +48,12 @@ class DicNodePriorityQueue : public DicNodeReleaseListener {
}
AK_FORCE_INLINE void setMaxSize(const int maxSize) {
- mMaxSize = min(maxSize, MAX_CAPACITY);
+ ASSERT(maxSize <= mCapacity);
+ mMaxSize = min(maxSize, mCapacity);
}
- AK_FORCE_INLINE void reset() {
- clearAndResize(MAX_CAPACITY);
+ AK_FORCE_INLINE void clearAndResizeToCapacity() {
+ clearAndResize(mCapacity);
}
AK_FORCE_INLINE void clear() {
@@ -64,27 +61,19 @@ class DicNodePriorityQueue : public DicNodeReleaseListener {
}
AK_FORCE_INLINE void clearAndResize(const int maxSize) {
+ ASSERT(maxSize <= mCapacity);
while (!mDicNodesQueue.empty()) {
mDicNodesQueue.pop();
}
setMaxSize(maxSize);
- for (int i = 0; i < MAX_CAPACITY + 1; ++i) {
+ for (int i = 0; i < mCapacity + 1; ++i) {
mDicNodesBuf[i].remove();
mDicNodesBuf[i].setReleaseListener(this);
- mUnusedNodeIndices[i] = i == MAX_CAPACITY ? NOT_A_NODE_ID : static_cast<int>(i) + 1;
+ mUnusedNodeIndices[i] = i == mCapacity ? NOT_A_NODE_ID : static_cast<int>(i) + 1;
}
mNextUnusedNodeId = 0;
}
- AK_FORCE_INLINE DicNode *newDicNode(DicNode *dicNode) {
- DicNode *newNode = searchEmptyDicNode();
- if (newNode) {
- DicNodeUtils::initByCopy(dicNode, newNode);
- return newNode;
- }
- return 0;
- }
-
// Copy
AK_FORCE_INLINE DicNode *copyPush(DicNode *dicNode) {
return copyPush(dicNode, mMaxSize);
@@ -111,12 +100,12 @@ class DicNodePriorityQueue : public DicNodeReleaseListener {
}
mUnusedNodeIndices[index] = mNextUnusedNodeId;
mNextUnusedNodeId = index;
- ASSERT(index >= 0 && index < (MAX_CAPACITY + 1));
+ ASSERT(index >= 0 && index < (mCapacity + 1));
}
AK_FORCE_INLINE void dump() const {
AKLOGI("\n\n\n\n\n===========================");
- for (int i = 0; i < MAX_CAPACITY + 1; ++i) {
+ for (int i = 0; i < mCapacity + 1; ++i) {
if (mDicNodesBuf[i].isUsed()) {
mDicNodesBuf[i].dump("QUEUE: ");
}
@@ -125,7 +114,7 @@ class DicNodePriorityQueue : public DicNodeReleaseListener {
}
private:
- DISALLOW_COPY_AND_ASSIGN(DicNodePriorityQueue);
+ DISALLOW_IMPLICIT_CONSTRUCTORS(DicNodePriorityQueue);
static const int NOT_A_NODE_ID = -1;
AK_FORCE_INLINE static bool compareDicNode(DicNode *left, DicNode *right) {
@@ -139,7 +128,7 @@ class DicNodePriorityQueue : public DicNodeReleaseListener {
};
typedef std::priority_queue<DicNode *, std::vector<DicNode *>, DicNodeComparator> DicNodesQueue;
- const int MAX_CAPACITY;
+ const int mCapacity;
int mMaxSize;
std::vector<DicNode> mDicNodesBuf; // of each element of mDicNodesBuf respectively
std::vector<int> mUnusedNodeIndices;
@@ -163,13 +152,12 @@ class DicNodePriorityQueue : public DicNodeReleaseListener {
}
AK_FORCE_INLINE DicNode *searchEmptyDicNode() {
- // TODO: Currently O(n) but should be improved to O(1)
- if (MAX_CAPACITY == 0) {
+ if (mCapacity == 0) {
return 0;
}
if (mNextUnusedNodeId == NOT_A_NODE_ID) {
AKLOGI("No unused node found.");
- for (int i = 0; i < MAX_CAPACITY + 1; ++i) {
+ for (int i = 0; i < mCapacity + 1; ++i) {
AKLOGI("Dump node availability, %d, %d, %d",
i, mDicNodesBuf[i].isUsed(), mUnusedNodeIndices[i]);
}
@@ -185,7 +173,7 @@ class DicNodePriorityQueue : public DicNodeReleaseListener {
const int index = static_cast<int>(dicNode - &mDicNodesBuf[0]);
mNextUnusedNodeId = mUnusedNodeIndices[index];
mUnusedNodeIndices[index] = NOT_A_NODE_ID;
- ASSERT(index >= 0 && index < (MAX_CAPACITY + 1));
+ ASSERT(index >= 0 && index < (mCapacity + 1));
}
AK_FORCE_INLINE DicNode *pushPoolNodeWithMaxSize(DicNode *dicNode, const int maxSize) {
@@ -209,6 +197,15 @@ class DicNodePriorityQueue : public DicNodeReleaseListener {
AK_FORCE_INLINE DicNode *copyPush(DicNode *dicNode, const int maxSize) {
return pushPoolNodeWithMaxSize(newDicNode(dicNode), maxSize);
}
+
+ AK_FORCE_INLINE DicNode *newDicNode(DicNode *dicNode) {
+ DicNode *newNode = searchEmptyDicNode();
+ if (newNode) {
+ DicNodeUtils::initByCopy(dicNode, newNode);
+ }
+ return newNode;
+ }
+
};
} // namespace latinime
#endif // LATINIME_DIC_NODE_PRIORITY_QUEUE_H
diff --git a/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp b/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp
index e81591992..ec65114c7 100644
--- a/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp
+++ b/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp
@@ -89,7 +89,7 @@ namespace latinime {
const int unigramProbability = node->getProbability();
const int wordPos = node->getPos();
const int prevWordPos = node->getPrevWordPos();
- if (NOT_A_VALID_WORD_POS == wordPos || NOT_A_VALID_WORD_POS == prevWordPos) {
+ if (NOT_A_DICT_POS == wordPos || NOT_A_DICT_POS == prevWordPos) {
// Note: Normally wordPos comes from the dictionary and should never equal
// NOT_A_VALID_WORD_POS.
return dictionaryStructurePolicy->getProbability(unigramProbability,
diff --git a/native/jni/src/suggest/core/dicnode/dic_nodes_cache.cpp b/native/jni/src/suggest/core/dicnode/dic_nodes_cache.cpp
index c3d2a2e74..b6be47e90 100644
--- a/native/jni/src/suggest/core/dicnode/dic_nodes_cache.cpp
+++ b/native/jni/src/suggest/core/dicnode/dic_nodes_cache.cpp
@@ -23,6 +23,11 @@
namespace latinime {
+// The biggest value among MAX_CACHE_DIC_NODE_SIZE, MAX_CACHE_DIC_NODE_SIZE_FOR_SINGLE_POINT, ...
+const int DicNodesCache::LARGE_PRIORITY_QUEUE_CAPACITY = 310;
+// Capacity for reducing memory footprint.
+const int DicNodesCache::SMALL_PRIORITY_QUEUE_CAPACITY = 100;
+
/**
* Truncates all of the dicNodes so that they start at the given commit point.
* Only called for multi-word typing input.
diff --git a/native/jni/src/suggest/core/dicnode/dic_nodes_cache.h b/native/jni/src/suggest/core/dicnode/dic_nodes_cache.h
index 7aab0906e..8493b6a8b 100644
--- a/native/jni/src/suggest/core/dicnode/dic_nodes_cache.h
+++ b/native/jni/src/suggest/core/dicnode/dic_nodes_cache.h
@@ -31,25 +31,32 @@ class DicNode;
*/
class DicNodesCache {
public:
- AK_FORCE_INLINE DicNodesCache()
- : mActiveDicNodes(&mDicNodePriorityQueues[DIC_NODES_CACHE_INITIAL_QUEUE_ID_ACTIVE]),
- mNextActiveDicNodes(&mDicNodePriorityQueues[
- DIC_NODES_CACHE_INITIAL_QUEUE_ID_NEXT_ACTIVE]),
- mTerminalDicNodes(&mDicNodePriorityQueues[DIC_NODES_CACHE_INITIAL_QUEUE_ID_TERMINAL]),
- mCachedDicNodesForContinuousSuggestion(&mDicNodePriorityQueues[
- DIC_NODES_CACHE_INITIAL_QUEUE_ID_CACHE_FOR_CONTINUOUS_SUGGESTION]),
- mInputIndex(0), mLastCachedInputIndex(0) {
- }
+ AK_FORCE_INLINE explicit DicNodesCache(const bool usesLargeCapacityCache)
+ : mUsesLargeCapacityCache(usesLargeCapacityCache),
+ mDicNodePriorityQueue0(getCacheCapacity()),
+ mDicNodePriorityQueue1(getCacheCapacity()),
+ mDicNodePriorityQueue2(getCacheCapacity()),
+ mDicNodePriorityQueueForTerminal(MAX_RESULTS),
+ mActiveDicNodes(&mDicNodePriorityQueue0),
+ mNextActiveDicNodes(&mDicNodePriorityQueue1),
+ mCachedDicNodesForContinuousSuggestion(&mDicNodePriorityQueue2),
+ mTerminalDicNodes(&mDicNodePriorityQueueForTerminal),
+ mInputIndex(0), mLastCachedInputIndex(0) {}
AK_FORCE_INLINE virtual ~DicNodesCache() {}
AK_FORCE_INLINE void reset(const int nextActiveSize, const int terminalSize) {
mInputIndex = 0;
mLastCachedInputIndex = 0;
- mActiveDicNodes->reset();
- mNextActiveDicNodes->clearAndResize(nextActiveSize);
+ // We want to use the max capacity for the current active dic node queue.
+ mActiveDicNodes->clearAndResizeToCapacity();
+ // nextActiveSize is used to limit the next iteration's active dic node size.
+ const int nextActiveSizeFittingToTheCapacity = min(nextActiveSize, getCacheCapacity());
+ mNextActiveDicNodes->clearAndResize(nextActiveSizeFittingToTheCapacity);
mTerminalDicNodes->clearAndResize(terminalSize);
- mCachedDicNodesForContinuousSuggestion->reset();
+ // We want to use the max capacity for the cached dic nodes that will be used for the
+ // continuous suggestion.
+ mCachedDicNodesForContinuousSuggestion->clearAndResizeToCapacity();
}
AK_FORCE_INLINE void continueSearch() {
@@ -157,21 +164,35 @@ class DicNodesCache {
return tmp;
}
+ AK_FORCE_INLINE int getCacheCapacity() const {
+ return mUsesLargeCapacityCache ?
+ LARGE_PRIORITY_QUEUE_CAPACITY : SMALL_PRIORITY_QUEUE_CAPACITY;
+ }
+
AK_FORCE_INLINE void resetTemporaryCaches() {
mActiveDicNodes->clear();
mNextActiveDicNodes->clear();
mTerminalDicNodes->clear();
}
- DicNodePriorityQueue mDicNodePriorityQueues[DIC_NODES_CACHE_PRIORITY_QUEUES_SIZE];
+ static const int LARGE_PRIORITY_QUEUE_CAPACITY;
+ static const int SMALL_PRIORITY_QUEUE_CAPACITY;
+
+ const bool mUsesLargeCapacityCache;
+ // Instances
+ DicNodePriorityQueue mDicNodePriorityQueue0;
+ DicNodePriorityQueue mDicNodePriorityQueue1;
+ DicNodePriorityQueue mDicNodePriorityQueue2;
+ DicNodePriorityQueue mDicNodePriorityQueueForTerminal;
+
// Active dicNodes currently being expanded.
DicNodePriorityQueue *mActiveDicNodes;
// Next dicNodes to be expanded.
DicNodePriorityQueue *mNextActiveDicNodes;
- // Current top terminal dicNodes.
- DicNodePriorityQueue *mTerminalDicNodes;
// Cached dicNodes used for continuous suggestion.
DicNodePriorityQueue *mCachedDicNodesForContinuousSuggestion;
+ // Current top terminal dicNodes.
+ DicNodePriorityQueue *mTerminalDicNodes;
int mInputIndex;
int mLastCachedInputIndex;
};
diff --git a/native/jni/src/suggest/core/dicnode/internal/dic_node_state_prevword.h b/native/jni/src/suggest/core/dicnode/internal/dic_node_state_prevword.h
index 9bc96877e..b8986203d 100644
--- a/native/jni/src/suggest/core/dicnode/internal/dic_node_state_prevword.h
+++ b/native/jni/src/suggest/core/dicnode/internal/dic_node_state_prevword.h
@@ -22,6 +22,7 @@
#include "defines.h"
#include "suggest/core/dicnode/dic_node_utils.h"
+#include "suggest/core/layout/proximity_info_state.h"
namespace latinime {
@@ -29,9 +30,8 @@ class DicNodeStatePrevWord {
public:
AK_FORCE_INLINE DicNodeStatePrevWord()
: mPrevWordCount(0), mPrevWordLength(0), mPrevWordStart(0), mPrevWordProbability(0),
- mPrevWordNodePos(NOT_A_VALID_WORD_POS) {
+ mPrevWordNodePos(NOT_A_DICT_POS), mSecondWordFirstInputIndex(NOT_AN_INDEX) {
memset(mPrevWord, 0, sizeof(mPrevWord));
- memset(mPrevSpacePositions, 0, sizeof(mPrevSpacePositions));
}
virtual ~DicNodeStatePrevWord() {}
@@ -41,8 +41,8 @@ class DicNodeStatePrevWord {
mPrevWordCount = 0;
mPrevWordStart = 0;
mPrevWordProbability = -1;
- mPrevWordNodePos = NOT_A_VALID_WORD_POS;
- memset(mPrevSpacePositions, 0, sizeof(mPrevSpacePositions));
+ mPrevWordNodePos = NOT_A_DICT_POS;
+ mSecondWordFirstInputIndex = NOT_AN_INDEX;
}
void init(const int prevWordNodePos) {
@@ -51,7 +51,7 @@ class DicNodeStatePrevWord {
mPrevWordStart = 0;
mPrevWordProbability = -1;
mPrevWordNodePos = prevWordNodePos;
- memset(mPrevSpacePositions, 0, sizeof(mPrevSpacePositions));
+ mSecondWordFirstInputIndex = NOT_AN_INDEX;
}
// Init by copy
@@ -61,14 +61,14 @@ class DicNodeStatePrevWord {
mPrevWordStart = prevWord->mPrevWordStart;
mPrevWordProbability = prevWord->mPrevWordProbability;
mPrevWordNodePos = prevWord->mPrevWordNodePos;
+ mSecondWordFirstInputIndex = prevWord->mSecondWordFirstInputIndex;
memcpy(mPrevWord, prevWord->mPrevWord, prevWord->mPrevWordLength * sizeof(mPrevWord[0]));
- memcpy(mPrevSpacePositions, prevWord->mPrevSpacePositions, sizeof(mPrevSpacePositions));
}
void init(const int16_t prevWordCount, const int16_t prevWordProbability,
const int prevWordNodePos, const int *const src0, const int16_t length0,
- const int *const src1, const int16_t length1, const int *const prevSpacePositions,
- const int lastInputIndex) {
+ const int *const src1, const int16_t length1,
+ const int prevWordSecondWordFirstInputIndex, const int lastInputIndex) {
mPrevWordCount = min(prevWordCount, static_cast<int16_t>(MAX_RESULTS));
mPrevWordProbability = prevWordProbability;
mPrevWordNodePos = prevWordNodePos;
@@ -80,8 +80,7 @@ class DicNodeStatePrevWord {
mPrevWord[twoWordsLen] = KEYCODE_SPACE;
mPrevWordStart = length0;
mPrevWordLength = static_cast<int16_t>(twoWordsLen + 1);
- memcpy(mPrevSpacePositions, prevSpacePositions, sizeof(mPrevSpacePositions));
- mPrevSpacePositions[mPrevWordCount - 1] = lastInputIndex;
+ mSecondWordFirstInputIndex = prevWordSecondWordFirstInputIndex;
}
void truncate(const int offset) {
@@ -96,11 +95,12 @@ class DicNodeStatePrevWord {
mPrevWordLength = newPrevWordLength;
}
- void outputSpacePositions(int *spaceIndices) const {
- // Convert uint16_t to int
- for (int i = 0; i < MAX_RESULTS; i++) {
- spaceIndices[i] = mPrevSpacePositions[i];
- }
+ void setSecondWordFirstInputIndex(const int inputIndex) {
+ mSecondWordFirstInputIndex = inputIndex;
+ }
+
+ int getSecondWordFirstInputIndex() const {
+ return mSecondWordFirstInputIndex;
}
// TODO: remove
@@ -138,8 +138,6 @@ class DicNodeStatePrevWord {
// TODO: Move to private
int mPrevWord[MAX_WORD_LENGTH];
- // TODO: Move to private
- int mPrevSpacePositions[MAX_RESULTS];
private:
// Caution!!!
@@ -150,6 +148,7 @@ class DicNodeStatePrevWord {
int16_t mPrevWordStart;
int16_t mPrevWordProbability;
int mPrevWordNodePos;
+ int mSecondWordFirstInputIndex;
};
} // namespace latinime
#endif // LATINIME_DIC_NODE_STATE_PREVWORD_H
diff --git a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp
index cf1cd8815..425b07624 100644
--- a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp
+++ b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp
@@ -116,7 +116,7 @@ int BigramDictionary::getPredictions(const int *prevWord, const int prevWordLeng
mDictionaryStructurePolicy->getBigramsStructurePolicy(), pos);
while (bigramsIt.hasNext()) {
bigramsIt.next();
- if (bigramsIt.getBigramPos() == NOT_A_VALID_WORD_POS) {
+ if (bigramsIt.getBigramPos() == NOT_A_DICT_POS) {
continue;
}
const int codePointCount = mDictionaryStructurePolicy->
@@ -146,7 +146,7 @@ int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const in
if (0 >= prevWordLength) return NOT_A_DICT_POS;
int pos = mDictionaryStructurePolicy->getTerminalNodePositionOfWord(prevWord, prevWordLength,
forceLowerCaseSearch);
- if (NOT_A_VALID_WORD_POS == pos) return NOT_A_DICT_POS;
+ if (NOT_A_DICT_POS == pos) return NOT_A_DICT_POS;
return mDictionaryStructurePolicy->getBigramsPositionOfNode(pos);
}
@@ -157,7 +157,7 @@ bool BigramDictionary::isValidBigram(const int *word0, int length0, const int *w
if (NOT_A_DICT_POS == pos) return false;
int nextWordPos = mDictionaryStructurePolicy->getTerminalNodePositionOfWord(word1, length1,
false /* forceLowerCaseSearch */);
- if (NOT_A_VALID_WORD_POS == nextWordPos) return false;
+ if (NOT_A_DICT_POS == nextWordPos) return false;
BinaryDictionaryBigramsIterator bigramsIt(
mDictionaryStructurePolicy->getBigramsStructurePolicy(), pos);
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp
index 02ece639c..033572201 100644
--- a/native/jni/src/suggest/core/dictionary/dictionary.cpp
+++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp
@@ -87,7 +87,7 @@ int Dictionary::getBigrams(const int *word, int length, int *outWords, int *freq
int Dictionary::getProbability(const int *word, int length) const {
int pos = getDictionaryStructurePolicy()->getTerminalNodePositionOfWord(word, length,
false /* forceLowerCaseSearch */);
- if (NOT_A_VALID_WORD_POS == pos) {
+ if (NOT_A_DICT_POS == pos) {
return NOT_A_PROBABILITY;
}
return getDictionaryStructurePolicy()->getUnigramProbabilityOfPtNode(pos);
@@ -112,6 +112,18 @@ void Dictionary::removeBigramWords(const int *const word0, const int length0,
mDictionaryStructureWithBufferPolicy->removeBigramWords(word0, length0, word1, length1);
}
+void Dictionary::flush(const char *const filePath) {
+ mDictionaryStructureWithBufferPolicy->flush(filePath);
+}
+
+void Dictionary::flushWithGC(const char *const filePath) {
+ mDictionaryStructureWithBufferPolicy->flushWithGC(filePath);
+}
+
+bool Dictionary::needsToRunGC() {
+ return mDictionaryStructureWithBufferPolicy->needsToRunGC();
+}
+
void Dictionary::logDictionaryInfo(JNIEnv *const env) const {
const int BUFFER_SIZE = 16;
int dictionaryIdCodePointBuffer[BUFFER_SIZE];
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.h b/native/jni/src/suggest/core/dictionary/dictionary.h
index 0afe5a54b..06e84bbfe 100644
--- a/native/jni/src/suggest/core/dictionary/dictionary.h
+++ b/native/jni/src/suggest/core/dictionary/dictionary.h
@@ -77,6 +77,12 @@ class Dictionary {
void removeBigramWords(const int *const word0, const int length0, const int *const word1,
const int length1);
+ void flush(const char *const filePath);
+
+ void flushWithGC(const char *const filePath);
+
+ bool needsToRunGC();
+
const DictionaryStructureWithBufferPolicy *getDictionaryStructurePolicy() const {
return mDictionaryStructureWithBufferPolicy;
}
diff --git a/native/jni/src/suggest/core/dictionary/multi_bigram_map.h b/native/jni/src/suggest/core/dictionary/multi_bigram_map.h
index 9efe5f6f9..aecf41386 100644
--- a/native/jni/src/suggest/core/dictionary/multi_bigram_map.h
+++ b/native/jni/src/suggest/core/dictionary/multi_bigram_map.h
@@ -73,7 +73,7 @@ class MultiBigramMap {
bigramsListPos);
while (bigramsIt.hasNext()) {
bigramsIt.next();
- if (bigramsIt.getBigramPos() == NOT_A_VALID_WORD_POS) {
+ if (bigramsIt.getBigramPos() == NOT_A_DICT_POS) {
continue;
}
mBigramMap[bigramsIt.getBigramPos()] = bigramsIt.getProbability();
diff --git a/native/jni/src/suggest/core/layout/proximity_info_state.cpp b/native/jni/src/suggest/core/layout/proximity_info_state.cpp
index 7780efdfd..fbabd92f2 100644
--- a/native/jni/src/suggest/core/layout/proximity_info_state.cpp
+++ b/native/jni/src/suggest/core/layout/proximity_info_state.cpp
@@ -36,8 +36,8 @@ void ProximityInfoState::initInputParams(const int pointerId, const float maxPoi
const int *const xCoordinates, const int *const yCoordinates, const int *const times,
const int *const pointerIds, const bool isGeometric) {
ASSERT(isGeometric || (inputSize < MAX_WORD_LENGTH));
- mIsContinuousSuggestionPossible =
- ProximityInfoStateUtils::checkAndReturnIsContinuousSuggestionPossible(
+ mIsContinuousSuggestionPossible = (mHasBeenUpdatedByGeometricInput != isGeometric) ?
+ false : ProximityInfoStateUtils::checkAndReturnIsContinuousSuggestionPossible(
inputSize, xCoordinates, yCoordinates, times, mSampledInputSize,
&mSampledInputXs, &mSampledInputYs, &mSampledTimes, &mSampledInputIndice);
if (DEBUG_DICT) {
@@ -155,6 +155,7 @@ void ProximityInfoState::initInputParams(const int pointerId, const float maxPoi
if (DEBUG_GEO_FULL) {
AKLOGI("ProximityState init finished: %d points out of %d", mSampledInputSize, inputSize);
}
+ mHasBeenUpdatedByGeometricInput = isGeometric;
}
// This function basically converts from a length to an edit distance. Accordingly, it's obviously
diff --git a/native/jni/src/suggest/core/layout/proximity_info_state.h b/native/jni/src/suggest/core/layout/proximity_info_state.h
index dbcd54488..c94060fa9 100644
--- a/native/jni/src/suggest/core/layout/proximity_info_state.h
+++ b/native/jni/src/suggest/core/layout/proximity_info_state.h
@@ -46,10 +46,11 @@ class ProximityInfoState {
: mProximityInfo(0), mMaxPointToKeyLength(0.0f), mAverageSpeed(0.0f),
mHasTouchPositionCorrectionData(false), mMostCommonKeyWidthSquare(0),
mKeyCount(0), mCellHeight(0), mCellWidth(0), mGridHeight(0), mGridWidth(0),
- mIsContinuousSuggestionPossible(false), mSampledInputXs(), mSampledInputYs(),
- mSampledTimes(), mSampledInputIndice(), mSampledLengthCache(),
- mBeelineSpeedPercentiles(), mSampledNormalizedSquaredLengthCache(), mSpeedRates(),
- mDirections(), mCharProbabilities(), mSampledNearKeySets(), mSampledSearchKeySets(),
+ mIsContinuousSuggestionPossible(false), mHasBeenUpdatedByGeometricInput(false),
+ mSampledInputXs(), mSampledInputYs(), mSampledTimes(), mSampledInputIndice(),
+ mSampledLengthCache(), mBeelineSpeedPercentiles(),
+ mSampledNormalizedSquaredLengthCache(), mSpeedRates(), mDirections(),
+ mCharProbabilities(), mSampledNearKeySets(), mSampledSearchKeySets(),
mSampledSearchKeyVectors(), mTouchPositionCorrectionEnabled(false),
mSampledInputSize(0), mMostProbableStringProbability(0.0f) {
memset(mInputProximities, 0, sizeof(mInputProximities));
@@ -129,6 +130,10 @@ class ProximityInfoState {
return mSampledInputYs[index];
}
+ int getInputIndexOfSampledPoint(const int sampledIndex) const {
+ return mSampledInputIndice[sampledIndex];
+ }
+
bool hasSpaceProximity(const int index) const;
int getLengthCache(const int index) const {
@@ -204,6 +209,7 @@ class ProximityInfoState {
int mGridHeight;
int mGridWidth;
bool mIsContinuousSuggestionPossible;
+ bool mHasBeenUpdatedByGeometricInput;
std::vector<int> mSampledInputXs;
std::vector<int> mSampledInputYs;
diff --git a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
index c8cbbcfdf..24d1b8ba1 100644
--- a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
+++ b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
@@ -74,6 +74,12 @@ class DictionaryStructureWithBufferPolicy {
virtual bool removeBigramWords(const int *const word0, const int length0,
const int *const word1, const int length1) = 0;
+ virtual void flush(const char *const filePath) = 0;
+
+ virtual void flushWithGC(const char *const filePath) = 0;
+
+ virtual bool needsToRunGC() const = 0;
+
protected:
DictionaryStructureWithBufferPolicy() {}
diff --git a/native/jni/src/suggest/core/session/dic_traverse_session.cpp b/native/jni/src/suggest/core/session/dic_traverse_session.cpp
index 0ca583f90..50f2bbd8d 100644
--- a/native/jni/src/suggest/core/session/dic_traverse_session.cpp
+++ b/native/jni/src/suggest/core/session/dic_traverse_session.cpp
@@ -23,6 +23,11 @@
namespace latinime {
+// 256K bytes threshold is heuristically used to distinguish dictionaries containing many unigrams
+// (e.g. main dictionary) from small dictionaries (e.g. contacts...)
+const int DicTraverseSession::DICTIONARY_SIZE_THRESHOLD_TO_USE_LARGE_CACHE_FOR_SUGGESTION =
+ 256 * 1024;
+
void DicTraverseSession::init(const Dictionary *const dictionary, const int *prevWord,
int prevWordLength, const SuggestOptions *const suggestOptions) {
mDictionary = dictionary;
@@ -30,13 +35,13 @@ void DicTraverseSession::init(const Dictionary *const dictionary, const int *pre
->getMultiWordCostMultiplier();
mSuggestOptions = suggestOptions;
if (!prevWord) {
- mPrevWordPos = NOT_A_VALID_WORD_POS;
+ mPrevWordPos = NOT_A_DICT_POS;
return;
}
// TODO: merge following similar calls to getTerminalPosition into one case-insensitive call.
mPrevWordPos = getDictionaryStructurePolicy()->getTerminalNodePositionOfWord(
prevWord, prevWordLength, false /* forceLowerCaseSearch */);
- if (mPrevWordPos == NOT_A_VALID_WORD_POS) {
+ if (mPrevWordPos == NOT_A_DICT_POS) {
// Check bigrams for lower-cased previous word if original was not found. Useful for
// auto-capitalized words like "The [current_word]".
mPrevWordPos = getDictionaryStructurePolicy()->getTerminalNodePositionOfWord(
@@ -59,8 +64,9 @@ const DictionaryStructureWithBufferPolicy *DicTraverseSession::getDictionaryStru
return mDictionary->getDictionaryStructurePolicy();
}
-void DicTraverseSession::resetCache(const int nextActiveCacheSize, const int maxWords) {
- mDicNodesCache.reset(nextActiveCacheSize, maxWords);
+void DicTraverseSession::resetCache(const int thresholdForNextActiveDicNodes, const int maxWords) {
+ mDicNodesCache.reset(thresholdForNextActiveDicNodes /* nextActiveSize */,
+ maxWords /* terminalSize */);
mMultiBigramMap.clear();
mPartiallyCommited = false;
}
diff --git a/native/jni/src/suggest/core/session/dic_traverse_session.h b/native/jni/src/suggest/core/session/dic_traverse_session.h
index 23de5cc65..e0b1c67d9 100644
--- a/native/jni/src/suggest/core/session/dic_traverse_session.h
+++ b/native/jni/src/suggest/core/session/dic_traverse_session.h
@@ -37,8 +37,12 @@ class DicTraverseSession {
public:
// A factory method for DicTraverseSession
- static AK_FORCE_INLINE void *getSessionInstance(JNIEnv *env, jstring localeStr) {
- return new DicTraverseSession(env, localeStr);
+ static AK_FORCE_INLINE void *getSessionInstance(JNIEnv *env, jstring localeStr,
+ jlong dictSize) {
+ // To deal with the trade-off between accuracy and memory space, large cache is used for
+ // dictionaries larger that the threshold
+ return new DicTraverseSession(env, localeStr,
+ dictSize >= DICTIONARY_SIZE_THRESHOLD_TO_USE_LARGE_CACHE_FOR_SUGGESTION);
}
static AK_FORCE_INLINE void initSessionInstance(DicTraverseSession *traverseSession,
@@ -54,10 +58,10 @@ class DicTraverseSession {
delete traverseSession;
}
- AK_FORCE_INLINE DicTraverseSession(JNIEnv *env, jstring localeStr)
- : mPrevWordPos(NOT_A_VALID_WORD_POS), mProximityInfo(0),
- mDictionary(0), mSuggestOptions(0), mDicNodesCache(), mMultiBigramMap(),
- mInputSize(0), mPartiallyCommited(false), mMaxPointerCount(1),
+ AK_FORCE_INLINE DicTraverseSession(JNIEnv *env, jstring localeStr, bool usesLargeCache)
+ : mPrevWordPos(NOT_A_DICT_POS), mProximityInfo(0),
+ mDictionary(0), mSuggestOptions(0), mDicNodesCache(usesLargeCache),
+ mMultiBigramMap(), mInputSize(0), mPartiallyCommited(false), mMaxPointerCount(1),
mMultiWordCostMultiplier(1.0f) {
// NOTE: mProximityInfoStates is an array of instances.
// No need to initialize it explicitly here.
@@ -73,7 +77,7 @@ class DicTraverseSession {
const int inputSize, const int *const inputXs, const int *const inputYs,
const int *const times, const int *const pointerIds, const float maxSpatialDistance,
const int maxPointerCount);
- void resetCache(const int nextActiveCacheSize, const int maxWords);
+ void resetCache(const int thresholdForNextActiveDicNodes, const int maxWords);
const DictionaryStructureWithBufferPolicy *getDictionaryStructurePolicy() const;
@@ -109,7 +113,9 @@ class DicTraverseSession {
if (usedPointerCount != 1) {
return false;
}
- *pointerId = usedPointerId;
+ if (pointerId) {
+ *pointerId = usedPointerId;
+ }
return true;
}
@@ -181,6 +187,7 @@ class DicTraverseSession {
DISALLOW_IMPLICIT_CONSTRUCTORS(DicTraverseSession);
// threshold to start caching
static const int CACHE_START_INPUT_LENGTH_THRESHOLD;
+ static const int DICTIONARY_SIZE_THRESHOLD_TO_USE_LARGE_CACHE_FOR_SUGGESTION;
void initializeProximityInfoStates(const int *const inputCodePoints, const int *const inputXs,
const int *const inputYs, const int *const times, const int *const pointerIds,
const int inputSize, const float maxSpatialDistance, const int maxPointerCount);
diff --git a/native/jni/src/suggest/core/suggest.cpp b/native/jni/src/suggest/core/suggest.cpp
index e788e914a..0c925be25 100644
--- a/native/jni/src/suggest/core/suggest.cpp
+++ b/native/jni/src/suggest/core/suggest.cpp
@@ -117,7 +117,7 @@ void Suggest::initializeSearch(DicTraverseSession *traverseSession, int commitPo
* Outputs the final list of suggestions (i.e., terminal nodes).
*/
int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequencies,
- int *outputCodePoints, int *spaceIndices, int *outputTypes) const {
+ int *outputCodePoints, int *outputIndicesToPartialCommit, int *outputTypes) const {
#if DEBUG_EVALUATE_MOST_PROBABLE_STRING
const int terminalSize = 0;
#else
@@ -139,6 +139,7 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen
SCORING->getMostProbableString(traverseSession, terminalSize, languageWeight,
&outputCodePoints[0], &outputTypes[0], &frequencies[0]);
if (hasMostProbableString) {
+ outputIndicesToPartialCommit[outputWordIndex] = NOT_AN_INDEX;
++outputWordIndex;
}
@@ -160,6 +161,9 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen
|| (traverseSession->getInputSize()
>= MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT
&& terminals[0].hasMultipleWords())) : false;
+ // TODO: have partial commit work even with multiple pointers.
+ const bool outputSecondWordFirstLetterInputIndex =
+ traverseSession->isOnlyOnePointerUsed(0 /* pointerId */);
// Output suggestion results here
for (int terminalIndex = 0; terminalIndex < terminalSize && outputWordIndex < MAX_RESULTS;
++terminalIndex) {
@@ -194,18 +198,21 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen
terminalDicNode->isExactMatch()
|| (forceCommitMultiWords && terminalDicNode->hasMultipleWords())
|| (isValidWord && SCORING->doesAutoCorrectValidWord()));
- maxScore = max(maxScore, finalScore);
-
- // TODO: Implement a smarter auto-commit method for handling multi-word suggestions.
- // Index for top typing suggestion should be 0.
- if (isValidWord && outputWordIndex == 0) {
- terminalDicNode->outputSpacePositionsResult(spaceIndices);
+ if (maxScore < finalScore && isValidWord) {
+ maxScore = finalScore;
}
// Don't output invalid words. However, we still need to submit their shortcuts if any.
if (isValidWord) {
outputTypes[outputWordIndex] = Dictionary::KIND_CORRECTION | outputTypeFlags;
frequencies[outputWordIndex] = finalScore;
+ if (outputSecondWordFirstLetterInputIndex) {
+ outputIndicesToPartialCommit[outputWordIndex] =
+ terminalDicNode->getSecondWordFirstInputIndex(
+ traverseSession->getProximityInfoState(0));
+ } else {
+ outputIndicesToPartialCommit[outputWordIndex] = NOT_AN_INDEX;
+ }
// Populate the outputChars array with the suggested word.
const int startIndex = outputWordIndex * MAX_WORD_LENGTH;
terminalDicNode->outputResult(&outputCodePoints[startIndex]);
@@ -220,8 +227,19 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen
// Shortcut is not supported for multiple words suggestions.
// TODO: Check shortcuts during traversal for multiple words suggestions.
const bool sameAsTyped = TRAVERSAL->sameAsTyped(traverseSession, terminalDicNode);
- outputWordIndex = ShortcutUtils::outputShortcuts(&shortcutIt, outputWordIndex,
- finalScore, outputCodePoints, frequencies, outputTypes, sameAsTyped);
+ const int updatedOutputWordIndex = ShortcutUtils::outputShortcuts(&shortcutIt,
+ outputWordIndex, finalScore, outputCodePoints, frequencies, outputTypes,
+ sameAsTyped);
+ const int secondWordFirstInputIndex = terminalDicNode->getSecondWordFirstInputIndex(
+ traverseSession->getProximityInfoState(0));
+ for (int i = outputWordIndex; i < updatedOutputWordIndex; ++i) {
+ if (outputSecondWordFirstLetterInputIndex) {
+ outputIndicesToPartialCommit[i] = secondWordFirstInputIndex;
+ } else {
+ outputIndicesToPartialCommit[i] = NOT_AN_INDEX;
+ }
+ }
+ outputWordIndex = updatedOutputWordIndex;
}
DicNode::managedDelete(terminalDicNode);
}
diff --git a/native/jni/src/suggest/core/suggest.h b/native/jni/src/suggest/core/suggest.h
index 875cbe4e0..b24019632 100644
--- a/native/jni/src/suggest/core/suggest.h
+++ b/native/jni/src/suggest/core/suggest.h
@@ -55,7 +55,7 @@ class Suggest : public SuggestInterface {
void createNextWordDicNode(DicTraverseSession *traverseSession, DicNode *dicNode,
const bool spaceSubstitution) const;
int outputSuggestions(DicTraverseSession *traverseSession, int *frequencies,
- int *outputCodePoints, int *outputIndices, int *outputTypes) const;
+ int *outputCodePoints, int *outputIndicesToPartialCommit, int *outputTypes) const;
void initializeSearch(DicTraverseSession *traverseSession, int commitPoint) const;
void expandCurrentDicNodes(DicTraverseSession *traverseSession) const;
void processTerminalDicNode(DicTraverseSession *traverseSession, DicNode *dicNode) const;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp
index d57547445..09e832f07 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp
@@ -38,6 +38,22 @@ const BigramListReadWriteUtils::BigramFlags
BigramListReadWriteUtils::MASK_ATTRIBUTE_PROBABILITY = 0x0F;
const int BigramListReadWriteUtils::ATTRIBUTE_ADDRESS_SHIFT = 4;
+/* static */ BigramListReadWriteUtils::BigramFlags
+ BigramListReadWriteUtils::getFlagsAndForwardPointer(const uint8_t *const bigramsBuf,
+ int *const pos) {
+ return ByteArrayUtils::readUint8AndAdvancePosition(bigramsBuf, pos);
+}
+
+/* static */ void BigramListReadWriteUtils::skipExistingBigrams(const uint8_t *const bigramsBuf,
+ int *const pos) {
+ BigramFlags flags = getFlagsAndForwardPointer(bigramsBuf, pos);
+ while (hasNext(flags)) {
+ *pos += attributeAddressSize(flags);
+ flags = getFlagsAndForwardPointer(bigramsBuf, pos);
+ }
+ *pos += attributeAddressSize(flags);
+}
+
/* static */ int BigramListReadWriteUtils::getBigramAddressAndForwardPointer(
const uint8_t *const bigramsBuf, const BigramFlags flags, int *const pos) {
int offset = 0;
@@ -54,7 +70,7 @@ const int BigramListReadWriteUtils::ATTRIBUTE_ADDRESS_SHIFT = 4;
break;
}
if (offset == 0) {
- return NOT_A_VALID_WORD_POS;
+ return NOT_A_DICT_POS;
}
if (isOffsetNegative(flags)) {
return origin - offset;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h
index ee2b722a4..884bcd7a9 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h
@@ -21,7 +21,6 @@
#include <stdint.h>
#include "defines.h"
-#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
namespace latinime {
@@ -29,10 +28,7 @@ class BigramListReadWriteUtils {
public:
typedef uint8_t BigramFlags;
- static AK_FORCE_INLINE BigramFlags getFlagsAndForwardPointer(
- const uint8_t *const bigramsBuf, int *const pos) {
- return ByteArrayUtils::readUint8AndAdvancePosition(bigramsBuf, pos);
- }
+ static BigramFlags getFlagsAndForwardPointer(const uint8_t *const bigramsBuf, int *const pos);
static AK_FORCE_INLINE int getProbabilityFromFlags(const BigramFlags flags) {
return flags & MASK_ATTRIBUTE_PROBABILITY;
@@ -43,15 +39,7 @@ public:
}
// Bigrams reading methods
- static AK_FORCE_INLINE void skipExistingBigrams(const uint8_t *const bigramsBuf,
- int *const pos) {
- BigramFlags flags = getFlagsAndForwardPointer(bigramsBuf, pos);
- while (hasNext(flags)) {
- *pos += attributeAddressSize(flags);
- flags = getFlagsAndForwardPointer(bigramsBuf, pos);
- }
- *pos += attributeAddressSize(flags);
- }
+ static void skipExistingBigrams(const uint8_t *const bigramsBuf, int *const pos);
static int getBigramAddressAndForwardPointer(const uint8_t *const bigramsBuf,
const BigramFlags flags, int *const pos);
@@ -74,12 +62,17 @@ public:
return flags | FLAG_ATTRIBUTE_HAS_NEXT;
}
+ static AK_FORCE_INLINE BigramFlags setProbabilityInFlags(const BigramFlags flags,
+ const int probability) {
+ return (flags & (~MASK_ATTRIBUTE_PROBABILITY)) | (probability & MASK_ATTRIBUTE_PROBABILITY);
+ }
+
// Returns true if the bigram entry is valid and put entry values into out*.
static AK_FORCE_INLINE bool createBigramEntryAndGetFlagsAndOffsetAndOffsetFieldSize(
const int entryPos, const int targetPos, const int probability, const bool hasNext,
BigramFlags *const outBigramFlags, uint32_t *const outOffset,
int *const outOffsetFieldSize) {
- if (targetPos == NOT_A_VALID_WORD_POS) {
+ if (targetPos == NOT_A_DICT_POS) {
return false;
}
BigramFlags flags = probability & MASK_ATTRIBUTE_PROBABILITY;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp
index 936dc9c5d..4c44d22fd 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp
@@ -18,25 +18,64 @@
namespace latinime {
-bool DynamicBigramListPolicy::copyAllBigrams(int *const fromPos, int *const toPos) {
+const int DynamicBigramListPolicy::BIGRAM_LINK_COUNT_LIMIT = 10000;
+
+void DynamicBigramListPolicy::getNextBigram(int *const outBigramPos, int *const outProbability,
+ bool *const outHasNext, int *const pos) const {
+ const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*pos);
+ const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
+ if (usesAdditionalBuffer) {
+ *pos -= mBuffer->getOriginalBufferSize();
+ }
+ const BigramListReadWriteUtils::BigramFlags flags =
+ BigramListReadWriteUtils::getFlagsAndForwardPointer(buffer, pos);
+ int originalBigramPos = BigramListReadWriteUtils::getBigramAddressAndForwardPointer(
+ buffer, flags, pos);
+ if (usesAdditionalBuffer && originalBigramPos != NOT_A_DICT_POS) {
+ originalBigramPos += mBuffer->getOriginalBufferSize();
+ }
+ *outBigramPos = followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos);
+ *outProbability = BigramListReadWriteUtils::getProbabilityFromFlags(flags);
+ *outHasNext = BigramListReadWriteUtils::hasNext(flags);
+ if (usesAdditionalBuffer) {
+ *pos += mBuffer->getOriginalBufferSize();
+ }
+}
+
+void DynamicBigramListPolicy::skipAllBigrams(int *const pos) const {
+ const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*pos);
+ const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
+ if (usesAdditionalBuffer) {
+ *pos -= mBuffer->getOriginalBufferSize();
+ }
+ BigramListReadWriteUtils::skipExistingBigrams(buffer, pos);
+ if (usesAdditionalBuffer) {
+ *pos += mBuffer->getOriginalBufferSize();
+ }
+}
+
+bool DynamicBigramListPolicy::copyAllBigrams(int *const fromPos, int *const toPos,
+ int *outBigramsCount) {
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*fromPos);
if (usesAdditionalBuffer) {
*fromPos -= mBuffer->getOriginalBufferSize();
}
+ *outBigramsCount = 0;
BigramListReadWriteUtils::BigramFlags flags;
do {
// The buffer address can be changed after calling buffer writing methods.
const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
flags = BigramListReadWriteUtils::getFlagsAndForwardPointer(buffer, fromPos);
- int bigramPos = BigramListReadWriteUtils::getBigramAddressAndForwardPointer(
+ int originalBigramPos = BigramListReadWriteUtils::getBigramAddressAndForwardPointer(
buffer, flags, fromPos);
- if (bigramPos == NOT_A_VALID_WORD_POS) {
+ if (originalBigramPos == NOT_A_DICT_POS) {
// skip invalid bigram entry.
continue;
}
if (usesAdditionalBuffer) {
- bigramPos += mBuffer->getOriginalBufferSize();
+ originalBigramPos += mBuffer->getOriginalBufferSize();
}
+ const int bigramPos = followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos);
BigramListReadWriteUtils::BigramFlags newBigramFlags;
uint32_t newBigramOffset;
int newBigramOffsetFieldSize;
@@ -54,6 +93,7 @@ bool DynamicBigramListPolicy::copyAllBigrams(int *const fromPos, int *const toPo
toPos)) {
return false;
}
+ (*outBigramsCount)++;
} while(BigramListReadWriteUtils::hasNext(flags));
if (usesAdditionalBuffer) {
*fromPos += mBuffer->getOriginalBufferSize();
@@ -61,8 +101,8 @@ bool DynamicBigramListPolicy::copyAllBigrams(int *const fromPos, int *const toPo
return true;
}
-bool DynamicBigramListPolicy::addBigramEntry(const int bigramPos, const int probability,
- int *const pos) {
+bool DynamicBigramListPolicy::addNewBigramEntryToBigramList(const int bigramPos,
+ const int probability, int *const pos) {
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*pos);
if (usesAdditionalBuffer) {
*pos -= mBuffer->getOriginalBufferSize();
@@ -76,7 +116,17 @@ bool DynamicBigramListPolicy::addBigramEntry(const int bigramPos, const int prob
// The buffer address can be changed after calling buffer writing methods.
const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
flags = BigramListReadWriteUtils::getFlagsAndForwardPointer(buffer, pos);
- BigramListReadWriteUtils::getBigramAddressAndForwardPointer(buffer, flags, pos);
+ int originalBigramPos = BigramListReadWriteUtils::getBigramAddressAndForwardPointer(
+ buffer, flags, pos);
+ if (usesAdditionalBuffer && originalBigramPos != NOT_A_DICT_POS) {
+ originalBigramPos += mBuffer->getOriginalBufferSize();
+ }
+ if (followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos) == bigramPos) {
+ // Update this bigram entry.
+ const BigramListReadWriteUtils::BigramFlags updatedFlags =
+ BigramListReadWriteUtils::setProbabilityInFlags(flags, probability);
+ return mBuffer->writeUintAndAdvancePosition(updatedFlags, 1 /* size */, &entryPos);
+ }
if (BigramListReadWriteUtils::hasNext(flags)) {
continue;
}
@@ -87,33 +137,35 @@ bool DynamicBigramListPolicy::addBigramEntry(const int bigramPos, const int prob
if (!mBuffer->writeUintAndAdvancePosition(updatedFlags, 1 /* size */, &entryPos)) {
return false;
}
- // Then, add a new entry after the last entry.
- BigramListReadWriteUtils::BigramFlags newBigramFlags;
- uint32_t newBigramOffset;
- int newBigramOffsetFieldSize;
- if(!BigramListReadWriteUtils::createBigramEntryAndGetFlagsAndOffsetAndOffsetFieldSize(
- *pos, bigramPos, BigramListReadWriteUtils::getProbabilityFromFlags(flags),
- BigramListReadWriteUtils::hasNext(flags), &newBigramFlags, &newBigramOffset,
- &newBigramOffsetFieldSize)) {
- continue;
- }
- int newEntryPos = *pos;
if (usesAdditionalBuffer) {
- newEntryPos += mBuffer->getOriginalBufferSize();
- }
- // Write bigram flags.
- if (!mBuffer->writeUintAndAdvancePosition(newBigramFlags, 1 /* size */,
- &newEntryPos)) {
- return false;
- }
- // Write bigram positon offset.
- if (!mBuffer->writeUintAndAdvancePosition(newBigramOffset, newBigramOffsetFieldSize,
- &newEntryPos)) {
- return false;
+ *pos += mBuffer->getOriginalBufferSize();
}
+ // Then, add a new entry after the last entry.
+ return writeNewBigramEntry(bigramPos, probability, pos);
} while(BigramListReadWriteUtils::hasNext(flags));
- if (usesAdditionalBuffer) {
- *pos += mBuffer->getOriginalBufferSize();
+ // We return directly from the while loop.
+ ASSERT(false);
+ return false;
+}
+
+bool DynamicBigramListPolicy::writeNewBigramEntry(const int bigramPos, const int probability,
+ int *const writingPos) {
+ BigramListReadWriteUtils::BigramFlags newBigramFlags;
+ uint32_t newBigramOffset;
+ int newBigramOffsetFieldSize;
+ if(!BigramListReadWriteUtils::createBigramEntryAndGetFlagsAndOffsetAndOffsetFieldSize(
+ *writingPos, bigramPos, probability, false /* hasNext */, &newBigramFlags,
+ &newBigramOffset, &newBigramOffsetFieldSize)) {
+ return false;
+ }
+ // Write bigram flags.
+ if (!mBuffer->writeUintAndAdvancePosition(newBigramFlags, 1 /* size */, writingPos)) {
+ return false;
+ }
+ // Write bigram positon offset.
+ if (!mBuffer->writeUintAndAdvancePosition(newBigramOffset, newBigramOffsetFieldSize,
+ writingPos)) {
+ return false;
}
return true;
}
@@ -133,11 +185,12 @@ bool DynamicBigramListPolicy::removeBigram(const int bigramListPos, const int ta
if (usesAdditionalBuffer) {
bigramOffsetFieldPos += mBuffer->getOriginalBufferSize();
}
- int bigramPos = BigramListReadWriteUtils::getBigramAddressAndForwardPointer(
+ int originalBigramPos = BigramListReadWriteUtils::getBigramAddressAndForwardPointer(
buffer, flags, &pos);
- if (usesAdditionalBuffer && bigramPos != NOT_A_VALID_WORD_POS) {
- bigramPos += mBuffer->getOriginalBufferSize();
+ if (usesAdditionalBuffer && originalBigramPos != NOT_A_DICT_POS) {
+ originalBigramPos += mBuffer->getOriginalBufferSize();
}
+ const int bigramPos = followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos);
if (bigramPos != targetBigramPos) {
continue;
}
@@ -152,4 +205,26 @@ bool DynamicBigramListPolicy::removeBigram(const int bigramListPos, const int ta
return false;
}
+int DynamicBigramListPolicy::followBigramLinkAndGetCurrentBigramPtNodePos(
+ const int originalBigramPos) const {
+ if (originalBigramPos == NOT_A_DICT_POS) {
+ return NOT_A_DICT_POS;
+ }
+ int currentPos = originalBigramPos;
+ DynamicPatriciaTrieNodeReader nodeReader(mBuffer, this /* bigramsPolicy */, mShortcutPolicy);
+ nodeReader.fetchNodeInfoFromBuffer(currentPos);
+ int bigramLinkCount = 0;
+ while (nodeReader.getBigramLinkedNodePos() != NOT_A_DICT_POS) {
+ currentPos = nodeReader.getBigramLinkedNodePos();
+ nodeReader.fetchNodeInfoFromBuffer(currentPos);
+ bigramLinkCount++;
+ if (bigramLinkCount > BIGRAM_LINK_COUNT_LIMIT) {
+ AKLOGI("Bigram link is invalid. start position: %d", bigramPos);
+ ASSERT(false);
+ return NOT_A_DICT_POS;
+ }
+ }
+ return currentPos;
+}
+
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h
index b7c05376d..dafb62d80 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h
@@ -21,7 +21,9 @@
#include "defines.h"
#include "suggest/core/policy/dictionary_bigrams_structure_policy.h"
+#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h"
#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h"
+#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
namespace latinime {
@@ -31,49 +33,26 @@ namespace latinime {
*/
class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy {
public:
- DynamicBigramListPolicy(BufferWithExtendableBuffer *const buffer)
- : mBuffer(buffer) {}
+ DynamicBigramListPolicy(BufferWithExtendableBuffer *const buffer,
+ const DictionaryShortcutsStructurePolicy *const shortcutPolicy)
+ : mBuffer(buffer), mShortcutPolicy(shortcutPolicy) {}
~DynamicBigramListPolicy() {}
void getNextBigram(int *const outBigramPos, int *const outProbability, bool *const outHasNext,
- int *const pos) const {
- const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*pos);
- const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
- if (usesAdditionalBuffer) {
- *pos -= mBuffer->getOriginalBufferSize();
- }
- const BigramListReadWriteUtils::BigramFlags flags =
- BigramListReadWriteUtils::getFlagsAndForwardPointer(buffer, pos);
- *outBigramPos = BigramListReadWriteUtils::getBigramAddressAndForwardPointer(
- buffer, flags, pos);
- if (usesAdditionalBuffer && *outBigramPos != NOT_A_VALID_WORD_POS) {
- *outBigramPos += mBuffer->getOriginalBufferSize();
- }
- *outProbability = BigramListReadWriteUtils::getProbabilityFromFlags(flags);
- *outHasNext = BigramListReadWriteUtils::hasNext(flags);
- if (usesAdditionalBuffer) {
- *pos += mBuffer->getOriginalBufferSize();
- }
- }
-
- void skipAllBigrams(int *const pos) const {
- const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*pos);
- const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
- if (usesAdditionalBuffer) {
- *pos -= mBuffer->getOriginalBufferSize();
- }
- BigramListReadWriteUtils::skipExistingBigrams(buffer, pos);
- if (usesAdditionalBuffer) {
- *pos += mBuffer->getOriginalBufferSize();
- }
- }
+ int *const pos) const;
+
+ void skipAllBigrams(int *const pos) const;
// Copy bigrams from the bigram list that starts at fromPos to toPos and advance these
- // positions after bigram lists. This method skips invalid bigram entries.
- bool copyAllBigrams(int *const fromPos, int *const toPos);
+ // positions after bigram lists. This method skips invalid bigram entries and write the valid
+ // bigram entry count to outBigramsCount.
+ bool copyAllBigrams(int *const fromPos, int *const toPos, int *outBigramsCount);
+
+ bool addNewBigramEntryToBigramList(const int bigramPos, const int probability, int *const pos);
- bool addBigramEntry(const int bigramPos, const int probability, int *const pos);
+ bool writeNewBigramEntry(const int bigramPos, const int probability,
+ int *const writingPos);
// Return if targetBigramPos is found or not.
bool removeBigram(const int bigramListPos, const int targetBigramPos);
@@ -81,7 +60,13 @@ class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy {
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicBigramListPolicy);
+ static const int BIGRAM_LINK_COUNT_LIMIT;
+
BufferWithExtendableBuffer *const mBuffer;
+ const DictionaryShortcutsStructurePolicy *const mShortcutPolicy;
+
+ // Follow bigram link and return the position of bigram target PtNode that is currently valid.
+ int followBigramLinkAndGetCurrentBigramPtNodePos(const int originalBigramPos) const;
};
} // namespace latinime
#endif // LATINIME_DYNAMIC_BIGRAM_LIST_POLICY_H
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.cpp b/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.cpp
index 434858d67..ff80dd2f6 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.cpp
@@ -27,12 +27,12 @@
namespace latinime {
/* static */ DictionaryStructureWithBufferPolicy *DictionaryStructureWithBufferPolicyFactory
- ::newDictionaryStructureWithBufferPolicy(const char *const path, const int pathLength,
- const int bufOffset, const int size, const bool isUpdatable) {
+ ::newDictionaryStructureWithBufferPolicy(const char *const path, const int bufOffset,
+ const int size, const bool isUpdatable) {
// Allocated buffer in MmapedBuffer::openBuffer() will be freed in the destructor of
// impl classes of DictionaryStructureWithBufferPolicy.
- const MmappedBuffer *const mmapedBuffer = MmappedBuffer::openBuffer(path, pathLength, bufOffset,
- size, isUpdatable);
+ const MmappedBuffer *const mmapedBuffer = MmappedBuffer::openBuffer(path, bufOffset, size,
+ isUpdatable);
if (!mmapedBuffer) {
return 0;
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h b/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h
index 1cb7a89c4..8cebc3b16 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h
@@ -27,8 +27,7 @@ namespace latinime {
class DictionaryStructureWithBufferPolicyFactory {
public:
static DictionaryStructureWithBufferPolicy *newDictionaryStructureWithBufferPolicy(
- const char *const path, const int pathLength, const int bufOffset, const int size,
- const bool isUpdatable);
+ const char *const path, const int bufOffset, const int size, const bool isUpdatable);
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(DictionaryStructureWithBufferPolicyFactory);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp
index 5674cb48e..737098423 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp
@@ -25,9 +25,17 @@ namespace latinime {
void DynamicPatriciaTrieNodeReader::fetchNodeInfoFromBufferAndProcessMovedNode(const int nodePos,
const int maxCodePointCount, int *const outCodePoints) {
+ if (nodePos < 0 || nodePos >= mBuffer->getTailPosition()) {
+ AKLOGE("Fetching PtNode info form invalid dictionary position: %d, dictionary size: %d",
+ nodePos, mBuffer->getTailPosition());
+ ASSERT(false);
+ invalidatePtNodeInfo();
+ return;
+ }
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(nodePos);
const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
int pos = nodePos;
+ mHeadPos = nodePos;
if (usesAdditionalBuffer) {
pos -= mBuffer->getOriginalBufferSize();
}
@@ -57,10 +65,17 @@ void DynamicPatriciaTrieNodeReader::fetchNodeInfoFromBufferAndProcessMovedNode(c
mChildrenPosFieldPos += mBuffer->getOriginalBufferSize();
}
mChildrenPos = DynamicPatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(
- dictBuf, mFlags, &pos);
+ dictBuf, &pos);
if (usesAdditionalBuffer && mChildrenPos != NOT_A_DICT_POS) {
mChildrenPos += mBuffer->getOriginalBufferSize();
}
+ if (mSiblingPos == NOT_A_DICT_POS) {
+ if (DynamicPatriciaTrieReadingUtils::isMoved(mFlags)) {
+ mBigramLinkedNodePos = mChildrenPos;
+ } else {
+ mBigramLinkedNodePos = NOT_A_DICT_POS;
+ }
+ }
if (usesAdditionalBuffer) {
pos += mBuffer->getOriginalBufferSize();
}
@@ -77,7 +92,7 @@ void DynamicPatriciaTrieNodeReader::fetchNodeInfoFromBufferAndProcessMovedNode(c
mBigramPos = NOT_A_DICT_POS;
}
// Update siblingPos if needed.
- if (mSiblingPos == NOT_A_VALID_WORD_POS) {
+ if (mSiblingPos == NOT_A_DICT_POS) {
// Sibling position is the tail position of current node.
mSiblingPos = pos;
}
@@ -88,4 +103,19 @@ void DynamicPatriciaTrieNodeReader::fetchNodeInfoFromBufferAndProcessMovedNode(c
}
}
+void DynamicPatriciaTrieNodeReader::invalidatePtNodeInfo() {
+ mHeadPos = NOT_A_DICT_POS;
+ mFlags = 0;
+ mParentPos = NOT_A_DICT_POS;
+ mCodePointCount = 0;
+ mProbabilityFieldPos = NOT_A_DICT_POS;
+ mProbability = NOT_A_PROBABILITY;
+ mChildrenPosFieldPos = NOT_A_DICT_POS;
+ mChildrenPos = NOT_A_DICT_POS;
+ mBigramLinkedNodePos = NOT_A_DICT_POS;
+ mShortcutPos = NOT_A_DICT_POS;
+ mBigramPos = NOT_A_DICT_POS;
+ mSiblingPos = NOT_A_DICT_POS;
+}
+
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h
index 2ee7c2495..6ef5f5813 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h
@@ -39,12 +39,12 @@ class DynamicPatriciaTrieNodeReader {
const DictionaryBigramsStructurePolicy *const bigramsPolicy,
const DictionaryShortcutsStructurePolicy *const shortcutsPolicy)
: mBuffer(buffer), mBigramsPolicy(bigramsPolicy),
- mShortcutsPolicy(shortcutsPolicy), mNodePos(NOT_A_VALID_WORD_POS), mFlags(0),
- mParentPos(NOT_A_DICT_POS), mCodePointCount(0),
- mProbabilityFieldPos(NOT_A_DICT_POS), mProbability(NOT_A_PROBABILITY),
- mChildrenPosFieldPos(NOT_A_DICT_POS), mChildrenPos(NOT_A_DICT_POS),
- mShortcutPos(NOT_A_DICT_POS), mBigramPos(NOT_A_DICT_POS),
- mSiblingPos(NOT_A_VALID_WORD_POS) {}
+ mShortcutsPolicy(shortcutsPolicy), mHeadPos(NOT_A_DICT_POS), mFlags(0),
+ mParentPos(NOT_A_DICT_POS), mCodePointCount(0), mProbabilityFieldPos(NOT_A_DICT_POS),
+ mProbability(NOT_A_PROBABILITY), mChildrenPosFieldPos(NOT_A_DICT_POS),
+ mChildrenPos(NOT_A_DICT_POS), mBigramLinkedNodePos(NOT_A_DICT_POS),
+ mShortcutPos(NOT_A_DICT_POS), mBigramPos(NOT_A_DICT_POS),
+ mSiblingPos(NOT_A_DICT_POS) {}
~DynamicPatriciaTrieNodeReader() {}
@@ -56,13 +56,14 @@ class DynamicPatriciaTrieNodeReader {
AK_FORCE_INLINE void fetchNodeInfoFromBufferAndGetNodeCodePoints(const int nodePos,
const int maxCodePointCount, int *const outCodePoints) {
- mNodePos = nodePos;
- mSiblingPos = NOT_A_VALID_WORD_POS;
- fetchNodeInfoFromBufferAndProcessMovedNode(mNodePos, maxCodePointCount, outCodePoints);
+ mSiblingPos = NOT_A_DICT_POS;
+ mBigramLinkedNodePos = NOT_A_DICT_POS;
+ fetchNodeInfoFromBufferAndProcessMovedNode(nodePos, maxCodePointCount, outCodePoints);
}
- AK_FORCE_INLINE int getNodePos() const {
- return mNodePos;
+ // HeadPos is different from NodePos when the current PtNode is a moved PtNode.
+ AK_FORCE_INLINE int getHeadPos() const {
+ return mHeadPos;
}
// Flags
@@ -114,6 +115,11 @@ class DynamicPatriciaTrieNodeReader {
return mChildrenPos;
}
+ // Bigram linked node position.
+ AK_FORCE_INLINE int getBigramLinkedNodePos() const {
+ return mBigramLinkedNodePos;
+ }
+
// Shortcutlist position
AK_FORCE_INLINE int getShortcutPos() const {
return mShortcutPos;
@@ -135,7 +141,7 @@ class DynamicPatriciaTrieNodeReader {
const BufferWithExtendableBuffer *const mBuffer;
const DictionaryBigramsStructurePolicy *const mBigramsPolicy;
const DictionaryShortcutsStructurePolicy *const mShortcutsPolicy;
- int mNodePos;
+ int mHeadPos;
DynamicPatriciaTrieReadingUtils::NodeFlags mFlags;
int mParentPos;
uint8_t mCodePointCount;
@@ -143,12 +149,15 @@ class DynamicPatriciaTrieNodeReader {
int mProbability;
int mChildrenPosFieldPos;
int mChildrenPos;
+ int mBigramLinkedNodePos;
int mShortcutPos;
int mBigramPos;
int mSiblingPos;
void fetchNodeInfoFromBufferAndProcessMovedNode(const int nodePos, const int maxCodePointCount,
int *const outCodePoints);
+
+ void invalidatePtNodeInfo();
};
} // namespace latinime
#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_NODE_READER_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp
index 945677b50..3cfbfd85b 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp
@@ -38,7 +38,7 @@ void DynamicPatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const d
readingHelper.initWithNodeArrayPos(dicNode->getChildrenPos());
const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader();
while (!readingHelper.isEnd()) {
- childDicNodes->pushLeavingChild(dicNode, nodeReader->getNodePos(),
+ childDicNodes->pushLeavingChild(dicNode, nodeReader->getHeadPos(),
nodeReader->getChildrenPos(), nodeReader->getProbability(),
nodeReader->isTerminal() && !nodeReader->isDeleted(),
nodeReader->hasChildren(), nodeReader->isBlacklisted() || nodeReader->isNotAWord(),
@@ -116,23 +116,23 @@ int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const in
if (!readingHelper.isMatchedCodePoint(
j, searchCodePoints[matchedCodePointCount + j])) {
// Different code point is found. The given word is not included in the dictionary.
- return NOT_A_VALID_WORD_POS;
+ return NOT_A_DICT_POS;
}
}
// All characters are matched.
if (length == readingHelper.getTotalCodePointCount()) {
// Terminal position is found.
- return nodeReader->getNodePos();
+ return nodeReader->getHeadPos();
}
if (!nodeReader->hasChildren()) {
- return NOT_A_VALID_WORD_POS;
+ return NOT_A_DICT_POS;
}
// Advance to the children nodes.
readingHelper.readChildNode();
}
// If we already traversed the tree further than the word is long, there means
// there was no match (or we would have found it).
- return NOT_A_VALID_WORD_POS;
+ return NOT_A_DICT_POS;
}
int DynamicPatriciaTriePolicy::getProbability(const int unigramProbability,
@@ -149,7 +149,7 @@ int DynamicPatriciaTriePolicy::getProbability(const int unigramProbability,
}
int DynamicPatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int nodePos) const {
- if (nodePos == NOT_A_VALID_WORD_POS) {
+ if (nodePos == NOT_A_DICT_POS) {
return NOT_A_PROBABILITY;
}
DynamicPatriciaTrieNodeReader nodeReader(&mBufferWithExtendableBuffer,
@@ -162,7 +162,7 @@ int DynamicPatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int nodePos)
}
int DynamicPatriciaTriePolicy::getShortcutPositionOfNode(const int nodePos) const {
- if (nodePos == NOT_A_VALID_WORD_POS) {
+ if (nodePos == NOT_A_DICT_POS) {
return NOT_A_DICT_POS;
}
DynamicPatriciaTrieNodeReader nodeReader(&mBufferWithExtendableBuffer,
@@ -175,7 +175,7 @@ int DynamicPatriciaTriePolicy::getShortcutPositionOfNode(const int nodePos) cons
}
int DynamicPatriciaTriePolicy::getBigramsPositionOfNode(const int nodePos) const {
- if (nodePos == NOT_A_VALID_WORD_POS) {
+ if (nodePos == NOT_A_DICT_POS) {
return NOT_A_DICT_POS;
}
DynamicPatriciaTrieNodeReader nodeReader(&mBufferWithExtendableBuffer,
@@ -209,12 +209,12 @@ bool DynamicPatriciaTriePolicy::addBigramWords(const int *const word0, const int
}
const int word0Pos = getTerminalNodePositionOfWord(word0, length0,
false /* forceLowerCaseSearch */);
- if (word0Pos == NOT_A_VALID_WORD_POS) {
+ if (word0Pos == NOT_A_DICT_POS) {
return false;
}
const int word1Pos = getTerminalNodePositionOfWord(word1, length1,
false /* forceLowerCaseSearch */);
- if (word1Pos == NOT_A_VALID_WORD_POS) {
+ if (word1Pos == NOT_A_DICT_POS) {
return false;
}
DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
@@ -230,12 +230,12 @@ bool DynamicPatriciaTriePolicy::removeBigramWords(const int *const word0, const
}
const int word0Pos = getTerminalNodePositionOfWord(word0, length0,
false /* forceLowerCaseSearch */);
- if (word0Pos == NOT_A_VALID_WORD_POS) {
+ if (word0Pos == NOT_A_DICT_POS) {
return false;
}
const int word1Pos = getTerminalNodePositionOfWord(word1, length1,
false /* forceLowerCaseSearch */);
- if (word1Pos == NOT_A_VALID_WORD_POS) {
+ if (word1Pos == NOT_A_DICT_POS) {
return false;
}
DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
@@ -243,4 +243,29 @@ bool DynamicPatriciaTriePolicy::removeBigramWords(const int *const word0, const
return writingHelper.removeBigramWords(word0Pos, word1Pos);
}
+void DynamicPatriciaTriePolicy::flush(const char *const filePath) {
+ if (!mBuffer->isUpdatable()) {
+ AKLOGI("Warning: flush() is called for non-updatable dictionary.");
+ return;
+ }
+ // TODO: Implement.
+}
+
+void DynamicPatriciaTriePolicy::flushWithGC(const char *const filePath) {
+ if (!mBuffer->isUpdatable()) {
+ AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary.");
+ return;
+ }
+ // TODO: Implement.
+}
+
+bool DynamicPatriciaTriePolicy::needsToRunGC() const {
+ if (!mBuffer->isUpdatable()) {
+ AKLOGI("Warning: needsToRunGC() is called for non-updatable dictionary.");
+ return false;
+ }
+ // TODO: Implement.
+ return false;
+}
+
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h
index cdab0e16a..2cbb0ff3b 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h
@@ -36,8 +36,8 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
: mBuffer(buffer), mHeaderPolicy(mBuffer->getBuffer()),
mBufferWithExtendableBuffer(mBuffer->getBuffer() + mHeaderPolicy.getSize(),
mBuffer->getBufferSize() - mHeaderPolicy.getSize()),
- mBigramListPolicy(&mBufferWithExtendableBuffer),
- mShortcutListPolicy(&mBufferWithExtendableBuffer) {}
+ mShortcutListPolicy(&mBufferWithExtendableBuffer),
+ mBigramListPolicy(&mBufferWithExtendableBuffer, &mShortcutListPolicy) {}
~DynamicPatriciaTriePolicy() {
delete mBuffer;
@@ -85,14 +85,20 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
bool removeBigramWords(const int *const word0, const int length0, const int *const word1,
const int length1);
+ void flush(const char *const filePath);
+
+ void flushWithGC(const char *const filePath);
+
+ bool needsToRunGC() const;
+
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTriePolicy);
const MmappedBuffer *const mBuffer;
const HeaderPolicy mHeaderPolicy;
BufferWithExtendableBuffer mBufferWithExtendableBuffer;
- DynamicBigramListPolicy mBigramListPolicy;
DynamicShortcutListPolicy mShortcutListPolicy;
+ DynamicBigramListPolicy mBigramListPolicy;
};
} // namespace latinime
#endif // LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h
index db1c392bb..120fd7699 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h
@@ -72,8 +72,7 @@ class DynamicPatriciaTrieReadingHelper {
// Initialize reading state with the head position of a node.
AK_FORCE_INLINE void initWithNodePos(const int nodePos) {
- // TODO: Consolidate NOT_A_VALID_WORD_POS and NOT_A_DICT_POS
- if (nodePos == NOT_A_VALID_WORD_POS || nodePos == NOT_A_DICT_POS) {
+ if (nodePos == NOT_A_DICT_POS) {
mPos = NOT_A_DICT_POS;
} else {
mIsError = false;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp
index 5d979fa51..8428c0b15 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp
@@ -28,19 +28,26 @@ const DptReadingUtils::NodeFlags DptReadingUtils::FLAG_IS_NOT_MOVED = 0xC0;
const DptReadingUtils::NodeFlags DptReadingUtils::FLAG_IS_MOVED = 0x40;
const DptReadingUtils::NodeFlags DptReadingUtils::FLAG_IS_DELETED = 0x80;
+/* static */ int DptReadingUtils::getForwardLinkPosition(const uint8_t *const buffer,
+ const int pos) {
+ int linkAddressPos = pos;
+ return ByteArrayUtils::readSint24AndAdvancePosition(buffer, &linkAddressPos);
+}
+
+/* static */ int DptReadingUtils::getParentPosAndAdvancePosition(const uint8_t *const buffer,
+ int *const pos) {
+ return ByteArrayUtils::readSint24AndAdvancePosition(buffer, pos);
+}
+
/* static */ int DptReadingUtils::readChildrenPositionAndAdvancePosition(
- const uint8_t *const buffer, const NodeFlags flags, int *const pos) {
- if ((flags & MASK_MOVED) == FLAG_IS_NOT_MOVED) {
- const int base = *pos;
- const int offset = ByteArrayUtils::readSint24AndAdvancePosition(buffer, pos);
- if (offset == 0) {
- // 0 offset means that the node does not have children.
- return NOT_A_DICT_POS;
- } else {
- return base + offset;
- }
- } else {
+ const uint8_t *const buffer, int *const pos) {
+ const int base = *pos;
+ const int offset = ByteArrayUtils::readSint24AndAdvancePosition(buffer, pos);
+ if (offset == 0) {
+ // 0 offset means that the node does not have children.
return NOT_A_DICT_POS;
+ } else {
+ return base + offset;
}
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h
index 62d73bb02..db5f9b1bd 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h
@@ -20,7 +20,6 @@
#include <stdint.h>
#include "defines.h"
-#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
namespace latinime {
@@ -28,22 +27,15 @@ class DynamicPatriciaTrieReadingUtils {
public:
typedef uint8_t NodeFlags;
- static AK_FORCE_INLINE int getForwardLinkPosition(const uint8_t *const buffer, const int pos) {
- int linkAddressPos = pos;
- return ByteArrayUtils::readSint24AndAdvancePosition(buffer, &linkAddressPos);
- }
+ static int getForwardLinkPosition(const uint8_t *const buffer, const int pos);
static AK_FORCE_INLINE bool isValidForwardLinkPosition(const int forwardLinkAddress) {
return forwardLinkAddress != 0;
}
- static AK_FORCE_INLINE int getParentPosAndAdvancePosition(const uint8_t *const buffer,
- int *const pos) {
- return ByteArrayUtils::readSint24AndAdvancePosition(buffer, pos);
- }
+ static int getParentPosAndAdvancePosition(const uint8_t *const buffer, int *const pos);
- static int readChildrenPositionAndAdvancePosition(const uint8_t *const buffer,
- const NodeFlags flags, int *const pos);
+ static int readChildrenPositionAndAdvancePosition(const uint8_t *const buffer, int *const pos);
/**
* Node Flags
@@ -59,9 +51,9 @@ class DynamicPatriciaTrieReadingUtils {
static AK_FORCE_INLINE NodeFlags updateAndGetFlags(const NodeFlags originalFlags,
const bool isMoved, const bool isDeleted) {
NodeFlags flags = originalFlags;
- flags = isMoved ? ((flags & (!MASK_MOVED)) | FLAG_IS_MOVED) : flags;
- flags = isDeleted ? ((flags & (!MASK_MOVED)) | FLAG_IS_DELETED) : flags;
- flags = (!isMoved && !isDeleted) ? ((flags & (!MASK_MOVED)) | FLAG_IS_NOT_MOVED) : flags;
+ flags = isMoved ? ((flags & (~MASK_MOVED)) | FLAG_IS_MOVED) : flags;
+ flags = isDeleted ? ((flags & (~MASK_MOVED)) | FLAG_IS_DELETED) : flags;
+ flags = (!isMoved && !isDeleted) ? ((flags & (~MASK_MOVED)) | FLAG_IS_NOT_MOVED) : flags;
return flags;
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp
index 99a983f21..311d31e5d 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp
@@ -26,10 +26,12 @@
namespace latinime {
+const int DynamicPatriciaTrieWritingHelper::CHILDREN_POSITION_FIELD_SIZE = 3;
+
bool DynamicPatriciaTrieWritingHelper::addUnigramWord(
DynamicPatriciaTrieReadingHelper *const readingHelper,
const int *const wordCodePoints, const int codePointCount, const int probability) {
- int parentPos = NOT_A_VALID_WORD_POS;
+ int parentPos = NOT_A_DICT_POS;
while (!readingHelper->isEnd()) {
const int matchedCodePointCount = readingHelper->getPrevTotalCodePointCount();
if (!readingHelper->isMatchedCodePoint(0 /* index */,
@@ -63,7 +65,7 @@ bool DynamicPatriciaTrieWritingHelper::addUnigramWord(
codePointCount - readingHelper->getTotalCodePointCount());
}
// Advance to the children nodes.
- parentPos = nodeReader->getNodePos();
+ parentPos = nodeReader->getHeadPos();
readingHelper->readChildNode();
}
if (readingHelper->isError()) {
@@ -79,29 +81,60 @@ bool DynamicPatriciaTrieWritingHelper::addUnigramWord(
bool DynamicPatriciaTrieWritingHelper::addBigramWords(const int word0Pos, const int word1Pos,
const int probability) {
+ int mMergedNodeCodePoints[MAX_WORD_LENGTH];
DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
- nodeReader.fetchNodeInfoFromBuffer(word0Pos);
- if (nodeReader.isDeleted()) {
+ nodeReader.fetchNodeInfoFromBufferAndGetNodeCodePoints(word0Pos, MAX_WORD_LENGTH,
+ mMergedNodeCodePoints);
+ // Move node to add bigram entry.
+ const int newNodePos = mBuffer->getTailPosition();
+ if (!markNodeAsMovedAndSetPosition(&nodeReader, newNodePos, newNodePos)) {
return false;
}
- // TODO: Implement.
- return false;
+ int writingPos = newNodePos;
+ // Write a new PtNode using original PtNode's info to the tail of the dictionary.
+ if (!writePtNodeToBufferByCopyingPtNodeInfo(&nodeReader, nodeReader.getParentPos(),
+ mMergedNodeCodePoints, nodeReader.getCodePointCount(), nodeReader.getProbability(),
+ &writingPos)) {
+ return false;
+ }
+ nodeReader.fetchNodeInfoFromBuffer(newNodePos);
+ if (nodeReader.getBigramsPos() != NOT_A_DICT_POS) {
+ // Insert a new bigram entry into the existing bigram list.
+ int bigramListPos = nodeReader.getBigramsPos();
+ return mBigramPolicy->addNewBigramEntryToBigramList(word1Pos, probability, &bigramListPos);
+ } else {
+ // The PtNode doesn't have a bigram list.
+ // First, Write a bigram entry at the tail position of the PtNode.
+ if (!mBigramPolicy->writeNewBigramEntry(word1Pos, probability, &writingPos)) {
+ return false;
+ }
+ // Then, Mark as the PtNode having bigram list in the flags.
+ const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
+ PatriciaTrieReadingUtils::createAndGetFlags(nodeReader.isBlacklisted(),
+ nodeReader.isNotAWord(), nodeReader.getProbability() != NOT_A_PROBABILITY,
+ nodeReader.getShortcutPos() != NOT_A_DICT_POS, true /* hasBigrams */,
+ nodeReader.getCodePointCount() > 1, CHILDREN_POSITION_FIELD_SIZE);
+ writingPos = newNodePos;
+ // Write updated flags into the moved PtNode's flags field.
+ return DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags,
+ &writingPos);
+ }
}
// Remove a bigram relation from word0Pos to word1Pos.
bool DynamicPatriciaTrieWritingHelper::removeBigramWords(const int word0Pos, const int word1Pos) {
DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
nodeReader.fetchNodeInfoFromBuffer(word0Pos);
- if (nodeReader.isDeleted() || nodeReader.getBigramsPos() == NOT_A_DICT_POS) {
+ if (nodeReader.getBigramsPos() == NOT_A_DICT_POS) {
return false;
}
- // TODO: Implement.
- return false;
+ return mBigramPolicy->removeBigram(nodeReader.getBigramsPos(), word1Pos);
}
bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition(
- const DynamicPatriciaTrieNodeReader *const originalNode, const int movedPos) {
- int pos = originalNode->getNodePos();
+ const DynamicPatriciaTrieNodeReader *const originalNode, const int movedPos,
+ const int bigramLinkedNodePos) {
+ int pos = originalNode->getHeadPos();
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(pos);
const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
if (usesAdditionalBuffer) {
@@ -113,18 +146,42 @@ bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition(
const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
DynamicPatriciaTrieReadingUtils::updateAndGetFlags(originalFlags, true /* isMoved */,
false /* isDeleted */);
- int writingPos = originalNode->getNodePos();
+ int writingPos = originalNode->getHeadPos();
// Update flags.
if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags,
&writingPos)) {
return false;
}
// Update moved position, which is stored in the parent offset field.
- const int movedPosOffset = movedPos - originalNode->getNodePos();
+ const int movedPosOffset = movedPos - originalNode->getHeadPos();
if (!DynamicPatriciaTrieWritingUtils::writeParentOffsetAndAdvancePosition(
mBuffer, movedPosOffset, &writingPos)) {
return false;
}
+ // Update bigram linked node position, which is stored in the children position field.
+ int childrenPosFieldPos = originalNode->getChildrenPosFieldPos();
+ if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(
+ mBuffer, bigramLinkedNodePos, &childrenPosFieldPos)) {
+ return false;
+ }
+ if (originalNode->hasChildren()) {
+ // Update children's parent position.
+ DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, mBigramPolicy, mShortcutPolicy);
+ const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader();
+ readingHelper.initWithNodeArrayPos(originalNode->getChildrenPos());
+ while (!readingHelper.isEnd()) {
+ const int childPtNodeWrittenPos = nodeReader->getHeadPos();
+ const int parentOffset = movedPos - childPtNodeWrittenPos;
+ int parentOffsetFieldPos = childPtNodeWrittenPos + 1 /* Flags */;
+ if (!DynamicPatriciaTrieWritingUtils::writeParentOffsetAndAdvancePosition(
+ mBuffer, parentOffset, &parentOffsetFieldPos)) {
+ // Parent offset cannot be written because of a bug or a broken dictionary; thus,
+ // we give up to update dictionary.
+ return false;
+ }
+ readingHelper.readNextSiblingNode();
+ }
+ }
return true;
}
@@ -135,13 +192,9 @@ bool DynamicPatriciaTrieWritingHelper::writePtNodeWithFullInfoToBuffer(const boo
const int originalBigramListPos, const int originalShortcutListPos,
int *const writingPos) {
const int nodePos = *writingPos;
- // Create node flags and write them.
- const PatriciaTrieReadingUtils::NodeFlags nodeFlags =
- PatriciaTrieReadingUtils::createAndGetFlags(isBlacklisted, isNotAWord,
- probability != NOT_A_PROBABILITY, originalShortcutListPos != NOT_A_DICT_POS,
- originalBigramListPos != NOT_A_DICT_POS, codePointCount > 1,
- 3 /* childrenPositionFieldSize */);
- if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, nodeFlags,
+ // Write dummy flags. The Node flags are updated with appropriate flags at the last step of the
+ // PtNode writing.
+ if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, 0 /* nodeFlags */,
writingPos)) {
return false;
}
@@ -154,7 +207,7 @@ bool DynamicPatriciaTrieWritingHelper::writePtNodeWithFullInfoToBuffer(const boo
// Write code points
if (!DynamicPatriciaTrieWritingUtils::writeCodePointsAndAdvancePosition(mBuffer, codePoints,
codePointCount, writingPos)) {
- return false;;
+ return false;
}
// Write probability when the probability is a valid probability, which means this node is
// terminal.
@@ -177,12 +230,25 @@ bool DynamicPatriciaTrieWritingHelper::writePtNodeWithFullInfoToBuffer(const boo
}
}
// Copy bigram list when the originalBigramListPos is valid dictionary position.
+ int bigramCount = 0;
if (originalBigramListPos != NOT_A_DICT_POS) {
int fromPos = originalBigramListPos;
- if (!mBigramPolicy->copyAllBigrams(&fromPos, writingPos)) {
+ if (!mBigramPolicy->copyAllBigrams(&fromPos, writingPos, &bigramCount)) {
return false;
}
}
+ // Create node flags and write them.
+ PatriciaTrieReadingUtils::NodeFlags nodeFlags =
+ PatriciaTrieReadingUtils::createAndGetFlags(isBlacklisted, isNotAWord,
+ probability != NOT_A_PROBABILITY /* isTerminal */,
+ originalShortcutListPos != NOT_A_DICT_POS /* hasShortcutTargets */,
+ bigramCount > 0 /* hasBigrams */, codePointCount > 1 /* hasMultipleChars */,
+ CHILDREN_POSITION_FIELD_SIZE);
+ int flagsFieldPos = nodePos;
+ if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, nodeFlags,
+ &flagsFieldPos)) {
+ return false;
+ }
return true;
}
@@ -230,7 +296,7 @@ bool DynamicPatriciaTrieWritingHelper::setPtNodeProbability(
} else {
// Make the node terminal and write the probability.
int movedPos = mBuffer->getTailPosition();
- if (!markNodeAsMovedAndSetPosition(originalPtNode, movedPos)) {
+ if (!markNodeAsMovedAndSetPosition(originalPtNode, movedPos, movedPos)) {
return false;
}
if (!writePtNodeToBufferByCopyingPtNodeInfo(originalPtNode, originalPtNode->getParentPos(),
@@ -250,7 +316,7 @@ bool DynamicPatriciaTrieWritingHelper::createChildrenPtNodeArrayAndAChildPtNode(
newPtNodeArrayPos, &childrenPosFieldPos)) {
return false;
}
- return createNewPtNodeArrayWithAChildPtNode(parentNode->getNodePos(), codePoints,
+ return createNewPtNodeArrayWithAChildPtNode(parentNode->getHeadPos(), codePoints,
codePointCount, probability);
}
@@ -287,11 +353,8 @@ bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes(
// Reallocating PtNode: abcde, newNode: abc.
// abc (1st, terminal) __ de (2nd)
const bool addsExtraChild = newNodeCodePointCount > overlappingCodePointCount;
- const int firstPtNodePos = mBuffer->getTailPosition();
- if (!markNodeAsMovedAndSetPosition(reallocatingPtNode, firstPtNodePos)) {
- return false;
- }
- int writingPos = firstPtNodePos;
+ const int firstPartOfReallocatedPtNodePos = mBuffer->getTailPosition();
+ int writingPos = firstPartOfReallocatedPtNodePos;
// Write the 1st part of the reallocating node. The children position will be updated later
// with actual children position.
const int newProbability = addsExtraChild ? NOT_A_PROBABILITY : probabilityOfNewPtNode;
@@ -307,15 +370,15 @@ bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes(
return false;
}
// Write the 2nd part of the reallocating node.
- if (!writePtNodeToBufferByCopyingPtNodeInfo(reallocatingPtNode,
- reallocatingPtNode->getNodePos(),
+ const int secondPartOfReallocatedPtNodePos = writingPos;
+ if (!writePtNodeToBufferByCopyingPtNodeInfo(reallocatingPtNode, firstPartOfReallocatedPtNodePos,
reallocatingPtNodeCodePoints + overlappingCodePointCount,
reallocatingPtNode->getCodePointCount() - overlappingCodePointCount,
reallocatingPtNode->getProbability(), &writingPos)) {
return false;
}
if (addsExtraChild) {
- if (!writePtNodeToBuffer(reallocatingPtNode->getNodePos(),
+ if (!writePtNodeToBuffer(firstPartOfReallocatedPtNodePos,
newNodeCodePoints + overlappingCodePointCount,
newNodeCodePointCount - overlappingCodePointCount, probabilityOfNewPtNode,
&writingPos)) {
@@ -326,9 +389,14 @@ bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes(
NOT_A_DICT_POS /* forwardLinkPos */, &writingPos)) {
return false;
}
+ // Update original reallocatingPtNode as moved.
+ if (!markNodeAsMovedAndSetPosition(reallocatingPtNode, firstPartOfReallocatedPtNodePos,
+ secondPartOfReallocatedPtNodePos)) {
+ return false;
+ }
// Load node info. Information of the 1st part will be fetched.
DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
- nodeReader.fetchNodeInfoFromBuffer(firstPtNodePos);
+ nodeReader.fetchNodeInfoFromBuffer(firstPartOfReallocatedPtNodePos);
// Update children position.
int childrenPosFieldPos = nodeReader.getChildrenPosFieldPos();
if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBuffer,
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h
index ada634a54..20e35abcf 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h
@@ -49,12 +49,14 @@ class DynamicPatriciaTrieWritingHelper {
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieWritingHelper);
+ static const int CHILDREN_POSITION_FIELD_SIZE;
+
BufferWithExtendableBuffer *const mBuffer;
DynamicBigramListPolicy *const mBigramPolicy;
DynamicShortcutListPolicy *const mShortcutPolicy;
bool markNodeAsMovedAndSetPosition(const DynamicPatriciaTrieNodeReader *const nodeToUpdate,
- const int movedPos);
+ const int movedPos, const int bigramLinkedNodePos);
bool writePtNodeWithFullInfoToBuffer(const bool isBlacklisted, const bool isNotAWord,
const int parentPos, const int *const codePoints, const int codePointCount,
diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp
index d5a83a938..e6cff431b 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp
@@ -219,7 +219,7 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
}
// This function gets the position of the terminal node of the exact matching word in the
-// dictionary. If no match is found, it returns NOT_A_VALID_WORD_POS.
+// dictionary. If no match is found, it returns NOT_A_DICT_POS.
int PatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord,
const int length, const bool forceLowerCaseSearch) const {
int pos = getRootPosition();
@@ -228,7 +228,7 @@ int PatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord,
while (true) {
// If we already traversed the tree further than the word is long, there means
// there was no match (or we would have found it).
- if (wordPos >= length) return NOT_A_VALID_WORD_POS;
+ if (wordPos >= length) return NOT_A_DICT_POS;
int ptNodeCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(mDictRoot,
&pos);
const int wChar = forceLowerCaseSearch
@@ -236,7 +236,7 @@ int PatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord,
while (true) {
// If there are no more PtNodes in this array, it means we could not
// find a matching character for this depth, therefore there is no match.
- if (0 >= ptNodeCount) return NOT_A_VALID_WORD_POS;
+ if (0 >= ptNodeCount) return NOT_A_DICT_POS;
const int ptNodePos = pos;
const PatriciaTrieReadingUtils::NodeFlags flags =
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
@@ -245,7 +245,7 @@ int PatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord,
if (character == wChar) {
// This is the correct PtNode. Only one PtNode may start with the same char within
// a PtNode array, so either we found our match in this array, or there is
- // no match and we can return NOT_A_VALID_WORD_POS. So we will check all the
+ // no match and we can return NOT_A_DICT_POS. So we will check all the
// characters in this PtNode indeed does match.
if (PatriciaTrieReadingUtils::hasMultipleChars(flags)) {
character = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(mDictRoot,
@@ -256,8 +256,8 @@ int PatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord,
// character that does not match, as explained above, it means the word is
// not in the dictionary (by virtue of this PtNode being the only one to
// match the word on the first character, but not matching the whole word).
- if (wordPos >= length) return NOT_A_VALID_WORD_POS;
- if (inWord[wordPos] != character) return NOT_A_VALID_WORD_POS;
+ if (wordPos >= length) return NOT_A_DICT_POS;
+ if (inWord[wordPos] != character) return NOT_A_DICT_POS;
character = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
mDictRoot, &pos);
}
@@ -274,7 +274,7 @@ int PatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord,
PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos);
}
if (!PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) {
- return NOT_A_VALID_WORD_POS;
+ return NOT_A_DICT_POS;
}
// We have children and we are still shorter than the word we are searching for, so
// we need to traverse children. Put the pointer on the children position, and
@@ -320,7 +320,7 @@ int PatriciaTriePolicy::getProbability(const int unigramProbability,
}
int PatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int nodePos) const {
- if (nodePos == NOT_A_VALID_WORD_POS) {
+ if (nodePos == NOT_A_DICT_POS) {
return NOT_A_PROBABILITY;
}
int pos = nodePos;
@@ -342,7 +342,7 @@ int PatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int nodePos) const {
}
int PatriciaTriePolicy::getShortcutPositionOfNode(const int nodePos) const {
- if (nodePos == NOT_A_VALID_WORD_POS) {
+ if (nodePos == NOT_A_DICT_POS) {
return NOT_A_DICT_POS;
}
int pos = nodePos;
@@ -362,7 +362,7 @@ int PatriciaTriePolicy::getShortcutPositionOfNode(const int nodePos) const {
}
int PatriciaTriePolicy::getBigramsPositionOfNode(const int nodePos) const {
- if (nodePos == NOT_A_VALID_WORD_POS) {
+ if (nodePos == NOT_A_DICT_POS) {
return NOT_A_DICT_POS;
}
int pos = nodePos;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h
index 75d976205..cee3e4ab2 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h
@@ -96,6 +96,22 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
return false;
}
+ void flush(const char *const filePath) {
+ // This method should not be called for non-updatable dictionary.
+ AKLOGI("Warning: flush() is called for non-updatable dictionary.");
+ }
+
+ void flushWithGC(const char *const filePath) {
+ // This method should not be called for non-updatable dictionary.
+ AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary.");
+ }
+
+ bool needsToRunGC() const {
+ // This method should not be called for non-updatable dictionary.
+ AKLOGI("Warning: needsToRunGC() is called for non-updatable dictionary.");
+ return false;
+ }
+
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTriePolicy);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp
index 576a158bc..1316b425f 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp
@@ -42,6 +42,63 @@ const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_IS_NOT_A_WORD = 0x02;
// Flag for blacklist
const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_IS_BLACKLISTED = 0x01;
+/* static */ int PtReadingUtils::getPtNodeArraySizeAndAdvancePosition(
+ const uint8_t *const buffer, int *const pos) {
+ const uint8_t firstByte = ByteArrayUtils::readUint8AndAdvancePosition(buffer, pos);
+ if (firstByte < 0x80) {
+ return firstByte;
+ } else {
+ return ((firstByte & 0x7F) << 8) ^ ByteArrayUtils::readUint8AndAdvancePosition(
+ buffer, pos);
+ }
+}
+
+/* static */ PtReadingUtils::NodeFlags PtReadingUtils::getFlagsAndAdvancePosition(
+ const uint8_t *const buffer, int *const pos) {
+ return ByteArrayUtils::readUint8AndAdvancePosition(buffer, pos);
+}
+
+/* static */ int PtReadingUtils::getCodePointAndAdvancePosition(const uint8_t *const buffer,
+ int *const pos) {
+ return ByteArrayUtils::readCodePointAndAdvancePosition(buffer, pos);
+}
+
+// Returns the number of read characters.
+/* static */ int PtReadingUtils::getCharsAndAdvancePosition(const uint8_t *const buffer,
+ const NodeFlags flags, const int maxLength, int *const outBuffer, int *const pos) {
+ int length = 0;
+ if (hasMultipleChars(flags)) {
+ length = ByteArrayUtils::readStringAndAdvancePosition(buffer, maxLength, outBuffer,
+ pos);
+ } else {
+ if (maxLength > 0) {
+ outBuffer[0] = getCodePointAndAdvancePosition(buffer, pos);
+ length = 1;
+ }
+ }
+ return length;
+}
+
+// Returns the number of skipped characters.
+/* static */ int PtReadingUtils::skipCharacters(const uint8_t *const buffer, const NodeFlags flags,
+ const int maxLength, int *const pos) {
+ if (hasMultipleChars(flags)) {
+ return ByteArrayUtils::advancePositionToBehindString(buffer, maxLength, pos);
+ } else {
+ if (maxLength > 0) {
+ getCodePointAndAdvancePosition(buffer, pos);
+ return 1;
+ } else {
+ return 0;
+ }
+ }
+}
+
+/* static */ int PtReadingUtils::readProbabilityAndAdvancePosition(const uint8_t *const buffer,
+ int *const pos) {
+ return ByteArrayUtils::readUint8AndAdvancePosition(buffer, pos);
+}
+
/* static */ int PtReadingUtils::readChildrenPositionAndAdvancePosition(
const uint8_t *const buffer, const NodeFlags flags, int *const pos) {
const int base = *pos;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.h
index 2b0646db2..8420ee95a 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.h
@@ -20,7 +20,6 @@
#include <stdint.h>
#include "defines.h"
-#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
namespace latinime {
@@ -28,62 +27,21 @@ class PatriciaTrieReadingUtils {
public:
typedef uint8_t NodeFlags;
- static AK_FORCE_INLINE int getPtNodeArraySizeAndAdvancePosition(
- const uint8_t *const buffer, int *const pos) {
- const uint8_t firstByte = ByteArrayUtils::readUint8AndAdvancePosition(buffer, pos);
- if (firstByte < 0x80) {
- return firstByte;
- } else {
- return ((firstByte & 0x7F) << 8) ^ ByteArrayUtils::readUint8AndAdvancePosition(
- buffer, pos);
- }
- }
+ static int getPtNodeArraySizeAndAdvancePosition(const uint8_t *const buffer, int *const pos);
- static AK_FORCE_INLINE NodeFlags getFlagsAndAdvancePosition(const uint8_t *const buffer,
- int *const pos) {
- return ByteArrayUtils::readUint8AndAdvancePosition(buffer, pos);
- }
+ static NodeFlags getFlagsAndAdvancePosition(const uint8_t *const buffer, int *const pos);
- static AK_FORCE_INLINE int getCodePointAndAdvancePosition(const uint8_t *const buffer,
- int *const pos) {
- return ByteArrayUtils::readCodePointAndAdvancePosition(buffer, pos);
- }
+ static int getCodePointAndAdvancePosition(const uint8_t *const buffer, int *const pos);
// Returns the number of read characters.
- static AK_FORCE_INLINE int getCharsAndAdvancePosition(const uint8_t *const buffer,
- const NodeFlags flags, const int maxLength, int *const outBuffer, int *const pos) {
- int length = 0;
- if (hasMultipleChars(flags)) {
- length = ByteArrayUtils::readStringAndAdvancePosition(buffer, maxLength, outBuffer,
- pos);
- } else {
- if (maxLength > 0) {
- outBuffer[0] = getCodePointAndAdvancePosition(buffer, pos);
- length = 1;
- }
- }
- return length;
- }
+ static int getCharsAndAdvancePosition(const uint8_t *const buffer, const NodeFlags flags,
+ const int maxLength, int *const outBuffer, int *const pos);
// Returns the number of skipped characters.
- static AK_FORCE_INLINE int skipCharacters(const uint8_t *const buffer, const NodeFlags flags,
- const int maxLength, int *const pos) {
- if (hasMultipleChars(flags)) {
- return ByteArrayUtils::advancePositionToBehindString(buffer, maxLength, pos);
- } else {
- if (maxLength > 0) {
- getCodePointAndAdvancePosition(buffer, pos);
- return 1;
- } else {
- return 0;
- }
- }
- }
+ static int skipCharacters(const uint8_t *const buffer, const NodeFlags flags,
+ const int maxLength, int *const pos);
- static AK_FORCE_INLINE int readProbabilityAndAdvancePosition(const uint8_t *const buffer,
- int *const pos) {
- return ByteArrayUtils::readUint8AndAdvancePosition(buffer, pos);
- }
+ static int readProbabilityAndAdvancePosition(const uint8_t *const buffer, int *const pos);
static int readChildrenPositionAndAdvancePosition(const uint8_t *const buffer,
const NodeFlags flags, int *const pos);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.cpp
index e70bb5071..847dcdee5 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.cpp
@@ -16,6 +16,8 @@
#include "suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h"
+#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
+
namespace latinime {
// Flag for presence of more attributes
@@ -28,4 +30,22 @@ const int ShortcutListReadingUtils::SHORTCUT_LIST_SIZE_FIELD_SIZE = 2;
// The numeric value of the shortcut probability that means 'whitelist'.
const int ShortcutListReadingUtils::WHITELIST_SHORTCUT_PROBABILITY = 15;
+/* static */ ShortcutListReadingUtils::ShortcutFlags
+ ShortcutListReadingUtils::getFlagsAndForwardPointer(const uint8_t *const dictRoot,
+ int *const pos) {
+ return ByteArrayUtils::readUint8AndAdvancePosition(dictRoot, pos);
+}
+
+/* static */ int ShortcutListReadingUtils::getShortcutListSizeAndForwardPointer(
+ const uint8_t *const dictRoot, int *const pos) {
+ // readUint16andAdvancePosition() returns an offset *including* the uint16 field itself.
+ return ByteArrayUtils::readUint16AndAdvancePosition(dictRoot, pos)
+ - SHORTCUT_LIST_SIZE_FIELD_SIZE;
+}
+
+/* static */ int ShortcutListReadingUtils::readShortcutTarget(
+ const uint8_t *const dictRoot, const int maxLength, int *const outWord, int *const pos) {
+ return ByteArrayUtils::readStringAndAdvancePosition(dictRoot, maxLength, outWord, pos);
+}
+
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h
index 5f4f240f5..a83ed5a50 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h
@@ -20,7 +20,6 @@
#include <stdint.h>
#include "defines.h"
-#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
namespace latinime {
@@ -28,10 +27,7 @@ class ShortcutListReadingUtils {
public:
typedef uint8_t ShortcutFlags;
- static AK_FORCE_INLINE ShortcutFlags getFlagsAndForwardPointer(
- const uint8_t *const dictRoot, int *const pos) {
- return ByteArrayUtils::readUint8AndAdvancePosition(dictRoot, pos);
- }
+ static ShortcutFlags getFlagsAndForwardPointer(const uint8_t *const dictRoot, int *const pos);
static AK_FORCE_INLINE int getProbabilityFromFlags(const ShortcutFlags flags) {
return flags & MASK_ATTRIBUTE_PROBABILITY;
@@ -43,12 +39,7 @@ class ShortcutListReadingUtils {
// This method returns the size of the shortcut list region excluding the shortcut list size
// field at the beginning.
- static AK_FORCE_INLINE int getShortcutListSizeAndForwardPointer(
- const uint8_t *const dictRoot, int *const pos) {
- // readUint16andAdvancePosition() returns an offset *including* the uint16 field itself.
- return ByteArrayUtils::readUint16AndAdvancePosition(dictRoot, pos)
- - SHORTCUT_LIST_SIZE_FIELD_SIZE;
- }
+ static int getShortcutListSizeAndForwardPointer(const uint8_t *const dictRoot, int *const pos);
static AK_FORCE_INLINE int getShortcutListSizeFieldSize() {
return SHORTCUT_LIST_SIZE_FIELD_SIZE;
@@ -63,11 +54,8 @@ class ShortcutListReadingUtils {
return getProbabilityFromFlags(flags) == WHITELIST_SHORTCUT_PROBABILITY;
}
- static AK_FORCE_INLINE int readShortcutTarget(
- const uint8_t *const dictRoot, const int maxLength, int *const outWord,
- int *const pos) {
- return ByteArrayUtils::readStringAndAdvancePosition(dictRoot, maxLength, outWord, pos);
- }
+ static int readShortcutTarget(const uint8_t *const dictRoot, const int maxLength,
+ int *const outWord, int *const pos);
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(ShortcutListReadingUtils);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp
index dfdaebd18..0fed275e9 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp
@@ -76,7 +76,7 @@ bool BufferWithExtendableBuffer::checkAndPrepareWriting(const int pos, const int
}
}
mUsedAdditionalBufferSize += size;
- } else if (pos + size >= tailPosition) {
+ } else if (pos + size > tailPosition) {
// The access will beyond the tail of used region.
return false;
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.cpp
index a84cfb9d5..1833e8832 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.cpp
@@ -18,7 +18,8 @@
namespace latinime {
-const uint8_t ByteArrayUtils::MINIMAL_ONE_BYTE_CHARACTER_VALUE = 0x20;
+const uint8_t ByteArrayUtils::MINIMUM_ONE_BYTE_CHARACTER_VALUE = 0x20;
+const uint8_t ByteArrayUtils::MAXIMUM_ONE_BYTE_CHARACTER_VALUE = 0xFF;
const uint8_t ByteArrayUtils::CHARACTER_ARRAY_TERMINATOR = 0x1F;
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h
index f727ecf8e..0c1576818 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h
@@ -135,7 +135,7 @@ class ByteArrayUtils {
static AK_FORCE_INLINE int readCodePointAndAdvancePosition(
const uint8_t *const buffer, int *const pos) {
const uint8_t firstByte = readUint8(buffer, *pos);
- if (firstByte < MINIMAL_ONE_BYTE_CHARACTER_VALUE) {
+ if (firstByte < MINIMUM_ONE_BYTE_CHARACTER_VALUE) {
if (firstByte == CHARACTER_ARRAY_TERMINATOR) {
*pos += 1;
return NOT_A_CODE_POINT;
@@ -172,6 +172,7 @@ class ByteArrayUtils {
int codePoint = readCodePointAndAdvancePosition(buffer, pos);
while (NOT_A_CODE_POINT != codePoint && length < maxLength) {
codePoint = readCodePointAndAdvancePosition(buffer, pos);
+ length++;
}
return length;
}
@@ -186,7 +187,8 @@ class ByteArrayUtils {
const int codePoint = codePoints[i];
if (codePoint == NOT_A_CODE_POINT || codePoint == CHARACTER_ARRAY_TERMINATOR) {
break;
- } else if (codePoint < MINIMAL_ONE_BYTE_CHARACTER_VALUE) {
+ } else if (codePoint < MINIMUM_ONE_BYTE_CHARACTER_VALUE
+ || codePoint > MAXIMUM_ONE_BYTE_CHARACTER_VALUE) {
// three bytes character.
writeUint24AndAdvancePosition(buffer, codePoint, pos);
} else {
@@ -206,7 +208,8 @@ class ByteArrayUtils {
const int codePoint = codePoints[i];
if (codePoint == NOT_A_CODE_POINT || codePoint == CHARACTER_ARRAY_TERMINATOR) {
break;
- } else if (codePoint < MINIMAL_ONE_BYTE_CHARACTER_VALUE) {
+ } else if (codePoint < MINIMUM_ONE_BYTE_CHARACTER_VALUE
+ || codePoint > MAXIMUM_ONE_BYTE_CHARACTER_VALUE) {
// three bytes character.
byteCount += 3;
} else {
@@ -224,7 +227,8 @@ class ByteArrayUtils {
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(ByteArrayUtils);
- static const uint8_t MINIMAL_ONE_BYTE_CHARACTER_VALUE;
+ static const uint8_t MINIMUM_ONE_BYTE_CHARACTER_VALUE;
+ static const uint8_t MAXIMUM_ONE_BYTE_CHARACTER_VALUE;
static const uint8_t CHARACTER_ARRAY_TERMINATOR;
static AK_FORCE_INLINE void writeUint32AndAdvancePosition(uint8_t *const buffer,
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h b/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h
index 6febd7832..6b69116eb 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h
@@ -29,8 +29,8 @@ namespace latinime {
class MmappedBuffer {
public:
- static MmappedBuffer* openBuffer(const char *const path, const int pathLength,
- const int bufferOffset, const int bufferSize, const bool isUpdatable) {
+ static MmappedBuffer* openBuffer(const char *const path, const int bufferOffset,
+ const int bufferSize, const bool isUpdatable) {
const int openMode = isUpdatable ? O_RDWR : O_RDONLY;
const int mmapFd = open(path, openMode);
if (mmapFd < 0) {