aboutsummaryrefslogtreecommitdiffstats
path: root/native
diff options
context:
space:
mode:
Diffstat (limited to 'native')
-rw-r--r--native/jni/com_android_inputmethod_latin_DicTraverseSession.cpp7
-rw-r--r--native/jni/src/defines.h2
-rw-r--r--native/jni/src/suggest/core/dicnode/dic_nodes_cache.cpp5
-rw-r--r--native/jni/src/suggest/core/dicnode/dic_nodes_cache.h21
-rw-r--r--native/jni/src/suggest/core/session/dic_traverse_session.cpp5
-rw-r--r--native/jni/src/suggest/core/session/dic_traverse_session.h15
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp72
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h45
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp8
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h27
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp4
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h6
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp18
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h3
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp59
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h1
17 files changed, 201 insertions, 99 deletions
diff --git a/native/jni/com_android_inputmethod_latin_DicTraverseSession.cpp b/native/jni/com_android_inputmethod_latin_DicTraverseSession.cpp
index 72e625836..386643332 100644
--- a/native/jni/com_android_inputmethod_latin_DicTraverseSession.cpp
+++ b/native/jni/com_android_inputmethod_latin_DicTraverseSession.cpp
@@ -25,8 +25,9 @@
namespace latinime {
class Dictionary;
-static jlong latinime_setDicTraverseSession(JNIEnv *env, jclass clazz, jstring localeJStr) {
- void *traverseSession = DicTraverseSession::getSessionInstance(env, localeJStr);
+static jlong latinime_setDicTraverseSession(JNIEnv *env, jclass clazz, jstring localeJStr,
+ jlong dictSize) {
+ void *traverseSession = DicTraverseSession::getSessionInstance(env, localeJStr, dictSize);
return reinterpret_cast<jlong>(traverseSession);
}
@@ -53,7 +54,7 @@ static void latinime_releaseDicTraverseSession(JNIEnv *env, jclass clazz, jlong
static const JNINativeMethod sMethods[] = {
{
const_cast<char *>("setDicTraverseSessionNative"),
- const_cast<char *>("(Ljava/lang/String;)J"),
+ const_cast<char *>("(Ljava/lang/String;J)J"),
reinterpret_cast<void *>(latinime_setDicTraverseSession)
},
{
diff --git a/native/jni/src/defines.h b/native/jni/src/defines.h
index 07f1e52c6..4605890c7 100644
--- a/native/jni/src/defines.h
+++ b/native/jni/src/defines.h
@@ -32,8 +32,6 @@
#define MAX_WORD_LENGTH 48
// Must be equal to BinaryDictionary.MAX_RESULTS in Java
#define MAX_RESULTS 18
-// The biggest value among MAX_CACHE_DIC_NODE_SIZE, MAX_CACHE_DIC_NODE_SIZE_FOR_SINGLE_POINT, ...
-#define MAX_DIC_NODE_PRIORITY_QUEUE_CAPACITY 310
// Must be equal to ProximityInfo.MAX_PROXIMITY_CHARS_SIZE in Java
#define MAX_PROXIMITY_CHARS_SIZE 16
#define ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE 2
diff --git a/native/jni/src/suggest/core/dicnode/dic_nodes_cache.cpp b/native/jni/src/suggest/core/dicnode/dic_nodes_cache.cpp
index c3d2a2e74..b6be47e90 100644
--- a/native/jni/src/suggest/core/dicnode/dic_nodes_cache.cpp
+++ b/native/jni/src/suggest/core/dicnode/dic_nodes_cache.cpp
@@ -23,6 +23,11 @@
namespace latinime {
+// The biggest value among MAX_CACHE_DIC_NODE_SIZE, MAX_CACHE_DIC_NODE_SIZE_FOR_SINGLE_POINT, ...
+const int DicNodesCache::LARGE_PRIORITY_QUEUE_CAPACITY = 310;
+// Capacity for reducing memory footprint.
+const int DicNodesCache::SMALL_PRIORITY_QUEUE_CAPACITY = 100;
+
/**
* Truncates all of the dicNodes so that they start at the given commit point.
* Only called for multi-word typing input.
diff --git a/native/jni/src/suggest/core/dicnode/dic_nodes_cache.h b/native/jni/src/suggest/core/dicnode/dic_nodes_cache.h
index f085848aa..8493b6a8b 100644
--- a/native/jni/src/suggest/core/dicnode/dic_nodes_cache.h
+++ b/native/jni/src/suggest/core/dicnode/dic_nodes_cache.h
@@ -31,10 +31,11 @@ class DicNode;
*/
class DicNodesCache {
public:
- AK_FORCE_INLINE DicNodesCache()
- : mDicNodePriorityQueue0(MAX_DIC_NODE_PRIORITY_QUEUE_CAPACITY),
- mDicNodePriorityQueue1(MAX_DIC_NODE_PRIORITY_QUEUE_CAPACITY),
- mDicNodePriorityQueue2(MAX_DIC_NODE_PRIORITY_QUEUE_CAPACITY),
+ AK_FORCE_INLINE explicit DicNodesCache(const bool usesLargeCapacityCache)
+ : mUsesLargeCapacityCache(usesLargeCapacityCache),
+ mDicNodePriorityQueue0(getCacheCapacity()),
+ mDicNodePriorityQueue1(getCacheCapacity()),
+ mDicNodePriorityQueue2(getCacheCapacity()),
mDicNodePriorityQueueForTerminal(MAX_RESULTS),
mActiveDicNodes(&mDicNodePriorityQueue0),
mNextActiveDicNodes(&mDicNodePriorityQueue1),
@@ -50,7 +51,8 @@ class DicNodesCache {
// We want to use the max capacity for the current active dic node queue.
mActiveDicNodes->clearAndResizeToCapacity();
// nextActiveSize is used to limit the next iteration's active dic node size.
- mNextActiveDicNodes->clearAndResize(nextActiveSize);
+ const int nextActiveSizeFittingToTheCapacity = min(nextActiveSize, getCacheCapacity());
+ mNextActiveDicNodes->clearAndResize(nextActiveSizeFittingToTheCapacity);
mTerminalDicNodes->clearAndResize(terminalSize);
// We want to use the max capacity for the cached dic nodes that will be used for the
// continuous suggestion.
@@ -162,12 +164,21 @@ class DicNodesCache {
return tmp;
}
+ AK_FORCE_INLINE int getCacheCapacity() const {
+ return mUsesLargeCapacityCache ?
+ LARGE_PRIORITY_QUEUE_CAPACITY : SMALL_PRIORITY_QUEUE_CAPACITY;
+ }
+
AK_FORCE_INLINE void resetTemporaryCaches() {
mActiveDicNodes->clear();
mNextActiveDicNodes->clear();
mTerminalDicNodes->clear();
}
+ static const int LARGE_PRIORITY_QUEUE_CAPACITY;
+ static const int SMALL_PRIORITY_QUEUE_CAPACITY;
+
+ const bool mUsesLargeCapacityCache;
// Instances
DicNodePriorityQueue mDicNodePriorityQueue0;
DicNodePriorityQueue mDicNodePriorityQueue1;
diff --git a/native/jni/src/suggest/core/session/dic_traverse_session.cpp b/native/jni/src/suggest/core/session/dic_traverse_session.cpp
index e7b386b2d..2c2259214 100644
--- a/native/jni/src/suggest/core/session/dic_traverse_session.cpp
+++ b/native/jni/src/suggest/core/session/dic_traverse_session.cpp
@@ -23,6 +23,11 @@
namespace latinime {
+// 256K bytes threshold is heuristically used to distinguish dictionaries containing many unigrams
+// (e.g. main dictionary) from small dictionaries (e.g. contacts...)
+const int DicTraverseSession::DICTIONARY_SIZE_THRESHOLD_TO_USE_LARGE_CACHE_FOR_SUGGESTION =
+ 256 * 1024;
+
void DicTraverseSession::init(const Dictionary *const dictionary, const int *prevWord,
int prevWordLength, const SuggestOptions *const suggestOptions) {
mDictionary = dictionary;
diff --git a/native/jni/src/suggest/core/session/dic_traverse_session.h b/native/jni/src/suggest/core/session/dic_traverse_session.h
index b25580b96..fe8893590 100644
--- a/native/jni/src/suggest/core/session/dic_traverse_session.h
+++ b/native/jni/src/suggest/core/session/dic_traverse_session.h
@@ -37,8 +37,12 @@ class DicTraverseSession {
public:
// A factory method for DicTraverseSession
- static AK_FORCE_INLINE void *getSessionInstance(JNIEnv *env, jstring localeStr) {
- return new DicTraverseSession(env, localeStr);
+ static AK_FORCE_INLINE void *getSessionInstance(JNIEnv *env, jstring localeStr,
+ jlong dictSize) {
+ // To deal with the trade-off between accuracy and memory space, large cache is used for
+ // dictionaries larger that the threshold
+ return new DicTraverseSession(env, localeStr,
+ dictSize >= DICTIONARY_SIZE_THRESHOLD_TO_USE_LARGE_CACHE_FOR_SUGGESTION);
}
static AK_FORCE_INLINE void initSessionInstance(DicTraverseSession *traverseSession,
@@ -54,10 +58,10 @@ class DicTraverseSession {
delete traverseSession;
}
- AK_FORCE_INLINE DicTraverseSession(JNIEnv *env, jstring localeStr)
+ AK_FORCE_INLINE DicTraverseSession(JNIEnv *env, jstring localeStr, bool usesLargeCache)
: mPrevWordPos(NOT_A_VALID_WORD_POS), mProximityInfo(0),
- mDictionary(0), mSuggestOptions(0), mDicNodesCache(), mMultiBigramMap(),
- mInputSize(0), mPartiallyCommited(false), mMaxPointerCount(1),
+ mDictionary(0), mSuggestOptions(0), mDicNodesCache(usesLargeCache),
+ mMultiBigramMap(), mInputSize(0), mPartiallyCommited(false), mMaxPointerCount(1),
mMultiWordCostMultiplier(1.0f) {
// NOTE: mProximityInfoStates is an array of instances.
// No need to initialize it explicitly here.
@@ -181,6 +185,7 @@ class DicTraverseSession {
DISALLOW_IMPLICIT_CONSTRUCTORS(DicTraverseSession);
// threshold to start caching
static const int CACHE_START_INPUT_LENGTH_THRESHOLD;
+ static const int DICTIONARY_SIZE_THRESHOLD_TO_USE_LARGE_CACHE_FOR_SUGGESTION;
void initializeProximityInfoStates(const int *const inputCodePoints, const int *const inputXs,
const int *const inputYs, const int *const times, const int *const pointerIds,
const int inputSize, const float maxSpatialDistance, const int maxPointerCount);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp
index 936dc9c5d..4ee138125 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp
@@ -18,6 +18,42 @@
namespace latinime {
+const int DynamicBigramListPolicy::BIGRAM_LINK_COUNT_LIMIT = 10000;
+
+void DynamicBigramListPolicy::getNextBigram(int *const outBigramPos, int *const outProbability,
+ bool *const outHasNext, int *const pos) const {
+ const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*pos);
+ const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
+ if (usesAdditionalBuffer) {
+ *pos -= mBuffer->getOriginalBufferSize();
+ }
+ const BigramListReadWriteUtils::BigramFlags flags =
+ BigramListReadWriteUtils::getFlagsAndForwardPointer(buffer, pos);
+ int originalBigramPos = BigramListReadWriteUtils::getBigramAddressAndForwardPointer(
+ buffer, flags, pos);
+ if (usesAdditionalBuffer && originalBigramPos != NOT_A_VALID_WORD_POS) {
+ originalBigramPos += mBuffer->getOriginalBufferSize();
+ }
+ *outBigramPos = followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos);
+ *outProbability = BigramListReadWriteUtils::getProbabilityFromFlags(flags);
+ *outHasNext = BigramListReadWriteUtils::hasNext(flags);
+ if (usesAdditionalBuffer) {
+ *pos += mBuffer->getOriginalBufferSize();
+ }
+}
+
+void DynamicBigramListPolicy::skipAllBigrams(int *const pos) const {
+ const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*pos);
+ const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
+ if (usesAdditionalBuffer) {
+ *pos -= mBuffer->getOriginalBufferSize();
+ }
+ BigramListReadWriteUtils::skipExistingBigrams(buffer, pos);
+ if (usesAdditionalBuffer) {
+ *pos += mBuffer->getOriginalBufferSize();
+ }
+}
+
bool DynamicBigramListPolicy::copyAllBigrams(int *const fromPos, int *const toPos) {
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*fromPos);
if (usesAdditionalBuffer) {
@@ -28,15 +64,16 @@ bool DynamicBigramListPolicy::copyAllBigrams(int *const fromPos, int *const toPo
// The buffer address can be changed after calling buffer writing methods.
const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
flags = BigramListReadWriteUtils::getFlagsAndForwardPointer(buffer, fromPos);
- int bigramPos = BigramListReadWriteUtils::getBigramAddressAndForwardPointer(
+ int originalBigramPos = BigramListReadWriteUtils::getBigramAddressAndForwardPointer(
buffer, flags, fromPos);
- if (bigramPos == NOT_A_VALID_WORD_POS) {
+ if (originalBigramPos == NOT_A_VALID_WORD_POS) {
// skip invalid bigram entry.
continue;
}
if (usesAdditionalBuffer) {
- bigramPos += mBuffer->getOriginalBufferSize();
+ originalBigramPos += mBuffer->getOriginalBufferSize();
}
+ const int bigramPos = followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos);
BigramListReadWriteUtils::BigramFlags newBigramFlags;
uint32_t newBigramOffset;
int newBigramOffsetFieldSize;
@@ -133,11 +170,12 @@ bool DynamicBigramListPolicy::removeBigram(const int bigramListPos, const int ta
if (usesAdditionalBuffer) {
bigramOffsetFieldPos += mBuffer->getOriginalBufferSize();
}
- int bigramPos = BigramListReadWriteUtils::getBigramAddressAndForwardPointer(
+ int originalBigramPos = BigramListReadWriteUtils::getBigramAddressAndForwardPointer(
buffer, flags, &pos);
- if (usesAdditionalBuffer && bigramPos != NOT_A_VALID_WORD_POS) {
- bigramPos += mBuffer->getOriginalBufferSize();
+ if (usesAdditionalBuffer && originalBigramPos != NOT_A_VALID_WORD_POS) {
+ originalBigramPos += mBuffer->getOriginalBufferSize();
}
+ const int bigramPos = followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos);
if (bigramPos != targetBigramPos) {
continue;
}
@@ -152,4 +190,26 @@ bool DynamicBigramListPolicy::removeBigram(const int bigramListPos, const int ta
return false;
}
+int DynamicBigramListPolicy::followBigramLinkAndGetCurrentBigramPtNodePos(
+ const int originalBigramPos) const {
+ if (originalBigramPos == NOT_A_VALID_WORD_POS) {
+ return NOT_A_VALID_WORD_POS;
+ }
+ int currentPos = originalBigramPos;
+ DynamicPatriciaTrieNodeReader nodeReader(mBuffer, this /* bigramsPolicy */, mShortcutPolicy);
+ nodeReader.fetchNodeInfoFromBuffer(currentPos);
+ int bigramLinkCount = 0;
+ while (nodeReader.getBigramLinkedNodePos() != NOT_A_DICT_POS) {
+ currentPos = nodeReader.getBigramLinkedNodePos();
+ nodeReader.fetchNodeInfoFromBuffer(currentPos);
+ bigramLinkCount++;
+ if (bigramLinkCount > BIGRAM_LINK_COUNT_LIMIT) {
+ AKLOGI("Bigram link is invalid. start position: %d", bigramPos);
+ ASSERT(false);
+ return NOT_A_VALID_WORD_POS;
+ }
+ }
+ return currentPos;
+}
+
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h
index b7c05376d..e451e313d 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h
@@ -21,7 +21,9 @@
#include "defines.h"
#include "suggest/core/policy/dictionary_bigrams_structure_policy.h"
+#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h"
#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h"
+#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
namespace latinime {
@@ -31,43 +33,16 @@ namespace latinime {
*/
class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy {
public:
- DynamicBigramListPolicy(BufferWithExtendableBuffer *const buffer)
- : mBuffer(buffer) {}
+ DynamicBigramListPolicy(BufferWithExtendableBuffer *const buffer,
+ const DictionaryShortcutsStructurePolicy *const shortcutPolicy)
+ : mBuffer(buffer), mShortcutPolicy(shortcutPolicy) {}
~DynamicBigramListPolicy() {}
void getNextBigram(int *const outBigramPos, int *const outProbability, bool *const outHasNext,
- int *const pos) const {
- const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*pos);
- const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
- if (usesAdditionalBuffer) {
- *pos -= mBuffer->getOriginalBufferSize();
- }
- const BigramListReadWriteUtils::BigramFlags flags =
- BigramListReadWriteUtils::getFlagsAndForwardPointer(buffer, pos);
- *outBigramPos = BigramListReadWriteUtils::getBigramAddressAndForwardPointer(
- buffer, flags, pos);
- if (usesAdditionalBuffer && *outBigramPos != NOT_A_VALID_WORD_POS) {
- *outBigramPos += mBuffer->getOriginalBufferSize();
- }
- *outProbability = BigramListReadWriteUtils::getProbabilityFromFlags(flags);
- *outHasNext = BigramListReadWriteUtils::hasNext(flags);
- if (usesAdditionalBuffer) {
- *pos += mBuffer->getOriginalBufferSize();
- }
- }
+ int *const pos) const;
- void skipAllBigrams(int *const pos) const {
- const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*pos);
- const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
- if (usesAdditionalBuffer) {
- *pos -= mBuffer->getOriginalBufferSize();
- }
- BigramListReadWriteUtils::skipExistingBigrams(buffer, pos);
- if (usesAdditionalBuffer) {
- *pos += mBuffer->getOriginalBufferSize();
- }
- }
+ void skipAllBigrams(int *const pos) const;
// Copy bigrams from the bigram list that starts at fromPos to toPos and advance these
// positions after bigram lists. This method skips invalid bigram entries.
@@ -81,7 +56,13 @@ class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy {
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicBigramListPolicy);
+ static const int BIGRAM_LINK_COUNT_LIMIT;
+
BufferWithExtendableBuffer *const mBuffer;
+ const DictionaryShortcutsStructurePolicy *const mShortcutPolicy;
+
+ // Follow bigram link and return the position of bigram target PtNode that is currently valid.
+ int followBigramLinkAndGetCurrentBigramPtNodePos(const int originalBigramPos) const;
};
} // namespace latinime
#endif // LATINIME_DYNAMIC_BIGRAM_LIST_POLICY_H
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp
index 5674cb48e..56ef60ae4 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp
@@ -28,6 +28,7 @@ void DynamicPatriciaTrieNodeReader::fetchNodeInfoFromBufferAndProcessMovedNode(c
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(nodePos);
const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
int pos = nodePos;
+ mHeadPos = nodePos;
if (usesAdditionalBuffer) {
pos -= mBuffer->getOriginalBufferSize();
}
@@ -57,10 +58,15 @@ void DynamicPatriciaTrieNodeReader::fetchNodeInfoFromBufferAndProcessMovedNode(c
mChildrenPosFieldPos += mBuffer->getOriginalBufferSize();
}
mChildrenPos = DynamicPatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(
- dictBuf, mFlags, &pos);
+ dictBuf, &pos);
if (usesAdditionalBuffer && mChildrenPos != NOT_A_DICT_POS) {
mChildrenPos += mBuffer->getOriginalBufferSize();
}
+ if (mSiblingPos == NOT_A_VALID_WORD_POS && DynamicPatriciaTrieReadingUtils::isMoved(mFlags)) {
+ mBigramLinkedNodePos = mChildrenPos;
+ } else {
+ mBigramLinkedNodePos = NOT_A_DICT_POS;
+ }
if (usesAdditionalBuffer) {
pos += mBuffer->getOriginalBufferSize();
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h
index 2ee7c2495..89d38a590 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h
@@ -39,11 +39,11 @@ class DynamicPatriciaTrieNodeReader {
const DictionaryBigramsStructurePolicy *const bigramsPolicy,
const DictionaryShortcutsStructurePolicy *const shortcutsPolicy)
: mBuffer(buffer), mBigramsPolicy(bigramsPolicy),
- mShortcutsPolicy(shortcutsPolicy), mNodePos(NOT_A_VALID_WORD_POS), mFlags(0),
- mParentPos(NOT_A_DICT_POS), mCodePointCount(0),
- mProbabilityFieldPos(NOT_A_DICT_POS), mProbability(NOT_A_PROBABILITY),
- mChildrenPosFieldPos(NOT_A_DICT_POS), mChildrenPos(NOT_A_DICT_POS),
- mShortcutPos(NOT_A_DICT_POS), mBigramPos(NOT_A_DICT_POS),
+ mShortcutsPolicy(shortcutsPolicy), mHeadPos(NOT_A_VALID_WORD_POS), mFlags(0),
+ mParentPos(NOT_A_DICT_POS), mCodePointCount(0), mProbabilityFieldPos(NOT_A_DICT_POS),
+ mProbability(NOT_A_PROBABILITY), mChildrenPosFieldPos(NOT_A_DICT_POS),
+ mChildrenPos(NOT_A_DICT_POS), mBigramLinkedNodePos(NOT_A_DICT_POS),
+ mShortcutPos(NOT_A_DICT_POS), mBigramPos(NOT_A_DICT_POS),
mSiblingPos(NOT_A_VALID_WORD_POS) {}
~DynamicPatriciaTrieNodeReader() {}
@@ -56,13 +56,14 @@ class DynamicPatriciaTrieNodeReader {
AK_FORCE_INLINE void fetchNodeInfoFromBufferAndGetNodeCodePoints(const int nodePos,
const int maxCodePointCount, int *const outCodePoints) {
- mNodePos = nodePos;
mSiblingPos = NOT_A_VALID_WORD_POS;
- fetchNodeInfoFromBufferAndProcessMovedNode(mNodePos, maxCodePointCount, outCodePoints);
+ mBigramLinkedNodePos = NOT_A_DICT_POS;
+ fetchNodeInfoFromBufferAndProcessMovedNode(nodePos, maxCodePointCount, outCodePoints);
}
- AK_FORCE_INLINE int getNodePos() const {
- return mNodePos;
+ // HeadPos is different from NodePos when the current PtNode is a moved PtNode.
+ AK_FORCE_INLINE int getHeadPos() const {
+ return mHeadPos;
}
// Flags
@@ -114,6 +115,11 @@ class DynamicPatriciaTrieNodeReader {
return mChildrenPos;
}
+ // Bigram linked node position.
+ AK_FORCE_INLINE int getBigramLinkedNodePos() const {
+ return mBigramLinkedNodePos;
+ }
+
// Shortcutlist position
AK_FORCE_INLINE int getShortcutPos() const {
return mShortcutPos;
@@ -135,7 +141,7 @@ class DynamicPatriciaTrieNodeReader {
const BufferWithExtendableBuffer *const mBuffer;
const DictionaryBigramsStructurePolicy *const mBigramsPolicy;
const DictionaryShortcutsStructurePolicy *const mShortcutsPolicy;
- int mNodePos;
+ int mHeadPos;
DynamicPatriciaTrieReadingUtils::NodeFlags mFlags;
int mParentPos;
uint8_t mCodePointCount;
@@ -143,6 +149,7 @@ class DynamicPatriciaTrieNodeReader {
int mProbability;
int mChildrenPosFieldPos;
int mChildrenPos;
+ int mBigramLinkedNodePos;
int mShortcutPos;
int mBigramPos;
int mSiblingPos;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp
index 945677b50..ece1781f1 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp
@@ -38,7 +38,7 @@ void DynamicPatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const d
readingHelper.initWithNodeArrayPos(dicNode->getChildrenPos());
const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader();
while (!readingHelper.isEnd()) {
- childDicNodes->pushLeavingChild(dicNode, nodeReader->getNodePos(),
+ childDicNodes->pushLeavingChild(dicNode, nodeReader->getHeadPos(),
nodeReader->getChildrenPos(), nodeReader->getProbability(),
nodeReader->isTerminal() && !nodeReader->isDeleted(),
nodeReader->hasChildren(), nodeReader->isBlacklisted() || nodeReader->isNotAWord(),
@@ -122,7 +122,7 @@ int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const in
// All characters are matched.
if (length == readingHelper.getTotalCodePointCount()) {
// Terminal position is found.
- return nodeReader->getNodePos();
+ return nodeReader->getHeadPos();
}
if (!nodeReader->hasChildren()) {
return NOT_A_VALID_WORD_POS;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h
index cdab0e16a..50c724012 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h
@@ -36,8 +36,8 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
: mBuffer(buffer), mHeaderPolicy(mBuffer->getBuffer()),
mBufferWithExtendableBuffer(mBuffer->getBuffer() + mHeaderPolicy.getSize(),
mBuffer->getBufferSize() - mHeaderPolicy.getSize()),
- mBigramListPolicy(&mBufferWithExtendableBuffer),
- mShortcutListPolicy(&mBufferWithExtendableBuffer) {}
+ mShortcutListPolicy(&mBufferWithExtendableBuffer),
+ mBigramListPolicy(&mBufferWithExtendableBuffer, &mShortcutListPolicy) {}
~DynamicPatriciaTriePolicy() {
delete mBuffer;
@@ -91,8 +91,8 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
const MmappedBuffer *const mBuffer;
const HeaderPolicy mHeaderPolicy;
BufferWithExtendableBuffer mBufferWithExtendableBuffer;
- DynamicBigramListPolicy mBigramListPolicy;
DynamicShortcutListPolicy mShortcutListPolicy;
+ DynamicBigramListPolicy mBigramListPolicy;
};
} // namespace latinime
#endif // LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp
index 5d979fa51..c7e89fff8 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp
@@ -29,18 +29,14 @@ const DptReadingUtils::NodeFlags DptReadingUtils::FLAG_IS_MOVED = 0x40;
const DptReadingUtils::NodeFlags DptReadingUtils::FLAG_IS_DELETED = 0x80;
/* static */ int DptReadingUtils::readChildrenPositionAndAdvancePosition(
- const uint8_t *const buffer, const NodeFlags flags, int *const pos) {
- if ((flags & MASK_MOVED) == FLAG_IS_NOT_MOVED) {
- const int base = *pos;
- const int offset = ByteArrayUtils::readSint24AndAdvancePosition(buffer, pos);
- if (offset == 0) {
- // 0 offset means that the node does not have children.
- return NOT_A_DICT_POS;
- } else {
- return base + offset;
- }
- } else {
+ const uint8_t *const buffer, int *const pos) {
+ const int base = *pos;
+ const int offset = ByteArrayUtils::readSint24AndAdvancePosition(buffer, pos);
+ if (offset == 0) {
+ // 0 offset means that the node does not have children.
return NOT_A_DICT_POS;
+ } else {
+ return base + offset;
}
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h
index 2e604a202..5a2ad9cb9 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h
@@ -42,8 +42,7 @@ class DynamicPatriciaTrieReadingUtils {
return ByteArrayUtils::readSint24AndAdvancePosition(buffer, pos);
}
- static int readChildrenPositionAndAdvancePosition(const uint8_t *const buffer,
- const NodeFlags flags, int *const pos);
+ static int readChildrenPositionAndAdvancePosition(const uint8_t *const buffer, int *const pos);
/**
* Node Flags
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp
index 7c0b6286c..dbc80f66a 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp
@@ -63,7 +63,7 @@ bool DynamicPatriciaTrieWritingHelper::addUnigramWord(
codePointCount - readingHelper->getTotalCodePointCount());
}
// Advance to the children nodes.
- parentPos = nodeReader->getNodePos();
+ parentPos = nodeReader->getHeadPos();
readingHelper->readChildNode();
}
if (readingHelper->isError()) {
@@ -100,8 +100,9 @@ bool DynamicPatriciaTrieWritingHelper::removeBigramWords(const int word0Pos, con
}
bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition(
- const DynamicPatriciaTrieNodeReader *const originalNode, const int movedPos) {
- int pos = originalNode->getNodePos();
+ const DynamicPatriciaTrieNodeReader *const originalNode, const int movedPos,
+ const int bigramLinkedNodePos) {
+ int pos = originalNode->getHeadPos();
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(pos);
const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
if (usesAdditionalBuffer) {
@@ -113,18 +114,42 @@ bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition(
const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
DynamicPatriciaTrieReadingUtils::updateAndGetFlags(originalFlags, true /* isMoved */,
false /* isDeleted */);
- int writingPos = originalNode->getNodePos();
+ int writingPos = originalNode->getHeadPos();
// Update flags.
if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags,
&writingPos)) {
return false;
}
// Update moved position, which is stored in the parent offset field.
- const int movedPosOffset = movedPos - originalNode->getNodePos();
+ const int movedPosOffset = movedPos - originalNode->getHeadPos();
if (!DynamicPatriciaTrieWritingUtils::writeParentOffsetAndAdvancePosition(
mBuffer, movedPosOffset, &writingPos)) {
return false;
}
+ // Update bigram linked node position, which is stored in the children position field.
+ int childrenPosFieldPos = originalNode->getChildrenPosFieldPos();
+ if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(
+ mBuffer, bigramLinkedNodePos, &childrenPosFieldPos)) {
+ return false;
+ }
+ if (originalNode->hasChildren()) {
+ // Update children's parent position.
+ DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, mBigramPolicy, mShortcutPolicy);
+ const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader();
+ readingHelper.initWithNodeArrayPos(originalNode->getChildrenPos());
+ while (!readingHelper.isEnd()) {
+ const int childPtNodeWrittenPos = nodeReader->getHeadPos();
+ const int parentOffset = movedPos - childPtNodeWrittenPos;
+ int parentOffsetFieldPos = childPtNodeWrittenPos + 1 /* Flags */;
+ if (!DynamicPatriciaTrieWritingUtils::writeParentOffsetAndAdvancePosition(
+ mBuffer, parentOffset, &parentOffsetFieldPos)) {
+ // Parent offset cannot be written because of a bug or a broken dictionary; thus,
+ // we give up to update dictionary.
+ return false;
+ }
+ readingHelper.readNextSiblingNode();
+ }
+ }
return true;
}
@@ -230,7 +255,7 @@ bool DynamicPatriciaTrieWritingHelper::setPtNodeProbability(
} else {
// Make the node terminal and write the probability.
int movedPos = mBuffer->getTailPosition();
- if (!markNodeAsMovedAndSetPosition(originalPtNode, movedPos)) {
+ if (!markNodeAsMovedAndSetPosition(originalPtNode, movedPos, movedPos)) {
return false;
}
if (!writePtNodeToBufferByCopyingPtNodeInfo(originalPtNode, originalPtNode->getParentPos(),
@@ -250,7 +275,7 @@ bool DynamicPatriciaTrieWritingHelper::createChildrenPtNodeArrayAndAChildPtNode(
newPtNodeArrayPos, &childrenPosFieldPos)) {
return false;
}
- return createNewPtNodeArrayWithAChildPtNode(parentNode->getNodePos(), codePoints,
+ return createNewPtNodeArrayWithAChildPtNode(parentNode->getHeadPos(), codePoints,
codePointCount, probability);
}
@@ -287,11 +312,8 @@ bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes(
// Reallocating PtNode: abcde, newNode: abc.
// abc (1st, terminal) __ de (2nd)
const bool addsExtraChild = newNodeCodePointCount > overlappingCodePointCount;
- const int firstPtNodePos = mBuffer->getTailPosition();
- if (!markNodeAsMovedAndSetPosition(reallocatingPtNode, firstPtNodePos)) {
- return false;
- }
- int writingPos = firstPtNodePos;
+ const int firstPartOfReallocatedPtNodePos = mBuffer->getTailPosition();
+ int writingPos = firstPartOfReallocatedPtNodePos;
// Write the 1st part of the reallocating node. The children position will be updated later
// with actual children position.
const int newProbability = addsExtraChild ? NOT_A_PROBABILITY : probabilityOfNewPtNode;
@@ -307,15 +329,15 @@ bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes(
return false;
}
// Write the 2nd part of the reallocating node.
- if (!writePtNodeToBufferByCopyingPtNodeInfo(reallocatingPtNode,
- reallocatingPtNode->getNodePos(),
+ const int secondPartOfReallocatedPtNodePos = writingPos;
+ if (!writePtNodeToBufferByCopyingPtNodeInfo(reallocatingPtNode, firstPartOfReallocatedPtNodePos,
reallocatingPtNodeCodePoints + overlappingCodePointCount,
reallocatingPtNode->getCodePointCount() - overlappingCodePointCount,
reallocatingPtNode->getProbability(), &writingPos)) {
return false;
}
if (addsExtraChild) {
- if (!writePtNodeToBuffer(reallocatingPtNode->getNodePos(),
+ if (!writePtNodeToBuffer(firstPartOfReallocatedPtNodePos,
newNodeCodePoints + overlappingCodePointCount,
newNodeCodePointCount - overlappingCodePointCount, probabilityOfNewPtNode,
&writingPos)) {
@@ -326,9 +348,14 @@ bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes(
NOT_A_DICT_POS /* forwardLinkPos */, &writingPos)) {
return false;
}
+ // Update original reallocatingPtNode as moved.
+ if (!markNodeAsMovedAndSetPosition(reallocatingPtNode, firstPartOfReallocatedPtNodePos,
+ secondPartOfReallocatedPtNodePos)) {
+ return false;
+ }
// Load node info. Information of the 1st part will be fetched.
DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
- nodeReader.fetchNodeInfoFromBuffer(firstPtNodePos);
+ nodeReader.fetchNodeInfoFromBuffer(firstPartOfReallocatedPtNodePos);
// Update children position.
int childrenPosFieldPos = nodeReader.getChildrenPosFieldPos();
if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBuffer,
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h
index ada634a54..e1b9d2e75 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h
@@ -54,7 +54,7 @@ class DynamicPatriciaTrieWritingHelper {
DynamicShortcutListPolicy *const mShortcutPolicy;
bool markNodeAsMovedAndSetPosition(const DynamicPatriciaTrieNodeReader *const nodeToUpdate,
- const int movedPos);
+ const int movedPos, const int bigramLinkedNodePos);
bool writePtNodeWithFullInfoToBuffer(const bool isBlacklisted, const bool isNotAWord,
const int parentPos, const int *const codePoints, const int codePointCount,
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h
index f727ecf8e..6bafb64ee 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h
@@ -172,6 +172,7 @@ class ByteArrayUtils {
int codePoint = readCodePointAndAdvancePosition(buffer, pos);
while (NOT_A_CODE_POINT != codePoint && length < maxLength) {
codePoint = readCodePointAndAdvancePosition(buffer, pos);
+ length++;
}
return length;
}